mcpbrowser 0.3.45 → 0.3.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mcpbrowser",
3
- "version": "0.3.45",
3
+ "version": "0.3.46",
4
4
  "mcpName": "io.github.cherchyk/mcpbrowser",
5
5
  "type": "module",
6
6
  "description": "MCP browser server - fetch web pages using real Chrome/Edge/Brave browser. Handles authentication, SSO, CAPTCHAs, and anti-bot protection. Browser automation for AI assistants.",
@@ -209,6 +209,24 @@ export async function executeJavascript({ url, script, timeoutMs = EXECUTION_TIM
209
209
  }
210
210
  const urlChanged = currentUrl !== beforeUrl;
211
211
 
212
+ // Detect CSP block or silent evaluation failure:
213
+ // When page.evaluate() is blocked by CSP, Puppeteer returns undefined (not an error).
214
+ // Distinguish this from a script that intentionally returns nothing.
215
+ if (evalResult === undefined || evalResult === null) {
216
+ return new ExecuteJavascriptResponse({
217
+ result: null,
218
+ type: 'undefined',
219
+ executionTimeMs,
220
+ truncated: false,
221
+ urlChanged,
222
+ currentUrl,
223
+ error: {
224
+ name: 'EvaluationEmpty',
225
+ message: 'Script evaluation returned no result. Possible causes: page Content Security Policy (CSP) blocked evaluation, the script has no return value, or the page context is sandboxed. Try browser_take_screenshot to verify the page is loaded, or use a simpler expression like "document.title" to test page accessibility.'
226
+ }
227
+ });
228
+ }
229
+
212
230
  if (evalResult?.error) {
213
231
  return new ExecuteJavascriptResponse({
214
232
  result: null,
@@ -77,6 +77,7 @@ export const FETCH_WEBPAGE_TOOL = {
77
77
  enum: ["", "chrome", "edge"]
78
78
  },
79
79
  removeUnnecessaryHTML: { type: "boolean", description: "Remove Unnecessary HTML for size reduction by 90%.", default: true },
80
+ selector: { type: "string", description: "CSS selector to extract a specific DOM subtree instead of the full page. Use to scope extraction and reduce response size (e.g., 'main', '[role=\"main\"]', 'body > div:first-child'). If no elements match, falls back to full page with a note." },
80
81
  postLoadWait: { type: "number", description: "Additional milliseconds to wait after page load before extracting HTML. Use for pages that need extra time to render. Default: 0 (no extra wait, SPA detection handles most cases automatically).", default: 0 }
81
82
  },
82
83
  required: ["url"],
@@ -122,7 +123,7 @@ export const FETCH_WEBPAGE_TOOL = {
122
123
  * @param {number} [params.postLoadWait=0] - Additional milliseconds to wait after page load before extracting HTML
123
124
  * @returns {Promise<Object>} Result object with success status, URL, HTML content, or error details
124
125
  */
125
- export async function fetchPage({ url, browser = '', removeUnnecessaryHTML = true, postLoadWait = 0 }) {
126
+ export async function fetchPage({ url, browser = '', removeUnnecessaryHTML = true, selector = null, postLoadWait = 0 }) {
126
127
  logger.info(`browser_fetch_webpage called: url=${url}`);
127
128
 
128
129
  // Handle missing URL with environment variable fallback
@@ -150,7 +151,7 @@ export async function fetchPage({ url, browser = '', removeUnnecessaryHTML = tru
150
151
 
151
152
  // Queue this request - processed sequentially, one at a time
152
153
  return queueRequest(async () => {
153
- return await doFetchPage({ url, browser, removeUnnecessaryHTML, postLoadWait });
154
+ return await doFetchPage({ url, browser, removeUnnecessaryHTML, selector, postLoadWait });
154
155
  });
155
156
  }
156
157
 
@@ -158,7 +159,7 @@ export async function fetchPage({ url, browser = '', removeUnnecessaryHTML = tru
158
159
  * Internal function that does the actual page fetching.
159
160
  * Called by the queue processor - only one runs at a time.
160
161
  */
161
- async function doFetchPage({ url, browser, removeUnnecessaryHTML, postLoadWait }) {
162
+ async function doFetchPage({ url, browser, removeUnnecessaryHTML, selector, postLoadWait }) {
162
163
  const originalHostname = new URL(url).hostname;
163
164
 
164
165
  // Ensure browser connection
@@ -215,7 +216,7 @@ async function doFetchPage({ url, browser, removeUnnecessaryHTML, postLoadWait }
215
216
  }
216
217
 
217
218
  // Extract and process HTML
218
- const processedHtml = await extractAndProcessHtml(page, removeUnnecessaryHTML);
219
+ const processedHtml = await extractAndProcessHtml(page, removeUnnecessaryHTML, selector);
219
220
 
220
221
  logger.info(`browser_fetch_webpage completed: ${page.url()}`);
221
222
 
@@ -69,7 +69,8 @@ export const GET_CURRENT_HTML_TOOL = {
69
69
  type: "object",
70
70
  properties: {
71
71
  url: { type: "string", description: "The URL of the page (must match a previously fetched page)" },
72
- removeUnnecessaryHTML: { type: "boolean", description: "Remove Unnecessary HTML for size reduction by 90%.", default: true }
72
+ removeUnnecessaryHTML: { type: "boolean", description: "Remove Unnecessary HTML for size reduction by 90%.", default: true },
73
+ selector: { type: "string", description: "CSS selector to extract a specific DOM subtree instead of the full page. Use to scope extraction and reduce response size (e.g., 'main', '[role=\"main\"]', 'body > div:first-child'). If no elements match, falls back to full page with a note." }
73
74
  },
74
75
  required: ["url"],
75
76
  additionalProperties: false
@@ -107,9 +108,9 @@ export const GET_CURRENT_HTML_TOOL = {
107
108
  * @param {boolean} [params.removeUnnecessaryHTML=true] - Whether to clean HTML
108
109
  * @returns {Promise<Object>} Result object with current HTML
109
110
  */
110
- export async function getCurrentHtml({ url, removeUnnecessaryHTML = true }) {
111
+ export async function getCurrentHtml({ url, removeUnnecessaryHTML = true, selector = null }) {
111
112
  const startTime = Date.now();
112
- logger.info(`browser_get_current_html called: url=${url}`);
113
+ logger.info(`browser_get_current_html called: url=${url}${selector ? ` selector=${selector}` : ''}`);
113
114
 
114
115
  if (!url) {
115
116
  throw new Error("url parameter is required");
@@ -158,7 +159,22 @@ export async function getCurrentHtml({ url, removeUnnecessaryHTML = true }) {
158
159
 
159
160
  try {
160
161
  const currentUrl = page.url();
161
- const html = await extractAndProcessHtml(page, removeUnnecessaryHTML);
162
+ const html = await extractAndProcessHtml(page, removeUnnecessaryHTML, selector);
163
+
164
+ // Detect empty/near-empty HTML extraction (e.g., CSP blocking page.evaluate)
165
+ if (!html || html.trim().length < 100) {
166
+ logger.warn(`browser_get_current_html: HTML extraction returned empty/minimal content from ${currentUrl} (${html ? html.trim().length : 0} chars)`);
167
+ return new InformationalResponse(
168
+ `HTML extraction returned empty content from ${currentUrl}`,
169
+ 'The page may be blocking evaluation via Content Security Policy (CSP), the page has not fully rendered, or the page uses a sandboxed context that prevents DOM reading.',
170
+ [
171
+ "Use MCPBrowser's browser_take_screenshot to verify the page is visually loaded",
172
+ "Use MCPBrowser's browser_execute_javascript with a simple script like 'document.title' to test page accessibility",
173
+ "Try MCPBrowser's browser_fetch_webpage to reload the page",
174
+ "Wait and retry — the page may still be rendering"
175
+ ]
176
+ );
177
+ }
162
178
 
163
179
  logger.info(`browser_get_current_html completed: got HTML from ${currentUrl}`);
164
180
 
package/src/core/html.js CHANGED
@@ -70,8 +70,9 @@ export function cleanHtml(html) {
70
70
  // Remove event handler attributes (onclick, onload, etc.)
71
71
  cleaned = cleaned.replace(/\s+on[a-z]+\s*=\s*["'][^"']*["']/gi, '');
72
72
 
73
- // Remove role attributes
74
- cleaned = cleaned.replace(/\s+role=["'][^"']*["']/gi, '');
73
+ // Keep role attributes — they're semantically valuable for LLM understanding
74
+ // and enable stable selectors like [role="main"], [role="navigation"]
75
+ // cleaned = cleaned.replace(/\s+role=["'][^"']*["']/gi, '');
75
76
 
76
77
  // Remove aria-* attributes
77
78
  cleaned = cleaned.replace(/\s+aria-[a-z0-9-]+=["'][^"']*["']/gi, '');
package/src/core/page.js CHANGED
@@ -475,23 +475,52 @@ async function waitForNavigationToSettle(page) {
475
475
  * settle and retries once.
476
476
  * @param {Page} page - The Puppeteer page instance
477
477
  * @param {boolean} removeUnnecessaryHTML - Whether to clean the HTML
478
+ * @param {string|null} [selector=null] - CSS selector to extract a DOM subtree instead of full page
478
479
  * @returns {Promise<string>} The processed HTML
479
480
  */
480
- export async function extractAndProcessHtml(page, removeUnnecessaryHTML) {
481
+ export async function extractAndProcessHtml(page, removeUnnecessaryHTML, selector = null) {
481
482
  let html;
483
+
484
+ const extractFn = selector
485
+ ? (sel) => {
486
+ const els = document.querySelectorAll(sel);
487
+ if (!els.length) return null;
488
+ return Array.from(els).map(el => el.outerHTML).join('\n');
489
+ }
490
+ : () => document.documentElement?.outerHTML || "";
491
+
492
+ const extractArg = selector || undefined;
493
+
482
494
  try {
483
- html = await page.evaluate(() => document.documentElement?.outerHTML || "");
495
+ html = await page.evaluate(extractFn, extractArg);
484
496
  } catch (err) {
485
497
  if (isNavigationError(err)) {
486
498
  logger.debug('Late navigation during HTML extraction, waiting for settle...');
487
499
  await waitForNavigationToSettle(page);
488
500
  // Re-run page readiness — the new page may be a SPA that needs rendering time
489
501
  await waitForPageReady(page);
490
- html = await page.evaluate(() => document.documentElement?.outerHTML || "");
502
+ html = await page.evaluate(extractFn, extractArg);
491
503
  } else {
492
504
  throw err;
493
505
  }
494
506
  }
507
+
508
+ // If selector matched nothing, fall back to full page with a note
509
+ if (selector && html === null) {
510
+ logger.debug(`Selector "${selector}" matched no elements, falling back to full page`);
511
+ try {
512
+ html = await page.evaluate(() => document.documentElement?.outerHTML || "");
513
+ } catch (err) {
514
+ if (isNavigationError(err)) {
515
+ await waitForNavigationToSettle(page);
516
+ await waitForPageReady(page);
517
+ html = await page.evaluate(() => document.documentElement?.outerHTML || "");
518
+ } else {
519
+ throw err;
520
+ }
521
+ }
522
+ html = `<!-- selector "${selector}" matched no elements; returning full page -->\n` + html;
523
+ }
495
524
 
496
525
  let processedHtml;
497
526
  if (removeUnnecessaryHTML) {
@@ -501,5 +530,12 @@ export async function extractAndProcessHtml(page, removeUnnecessaryHTML) {
501
530
  processedHtml = enrichHtml(html, page.url());
502
531
  }
503
532
 
533
+ // Warn when response is very large — the agent should use the selector parameter
534
+ // to scope extraction to a DOM subtree instead of fetching the entire page.
535
+ const htmlByteLength = new TextEncoder().encode(processedHtml).length;
536
+ if (htmlByteLength > 500_000) {
537
+ logger.warn(`Large HTML response (${(htmlByteLength / 1024).toFixed(0)}KB). Consider using the "selector" parameter to extract a specific DOM subtree instead of the full page.`);
538
+ }
539
+
504
540
  return processedHtml;
505
541
  }