brave-real-browser-mcp-server 2.27.4 → 2.27.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2876,3 +2876,135 @@ export async function handleStreamExtractor(page, args) {
2876
2876
  : 'No direct URLs found',
2877
2877
  };
2878
2878
  }
2879
+ /**
2880
+ * JS Scrape - Single-call JavaScript-rendered content extraction
2881
+ * Combines navigation, waiting, scrolling, and content extraction in one call
2882
+ * Perfect for scraping dynamic/AJAX-loaded content
2883
+ */
2884
+ export async function handleJsScrape(page, args) {
2885
+ const waitForTimeout = args.waitForTimeout || 10000;
2886
+ const returnType = args.returnType || 'html';
2887
+ try {
2888
+ // Step 1: Navigate to URL
2889
+ await page.goto(args.url, {
2890
+ waitUntil: 'domcontentloaded',
2891
+ timeout: waitForTimeout
2892
+ });
2893
+ // Step 2: Wait for specific selector if provided
2894
+ if (args.waitForSelector) {
2895
+ try {
2896
+ await page.waitForSelector(args.waitForSelector, {
2897
+ timeout: waitForTimeout,
2898
+ visible: true
2899
+ });
2900
+ }
2901
+ catch (e) {
2902
+ // Continue even if selector not found - page might still have content
2903
+ }
2904
+ }
2905
+ // Step 3: Scroll to trigger lazy loading if requested
2906
+ if (args.scrollToLoad !== false) {
2907
+ await page.evaluate(async () => {
2908
+ const scrollStep = window.innerHeight;
2909
+ const scrollDelay = 200;
2910
+ let totalScrolled = 0;
2911
+ const maxScroll = document.body.scrollHeight;
2912
+ while (totalScrolled < maxScroll && totalScrolled < 5000) {
2913
+ window.scrollBy(0, scrollStep);
2914
+ totalScrolled += scrollStep;
2915
+ await new Promise(r => setTimeout(r, scrollDelay));
2916
+ }
2917
+ // Scroll back to top
2918
+ window.scrollTo(0, 0);
2919
+ });
2920
+ // Wait for any lazy-loaded content
2921
+ await new Promise(r => setTimeout(r, 1000));
2922
+ }
2923
+ // Step 4: Wait additional time for JavaScript to execute
2924
+ await new Promise(r => setTimeout(r, 500));
2925
+ // Step 5: Extract content based on returnType
2926
+ const title = await page.title();
2927
+ const finalUrl = page.url();
2928
+ let html;
2929
+ let text;
2930
+ let elements;
2931
+ let elementCount = 0;
2932
+ if (args.extractSelector) {
2933
+ // Extract specific elements
2934
+ const extractedData = await page.evaluate((selector, attrs) => {
2935
+ const nodeList = document.querySelectorAll(selector);
2936
+ const result = [];
2937
+ nodeList.forEach((el) => {
2938
+ const attrObj = {};
2939
+ (attrs || ['href', 'src', 'alt', 'title', 'data-*']).forEach(attrName => {
2940
+ if (attrName.endsWith('*')) {
2941
+ // Handle wildcard attributes like data-*
2942
+ const prefix = attrName.slice(0, -1);
2943
+ Array.from(el.attributes).forEach(attr => {
2944
+ if (attr.name.startsWith(prefix)) {
2945
+ attrObj[attr.name] = attr.value;
2946
+ }
2947
+ });
2948
+ }
2949
+ else {
2950
+ const val = el.getAttribute(attrName);
2951
+ if (val)
2952
+ attrObj[attrName] = val;
2953
+ }
2954
+ });
2955
+ result.push({
2956
+ tag: el.tagName.toLowerCase(),
2957
+ text: el.textContent?.trim()?.substring(0, 500) || '',
2958
+ html: el.outerHTML.substring(0, 2000),
2959
+ attributes: attrObj
2960
+ });
2961
+ });
2962
+ return result;
2963
+ }, args.extractSelector, args.extractAttributes || []);
2964
+ elementCount = extractedData.length;
2965
+ if (returnType === 'elements') {
2966
+ elements = extractedData.map(e => ({
2967
+ tag: e.tag,
2968
+ text: e.text,
2969
+ attributes: e.attributes
2970
+ }));
2971
+ }
2972
+ else if (returnType === 'html') {
2973
+ html = extractedData.map(e => e.html).join('\n');
2974
+ }
2975
+ else {
2976
+ text = extractedData.map(e => e.text).join('\n');
2977
+ }
2978
+ }
2979
+ else {
2980
+ // Extract full page content
2981
+ if (returnType === 'html') {
2982
+ html = await page.evaluate(() => document.documentElement.outerHTML);
2983
+ }
2984
+ else {
2985
+ text = await page.evaluate(() => document.body?.textContent || '');
2986
+ }
2987
+ elementCount = 1;
2988
+ }
2989
+ return {
2990
+ success: true,
2991
+ url: args.url,
2992
+ finalUrl,
2993
+ title,
2994
+ html,
2995
+ text,
2996
+ elements,
2997
+ elementCount
2998
+ };
2999
+ }
3000
+ catch (error) {
3001
+ return {
3002
+ success: false,
3003
+ url: args.url,
3004
+ finalUrl: page.url() || args.url,
3005
+ title: '',
3006
+ elementCount: 0,
3007
+ error: error instanceof Error ? error.message : String(error)
3008
+ };
3009
+ }
3010
+ }
package/dist/index.js CHANGED
@@ -61,7 +61,7 @@ import { handleBreadcrumbNavigator, handleUrlRedirectTracer, handleSearchContent
61
61
  // Download tools
62
62
  handleFileDownloader,
63
63
  // Enhanced streaming/download tools
64
- handleIframeHandler, handleStreamExtractor, } from './handlers/advanced-tools.js';
64
+ handleIframeHandler, handleStreamExtractor, handleJsScrape, } from './handlers/advanced-tools.js';
65
65
  // State for video recording
66
66
  const recorderState = new Map();
67
67
  debug('All modules loaded successfully');
@@ -256,6 +256,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
256
256
  if (!page)
257
257
  throw new Error('Browser not initialized. Call browser_init first.');
258
258
  return { content: [{ type: 'text', text: JSON.stringify(await handleStreamExtractor(page, args)) }] };
259
+ case TOOL_NAMES.JS_SCRAPE:
260
+ if (!page)
261
+ throw new Error('Browser not initialized. Call browser_init first.');
262
+ return { content: [{ type: 'text', text: JSON.stringify(await handleJsScrape(page, args)) }] };
259
263
  default:
260
264
  throw new Error(`Unknown tool: ${name}`);
261
265
  }
@@ -761,6 +761,25 @@ export const TOOLS = [
761
761
  },
762
762
  },
763
763
  },
764
+ {
765
+ name: 'js_scrape',
766
+ description: 'Single-call JavaScript-rendered content extraction. Combines navigation, auto-wait, scrolling, and content extraction. Perfect for AJAX/dynamic pages that Jsoup cannot parse.',
767
+ inputSchema: {
768
+ type: 'object',
769
+ additionalProperties: false,
770
+ properties: {
771
+ url: { type: 'string', description: 'URL to scrape (required)' },
772
+ waitForSelector: { type: 'string', description: 'CSS selector to wait for before extracting content' },
773
+ waitForTimeout: { type: 'number', description: 'Maximum wait time in ms', default: 10000 },
774
+ extractSelector: { type: 'string', description: 'CSS selector for specific elements to extract (optional, extracts full page if not specified)' },
775
+ extractAttributes: { type: 'array', items: { type: 'string' }, description: 'Attributes to extract from elements (e.g., href, src, data-*)' },
776
+ returnType: { type: 'string', enum: ['html', 'text', 'elements'], description: 'Return format', default: 'html' },
777
+ scrollToLoad: { type: 'boolean', description: 'Scroll page to trigger lazy loading', default: true },
778
+ closeBrowserAfter: { type: 'boolean', description: 'Close browser after scraping', default: false },
779
+ },
780
+ required: ['url'],
781
+ },
782
+ },
764
783
  ];
765
784
  // Tool name constants for type safety
766
785
  export const TOOL_NAMES = {
@@ -799,6 +818,7 @@ export const TOOL_NAMES = {
799
818
  // Enhanced tools
800
819
  IFRAME_HANDLER: 'iframe_handler',
801
820
  STREAM_EXTRACTOR: 'stream_extractor',
821
+ JS_SCRAPE: 'js_scrape',
802
822
  };
803
823
  // Tool categories for organization
804
824
  export const TOOL_CATEGORIES = {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "brave-real-browser-mcp-server",
3
- "version": "2.27.4",
3
+ "version": "2.27.6",
4
4
  "description": "🦁 MCP server for Brave Real Browser - NPM Workspaces Monorepo with anti-detection features, SSE streaming, and LSP compatibility",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -50,7 +50,7 @@
50
50
  "dependencies": {
51
51
  "@modelcontextprotocol/sdk": "latest",
52
52
  "@types/turndown": "latest",
53
- "brave-real-browser": "^2.8.4",
53
+ "brave-real-browser": "^2.8.6",
54
54
  "puppeteer-core": "^24.35.0",
55
55
  "turndown": "latest",
56
56
  "vscode-languageserver": "^9.0.1",