brave-real-browser-mcp-server 2.27.3 → 2.27.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/handlers/advanced-tools.js +132 -0
- package/dist/index.js +5 -1
- package/dist/tool-definitions.js +20 -0
- package/package.json +2 -2
|
@@ -2876,3 +2876,135 @@ export async function handleStreamExtractor(page, args) {
|
|
|
2876
2876
|
: 'No direct URLs found',
|
|
2877
2877
|
};
|
|
2878
2878
|
}
|
|
2879
|
+
/**
|
|
2880
|
+
* JS Scrape - Single-call JavaScript-rendered content extraction
|
|
2881
|
+
* Combines navigation, waiting, scrolling, and content extraction in one call
|
|
2882
|
+
* Perfect for scraping dynamic/AJAX-loaded content
|
|
2883
|
+
*/
|
|
2884
|
+
export async function handleJsScrape(page, args) {
|
|
2885
|
+
const waitForTimeout = args.waitForTimeout || 10000;
|
|
2886
|
+
const returnType = args.returnType || 'html';
|
|
2887
|
+
try {
|
|
2888
|
+
// Step 1: Navigate to URL
|
|
2889
|
+
await page.goto(args.url, {
|
|
2890
|
+
waitUntil: 'domcontentloaded',
|
|
2891
|
+
timeout: waitForTimeout
|
|
2892
|
+
});
|
|
2893
|
+
// Step 2: Wait for specific selector if provided
|
|
2894
|
+
if (args.waitForSelector) {
|
|
2895
|
+
try {
|
|
2896
|
+
await page.waitForSelector(args.waitForSelector, {
|
|
2897
|
+
timeout: waitForTimeout,
|
|
2898
|
+
visible: true
|
|
2899
|
+
});
|
|
2900
|
+
}
|
|
2901
|
+
catch (e) {
|
|
2902
|
+
// Continue even if selector not found - page might still have content
|
|
2903
|
+
}
|
|
2904
|
+
}
|
|
2905
|
+
// Step 3: Scroll to trigger lazy loading if requested
|
|
2906
|
+
if (args.scrollToLoad !== false) {
|
|
2907
|
+
await page.evaluate(async () => {
|
|
2908
|
+
const scrollStep = window.innerHeight;
|
|
2909
|
+
const scrollDelay = 200;
|
|
2910
|
+
let totalScrolled = 0;
|
|
2911
|
+
const maxScroll = document.body.scrollHeight;
|
|
2912
|
+
while (totalScrolled < maxScroll && totalScrolled < 5000) {
|
|
2913
|
+
window.scrollBy(0, scrollStep);
|
|
2914
|
+
totalScrolled += scrollStep;
|
|
2915
|
+
await new Promise(r => setTimeout(r, scrollDelay));
|
|
2916
|
+
}
|
|
2917
|
+
// Scroll back to top
|
|
2918
|
+
window.scrollTo(0, 0);
|
|
2919
|
+
});
|
|
2920
|
+
// Wait for any lazy-loaded content
|
|
2921
|
+
await new Promise(r => setTimeout(r, 1000));
|
|
2922
|
+
}
|
|
2923
|
+
// Step 4: Wait additional time for JavaScript to execute
|
|
2924
|
+
await new Promise(r => setTimeout(r, 500));
|
|
2925
|
+
// Step 5: Extract content based on returnType
|
|
2926
|
+
const title = await page.title();
|
|
2927
|
+
const finalUrl = page.url();
|
|
2928
|
+
let html;
|
|
2929
|
+
let text;
|
|
2930
|
+
let elements;
|
|
2931
|
+
let elementCount = 0;
|
|
2932
|
+
if (args.extractSelector) {
|
|
2933
|
+
// Extract specific elements
|
|
2934
|
+
const extractedData = await page.evaluate((selector, attrs) => {
|
|
2935
|
+
const nodeList = document.querySelectorAll(selector);
|
|
2936
|
+
const result = [];
|
|
2937
|
+
nodeList.forEach((el) => {
|
|
2938
|
+
const attrObj = {};
|
|
2939
|
+
(attrs || ['href', 'src', 'alt', 'title', 'data-*']).forEach(attrName => {
|
|
2940
|
+
if (attrName.endsWith('*')) {
|
|
2941
|
+
// Handle wildcard attributes like data-*
|
|
2942
|
+
const prefix = attrName.slice(0, -1);
|
|
2943
|
+
Array.from(el.attributes).forEach(attr => {
|
|
2944
|
+
if (attr.name.startsWith(prefix)) {
|
|
2945
|
+
attrObj[attr.name] = attr.value;
|
|
2946
|
+
}
|
|
2947
|
+
});
|
|
2948
|
+
}
|
|
2949
|
+
else {
|
|
2950
|
+
const val = el.getAttribute(attrName);
|
|
2951
|
+
if (val)
|
|
2952
|
+
attrObj[attrName] = val;
|
|
2953
|
+
}
|
|
2954
|
+
});
|
|
2955
|
+
result.push({
|
|
2956
|
+
tag: el.tagName.toLowerCase(),
|
|
2957
|
+
text: el.textContent?.trim()?.substring(0, 500) || '',
|
|
2958
|
+
html: el.outerHTML.substring(0, 2000),
|
|
2959
|
+
attributes: attrObj
|
|
2960
|
+
});
|
|
2961
|
+
});
|
|
2962
|
+
return result;
|
|
2963
|
+
}, args.extractSelector, args.extractAttributes || []);
|
|
2964
|
+
elementCount = extractedData.length;
|
|
2965
|
+
if (returnType === 'elements') {
|
|
2966
|
+
elements = extractedData.map(e => ({
|
|
2967
|
+
tag: e.tag,
|
|
2968
|
+
text: e.text,
|
|
2969
|
+
attributes: e.attributes
|
|
2970
|
+
}));
|
|
2971
|
+
}
|
|
2972
|
+
else if (returnType === 'html') {
|
|
2973
|
+
html = extractedData.map(e => e.html).join('\n');
|
|
2974
|
+
}
|
|
2975
|
+
else {
|
|
2976
|
+
text = extractedData.map(e => e.text).join('\n');
|
|
2977
|
+
}
|
|
2978
|
+
}
|
|
2979
|
+
else {
|
|
2980
|
+
// Extract full page content
|
|
2981
|
+
if (returnType === 'html') {
|
|
2982
|
+
html = await page.evaluate(() => document.documentElement.outerHTML);
|
|
2983
|
+
}
|
|
2984
|
+
else {
|
|
2985
|
+
text = await page.evaluate(() => document.body?.textContent || '');
|
|
2986
|
+
}
|
|
2987
|
+
elementCount = 1;
|
|
2988
|
+
}
|
|
2989
|
+
return {
|
|
2990
|
+
success: true,
|
|
2991
|
+
url: args.url,
|
|
2992
|
+
finalUrl,
|
|
2993
|
+
title,
|
|
2994
|
+
html,
|
|
2995
|
+
text,
|
|
2996
|
+
elements,
|
|
2997
|
+
elementCount
|
|
2998
|
+
};
|
|
2999
|
+
}
|
|
3000
|
+
catch (error) {
|
|
3001
|
+
return {
|
|
3002
|
+
success: false,
|
|
3003
|
+
url: args.url,
|
|
3004
|
+
finalUrl: page.url() || args.url,
|
|
3005
|
+
title: '',
|
|
3006
|
+
elementCount: 0,
|
|
3007
|
+
error: error instanceof Error ? error.message : String(error)
|
|
3008
|
+
};
|
|
3009
|
+
}
|
|
3010
|
+
}
|
package/dist/index.js
CHANGED
|
@@ -61,7 +61,7 @@ import { handleBreadcrumbNavigator, handleUrlRedirectTracer, handleSearchContent
|
|
|
61
61
|
// Download tools
|
|
62
62
|
handleFileDownloader,
|
|
63
63
|
// Enhanced streaming/download tools
|
|
64
|
-
handleIframeHandler, handleStreamExtractor, } from './handlers/advanced-tools.js';
|
|
64
|
+
handleIframeHandler, handleStreamExtractor, handleJsScrape, } from './handlers/advanced-tools.js';
|
|
65
65
|
// State for video recording
|
|
66
66
|
const recorderState = new Map();
|
|
67
67
|
debug('All modules loaded successfully');
|
|
@@ -256,6 +256,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
256
256
|
if (!page)
|
|
257
257
|
throw new Error('Browser not initialized. Call browser_init first.');
|
|
258
258
|
return { content: [{ type: 'text', text: JSON.stringify(await handleStreamExtractor(page, args)) }] };
|
|
259
|
+
case TOOL_NAMES.JS_SCRAPE:
|
|
260
|
+
if (!page)
|
|
261
|
+
throw new Error('Browser not initialized. Call browser_init first.');
|
|
262
|
+
return { content: [{ type: 'text', text: JSON.stringify(await handleJsScrape(page, args)) }] };
|
|
259
263
|
default:
|
|
260
264
|
throw new Error(`Unknown tool: ${name}`);
|
|
261
265
|
}
|
package/dist/tool-definitions.js
CHANGED
|
@@ -761,6 +761,25 @@ export const TOOLS = [
|
|
|
761
761
|
},
|
|
762
762
|
},
|
|
763
763
|
},
|
|
764
|
+
{
|
|
765
|
+
name: 'js_scrape',
|
|
766
|
+
description: 'Single-call JavaScript-rendered content extraction. Combines navigation, auto-wait, scrolling, and content extraction. Perfect for AJAX/dynamic pages that Jsoup cannot parse.',
|
|
767
|
+
inputSchema: {
|
|
768
|
+
type: 'object',
|
|
769
|
+
additionalProperties: false,
|
|
770
|
+
properties: {
|
|
771
|
+
url: { type: 'string', description: 'URL to scrape (required)' },
|
|
772
|
+
waitForSelector: { type: 'string', description: 'CSS selector to wait for before extracting content' },
|
|
773
|
+
waitForTimeout: { type: 'number', description: 'Maximum wait time in ms', default: 10000 },
|
|
774
|
+
extractSelector: { type: 'string', description: 'CSS selector for specific elements to extract (optional, extracts full page if not specified)' },
|
|
775
|
+
extractAttributes: { type: 'array', items: { type: 'string' }, description: 'Attributes to extract from elements (e.g., href, src, data-*)' },
|
|
776
|
+
returnType: { type: 'string', enum: ['html', 'text', 'elements'], description: 'Return format', default: 'html' },
|
|
777
|
+
scrollToLoad: { type: 'boolean', description: 'Scroll page to trigger lazy loading', default: true },
|
|
778
|
+
closeBrowserAfter: { type: 'boolean', description: 'Close browser after scraping', default: false },
|
|
779
|
+
},
|
|
780
|
+
required: ['url'],
|
|
781
|
+
},
|
|
782
|
+
},
|
|
764
783
|
];
|
|
765
784
|
// Tool name constants for type safety
|
|
766
785
|
export const TOOL_NAMES = {
|
|
@@ -799,6 +818,7 @@ export const TOOL_NAMES = {
|
|
|
799
818
|
// Enhanced tools
|
|
800
819
|
IFRAME_HANDLER: 'iframe_handler',
|
|
801
820
|
STREAM_EXTRACTOR: 'stream_extractor',
|
|
821
|
+
JS_SCRAPE: 'js_scrape',
|
|
802
822
|
};
|
|
803
823
|
// Tool categories for organization
|
|
804
824
|
export const TOOL_CATEGORIES = {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "brave-real-browser-mcp-server",
|
|
3
|
-
"version": "2.27.
|
|
3
|
+
"version": "2.27.5",
|
|
4
4
|
"description": "🦁 MCP server for Brave Real Browser - NPM Workspaces Monorepo with anti-detection features, SSE streaming, and LSP compatibility",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
"dependencies": {
|
|
51
51
|
"@modelcontextprotocol/sdk": "latest",
|
|
52
52
|
"@types/turndown": "latest",
|
|
53
|
-
"brave-real-browser": "^2.8.
|
|
53
|
+
"brave-real-browser": "^2.8.5",
|
|
54
54
|
"puppeteer-core": "^24.35.0",
|
|
55
55
|
"turndown": "latest",
|
|
56
56
|
"vscode-languageserver": "^9.0.1",
|