brave-real-browser-mcp-server 2.24.1 → 2.24.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/handlers/advanced-tools.js +128 -0
- package/dist/index.js +3 -12
- package/dist/tool-definitions.js +11 -36
- package/dist/unified-server.js +4 -9
- package/package.json +2 -2
|
@@ -2053,6 +2053,134 @@ export async function handleIframeHandler(page, args) {
|
|
|
2053
2053
|
message: 'Frame accessed successfully',
|
|
2054
2054
|
};
|
|
2055
2055
|
}
|
|
2056
|
+
// NEW: deep_scrape action - HTTP-based recursive iframe crawling
|
|
2057
|
+
if (action === 'deep_scrape') {
|
|
2058
|
+
const timeout = args.timeout || 10000;
|
|
2059
|
+
const filterPattern = args.filterPattern ? new RegExp(args.filterPattern, 'i') : null;
|
|
2060
|
+
const allIframes = [];
|
|
2061
|
+
const videoSources = [];
|
|
2062
|
+
const visited = new Set();
|
|
2063
|
+
// Helper: Fetch page content via HTTP
|
|
2064
|
+
const fetchPageContent = async (url) => {
|
|
2065
|
+
try {
|
|
2066
|
+
const https = await import('https');
|
|
2067
|
+
const http = await import('http');
|
|
2068
|
+
return new Promise((resolve) => {
|
|
2069
|
+
const protocol = url.startsWith('https') ? https : http;
|
|
2070
|
+
const req = protocol.get(url, { timeout }, (res) => {
|
|
2071
|
+
let data = '';
|
|
2072
|
+
res.on('data', (chunk) => data += chunk);
|
|
2073
|
+
res.on('end', () => resolve(data));
|
|
2074
|
+
});
|
|
2075
|
+
req.on('error', () => resolve(''));
|
|
2076
|
+
req.on('timeout', () => { req.destroy(); resolve(''); });
|
|
2077
|
+
});
|
|
2078
|
+
}
|
|
2079
|
+
catch {
|
|
2080
|
+
return '';
|
|
2081
|
+
}
|
|
2082
|
+
};
|
|
2083
|
+
// Helper: Extract iframes and video sources from HTML
|
|
2084
|
+
const extractFromHtml = (html, baseUrl) => {
|
|
2085
|
+
const iframes = [];
|
|
2086
|
+
const videos = [];
|
|
2087
|
+
// Extract iframes
|
|
2088
|
+
const iframeRegex = /<iframe[^>]*src=["']([^"']+)["'][^>]*>/gi;
|
|
2089
|
+
let match;
|
|
2090
|
+
while ((match = iframeRegex.exec(html)) !== null) {
|
|
2091
|
+
let src = match[1];
|
|
2092
|
+
// Handle relative URLs
|
|
2093
|
+
if (src.startsWith('//'))
|
|
2094
|
+
src = 'https:' + src;
|
|
2095
|
+
else if (src.startsWith('/')) {
|
|
2096
|
+
const urlObj = new URL(baseUrl);
|
|
2097
|
+
src = urlObj.origin + src;
|
|
2098
|
+
}
|
|
2099
|
+
iframes.push(src);
|
|
2100
|
+
}
|
|
2101
|
+
// Extract video sources (m3u8, mp4, etc.)
|
|
2102
|
+
const videoPatterns = [
|
|
2103
|
+
/https?:\/\/[^"'\s]+\.m3u8[^"'\s]*/gi,
|
|
2104
|
+
/https?:\/\/[^"'\s]+\.mp4[^"'\s]*/gi,
|
|
2105
|
+
/https?:\/\/[^"'\s]+\.webm[^"'\s]*/gi,
|
|
2106
|
+
/file:\s*["']([^"']+\.m3u8[^"']*)["']/gi,
|
|
2107
|
+
/source:\s*["']([^"']+\.m3u8[^"']*)["']/gi,
|
|
2108
|
+
];
|
|
2109
|
+
for (const pattern of videoPatterns) {
|
|
2110
|
+
let videoMatch;
|
|
2111
|
+
while ((videoMatch = pattern.exec(html)) !== null) {
|
|
2112
|
+
const url = videoMatch[1] || videoMatch[0];
|
|
2113
|
+
videos.push({ url, type: url.includes('.m3u8') ? 'hls' : 'mp4' });
|
|
2114
|
+
}
|
|
2115
|
+
}
|
|
2116
|
+
// Try to unpack obfuscated JS (p,a,c,k,e,d)
|
|
2117
|
+
const packedMatch = html.match(/eval\(function\(p,a,c,k,e,[rd]\)[^{]+\{[^}]+\}[^)]+\('[^']+'/);
|
|
2118
|
+
if (packedMatch) {
|
|
2119
|
+
try {
|
|
2120
|
+
// Simple unpacking - extract strings
|
|
2121
|
+
const stringsMatch = html.match(/'([^']+)'\.split\('\|'\)/);
|
|
2122
|
+
if (stringsMatch) {
|
|
2123
|
+
const strings = stringsMatch[1].split('|');
|
|
2124
|
+
for (const s of strings) {
|
|
2125
|
+
if (s.includes('m3u8') || s.includes('master')) {
|
|
2126
|
+
// Find m3u8 URLs in unpacked content
|
|
2127
|
+
const m3u8Match = html.match(new RegExp(`https?://[^"'\\s]*${s}[^"'\\s]*`, 'i'));
|
|
2128
|
+
if (m3u8Match) {
|
|
2129
|
+
videos.push({ url: m3u8Match[0], type: 'hls', unpacked: true });
|
|
2130
|
+
}
|
|
2131
|
+
}
|
|
2132
|
+
}
|
|
2133
|
+
}
|
|
2134
|
+
}
|
|
2135
|
+
catch { /* ignore unpacking errors */ }
|
|
2136
|
+
}
|
|
2137
|
+
return { iframes, videos };
|
|
2138
|
+
};
|
|
2139
|
+
// Recursive crawler
|
|
2140
|
+
const crawlIframe = async (url, depth) => {
|
|
2141
|
+
if (depth >= maxDepth || visited.has(url))
|
|
2142
|
+
return;
|
|
2143
|
+
visited.add(url);
|
|
2144
|
+
// Apply filter if specified
|
|
2145
|
+
if (filterPattern && !filterPattern.test(url))
|
|
2146
|
+
return;
|
|
2147
|
+
const html = await fetchPageContent(url);
|
|
2148
|
+
if (!html)
|
|
2149
|
+
return;
|
|
2150
|
+
const { iframes, videos } = extractFromHtml(html, url);
|
|
2151
|
+
// Add this iframe to results
|
|
2152
|
+
allIframes.push({ depth, url, childCount: iframes.length });
|
|
2153
|
+
// Add video sources
|
|
2154
|
+
for (const video of videos) {
|
|
2155
|
+
if (!videoSources.some(v => v.url === video.url)) {
|
|
2156
|
+
videoSources.push({ ...video, foundAt: url, depth });
|
|
2157
|
+
}
|
|
2158
|
+
}
|
|
2159
|
+
// Recursively crawl child iframes
|
|
2160
|
+
for (const iframeSrc of iframes) {
|
|
2161
|
+
await crawlIframe(iframeSrc, depth + 1);
|
|
2162
|
+
}
|
|
2163
|
+
};
|
|
2164
|
+
// Start from current page URL
|
|
2165
|
+
const currentUrl = page.url();
|
|
2166
|
+
await crawlIframe(currentUrl, 0);
|
|
2167
|
+
// Also check browser frames
|
|
2168
|
+
for (const frame of page.frames()) {
|
|
2169
|
+
try {
|
|
2170
|
+
const frameUrl = frame.url();
|
|
2171
|
+
if (frameUrl && frameUrl !== 'about:blank' && !visited.has(frameUrl)) {
|
|
2172
|
+
await crawlIframe(frameUrl, 1);
|
|
2173
|
+
}
|
|
2174
|
+
}
|
|
2175
|
+
catch { /* ignore inaccessible frames */ }
|
|
2176
|
+
}
|
|
2177
|
+
return {
|
|
2178
|
+
success: true,
|
|
2179
|
+
iframes: args.flatten !== false ? allIframes : allIframes,
|
|
2180
|
+
videoSources: args.extractVideoSources !== false ? videoSources : undefined,
|
|
2181
|
+
message: `Deep scraped ${allIframes.length} iframes, found ${videoSources.length} video sources`,
|
|
2182
|
+
};
|
|
2183
|
+
}
|
|
2056
2184
|
return {
|
|
2057
2185
|
success: false,
|
|
2058
2186
|
iframes: [],
|
package/dist/index.js
CHANGED
|
@@ -57,9 +57,7 @@ import { handleClick, handleType, handleSolveCaptcha, handleRandomScroll } from
|
|
|
57
57
|
import { handleGetContent, handleFindSelector } from './handlers/content-handlers.js';
|
|
58
58
|
import { handleSaveContentAsMarkdown } from './handlers/file-handlers.js';
|
|
59
59
|
// Import advanced tools handlers
|
|
60
|
-
import { handleBreadcrumbNavigator, handleUrlRedirectTracer, handleSearchContent, handleExtractJson, handleScrapeMetaTags, handlePressKey, handleProgressTracker, handleDeepAnalysis, handleNetworkRecorder, handleApiFinder, handleAjaxContentWaiter,
|
|
61
|
-
// Streaming tools
|
|
62
|
-
handleM3u8Parser, handleCookieManager,
|
|
60
|
+
import { handleBreadcrumbNavigator, handleUrlRedirectTracer, handleSearchContent, handleExtractJson, handleScrapeMetaTags, handlePressKey, handleProgressTracker, handleDeepAnalysis, handleNetworkRecorder, handleApiFinder, handleAjaxContentWaiter, handleElementScreenshot, handleLinkHarvester, handleBatchElementScraper, handleExtractSchema, handleCookieManager,
|
|
63
61
|
// Download tools
|
|
64
62
|
handleFileDownloader,
|
|
65
63
|
// Enhanced streaming/download tools
|
|
@@ -220,10 +218,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
220
218
|
if (!page)
|
|
221
219
|
throw new Error('Browser not initialized. Call browser_init first.');
|
|
222
220
|
return { content: [{ type: 'text', text: JSON.stringify(await handleAjaxContentWaiter(page, args || {})) }] };
|
|
223
|
-
case
|
|
224
|
-
if (!page)
|
|
225
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
226
|
-
return { content: [{ type: 'text', text: JSON.stringify(await handleMediaExtractor(page, args || {})) }] };
|
|
221
|
+
// MEDIA_EXTRACTOR case REMOVED - merged into STREAM_EXTRACTOR
|
|
227
222
|
case TOOL_NAMES.ELEMENT_SCREENSHOT:
|
|
228
223
|
if (!page)
|
|
229
224
|
throw new Error('Browser not initialized. Call browser_init first.');
|
|
@@ -240,11 +235,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
240
235
|
if (!page)
|
|
241
236
|
throw new Error('Browser not initialized. Call browser_init first.');
|
|
242
237
|
return { content: [{ type: 'text', text: JSON.stringify(await handleExtractSchema(page, args || {})) }] };
|
|
243
|
-
//
|
|
244
|
-
case TOOL_NAMES.M3U8_PARSER:
|
|
245
|
-
if (!page)
|
|
246
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
247
|
-
return { content: [{ type: 'text', text: JSON.stringify(await handleM3u8Parser(page, args || {})) }] };
|
|
238
|
+
// M3U8_PARSER case REMOVED - merged into STREAM_EXTRACTOR
|
|
248
239
|
case TOOL_NAMES.COOKIE_MANAGER:
|
|
249
240
|
if (!page)
|
|
250
241
|
throw new Error('Browser not initialized. Call browser_init first.');
|
package/dist/tool-definitions.js
CHANGED
|
@@ -485,21 +485,7 @@ export const TOOLS = [
|
|
|
485
485
|
},
|
|
486
486
|
},
|
|
487
487
|
},
|
|
488
|
-
|
|
489
|
-
name: 'media_extractor',
|
|
490
|
-
description: 'Extract media (audio/video) from page with quality options and ad-bypass',
|
|
491
|
-
inputSchema: {
|
|
492
|
-
type: 'object',
|
|
493
|
-
additionalProperties: false,
|
|
494
|
-
properties: {
|
|
495
|
-
mediaType: { type: 'string', enum: ['video', 'audio', 'all'], description: 'Type of media to extract', default: 'all' },
|
|
496
|
-
includeEmbedded: { type: 'boolean', description: 'Include embedded iframes', default: true },
|
|
497
|
-
quality: { type: 'string', description: 'Preferred quality (highest, lowest, 1080p, 720p)' },
|
|
498
|
-
format: { type: 'string', description: 'Preferred format (mp4, webm, m3u8)' },
|
|
499
|
-
bypassAds: { type: 'boolean', description: 'Attempt to bypass video ads', default: false },
|
|
500
|
-
},
|
|
501
|
-
},
|
|
502
|
-
},
|
|
488
|
+
// media_extractor REMOVED - functionality merged into stream_extractor
|
|
503
489
|
{
|
|
504
490
|
name: 'element_screenshot',
|
|
505
491
|
description: 'Capture screenshot of a specific element',
|
|
@@ -554,23 +540,7 @@ export const TOOLS = [
|
|
|
554
540
|
},
|
|
555
541
|
},
|
|
556
542
|
},
|
|
557
|
-
//
|
|
558
|
-
// STREAMING & MEDIA TOOLS (3 new tools)
|
|
559
|
-
// ============================================================
|
|
560
|
-
{
|
|
561
|
-
name: 'm3u8_parser',
|
|
562
|
-
description: 'Parse and extract HLS/m3u8 streaming URLs with quality options',
|
|
563
|
-
inputSchema: {
|
|
564
|
-
type: 'object',
|
|
565
|
-
additionalProperties: false,
|
|
566
|
-
properties: {
|
|
567
|
-
url: { type: 'string', description: 'URL of the page or m3u8 file' },
|
|
568
|
-
extractAll: { type: 'boolean', description: 'Extract all quality variants', default: true },
|
|
569
|
-
preferQuality: { type: 'string', description: 'Preferred quality (1080p, 720p, 480p, best, worst)', default: 'best' },
|
|
570
|
-
includeAudio: { type: 'boolean', description: 'Include audio-only streams', default: true },
|
|
571
|
-
},
|
|
572
|
-
},
|
|
573
|
-
},
|
|
543
|
+
// m3u8_parser REMOVED - functionality merged into stream_extractor
|
|
574
544
|
{
|
|
575
545
|
name: 'cookie_manager',
|
|
576
546
|
description: 'Manage browser cookies for premium accounts and sessions',
|
|
@@ -613,16 +583,21 @@ export const TOOLS = [
|
|
|
613
583
|
// ============================================================
|
|
614
584
|
{
|
|
615
585
|
name: 'iframe_handler',
|
|
616
|
-
description: 'Extract content from nested iframes including embedded video players',
|
|
586
|
+
description: 'Extract content from nested iframes including embedded video players. Use action=deep_scrape for HTTP-based recursive crawling of complex streaming sites (5x faster than browser navigation)',
|
|
617
587
|
inputSchema: {
|
|
618
588
|
type: 'object',
|
|
619
589
|
additionalProperties: false,
|
|
620
590
|
properties: {
|
|
621
|
-
action: { type: 'string', enum: ['list', 'enter', 'extract', 'exitAll'], description: 'Action to perform
|
|
591
|
+
action: { type: 'string', enum: ['list', 'enter', 'extract', 'exitAll', 'deep_scrape'], description: 'Action to perform. deep_scrape: HTTP-based recursive iframe crawling for complex sites' },
|
|
622
592
|
selector: { type: 'string', description: 'CSS selector of target iframe' },
|
|
623
593
|
frameIndex: { type: 'number', description: 'Index of iframe to enter (0-based)' },
|
|
624
594
|
maxDepth: { type: 'number', description: 'Maximum nesting depth to traverse', default: 3 },
|
|
625
595
|
extractSelector: { type: 'string', description: 'Selector to extract content from within iframe' },
|
|
596
|
+
recursive: { type: 'boolean', description: 'Traverse nested iframes via HTTP (for deep_scrape)', default: true },
|
|
597
|
+
flatten: { type: 'boolean', description: 'Return flat list vs tree structure', default: true },
|
|
598
|
+
filterPattern: { type: 'string', description: 'Regex to filter iframe URLs (e.g., "multimoviesshg|streamhg")' },
|
|
599
|
+
extractVideoSources: { type: 'boolean', description: 'Auto-extract m3u8/mp4 video sources', default: true },
|
|
600
|
+
timeout: { type: 'number', description: 'HTTP request timeout in ms', default: 10000 },
|
|
626
601
|
},
|
|
627
602
|
},
|
|
628
603
|
},
|
|
@@ -673,12 +648,12 @@ export const TOOL_NAMES = {
|
|
|
673
648
|
NETWORK_RECORDER: 'network_recorder',
|
|
674
649
|
API_FINDER: 'api_finder',
|
|
675
650
|
AJAX_CONTENT_WAITER: 'ajax_content_waiter',
|
|
676
|
-
MEDIA_EXTRACTOR: 'media_extractor',
|
|
651
|
+
// MEDIA_EXTRACTOR: 'media_extractor', // REMOVED - merged into STREAM_EXTRACTOR
|
|
677
652
|
ELEMENT_SCREENSHOT: 'element_screenshot',
|
|
678
653
|
LINK_HARVESTER: 'link_harvester',
|
|
679
654
|
BATCH_ELEMENT_SCRAPER: 'batch_element_scraper',
|
|
680
655
|
EXTRACT_SCHEMA: 'extract_schema',
|
|
681
|
-
M3U8_PARSER: 'm3u8_parser',
|
|
656
|
+
// M3U8_PARSER: 'm3u8_parser', // REMOVED - merged into STREAM_EXTRACTOR
|
|
682
657
|
COOKIE_MANAGER: 'cookie_manager',
|
|
683
658
|
FILE_DOWNLOADER: 'file_downloader',
|
|
684
659
|
// Enhanced tools
|
package/dist/unified-server.js
CHANGED
|
@@ -132,11 +132,7 @@ mcpServer.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
132
132
|
throw new Error('Browser not initialized');
|
|
133
133
|
result = { content: [{ type: 'text', text: JSON.stringify(await advancedTools.handleSearchContent(page, args)) }] };
|
|
134
134
|
break;
|
|
135
|
-
case
|
|
136
|
-
if (!page)
|
|
137
|
-
throw new Error('Browser not initialized');
|
|
138
|
-
result = { content: [{ type: 'text', text: JSON.stringify(await advancedTools.handleMediaExtractor(page, args || {})) }] };
|
|
139
|
-
break;
|
|
135
|
+
// MEDIA_EXTRACTOR case REMOVED - merged into STREAM_EXTRACTOR
|
|
140
136
|
case TOOL_NAMES.STREAM_EXTRACTOR:
|
|
141
137
|
if (!page)
|
|
142
138
|
throw new Error('Browser not initialized');
|
|
@@ -513,10 +509,9 @@ async function main() {
|
|
|
513
509
|
console.error(' 📜 random_scroll - Natural scrolling');
|
|
514
510
|
console.error(' 🤖 solve_captcha - Solve CAPTCHAs');
|
|
515
511
|
console.error('');
|
|
516
|
-
console.error(' Media
|
|
517
|
-
console.error(' 🎬
|
|
518
|
-
console.error('
|
|
519
|
-
console.error(' 🎥 stream_extractor - Master stream extraction');
|
|
512
|
+
console.error(' Media & Streaming:');
|
|
513
|
+
console.error(' 🎬 stream_extractor - Master: Extract video/audio/m3u8/mp4');
|
|
514
|
+
console.error(' 🖼️ iframe_handler - Handle nested iframes (deep_scrape)');
|
|
520
515
|
console.error('');
|
|
521
516
|
console.error(' Advanced Tools:');
|
|
522
517
|
console.error(' 🔎 search_content - Search patterns in page');
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "brave-real-browser-mcp-server",
|
|
3
|
-
"version": "2.24.
|
|
3
|
+
"version": "2.24.3",
|
|
4
4
|
"description": "🦁 MCP server for Brave Real Browser - NPM Workspaces Monorepo with anti-detection features, SSE streaming, and LSP compatibility",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
"dependencies": {
|
|
51
51
|
"@modelcontextprotocol/sdk": "latest",
|
|
52
52
|
"@types/turndown": "latest",
|
|
53
|
-
"brave-real-browser": "^2.5.
|
|
53
|
+
"brave-real-browser": "^2.5.3",
|
|
54
54
|
"turndown": "latest",
|
|
55
55
|
"vscode-languageserver": "^9.0.1",
|
|
56
56
|
"vscode-languageserver-textdocument": "^1.0.12"
|