brave-real-browser-mcp-server 2.24.1 → 2.24.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -2053,6 +2053,134 @@ export async function handleIframeHandler(page, args) {
|
|
|
2053
2053
|
message: 'Frame accessed successfully',
|
|
2054
2054
|
};
|
|
2055
2055
|
}
|
|
2056
|
+
// NEW: deep_scrape action - HTTP-based recursive iframe crawling
|
|
2057
|
+
if (action === 'deep_scrape') {
|
|
2058
|
+
const timeout = args.timeout || 10000;
|
|
2059
|
+
const filterPattern = args.filterPattern ? new RegExp(args.filterPattern, 'i') : null;
|
|
2060
|
+
const allIframes = [];
|
|
2061
|
+
const videoSources = [];
|
|
2062
|
+
const visited = new Set();
|
|
2063
|
+
// Helper: Fetch page content via HTTP
|
|
2064
|
+
const fetchPageContent = async (url) => {
|
|
2065
|
+
try {
|
|
2066
|
+
const https = await import('https');
|
|
2067
|
+
const http = await import('http');
|
|
2068
|
+
return new Promise((resolve) => {
|
|
2069
|
+
const protocol = url.startsWith('https') ? https : http;
|
|
2070
|
+
const req = protocol.get(url, { timeout }, (res) => {
|
|
2071
|
+
let data = '';
|
|
2072
|
+
res.on('data', (chunk) => data += chunk);
|
|
2073
|
+
res.on('end', () => resolve(data));
|
|
2074
|
+
});
|
|
2075
|
+
req.on('error', () => resolve(''));
|
|
2076
|
+
req.on('timeout', () => { req.destroy(); resolve(''); });
|
|
2077
|
+
});
|
|
2078
|
+
}
|
|
2079
|
+
catch {
|
|
2080
|
+
return '';
|
|
2081
|
+
}
|
|
2082
|
+
};
|
|
2083
|
+
// Helper: Extract iframes and video sources from HTML
|
|
2084
|
+
const extractFromHtml = (html, baseUrl) => {
|
|
2085
|
+
const iframes = [];
|
|
2086
|
+
const videos = [];
|
|
2087
|
+
// Extract iframes
|
|
2088
|
+
const iframeRegex = /<iframe[^>]*src=["']([^"']+)["'][^>]*>/gi;
|
|
2089
|
+
let match;
|
|
2090
|
+
while ((match = iframeRegex.exec(html)) !== null) {
|
|
2091
|
+
let src = match[1];
|
|
2092
|
+
// Handle relative URLs
|
|
2093
|
+
if (src.startsWith('//'))
|
|
2094
|
+
src = 'https:' + src;
|
|
2095
|
+
else if (src.startsWith('/')) {
|
|
2096
|
+
const urlObj = new URL(baseUrl);
|
|
2097
|
+
src = urlObj.origin + src;
|
|
2098
|
+
}
|
|
2099
|
+
iframes.push(src);
|
|
2100
|
+
}
|
|
2101
|
+
// Extract video sources (m3u8, mp4, etc.)
|
|
2102
|
+
const videoPatterns = [
|
|
2103
|
+
/https?:\/\/[^"'\s]+\.m3u8[^"'\s]*/gi,
|
|
2104
|
+
/https?:\/\/[^"'\s]+\.mp4[^"'\s]*/gi,
|
|
2105
|
+
/https?:\/\/[^"'\s]+\.webm[^"'\s]*/gi,
|
|
2106
|
+
/file:\s*["']([^"']+\.m3u8[^"']*)["']/gi,
|
|
2107
|
+
/source:\s*["']([^"']+\.m3u8[^"']*)["']/gi,
|
|
2108
|
+
];
|
|
2109
|
+
for (const pattern of videoPatterns) {
|
|
2110
|
+
let videoMatch;
|
|
2111
|
+
while ((videoMatch = pattern.exec(html)) !== null) {
|
|
2112
|
+
const url = videoMatch[1] || videoMatch[0];
|
|
2113
|
+
videos.push({ url, type: url.includes('.m3u8') ? 'hls' : 'mp4' });
|
|
2114
|
+
}
|
|
2115
|
+
}
|
|
2116
|
+
// Try to unpack obfuscated JS (p,a,c,k,e,d)
|
|
2117
|
+
const packedMatch = html.match(/eval\(function\(p,a,c,k,e,[rd]\)[^{]+\{[^}]+\}[^)]+\('[^']+'/);
|
|
2118
|
+
if (packedMatch) {
|
|
2119
|
+
try {
|
|
2120
|
+
// Simple unpacking - extract strings
|
|
2121
|
+
const stringsMatch = html.match(/'([^']+)'\.split\('\|'\)/);
|
|
2122
|
+
if (stringsMatch) {
|
|
2123
|
+
const strings = stringsMatch[1].split('|');
|
|
2124
|
+
for (const s of strings) {
|
|
2125
|
+
if (s.includes('m3u8') || s.includes('master')) {
|
|
2126
|
+
// Find m3u8 URLs in unpacked content
|
|
2127
|
+
const m3u8Match = html.match(new RegExp(`https?://[^"'\\s]*${s}[^"'\\s]*`, 'i'));
|
|
2128
|
+
if (m3u8Match) {
|
|
2129
|
+
videos.push({ url: m3u8Match[0], type: 'hls', unpacked: true });
|
|
2130
|
+
}
|
|
2131
|
+
}
|
|
2132
|
+
}
|
|
2133
|
+
}
|
|
2134
|
+
}
|
|
2135
|
+
catch { /* ignore unpacking errors */ }
|
|
2136
|
+
}
|
|
2137
|
+
return { iframes, videos };
|
|
2138
|
+
};
|
|
2139
|
+
// Recursive crawler
|
|
2140
|
+
const crawlIframe = async (url, depth) => {
|
|
2141
|
+
if (depth >= maxDepth || visited.has(url))
|
|
2142
|
+
return;
|
|
2143
|
+
visited.add(url);
|
|
2144
|
+
// Apply filter if specified
|
|
2145
|
+
if (filterPattern && !filterPattern.test(url))
|
|
2146
|
+
return;
|
|
2147
|
+
const html = await fetchPageContent(url);
|
|
2148
|
+
if (!html)
|
|
2149
|
+
return;
|
|
2150
|
+
const { iframes, videos } = extractFromHtml(html, url);
|
|
2151
|
+
// Add this iframe to results
|
|
2152
|
+
allIframes.push({ depth, url, childCount: iframes.length });
|
|
2153
|
+
// Add video sources
|
|
2154
|
+
for (const video of videos) {
|
|
2155
|
+
if (!videoSources.some(v => v.url === video.url)) {
|
|
2156
|
+
videoSources.push({ ...video, foundAt: url, depth });
|
|
2157
|
+
}
|
|
2158
|
+
}
|
|
2159
|
+
// Recursively crawl child iframes
|
|
2160
|
+
for (const iframeSrc of iframes) {
|
|
2161
|
+
await crawlIframe(iframeSrc, depth + 1);
|
|
2162
|
+
}
|
|
2163
|
+
};
|
|
2164
|
+
// Start from current page URL
|
|
2165
|
+
const currentUrl = page.url();
|
|
2166
|
+
await crawlIframe(currentUrl, 0);
|
|
2167
|
+
// Also check browser frames
|
|
2168
|
+
for (const frame of page.frames()) {
|
|
2169
|
+
try {
|
|
2170
|
+
const frameUrl = frame.url();
|
|
2171
|
+
if (frameUrl && frameUrl !== 'about:blank' && !visited.has(frameUrl)) {
|
|
2172
|
+
await crawlIframe(frameUrl, 1);
|
|
2173
|
+
}
|
|
2174
|
+
}
|
|
2175
|
+
catch { /* ignore inaccessible frames */ }
|
|
2176
|
+
}
|
|
2177
|
+
return {
|
|
2178
|
+
success: true,
|
|
2179
|
+
iframes: args.flatten !== false ? allIframes : allIframes,
|
|
2180
|
+
videoSources: args.extractVideoSources !== false ? videoSources : undefined,
|
|
2181
|
+
message: `Deep scraped ${allIframes.length} iframes, found ${videoSources.length} video sources`,
|
|
2182
|
+
};
|
|
2183
|
+
}
|
|
2056
2184
|
return {
|
|
2057
2185
|
success: false,
|
|
2058
2186
|
iframes: [],
|
package/dist/tool-definitions.js
CHANGED
|
@@ -613,16 +613,21 @@ export const TOOLS = [
|
|
|
613
613
|
// ============================================================
|
|
614
614
|
{
|
|
615
615
|
name: 'iframe_handler',
|
|
616
|
-
description: 'Extract content from nested iframes including embedded video players',
|
|
616
|
+
description: 'Extract content from nested iframes including embedded video players. Use action=deep_scrape for HTTP-based recursive crawling of complex streaming sites (5x faster than browser navigation)',
|
|
617
617
|
inputSchema: {
|
|
618
618
|
type: 'object',
|
|
619
619
|
additionalProperties: false,
|
|
620
620
|
properties: {
|
|
621
|
-
action: { type: 'string', enum: ['list', 'enter', 'extract', 'exitAll'], description: 'Action to perform
|
|
621
|
+
action: { type: 'string', enum: ['list', 'enter', 'extract', 'exitAll', 'deep_scrape'], description: 'Action to perform. deep_scrape: HTTP-based recursive iframe crawling for complex sites' },
|
|
622
622
|
selector: { type: 'string', description: 'CSS selector of target iframe' },
|
|
623
623
|
frameIndex: { type: 'number', description: 'Index of iframe to enter (0-based)' },
|
|
624
624
|
maxDepth: { type: 'number', description: 'Maximum nesting depth to traverse', default: 3 },
|
|
625
625
|
extractSelector: { type: 'string', description: 'Selector to extract content from within iframe' },
|
|
626
|
+
recursive: { type: 'boolean', description: 'Traverse nested iframes via HTTP (for deep_scrape)', default: true },
|
|
627
|
+
flatten: { type: 'boolean', description: 'Return flat list vs tree structure', default: true },
|
|
628
|
+
filterPattern: { type: 'string', description: 'Regex to filter iframe URLs (e.g., "multimoviesshg|streamhg")' },
|
|
629
|
+
extractVideoSources: { type: 'boolean', description: 'Auto-extract m3u8/mp4 video sources', default: true },
|
|
630
|
+
timeout: { type: 'number', description: 'HTTP request timeout in ms', default: 10000 },
|
|
626
631
|
},
|
|
627
632
|
},
|
|
628
633
|
},
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "brave-real-browser-mcp-server",
|
|
3
|
-
"version": "2.24.
|
|
3
|
+
"version": "2.24.2",
|
|
4
4
|
"description": "🦁 MCP server for Brave Real Browser - NPM Workspaces Monorepo with anti-detection features, SSE streaming, and LSP compatibility",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
"dependencies": {
|
|
51
51
|
"@modelcontextprotocol/sdk": "latest",
|
|
52
52
|
"@types/turndown": "latest",
|
|
53
|
-
"brave-real-browser": "^2.5.
|
|
53
|
+
"brave-real-browser": "^2.5.2",
|
|
54
54
|
"turndown": "latest",
|
|
55
55
|
"vscode-languageserver": "^9.0.1",
|
|
56
56
|
"vscode-languageserver-textdocument": "^1.0.12"
|