brave-real-browser-mcp-server 2.24.0 → 2.24.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2053,6 +2053,134 @@ export async function handleIframeHandler(page, args) {
2053
2053
  message: 'Frame accessed successfully',
2054
2054
  };
2055
2055
  }
2056
+ // NEW: deep_scrape action - HTTP-based recursive iframe crawling
2057
+ if (action === 'deep_scrape') {
2058
+ const timeout = args.timeout || 10000;
2059
+ const filterPattern = args.filterPattern ? new RegExp(args.filterPattern, 'i') : null;
2060
+ const allIframes = [];
2061
+ const videoSources = [];
2062
+ const visited = new Set();
2063
+ // Helper: Fetch page content via HTTP
2064
+ const fetchPageContent = async (url) => {
2065
+ try {
2066
+ const https = await import('https');
2067
+ const http = await import('http');
2068
+ return new Promise((resolve) => {
2069
+ const protocol = url.startsWith('https') ? https : http;
2070
+ const req = protocol.get(url, { timeout }, (res) => {
2071
+ let data = '';
2072
+ res.on('data', (chunk) => data += chunk);
2073
+ res.on('end', () => resolve(data));
2074
+ });
2075
+ req.on('error', () => resolve(''));
2076
+ req.on('timeout', () => { req.destroy(); resolve(''); });
2077
+ });
2078
+ }
2079
+ catch {
2080
+ return '';
2081
+ }
2082
+ };
2083
+ // Helper: Extract iframes and video sources from HTML
2084
+ const extractFromHtml = (html, baseUrl) => {
2085
+ const iframes = [];
2086
+ const videos = [];
2087
+ // Extract iframes
2088
+ const iframeRegex = /<iframe[^>]*src=["']([^"']+)["'][^>]*>/gi;
2089
+ let match;
2090
+ while ((match = iframeRegex.exec(html)) !== null) {
2091
+ let src = match[1];
2092
+ // Handle relative URLs
2093
+ if (src.startsWith('//'))
2094
+ src = 'https:' + src;
2095
+ else if (src.startsWith('/')) {
2096
+ const urlObj = new URL(baseUrl);
2097
+ src = urlObj.origin + src;
2098
+ }
2099
+ iframes.push(src);
2100
+ }
2101
+ // Extract video sources (m3u8, mp4, etc.)
2102
+ const videoPatterns = [
2103
+ /https?:\/\/[^"'\s]+\.m3u8[^"'\s]*/gi,
2104
+ /https?:\/\/[^"'\s]+\.mp4[^"'\s]*/gi,
2105
+ /https?:\/\/[^"'\s]+\.webm[^"'\s]*/gi,
2106
+ /file:\s*["']([^"']+\.m3u8[^"']*)["']/gi,
2107
+ /source:\s*["']([^"']+\.m3u8[^"']*)["']/gi,
2108
+ ];
2109
+ for (const pattern of videoPatterns) {
2110
+ let videoMatch;
2111
+ while ((videoMatch = pattern.exec(html)) !== null) {
2112
+ const url = videoMatch[1] || videoMatch[0];
2113
+ videos.push({ url, type: url.includes('.m3u8') ? 'hls' : 'mp4' });
2114
+ }
2115
+ }
2116
+ // Try to unpack obfuscated JS (p,a,c,k,e,d)
2117
+ const packedMatch = html.match(/eval\(function\(p,a,c,k,e,[rd]\)[^{]+\{[^}]+\}[^)]+\('[^']+'/);
2118
+ if (packedMatch) {
2119
+ try {
2120
+ // Simple unpacking - extract strings
2121
+ const stringsMatch = html.match(/'([^']+)'\.split\('\|'\)/);
2122
+ if (stringsMatch) {
2123
+ const strings = stringsMatch[1].split('|');
2124
+ for (const s of strings) {
2125
+ if (s.includes('m3u8') || s.includes('master')) {
2126
+ // Find m3u8 URLs in unpacked content
2127
+ const m3u8Match = html.match(new RegExp(`https?://[^"'\\s]*${s}[^"'\\s]*`, 'i'));
2128
+ if (m3u8Match) {
2129
+ videos.push({ url: m3u8Match[0], type: 'hls', unpacked: true });
2130
+ }
2131
+ }
2132
+ }
2133
+ }
2134
+ }
2135
+ catch { /* ignore unpacking errors */ }
2136
+ }
2137
+ return { iframes, videos };
2138
+ };
2139
+ // Recursive crawler
2140
+ const crawlIframe = async (url, depth) => {
2141
+ if (depth >= maxDepth || visited.has(url))
2142
+ return;
2143
+ visited.add(url);
2144
+ // Apply filter if specified
2145
+ if (filterPattern && !filterPattern.test(url))
2146
+ return;
2147
+ const html = await fetchPageContent(url);
2148
+ if (!html)
2149
+ return;
2150
+ const { iframes, videos } = extractFromHtml(html, url);
2151
+ // Add this iframe to results
2152
+ allIframes.push({ depth, url, childCount: iframes.length });
2153
+ // Add video sources
2154
+ for (const video of videos) {
2155
+ if (!videoSources.some(v => v.url === video.url)) {
2156
+ videoSources.push({ ...video, foundAt: url, depth });
2157
+ }
2158
+ }
2159
+ // Recursively crawl child iframes
2160
+ for (const iframeSrc of iframes) {
2161
+ await crawlIframe(iframeSrc, depth + 1);
2162
+ }
2163
+ };
2164
+ // Start from current page URL
2165
+ const currentUrl = page.url();
2166
+ await crawlIframe(currentUrl, 0);
2167
+ // Also check browser frames
2168
+ for (const frame of page.frames()) {
2169
+ try {
2170
+ const frameUrl = frame.url();
2171
+ if (frameUrl && frameUrl !== 'about:blank' && !visited.has(frameUrl)) {
2172
+ await crawlIframe(frameUrl, 1);
2173
+ }
2174
+ }
2175
+ catch { /* ignore inaccessible frames */ }
2176
+ }
2177
+ return {
2178
+ success: true,
2179
+ iframes: args.flatten !== false ? allIframes : allIframes,
2180
+ videoSources: args.extractVideoSources !== false ? videoSources : undefined,
2181
+ message: `Deep scraped ${allIframes.length} iframes, found ${videoSources.length} video sources`,
2182
+ };
2183
+ }
2056
2184
  return {
2057
2185
  success: false,
2058
2186
  iframes: [],
@@ -613,16 +613,21 @@ export const TOOLS = [
613
613
  // ============================================================
614
614
  {
615
615
  name: 'iframe_handler',
616
- description: 'Extract content from nested iframes including embedded video players',
616
+ description: 'Extract content from nested iframes including embedded video players. Use action=deep_scrape for HTTP-based recursive crawling of complex streaming sites (5x faster than browser navigation)',
617
617
  inputSchema: {
618
618
  type: 'object',
619
619
  additionalProperties: false,
620
620
  properties: {
621
- action: { type: 'string', enum: ['list', 'enter', 'extract', 'exitAll'], description: 'Action to perform on iframes' },
621
+ action: { type: 'string', enum: ['list', 'enter', 'extract', 'exitAll', 'deep_scrape'], description: 'Action to perform. deep_scrape: HTTP-based recursive iframe crawling for complex sites' },
622
622
  selector: { type: 'string', description: 'CSS selector of target iframe' },
623
623
  frameIndex: { type: 'number', description: 'Index of iframe to enter (0-based)' },
624
624
  maxDepth: { type: 'number', description: 'Maximum nesting depth to traverse', default: 3 },
625
625
  extractSelector: { type: 'string', description: 'Selector to extract content from within iframe' },
626
+ recursive: { type: 'boolean', description: 'Traverse nested iframes via HTTP (for deep_scrape)', default: true },
627
+ flatten: { type: 'boolean', description: 'Return flat list vs tree structure', default: true },
628
+ filterPattern: { type: 'string', description: 'Regex to filter iframe URLs (e.g., "multimoviesshg|streamhg")' },
629
+ extractVideoSources: { type: 'boolean', description: 'Auto-extract m3u8/mp4 video sources', default: true },
630
+ timeout: { type: 'number', description: 'HTTP request timeout in ms', default: 10000 },
626
631
  },
627
632
  },
628
633
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "brave-real-browser-mcp-server",
3
- "version": "2.24.0",
3
+ "version": "2.24.2",
4
4
  "description": "🦁 MCP server for Brave Real Browser - NPM Workspaces Monorepo with anti-detection features, SSE streaming, and LSP compatibility",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -50,7 +50,7 @@
50
50
  "dependencies": {
51
51
  "@modelcontextprotocol/sdk": "latest",
52
52
  "@types/turndown": "latest",
53
- "brave-real-browser": "^2.5.0",
53
+ "brave-real-browser": "^2.5.2",
54
54
  "turndown": "latest",
55
55
  "vscode-languageserver": "^9.0.1",
56
56
  "vscode-languageserver-textdocument": "^1.0.12"