npm - brave-real-browser-mcp-server - Versions diffs - 2.24.0 → 2.24.2 - Mend

brave-real-browser-mcp-server 2.24.0 → 2.24.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/handlers/advanced-tools.js +128 -0
package/dist/tool-definitions.js +7 -2
package/package.json +2 -2

package/dist/handlers/advanced-tools.js CHANGED Viewed

@@ -2053,6 +2053,134 @@ export async function handleIframeHandler(page, args) {
             message: 'Frame accessed successfully',
         };
     }
+    // NEW: deep_scrape action - HTTP-based recursive iframe crawling
+    if (action === 'deep_scrape') {
+        const timeout = args.timeout || 10000;
+        const filterPattern = args.filterPattern ? new RegExp(args.filterPattern, 'i') : null;
+        const allIframes = [];
+        const videoSources = [];
+        const visited = new Set();
+        // Helper: Fetch page content via HTTP
+        const fetchPageContent = async (url) => {
+            try {
+                const https = await import('https');
+                const http = await import('http');
+                return new Promise((resolve) => {
+                    const protocol = url.startsWith('https') ? https : http;
+                    const req = protocol.get(url, { timeout }, (res) => {
+                        let data = '';
+                        res.on('data', (chunk) => data += chunk);
+                        res.on('end', () => resolve(data));
+                    });
+                    req.on('error', () => resolve(''));
+                    req.on('timeout', () => { req.destroy(); resolve(''); });
+                });
+            }
+            catch {
+                return '';
+            }
+        };
+        // Helper: Extract iframes and video sources from HTML
+        const extractFromHtml = (html, baseUrl) => {
+            const iframes = [];
+            const videos = [];
+            // Extract iframes
+            const iframeRegex = /<iframe[^>]*src=["']([^"']+)["'][^>]*>/gi;
+            let match;
+            while ((match = iframeRegex.exec(html)) !== null) {
+                let src = match[1];
+                // Handle relative URLs
+                if (src.startsWith('//'))
+                    src = 'https:' + src;
+                else if (src.startsWith('/')) {
+                    const urlObj = new URL(baseUrl);
+                    src = urlObj.origin + src;
+                }
+                iframes.push(src);
+            }
+            // Extract video sources (m3u8, mp4, etc.)
+            const videoPatterns = [
+                /https?:\/\/[^"'\s]+\.m3u8[^"'\s]*/gi,
+                /https?:\/\/[^"'\s]+\.mp4[^"'\s]*/gi,
+                /https?:\/\/[^"'\s]+\.webm[^"'\s]*/gi,
+                /file:\s*["']([^"']+\.m3u8[^"']*)["']/gi,
+                /source:\s*["']([^"']+\.m3u8[^"']*)["']/gi,
+            ];
+            for (const pattern of videoPatterns) {
+                let videoMatch;
+                while ((videoMatch = pattern.exec(html)) !== null) {
+                    const url = videoMatch[1] || videoMatch[0];
+                    videos.push({ url, type: url.includes('.m3u8') ? 'hls' : 'mp4' });
+                }
+            }
+            // Try to unpack obfuscated JS (p,a,c,k,e,d)
+            const packedMatch = html.match(/eval\(function\(p,a,c,k,e,[rd]\)[^{]+\{[^}]+\}[^)]+\('[^']+'/);
+            if (packedMatch) {
+                try {
+                    // Simple unpacking - extract strings
+                    const stringsMatch = html.match(/'([^']+)'\.split\('\|'\)/);
+                    if (stringsMatch) {
+                        const strings = stringsMatch[1].split('|');
+                        for (const s of strings) {
+                            if (s.includes('m3u8') || s.includes('master')) {
+                                // Find m3u8 URLs in unpacked content
+                                const m3u8Match = html.match(new RegExp(`https?://[^"'\\s]*${s}[^"'\\s]*`, 'i'));
+                                if (m3u8Match) {
+                                    videos.push({ url: m3u8Match[0], type: 'hls', unpacked: true });
+                                }
+                            }
+                        }
+                    }
+                }
+                catch { /* ignore unpacking errors */ }
+            }
+            return { iframes, videos };
+        };
+        // Recursive crawler
+        const crawlIframe = async (url, depth) => {
+            if (depth >= maxDepth || visited.has(url))
+                return;
+            visited.add(url);
+            // Apply filter if specified
+            if (filterPattern && !filterPattern.test(url))
+                return;
+            const html = await fetchPageContent(url);
+            if (!html)
+                return;
+            const { iframes, videos } = extractFromHtml(html, url);
+            // Add this iframe to results
+            allIframes.push({ depth, url, childCount: iframes.length });
+            // Add video sources
+            for (const video of videos) {
+                if (!videoSources.some(v => v.url === video.url)) {
+                    videoSources.push({ ...video, foundAt: url, depth });
+                }
+            }
+            // Recursively crawl child iframes
+            for (const iframeSrc of iframes) {
+                await crawlIframe(iframeSrc, depth + 1);
+            }
+        };
+        // Start from current page URL
+        const currentUrl = page.url();
+        await crawlIframe(currentUrl, 0);
+        // Also check browser frames
+        for (const frame of page.frames()) {
+            try {
+                const frameUrl = frame.url();
+                if (frameUrl && frameUrl !== 'about:blank' && !visited.has(frameUrl)) {
+                    await crawlIframe(frameUrl, 1);
+                }
+            }
+            catch { /* ignore inaccessible frames */ }
+        }
+        return {
+            success: true,
+            iframes: args.flatten !== false ? allIframes : allIframes,
+            videoSources: args.extractVideoSources !== false ? videoSources : undefined,
+            message: `Deep scraped ${allIframes.length} iframes, found ${videoSources.length} video sources`,
+        };
+    }
     return {
         success: false,
         iframes: [],

package/dist/tool-definitions.js CHANGED Viewed

@@ -613,16 +613,21 @@ export const TOOLS = [
     // ============================================================
     {
         name: 'iframe_handler',
-        description: 'Extract content from nested iframes including embedded video players',
+        description: 'Extract content from nested iframes including embedded video players. Use action=deep_scrape for HTTP-based recursive crawling of complex streaming sites (5x faster than browser navigation)',
         inputSchema: {
             type: 'object',
             additionalProperties: false,
             properties: {
-                action: { type: 'string', enum: ['list', 'enter', 'extract', 'exitAll'], description: 'Action to perform on iframes' },
+                action: { type: 'string', enum: ['list', 'enter', 'extract', 'exitAll', 'deep_scrape'], description: 'Action to perform. deep_scrape: HTTP-based recursive iframe crawling for complex sites' },
                 selector: { type: 'string', description: 'CSS selector of target iframe' },
                 frameIndex: { type: 'number', description: 'Index of iframe to enter (0-based)' },
                 maxDepth: { type: 'number', description: 'Maximum nesting depth to traverse', default: 3 },
                 extractSelector: { type: 'string', description: 'Selector to extract content from within iframe' },
+                recursive: { type: 'boolean', description: 'Traverse nested iframes via HTTP (for deep_scrape)', default: true },
+                flatten: { type: 'boolean', description: 'Return flat list vs tree structure', default: true },
+                filterPattern: { type: 'string', description: 'Regex to filter iframe URLs (e.g., "multimoviesshg|streamhg")' },
+                extractVideoSources: { type: 'boolean', description: 'Auto-extract m3u8/mp4 video sources', default: true },
+                timeout: { type: 'number', description: 'HTTP request timeout in ms', default: 10000 },
             },
         },
     },

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "brave-real-browser-mcp-server",
-  "version": "2.24.0",
+  "version": "2.24.2",
   "description": "🦁 MCP server for Brave Real Browser - NPM Workspaces Monorepo with anti-detection features, SSE streaming, and LSP compatibility",
   "type": "module",
   "main": "dist/index.js",
@@ -50,7 +50,7 @@
   "dependencies": {
     "@modelcontextprotocol/sdk": "latest",
     "@types/turndown": "latest",
-    "brave-real-browser": "^2.5.0",
+    "brave-real-browser": "^2.5.2",
     "turndown": "latest",
     "vscode-languageserver": "^9.0.1",
     "vscode-languageserver-textdocument": "^1.0.12"