npm - brave-real-browser-mcp-server - Versions diffs - 2.24.1 → 2.24.3 - Mend

brave-real-browser-mcp-server 2.24.1 → 2.24.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/handlers/advanced-tools.js +128 -0
package/dist/index.js +3 -12
package/dist/tool-definitions.js +11 -36
package/dist/unified-server.js +4 -9
package/package.json +2 -2

package/dist/handlers/advanced-tools.js CHANGED Viewed

@@ -2053,6 +2053,134 @@ export async function handleIframeHandler(page, args) {
             message: 'Frame accessed successfully',
         };
     }
+    // NEW: deep_scrape action - HTTP-based recursive iframe crawling
+    if (action === 'deep_scrape') {
+        const timeout = args.timeout || 10000;
+        const filterPattern = args.filterPattern ? new RegExp(args.filterPattern, 'i') : null;
+        const allIframes = [];
+        const videoSources = [];
+        const visited = new Set();
+        // Helper: Fetch page content via HTTP
+        const fetchPageContent = async (url) => {
+            try {
+                const https = await import('https');
+                const http = await import('http');
+                return new Promise((resolve) => {
+                    const protocol = url.startsWith('https') ? https : http;
+                    const req = protocol.get(url, { timeout }, (res) => {
+                        let data = '';
+                        res.on('data', (chunk) => data += chunk);
+                        res.on('end', () => resolve(data));
+                    });
+                    req.on('error', () => resolve(''));
+                    req.on('timeout', () => { req.destroy(); resolve(''); });
+                });
+            }
+            catch {
+                return '';
+            }
+        };
+        // Helper: Extract iframes and video sources from HTML
+        const extractFromHtml = (html, baseUrl) => {
+            const iframes = [];
+            const videos = [];
+            // Extract iframes
+            const iframeRegex = /<iframe[^>]*src=["']([^"']+)["'][^>]*>/gi;
+            let match;
+            while ((match = iframeRegex.exec(html)) !== null) {
+                let src = match[1];
+                // Handle relative URLs
+                if (src.startsWith('//'))
+                    src = 'https:' + src;
+                else if (src.startsWith('/')) {
+                    const urlObj = new URL(baseUrl);
+                    src = urlObj.origin + src;
+                }
+                iframes.push(src);
+            }
+            // Extract video sources (m3u8, mp4, etc.)
+            const videoPatterns = [
+                /https?:\/\/[^"'\s]+\.m3u8[^"'\s]*/gi,
+                /https?:\/\/[^"'\s]+\.mp4[^"'\s]*/gi,
+                /https?:\/\/[^"'\s]+\.webm[^"'\s]*/gi,
+                /file:\s*["']([^"']+\.m3u8[^"']*)["']/gi,
+                /source:\s*["']([^"']+\.m3u8[^"']*)["']/gi,
+            ];
+            for (const pattern of videoPatterns) {
+                let videoMatch;
+                while ((videoMatch = pattern.exec(html)) !== null) {
+                    const url = videoMatch[1] || videoMatch[0];
+                    videos.push({ url, type: url.includes('.m3u8') ? 'hls' : 'mp4' });
+                }
+            }
+            // Try to unpack obfuscated JS (p,a,c,k,e,d)
+            const packedMatch = html.match(/eval\(function\(p,a,c,k,e,[rd]\)[^{]+\{[^}]+\}[^)]+\('[^']+'/);
+            if (packedMatch) {
+                try {
+                    // Simple unpacking - extract strings
+                    const stringsMatch = html.match(/'([^']+)'\.split\('\|'\)/);
+                    if (stringsMatch) {
+                        const strings = stringsMatch[1].split('|');
+                        for (const s of strings) {
+                            if (s.includes('m3u8') || s.includes('master')) {
+                                // Find m3u8 URLs in unpacked content
+                                const m3u8Match = html.match(new RegExp(`https?://[^"'\\s]*${s}[^"'\\s]*`, 'i'));
+                                if (m3u8Match) {
+                                    videos.push({ url: m3u8Match[0], type: 'hls', unpacked: true });
+                                }
+                            }
+                        }
+                    }
+                }
+                catch { /* ignore unpacking errors */ }
+            }
+            return { iframes, videos };
+        };
+        // Recursive crawler
+        const crawlIframe = async (url, depth) => {
+            if (depth >= maxDepth || visited.has(url))
+                return;
+            visited.add(url);
+            // Apply filter if specified
+            if (filterPattern && !filterPattern.test(url))
+                return;
+            const html = await fetchPageContent(url);
+            if (!html)
+                return;
+            const { iframes, videos } = extractFromHtml(html, url);
+            // Add this iframe to results
+            allIframes.push({ depth, url, childCount: iframes.length });
+            // Add video sources
+            for (const video of videos) {
+                if (!videoSources.some(v => v.url === video.url)) {
+                    videoSources.push({ ...video, foundAt: url, depth });
+                }
+            }
+            // Recursively crawl child iframes
+            for (const iframeSrc of iframes) {
+                await crawlIframe(iframeSrc, depth + 1);
+            }
+        };
+        // Start from current page URL
+        const currentUrl = page.url();
+        await crawlIframe(currentUrl, 0);
+        // Also check browser frames
+        for (const frame of page.frames()) {
+            try {
+                const frameUrl = frame.url();
+                if (frameUrl && frameUrl !== 'about:blank' && !visited.has(frameUrl)) {
+                    await crawlIframe(frameUrl, 1);
+                }
+            }
+            catch { /* ignore inaccessible frames */ }
+        }
+        return {
+            success: true,
+            iframes: args.flatten !== false ? allIframes : allIframes,
+            videoSources: args.extractVideoSources !== false ? videoSources : undefined,
+            message: `Deep scraped ${allIframes.length} iframes, found ${videoSources.length} video sources`,
+        };
+    }
     return {
         success: false,
         iframes: [],

package/dist/index.js CHANGED Viewed

@@ -57,9 +57,7 @@ import { handleClick, handleType, handleSolveCaptcha, handleRandomScroll } from
 import { handleGetContent, handleFindSelector } from './handlers/content-handlers.js';
 import { handleSaveContentAsMarkdown } from './handlers/file-handlers.js';
 // Import advanced tools handlers
-import { handleBreadcrumbNavigator, handleUrlRedirectTracer, handleSearchContent, handleExtractJson, handleScrapeMetaTags, handlePressKey, handleProgressTracker, handleDeepAnalysis, handleNetworkRecorder, handleApiFinder, handleAjaxContentWaiter, handleMediaExtractor, handleElementScreenshot, handleLinkHarvester, handleBatchElementScraper, handleExtractSchema,
-// Streaming tools
-handleM3u8Parser, handleCookieManager,
+import { handleBreadcrumbNavigator, handleUrlRedirectTracer, handleSearchContent, handleExtractJson, handleScrapeMetaTags, handlePressKey, handleProgressTracker, handleDeepAnalysis, handleNetworkRecorder, handleApiFinder, handleAjaxContentWaiter, handleElementScreenshot, handleLinkHarvester, handleBatchElementScraper, handleExtractSchema, handleCookieManager,
 // Download tools
 handleFileDownloader,
 // Enhanced streaming/download tools
@@ -220,10 +218,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
                 if (!page)
                     throw new Error('Browser not initialized. Call browser_init first.');
                 return { content: [{ type: 'text', text: JSON.stringify(await handleAjaxContentWaiter(page, args || {})) }] };
-            case TOOL_NAMES.MEDIA_EXTRACTOR:
-                if (!page)
-                    throw new Error('Browser not initialized. Call browser_init first.');
-                return { content: [{ type: 'text', text: JSON.stringify(await handleMediaExtractor(page, args || {})) }] };
+            // MEDIA_EXTRACTOR case REMOVED - merged into STREAM_EXTRACTOR
             case TOOL_NAMES.ELEMENT_SCREENSHOT:
                 if (!page)
                     throw new Error('Browser not initialized. Call browser_init first.');
@@ -240,11 +235,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
                 if (!page)
                     throw new Error('Browser not initialized. Call browser_init first.');
                 return { content: [{ type: 'text', text: JSON.stringify(await handleExtractSchema(page, args || {})) }] };
-            // Streaming & Media Tools
-            case TOOL_NAMES.M3U8_PARSER:
-                if (!page)
-                    throw new Error('Browser not initialized. Call browser_init first.');
-                return { content: [{ type: 'text', text: JSON.stringify(await handleM3u8Parser(page, args || {})) }] };
+            // M3U8_PARSER case REMOVED - merged into STREAM_EXTRACTOR
             case TOOL_NAMES.COOKIE_MANAGER:
                 if (!page)
                     throw new Error('Browser not initialized. Call browser_init first.');

package/dist/tool-definitions.js CHANGED Viewed

@@ -485,21 +485,7 @@ export const TOOLS = [
             },
         },
     },
-    {
-        name: 'media_extractor',
-        description: 'Extract media (audio/video) from page with quality options and ad-bypass',
-        inputSchema: {
-            type: 'object',
-            additionalProperties: false,
-            properties: {
-                mediaType: { type: 'string', enum: ['video', 'audio', 'all'], description: 'Type of media to extract', default: 'all' },
-                includeEmbedded: { type: 'boolean', description: 'Include embedded iframes', default: true },
-                quality: { type: 'string', description: 'Preferred quality (highest, lowest, 1080p, 720p)' },
-                format: { type: 'string', description: 'Preferred format (mp4, webm, m3u8)' },
-                bypassAds: { type: 'boolean', description: 'Attempt to bypass video ads', default: false },
-            },
-        },
-    },
+    // media_extractor REMOVED - functionality merged into stream_extractor
     {
         name: 'element_screenshot',
         description: 'Capture screenshot of a specific element',
@@ -554,23 +540,7 @@ export const TOOLS = [
             },
         },
     },
-    // ============================================================
-    // STREAMING & MEDIA TOOLS (3 new tools)
-    // ============================================================
-    {
-        name: 'm3u8_parser',
-        description: 'Parse and extract HLS/m3u8 streaming URLs with quality options',
-        inputSchema: {
-            type: 'object',
-            additionalProperties: false,
-            properties: {
-                url: { type: 'string', description: 'URL of the page or m3u8 file' },
-                extractAll: { type: 'boolean', description: 'Extract all quality variants', default: true },
-                preferQuality: { type: 'string', description: 'Preferred quality (1080p, 720p, 480p, best, worst)', default: 'best' },
-                includeAudio: { type: 'boolean', description: 'Include audio-only streams', default: true },
-            },
-        },
-    },
+    // m3u8_parser REMOVED - functionality merged into stream_extractor
     {
         name: 'cookie_manager',
         description: 'Manage browser cookies for premium accounts and sessions',
@@ -613,16 +583,21 @@ export const TOOLS = [
     // ============================================================
     {
         name: 'iframe_handler',
-        description: 'Extract content from nested iframes including embedded video players',
+        description: 'Extract content from nested iframes including embedded video players. Use action=deep_scrape for HTTP-based recursive crawling of complex streaming sites (5x faster than browser navigation)',
         inputSchema: {
             type: 'object',
             additionalProperties: false,
             properties: {
-                action: { type: 'string', enum: ['list', 'enter', 'extract', 'exitAll'], description: 'Action to perform on iframes' },
+                action: { type: 'string', enum: ['list', 'enter', 'extract', 'exitAll', 'deep_scrape'], description: 'Action to perform. deep_scrape: HTTP-based recursive iframe crawling for complex sites' },
                 selector: { type: 'string', description: 'CSS selector of target iframe' },
                 frameIndex: { type: 'number', description: 'Index of iframe to enter (0-based)' },
                 maxDepth: { type: 'number', description: 'Maximum nesting depth to traverse', default: 3 },
                 extractSelector: { type: 'string', description: 'Selector to extract content from within iframe' },
+                recursive: { type: 'boolean', description: 'Traverse nested iframes via HTTP (for deep_scrape)', default: true },
+                flatten: { type: 'boolean', description: 'Return flat list vs tree structure', default: true },
+                filterPattern: { type: 'string', description: 'Regex to filter iframe URLs (e.g., "multimoviesshg|streamhg")' },
+                extractVideoSources: { type: 'boolean', description: 'Auto-extract m3u8/mp4 video sources', default: true },
+                timeout: { type: 'number', description: 'HTTP request timeout in ms', default: 10000 },
             },
         },
     },
@@ -673,12 +648,12 @@ export const TOOL_NAMES = {
     NETWORK_RECORDER: 'network_recorder',
     API_FINDER: 'api_finder',
     AJAX_CONTENT_WAITER: 'ajax_content_waiter',
-    MEDIA_EXTRACTOR: 'media_extractor',
+    // MEDIA_EXTRACTOR: 'media_extractor', // REMOVED - merged into STREAM_EXTRACTOR
     ELEMENT_SCREENSHOT: 'element_screenshot',
     LINK_HARVESTER: 'link_harvester',
     BATCH_ELEMENT_SCRAPER: 'batch_element_scraper',
     EXTRACT_SCHEMA: 'extract_schema',
-    M3U8_PARSER: 'm3u8_parser',
+    // M3U8_PARSER: 'm3u8_parser', // REMOVED - merged into STREAM_EXTRACTOR
     COOKIE_MANAGER: 'cookie_manager',
     FILE_DOWNLOADER: 'file_downloader',
     // Enhanced tools

package/dist/unified-server.js CHANGED Viewed

@@ -132,11 +132,7 @@ mcpServer.setRequestHandler(CallToolRequestSchema, async (request) => {
                     throw new Error('Browser not initialized');
                 result = { content: [{ type: 'text', text: JSON.stringify(await advancedTools.handleSearchContent(page, args)) }] };
                 break;
-            case TOOL_NAMES.MEDIA_EXTRACTOR:
-                if (!page)
-                    throw new Error('Browser not initialized');
-                result = { content: [{ type: 'text', text: JSON.stringify(await advancedTools.handleMediaExtractor(page, args || {})) }] };
-                break;
+            // MEDIA_EXTRACTOR case REMOVED - merged into STREAM_EXTRACTOR
             case TOOL_NAMES.STREAM_EXTRACTOR:
                 if (!page)
                     throw new Error('Browser not initialized');
@@ -513,10 +509,9 @@ async function main() {
             console.error('   📜 random_scroll         - Natural scrolling');
             console.error('   🤖 solve_captcha         - Solve CAPTCHAs');
             console.error('');
-            console.error('   Media Extraction:');
-            console.error('   🎬 media_extractor       - Extract video/audio');
-            console.error('   📺 m3u8_parser           - Parse HLS streams');
-            console.error('   🎥 stream_extractor      - Master stream extraction');
+            console.error('   Media & Streaming:');
+            console.error('   🎬 stream_extractor      - Master: Extract video/audio/m3u8/mp4');
+            console.error('   🖼️  iframe_handler        - Handle nested iframes (deep_scrape)');
             console.error('');
             console.error('   Advanced Tools:');
             console.error('   🔎 search_content        - Search patterns in page');

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "brave-real-browser-mcp-server",
-  "version": "2.24.1",
+  "version": "2.24.3",
   "description": "🦁 MCP server for Brave Real Browser - NPM Workspaces Monorepo with anti-detection features, SSE streaming, and LSP compatibility",
   "type": "module",
   "main": "dist/index.js",
@@ -50,7 +50,7 @@
   "dependencies": {
     "@modelcontextprotocol/sdk": "latest",
     "@types/turndown": "latest",
-    "brave-real-browser": "^2.5.1",
+    "brave-real-browser": "^2.5.3",
     "turndown": "latest",
     "vscode-languageserver": "^9.0.1",
     "vscode-languageserver-textdocument": "^1.0.12"