brave-real-browser-mcp-server 2.24.1 → 2.24.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2053,6 +2053,134 @@ export async function handleIframeHandler(page, args) {
2053
2053
  message: 'Frame accessed successfully',
2054
2054
  };
2055
2055
  }
2056
+ // NEW: deep_scrape action - HTTP-based recursive iframe crawling
2057
+ if (action === 'deep_scrape') {
2058
+ const timeout = args.timeout || 10000;
2059
+ const filterPattern = args.filterPattern ? new RegExp(args.filterPattern, 'i') : null;
2060
+ const allIframes = [];
2061
+ const videoSources = [];
2062
+ const visited = new Set();
2063
+ // Helper: Fetch page content via HTTP
2064
+ const fetchPageContent = async (url) => {
2065
+ try {
2066
+ const https = await import('https');
2067
+ const http = await import('http');
2068
+ return new Promise((resolve) => {
2069
+ const protocol = url.startsWith('https') ? https : http;
2070
+ const req = protocol.get(url, { timeout }, (res) => {
2071
+ let data = '';
2072
+ res.on('data', (chunk) => data += chunk);
2073
+ res.on('end', () => resolve(data));
2074
+ });
2075
+ req.on('error', () => resolve(''));
2076
+ req.on('timeout', () => { req.destroy(); resolve(''); });
2077
+ });
2078
+ }
2079
+ catch {
2080
+ return '';
2081
+ }
2082
+ };
2083
+ // Helper: Extract iframes and video sources from HTML
2084
+ const extractFromHtml = (html, baseUrl) => {
2085
+ const iframes = [];
2086
+ const videos = [];
2087
+ // Extract iframes
2088
+ const iframeRegex = /<iframe[^>]*src=["']([^"']+)["'][^>]*>/gi;
2089
+ let match;
2090
+ while ((match = iframeRegex.exec(html)) !== null) {
2091
+ let src = match[1];
2092
+ // Handle relative URLs
2093
+ if (src.startsWith('//'))
2094
+ src = 'https:' + src;
2095
+ else if (src.startsWith('/')) {
2096
+ const urlObj = new URL(baseUrl);
2097
+ src = urlObj.origin + src;
2098
+ }
2099
+ iframes.push(src);
2100
+ }
2101
+ // Extract video sources (m3u8, mp4, etc.)
2102
+ const videoPatterns = [
2103
+ /https?:\/\/[^"'\s]+\.m3u8[^"'\s]*/gi,
2104
+ /https?:\/\/[^"'\s]+\.mp4[^"'\s]*/gi,
2105
+ /https?:\/\/[^"'\s]+\.webm[^"'\s]*/gi,
2106
+ /file:\s*["']([^"']+\.m3u8[^"']*)["']/gi,
2107
+ /source:\s*["']([^"']+\.m3u8[^"']*)["']/gi,
2108
+ ];
2109
+ for (const pattern of videoPatterns) {
2110
+ let videoMatch;
2111
+ while ((videoMatch = pattern.exec(html)) !== null) {
2112
+ const url = videoMatch[1] || videoMatch[0];
2113
+ videos.push({ url, type: url.includes('.m3u8') ? 'hls' : 'mp4' });
2114
+ }
2115
+ }
2116
+ // Try to unpack obfuscated JS (p,a,c,k,e,d)
2117
+ const packedMatch = html.match(/eval\(function\(p,a,c,k,e,[rd]\)[^{]+\{[^}]+\}[^)]+\('[^']+'/);
2118
+ if (packedMatch) {
2119
+ try {
2120
+ // Simple unpacking - extract strings
2121
+ const stringsMatch = html.match(/'([^']+)'\.split\('\|'\)/);
2122
+ if (stringsMatch) {
2123
+ const strings = stringsMatch[1].split('|');
2124
+ for (const s of strings) {
2125
+ if (s.includes('m3u8') || s.includes('master')) {
2126
+ // Find m3u8 URLs in unpacked content
2127
+ const m3u8Match = html.match(new RegExp(`https?://[^"'\\s]*${s}[^"'\\s]*`, 'i'));
2128
+ if (m3u8Match) {
2129
+ videos.push({ url: m3u8Match[0], type: 'hls', unpacked: true });
2130
+ }
2131
+ }
2132
+ }
2133
+ }
2134
+ }
2135
+ catch { /* ignore unpacking errors */ }
2136
+ }
2137
+ return { iframes, videos };
2138
+ };
2139
+ // Recursive crawler
2140
+ const crawlIframe = async (url, depth) => {
2141
+ if (depth >= maxDepth || visited.has(url))
2142
+ return;
2143
+ visited.add(url);
2144
+ // Apply filter if specified
2145
+ if (filterPattern && !filterPattern.test(url))
2146
+ return;
2147
+ const html = await fetchPageContent(url);
2148
+ if (!html)
2149
+ return;
2150
+ const { iframes, videos } = extractFromHtml(html, url);
2151
+ // Add this iframe to results
2152
+ allIframes.push({ depth, url, childCount: iframes.length });
2153
+ // Add video sources
2154
+ for (const video of videos) {
2155
+ if (!videoSources.some(v => v.url === video.url)) {
2156
+ videoSources.push({ ...video, foundAt: url, depth });
2157
+ }
2158
+ }
2159
+ // Recursively crawl child iframes
2160
+ for (const iframeSrc of iframes) {
2161
+ await crawlIframe(iframeSrc, depth + 1);
2162
+ }
2163
+ };
2164
+ // Start from current page URL
2165
+ const currentUrl = page.url();
2166
+ await crawlIframe(currentUrl, 0);
2167
+ // Also check browser frames
2168
+ for (const frame of page.frames()) {
2169
+ try {
2170
+ const frameUrl = frame.url();
2171
+ if (frameUrl && frameUrl !== 'about:blank' && !visited.has(frameUrl)) {
2172
+ await crawlIframe(frameUrl, 1);
2173
+ }
2174
+ }
2175
+ catch { /* ignore inaccessible frames */ }
2176
+ }
2177
+ return {
2178
+ success: true,
2179
+ iframes: args.flatten !== false ? allIframes : allIframes,
2180
+ videoSources: args.extractVideoSources !== false ? videoSources : undefined,
2181
+ message: `Deep scraped ${allIframes.length} iframes, found ${videoSources.length} video sources`,
2182
+ };
2183
+ }
2056
2184
  return {
2057
2185
  success: false,
2058
2186
  iframes: [],
package/dist/index.js CHANGED
@@ -57,9 +57,7 @@ import { handleClick, handleType, handleSolveCaptcha, handleRandomScroll } from
57
57
  import { handleGetContent, handleFindSelector } from './handlers/content-handlers.js';
58
58
  import { handleSaveContentAsMarkdown } from './handlers/file-handlers.js';
59
59
  // Import advanced tools handlers
60
- import { handleBreadcrumbNavigator, handleUrlRedirectTracer, handleSearchContent, handleExtractJson, handleScrapeMetaTags, handlePressKey, handleProgressTracker, handleDeepAnalysis, handleNetworkRecorder, handleApiFinder, handleAjaxContentWaiter, handleMediaExtractor, handleElementScreenshot, handleLinkHarvester, handleBatchElementScraper, handleExtractSchema,
61
- // Streaming tools
62
- handleM3u8Parser, handleCookieManager,
60
+ import { handleBreadcrumbNavigator, handleUrlRedirectTracer, handleSearchContent, handleExtractJson, handleScrapeMetaTags, handlePressKey, handleProgressTracker, handleDeepAnalysis, handleNetworkRecorder, handleApiFinder, handleAjaxContentWaiter, handleElementScreenshot, handleLinkHarvester, handleBatchElementScraper, handleExtractSchema, handleCookieManager,
63
61
  // Download tools
64
62
  handleFileDownloader,
65
63
  // Enhanced streaming/download tools
@@ -220,10 +218,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
220
218
  if (!page)
221
219
  throw new Error('Browser not initialized. Call browser_init first.');
222
220
  return { content: [{ type: 'text', text: JSON.stringify(await handleAjaxContentWaiter(page, args || {})) }] };
223
- case TOOL_NAMES.MEDIA_EXTRACTOR:
224
- if (!page)
225
- throw new Error('Browser not initialized. Call browser_init first.');
226
- return { content: [{ type: 'text', text: JSON.stringify(await handleMediaExtractor(page, args || {})) }] };
221
+ // MEDIA_EXTRACTOR case REMOVED - merged into STREAM_EXTRACTOR
227
222
  case TOOL_NAMES.ELEMENT_SCREENSHOT:
228
223
  if (!page)
229
224
  throw new Error('Browser not initialized. Call browser_init first.');
@@ -240,11 +235,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
240
235
  if (!page)
241
236
  throw new Error('Browser not initialized. Call browser_init first.');
242
237
  return { content: [{ type: 'text', text: JSON.stringify(await handleExtractSchema(page, args || {})) }] };
243
- // Streaming & Media Tools
244
- case TOOL_NAMES.M3U8_PARSER:
245
- if (!page)
246
- throw new Error('Browser not initialized. Call browser_init first.');
247
- return { content: [{ type: 'text', text: JSON.stringify(await handleM3u8Parser(page, args || {})) }] };
238
+ // M3U8_PARSER case REMOVED - merged into STREAM_EXTRACTOR
248
239
  case TOOL_NAMES.COOKIE_MANAGER:
249
240
  if (!page)
250
241
  throw new Error('Browser not initialized. Call browser_init first.');
@@ -485,21 +485,7 @@ export const TOOLS = [
485
485
  },
486
486
  },
487
487
  },
488
- {
489
- name: 'media_extractor',
490
- description: 'Extract media (audio/video) from page with quality options and ad-bypass',
491
- inputSchema: {
492
- type: 'object',
493
- additionalProperties: false,
494
- properties: {
495
- mediaType: { type: 'string', enum: ['video', 'audio', 'all'], description: 'Type of media to extract', default: 'all' },
496
- includeEmbedded: { type: 'boolean', description: 'Include embedded iframes', default: true },
497
- quality: { type: 'string', description: 'Preferred quality (highest, lowest, 1080p, 720p)' },
498
- format: { type: 'string', description: 'Preferred format (mp4, webm, m3u8)' },
499
- bypassAds: { type: 'boolean', description: 'Attempt to bypass video ads', default: false },
500
- },
501
- },
502
- },
488
+ // media_extractor REMOVED - functionality merged into stream_extractor
503
489
  {
504
490
  name: 'element_screenshot',
505
491
  description: 'Capture screenshot of a specific element',
@@ -554,23 +540,7 @@ export const TOOLS = [
554
540
  },
555
541
  },
556
542
  },
557
- // ============================================================
558
- // STREAMING & MEDIA TOOLS (3 new tools)
559
- // ============================================================
560
- {
561
- name: 'm3u8_parser',
562
- description: 'Parse and extract HLS/m3u8 streaming URLs with quality options',
563
- inputSchema: {
564
- type: 'object',
565
- additionalProperties: false,
566
- properties: {
567
- url: { type: 'string', description: 'URL of the page or m3u8 file' },
568
- extractAll: { type: 'boolean', description: 'Extract all quality variants', default: true },
569
- preferQuality: { type: 'string', description: 'Preferred quality (1080p, 720p, 480p, best, worst)', default: 'best' },
570
- includeAudio: { type: 'boolean', description: 'Include audio-only streams', default: true },
571
- },
572
- },
573
- },
543
+ // m3u8_parser REMOVED - functionality merged into stream_extractor
574
544
  {
575
545
  name: 'cookie_manager',
576
546
  description: 'Manage browser cookies for premium accounts and sessions',
@@ -613,16 +583,21 @@ export const TOOLS = [
613
583
  // ============================================================
614
584
  {
615
585
  name: 'iframe_handler',
616
- description: 'Extract content from nested iframes including embedded video players',
586
+ description: 'Extract content from nested iframes including embedded video players. Use action=deep_scrape for HTTP-based recursive crawling of complex streaming sites (5x faster than browser navigation)',
617
587
  inputSchema: {
618
588
  type: 'object',
619
589
  additionalProperties: false,
620
590
  properties: {
621
- action: { type: 'string', enum: ['list', 'enter', 'extract', 'exitAll'], description: 'Action to perform on iframes' },
591
+ action: { type: 'string', enum: ['list', 'enter', 'extract', 'exitAll', 'deep_scrape'], description: 'Action to perform. deep_scrape: HTTP-based recursive iframe crawling for complex sites' },
622
592
  selector: { type: 'string', description: 'CSS selector of target iframe' },
623
593
  frameIndex: { type: 'number', description: 'Index of iframe to enter (0-based)' },
624
594
  maxDepth: { type: 'number', description: 'Maximum nesting depth to traverse', default: 3 },
625
595
  extractSelector: { type: 'string', description: 'Selector to extract content from within iframe' },
596
+ recursive: { type: 'boolean', description: 'Traverse nested iframes via HTTP (for deep_scrape)', default: true },
597
+ flatten: { type: 'boolean', description: 'Return flat list vs tree structure', default: true },
598
+ filterPattern: { type: 'string', description: 'Regex to filter iframe URLs (e.g., "multimoviesshg|streamhg")' },
599
+ extractVideoSources: { type: 'boolean', description: 'Auto-extract m3u8/mp4 video sources', default: true },
600
+ timeout: { type: 'number', description: 'HTTP request timeout in ms', default: 10000 },
626
601
  },
627
602
  },
628
603
  },
@@ -673,12 +648,12 @@ export const TOOL_NAMES = {
673
648
  NETWORK_RECORDER: 'network_recorder',
674
649
  API_FINDER: 'api_finder',
675
650
  AJAX_CONTENT_WAITER: 'ajax_content_waiter',
676
- MEDIA_EXTRACTOR: 'media_extractor',
651
+ // MEDIA_EXTRACTOR: 'media_extractor', // REMOVED - merged into STREAM_EXTRACTOR
677
652
  ELEMENT_SCREENSHOT: 'element_screenshot',
678
653
  LINK_HARVESTER: 'link_harvester',
679
654
  BATCH_ELEMENT_SCRAPER: 'batch_element_scraper',
680
655
  EXTRACT_SCHEMA: 'extract_schema',
681
- M3U8_PARSER: 'm3u8_parser',
656
+ // M3U8_PARSER: 'm3u8_parser', // REMOVED - merged into STREAM_EXTRACTOR
682
657
  COOKIE_MANAGER: 'cookie_manager',
683
658
  FILE_DOWNLOADER: 'file_downloader',
684
659
  // Enhanced tools
@@ -132,11 +132,7 @@ mcpServer.setRequestHandler(CallToolRequestSchema, async (request) => {
132
132
  throw new Error('Browser not initialized');
133
133
  result = { content: [{ type: 'text', text: JSON.stringify(await advancedTools.handleSearchContent(page, args)) }] };
134
134
  break;
135
- case TOOL_NAMES.MEDIA_EXTRACTOR:
136
- if (!page)
137
- throw new Error('Browser not initialized');
138
- result = { content: [{ type: 'text', text: JSON.stringify(await advancedTools.handleMediaExtractor(page, args || {})) }] };
139
- break;
135
+ // MEDIA_EXTRACTOR case REMOVED - merged into STREAM_EXTRACTOR
140
136
  case TOOL_NAMES.STREAM_EXTRACTOR:
141
137
  if (!page)
142
138
  throw new Error('Browser not initialized');
@@ -513,10 +509,9 @@ async function main() {
513
509
  console.error(' 📜 random_scroll - Natural scrolling');
514
510
  console.error(' 🤖 solve_captcha - Solve CAPTCHAs');
515
511
  console.error('');
516
- console.error(' Media Extraction:');
517
- console.error(' 🎬 media_extractor - Extract video/audio');
518
- console.error(' 📺 m3u8_parser - Parse HLS streams');
519
- console.error(' 🎥 stream_extractor - Master stream extraction');
512
+ console.error(' Media & Streaming:');
513
+ console.error(' 🎬 stream_extractor - Master: Extract video/audio/m3u8/mp4');
514
+ console.error(' 🖼️ iframe_handler - Handle nested iframes (deep_scrape)');
520
515
  console.error('');
521
516
  console.error(' Advanced Tools:');
522
517
  console.error(' 🔎 search_content - Search patterns in page');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "brave-real-browser-mcp-server",
3
- "version": "2.24.1",
3
+ "version": "2.24.3",
4
4
  "description": "🦁 MCP server for Brave Real Browser - NPM Workspaces Monorepo with anti-detection features, SSE streaming, and LSP compatibility",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -50,7 +50,7 @@
50
50
  "dependencies": {
51
51
  "@modelcontextprotocol/sdk": "latest",
52
52
  "@types/turndown": "latest",
53
- "brave-real-browser": "^2.5.1",
53
+ "brave-real-browser": "^2.5.3",
54
54
  "turndown": "latest",
55
55
  "vscode-languageserver": "^9.0.1",
56
56
  "vscode-languageserver-textdocument": "^1.0.12"