brave-real-browser-mcp-server 2.24.2 → 2.24.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2367,18 +2367,19 @@ export async function handleCloudflareBypass(page, args) {
2367
2367
  }
2368
2368
  /**
2369
2369
  * Master tool: Extract direct stream/download URLs
2370
+ * ULTRA POWERFUL: Handles packed JS, JW Player, Video.js, HLS.js, obfuscated scripts
2370
2371
  */
2371
2372
  export async function handleStreamExtractor(page, args) {
2372
- const formats = args.formats || ['mp4', 'mkv', 'm3u8', 'mp3', 'webm'];
2373
+ const formats = args.formats || ['mp4', 'mkv', 'm3u8', 'mp3', 'webm', 'flv', 'avi'];
2373
2374
  const maxRedirects = args.maxRedirects || 10;
2374
2375
  const directUrls = [];
2375
2376
  // Navigate if URL provided
2376
2377
  if (args.url) {
2377
2378
  await page.goto(args.url, { waitUntil: 'domcontentloaded', timeout: 30000 });
2378
2379
  }
2379
- // Handle Cloudflare if enabled (inline logic)
2380
+ // Handle Cloudflare if enabled
2380
2381
  if (args.bypassCloudflare) {
2381
- const cfPatterns = ['Checking your browser', 'Just a moment', 'cf-browser-verification'];
2382
+ const cfPatterns = ['Checking your browser', 'Just a moment', 'cf-browser-verification', 'cf_chl_opt'];
2382
2383
  const isCloudflare = async () => {
2383
2384
  try {
2384
2385
  const content = await page.content();
@@ -2388,72 +2389,212 @@ export async function handleStreamExtractor(page, args) {
2388
2389
  return false;
2389
2390
  }
2390
2391
  };
2391
- // Wait up to 15 seconds for Cloudflare to pass
2392
2392
  const startCf = Date.now();
2393
- while (await isCloudflare() && Date.now() - startCf < 15000) {
2393
+ while (await isCloudflare() && Date.now() - startCf < 20000) {
2394
2394
  await new Promise(r => setTimeout(r, 1000));
2395
2395
  }
2396
2396
  }
2397
- // Handle countdown if enabled (inline logic)
2397
+ // Handle countdown if enabled
2398
2398
  if (args.waitForCountdown) {
2399
- const maxWait = 60;
2399
+ const maxWait = 120;
2400
2400
  const startTime = Date.now();
2401
2401
  while ((Date.now() - startTime) / 1000 < maxWait) {
2402
2402
  const hasCountdown = await page.evaluate(() => {
2403
2403
  const text = document.body?.innerText || '';
2404
- return /\d+\s*seconds?|wait\s*\d+|please\s*wait|countdown/gi.test(text);
2404
+ return /\d+\s*seconds?|wait\s*\d+|please\s*wait|countdown|getting link/gi.test(text);
2405
2405
  });
2406
2406
  if (!hasCountdown)
2407
2407
  break;
2408
2408
  await new Promise(r => setTimeout(r, 1000));
2409
2409
  }
2410
2410
  }
2411
- // Extract URLs from page
2412
- const extractedUrls = await page.evaluate((fmts) => {
2411
+ // ULTRA POWERFUL: Extract from all sources
2412
+ const extractedData = await page.evaluate((fmts) => {
2413
2413
  const urls = [];
2414
- const patterns = fmts.map(f => new RegExp(`https?://[^"'\\s]+\\.${f}([?#][^"'\\s]*)?`, 'gi'));
2415
- // Check page HTML
2416
2414
  const html = document.documentElement.innerHTML;
2417
- patterns.forEach(pattern => {
2418
- const matches = html.match(pattern);
2419
- if (matches)
2420
- urls.push(...matches);
2415
+ // ============================================================
2416
+ // 1. PACKED JS UNPACKING (p,a,c,k,e,d)
2417
+ // ============================================================
2418
+ const unpackPackedJS = (packed) => {
2419
+ try {
2420
+ // Find packed function pattern
2421
+ const match = packed.match(/eval\(function\(p,a,c,k,e,[rd]\)\{[^}]+\}[^)]+\('[^']+'/);
2422
+ if (!match)
2423
+ return '';
2424
+ // Extract the encoded string and dictionary
2425
+ const stringsMatch = packed.match(/'([^']+)'\.split\('\|'\)/);
2426
+ if (!stringsMatch)
2427
+ return '';
2428
+ const dict = stringsMatch[1].split('|');
2429
+ let result = packed;
2430
+ // Replace placeholders with actual values
2431
+ for (let i = 0; i < dict.length; i++) {
2432
+ if (dict[i]) {
2433
+ const base36 = i.toString(36);
2434
+ result = result.replace(new RegExp(`\\b${base36}\\b`, 'g'), dict[i]);
2435
+ }
2436
+ }
2437
+ return result;
2438
+ }
2439
+ catch {
2440
+ return '';
2441
+ }
2442
+ };
2443
+ // Find and unpack all packed scripts
2444
+ const scripts = document.querySelectorAll('script');
2445
+ scripts.forEach(script => {
2446
+ const content = script.textContent || '';
2447
+ if (content.includes('eval(function(p,a,c,k,e,')) {
2448
+ const unpacked = unpackPackedJS(content);
2449
+ // Extract URLs from unpacked content
2450
+ fmts.forEach(fmt => {
2451
+ const regex = new RegExp(`https?://[^"'\\s]+\\.${fmt}[^"'\\s]*`, 'gi');
2452
+ const matches = unpacked.match(regex);
2453
+ if (matches)
2454
+ matches.forEach(url => urls.push({ url, source: 'packed_js' }));
2455
+ });
2456
+ }
2421
2457
  });
2422
- // Check video/audio sources
2423
- document.querySelectorAll('video source, audio source, video, audio').forEach(el => {
2424
- const src = el.getAttribute('src');
2425
- if (src && fmts.some(f => src.includes(`.${f}`))) {
2426
- urls.push(src);
2458
+ // ============================================================
2459
+ // 2. JW PLAYER DETECTION
2460
+ // ============================================================
2461
+ if (window.jwplayer) {
2462
+ try {
2463
+ const player = window.jwplayer();
2464
+ if (player && player.getPlaylistItem) {
2465
+ const item = player.getPlaylistItem();
2466
+ if (item) {
2467
+ if (item.file)
2468
+ urls.push({ url: item.file, source: 'jwplayer' });
2469
+ if (item.sources) {
2470
+ item.sources.forEach((s) => {
2471
+ if (s.file)
2472
+ urls.push({ url: s.file, source: 'jwplayer' });
2473
+ });
2474
+ }
2475
+ }
2476
+ }
2477
+ }
2478
+ catch { /* ignore */ }
2479
+ }
2480
+ // JW Player setup patterns in scripts
2481
+ const jwPatterns = [
2482
+ /file:\s*["']([^"']+\.m3u8[^"']*?)["']/gi,
2483
+ /file:\s*["']([^"']+\.mp4[^"']*?)["']/gi,
2484
+ /sources:\s*\[\s*\{[^}]*file:\s*["']([^"']+)["']/gi,
2485
+ /setup\([^)]*file:\s*["']([^"']+)["']/gi,
2486
+ ];
2487
+ jwPatterns.forEach(pattern => {
2488
+ let match;
2489
+ while ((match = pattern.exec(html)) !== null) {
2490
+ urls.push({ url: match[1], source: 'jwplayer_setup' });
2491
+ }
2492
+ });
2493
+ // ============================================================
2494
+ // 3. VIDEO.JS DETECTION
2495
+ // ============================================================
2496
+ const videoJsPlayers = document.querySelectorAll('.video-js, [data-setup], video[id^="vjs"]');
2497
+ videoJsPlayers.forEach(player => {
2498
+ const video = player.querySelector('source') || player;
2499
+ const src = video.getAttribute('src') || player.src;
2500
+ if (src)
2501
+ urls.push({ url: src, source: 'videojs' });
2502
+ });
2503
+ // ============================================================
2504
+ // 4. HLS.JS DETECTION
2505
+ // ============================================================
2506
+ const hlsPatterns = [
2507
+ /hls\.loadSource\(["']([^"']+)["']\)/gi,
2508
+ /Hls\.loadSource\(["']([^"']+)["']\)/gi,
2509
+ /source:\s*["']([^"']+\.m3u8[^"']*)["']/gi,
2510
+ /src:\s*["']([^"']+\.m3u8[^"']*)["']/gi,
2511
+ ];
2512
+ hlsPatterns.forEach(pattern => {
2513
+ let match;
2514
+ while ((match = pattern.exec(html)) !== null) {
2515
+ urls.push({ url: match[1], source: 'hlsjs' });
2427
2516
  }
2428
2517
  });
2429
- // Check links
2518
+ // ============================================================
2519
+ // 5. PLYR DETECTION
2520
+ // ============================================================
2521
+ if (window.Plyr) {
2522
+ try {
2523
+ const plyrPlayer = window.player;
2524
+ if (plyrPlayer && plyrPlayer.source) {
2525
+ urls.push({ url: plyrPlayer.source, source: 'plyr' });
2526
+ }
2527
+ }
2528
+ catch { /* ignore */ }
2529
+ }
2530
+ // ============================================================
2531
+ // 6. DATA ATTRIBUTES
2532
+ // ============================================================
2533
+ document.querySelectorAll('[data-src], [data-video], [data-file], [data-stream]').forEach(el => {
2534
+ const attrs = ['data-src', 'data-video', 'data-file', 'data-stream', 'data-link'];
2535
+ attrs.forEach(attr => {
2536
+ const val = el.getAttribute(attr);
2537
+ if (val && fmts.some(f => val.includes(`.${f}`))) {
2538
+ urls.push({ url: val, source: 'data_attr' });
2539
+ }
2540
+ });
2541
+ });
2542
+ // ============================================================
2543
+ // 7. STANDARD VIDEO/AUDIO ELEMENTS
2544
+ // ============================================================
2545
+ document.querySelectorAll('video, audio, source').forEach(el => {
2546
+ const src = el.getAttribute('src');
2547
+ if (src)
2548
+ urls.push({ url: src, source: 'html_media' });
2549
+ });
2550
+ // ============================================================
2551
+ // 8. DIRECT LINKS
2552
+ // ============================================================
2430
2553
  document.querySelectorAll('a[href]').forEach(el => {
2431
2554
  const href = el.href;
2432
2555
  if (href && fmts.some(f => href.includes(`.${f}`))) {
2433
- urls.push(href);
2556
+ urls.push({ url: href, source: 'direct_link' });
2434
2557
  }
2435
2558
  });
2436
- // Check iframes for embedded players
2559
+ // ============================================================
2560
+ // 9. IFRAME PLAYERS
2561
+ // ============================================================
2437
2562
  document.querySelectorAll('iframe').forEach(iframe => {
2438
2563
  const src = iframe.src;
2439
- if (src && (src.includes('player') || src.includes('embed'))) {
2440
- urls.push(`iframe:${src}`);
2564
+ if (src && (src.includes('player') || src.includes('embed') || src.includes('video'))) {
2565
+ urls.push({ url: `iframe:${src}`, source: 'iframe' });
2441
2566
  }
2442
2567
  });
2443
- return [...new Set(urls)];
2568
+ // ============================================================
2569
+ // 10. REGEX SCAN OF ENTIRE HTML
2570
+ // ============================================================
2571
+ fmts.forEach(fmt => {
2572
+ const pattern = new RegExp(`https?://[^"'\\s<>]+\\.${fmt}[^"'\\s<>]*`, 'gi');
2573
+ const matches = html.match(pattern);
2574
+ if (matches)
2575
+ matches.forEach(url => urls.push({ url, source: 'regex_scan' }));
2576
+ });
2577
+ // Deduplicate
2578
+ const seen = new Set();
2579
+ return urls.filter(u => {
2580
+ if (seen.has(u.url))
2581
+ return false;
2582
+ seen.add(u.url);
2583
+ return true;
2584
+ });
2444
2585
  }, formats);
2445
2586
  // Process found URLs
2446
- for (const url of extractedUrls) {
2447
- const format = formats.find(f => url.includes(`.${f}`)) || 'unknown';
2587
+ for (const item of extractedData) {
2588
+ const format = formats.find(f => item.url.includes(`.${f}`)) || 'unknown';
2448
2589
  directUrls.push({
2449
- url,
2590
+ url: item.url,
2450
2591
  format,
2451
2592
  quality: args.quality || 'auto',
2593
+ source: item.source,
2452
2594
  });
2453
2595
  }
2454
2596
  // Check network requests for media URLs
2455
2597
  const networkUrls = await page.evaluate((fmts) => {
2456
- // Check performance entries for loaded resources
2457
2598
  const resources = performance.getEntriesByType('resource');
2458
2599
  return resources
2459
2600
  .filter(r => fmts.some(f => r.name.includes(`.${f}`)))
@@ -2462,14 +2603,14 @@ export async function handleStreamExtractor(page, args) {
2462
2603
  for (const url of networkUrls) {
2463
2604
  if (!directUrls.some(d => d.url === url)) {
2464
2605
  const format = formats.find(f => url.includes(`.${f}`)) || 'unknown';
2465
- directUrls.push({ url, format });
2606
+ directUrls.push({ url, format, source: 'network' });
2466
2607
  }
2467
2608
  }
2468
2609
  return {
2469
2610
  success: directUrls.length > 0,
2470
2611
  directUrls,
2471
2612
  message: directUrls.length > 0
2472
- ? `Found ${directUrls.length} direct URL(s)`
2613
+ ? `🎬 Found ${directUrls.length} direct URL(s) from ${new Set(directUrls.map(d => d.source)).size} sources`
2473
2614
  : 'No direct URLs found',
2474
2615
  };
2475
2616
  }
@@ -80,6 +80,40 @@ export async function handleFindSelector(args) {
80
80
  // Ensure elementType has a fallback value
81
81
  const elementType = args?.elementType || '*';
82
82
  tracker.setProgress(10, '🔧 Preparing search strategies...');
83
+ // ============================================================
84
+ // FUZZY MATCHING: Levenshtein distance for typo tolerance
85
+ // ============================================================
86
+ const fuzzyMatch = (str1, str2, threshold = 0.7) => {
87
+ const s1 = str1.toLowerCase();
88
+ const s2 = str2.toLowerCase();
89
+ // Exact match
90
+ if (s1 === s2)
91
+ return { match: true, score: 1 };
92
+ // Contains match
93
+ if (s1.includes(s2) || s2.includes(s1))
94
+ return { match: true, score: 0.9 };
95
+ // Levenshtein distance
96
+ const len1 = s1.length;
97
+ const len2 = s2.length;
98
+ const matrix = [];
99
+ for (let i = 0; i <= len1; i++)
100
+ matrix[i] = [i];
101
+ for (let j = 0; j <= len2; j++)
102
+ matrix[0][j] = j;
103
+ for (let i = 1; i <= len1; i++) {
104
+ for (let j = 1; j <= len2; j++) {
105
+ const cost = s1[i - 1] === s2[j - 1] ? 0 : 1;
106
+ matrix[i][j] = Math.min(matrix[i - 1][j] + 1, // deletion
107
+ matrix[i][j - 1] + 1, // insertion
108
+ matrix[i - 1][j - 1] + cost // substitution
109
+ );
110
+ }
111
+ }
112
+ const distance = matrix[len1][len2];
113
+ const maxLen = Math.max(len1, len2);
114
+ const score = 1 - (distance / maxLen);
115
+ return { match: score >= threshold, score };
116
+ };
83
117
  // Helper: Search in Shadow DOM
84
118
  const searchInShadowDOM = async (sel) => {
85
119
  return await pageInstance.evaluate((selector) => {
package/dist/index.js CHANGED
@@ -57,9 +57,7 @@ import { handleClick, handleType, handleSolveCaptcha, handleRandomScroll } from
57
57
  import { handleGetContent, handleFindSelector } from './handlers/content-handlers.js';
58
58
  import { handleSaveContentAsMarkdown } from './handlers/file-handlers.js';
59
59
  // Import advanced tools handlers
60
- import { handleBreadcrumbNavigator, handleUrlRedirectTracer, handleSearchContent, handleExtractJson, handleScrapeMetaTags, handlePressKey, handleProgressTracker, handleDeepAnalysis, handleNetworkRecorder, handleApiFinder, handleAjaxContentWaiter, handleMediaExtractor, handleElementScreenshot, handleLinkHarvester, handleBatchElementScraper, handleExtractSchema,
61
- // Streaming tools
62
- handleM3u8Parser, handleCookieManager,
60
+ import { handleBreadcrumbNavigator, handleUrlRedirectTracer, handleSearchContent, handleExtractJson, handleScrapeMetaTags, handlePressKey, handleProgressTracker, handleDeepAnalysis, handleNetworkRecorder, handleApiFinder, handleAjaxContentWaiter, handleElementScreenshot, handleLinkHarvester, handleBatchElementScraper, handleExtractSchema, handleCookieManager,
63
61
  // Download tools
64
62
  handleFileDownloader,
65
63
  // Enhanced streaming/download tools
@@ -220,10 +218,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
220
218
  if (!page)
221
219
  throw new Error('Browser not initialized. Call browser_init first.');
222
220
  return { content: [{ type: 'text', text: JSON.stringify(await handleAjaxContentWaiter(page, args || {})) }] };
223
- case TOOL_NAMES.MEDIA_EXTRACTOR:
224
- if (!page)
225
- throw new Error('Browser not initialized. Call browser_init first.');
226
- return { content: [{ type: 'text', text: JSON.stringify(await handleMediaExtractor(page, args || {})) }] };
221
+ // MEDIA_EXTRACTOR case REMOVED - merged into STREAM_EXTRACTOR
227
222
  case TOOL_NAMES.ELEMENT_SCREENSHOT:
228
223
  if (!page)
229
224
  throw new Error('Browser not initialized. Call browser_init first.');
@@ -240,11 +235,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
240
235
  if (!page)
241
236
  throw new Error('Browser not initialized. Call browser_init first.');
242
237
  return { content: [{ type: 'text', text: JSON.stringify(await handleExtractSchema(page, args || {})) }] };
243
- // Streaming & Media Tools
244
- case TOOL_NAMES.M3U8_PARSER:
245
- if (!page)
246
- throw new Error('Browser not initialized. Call browser_init first.');
247
- return { content: [{ type: 'text', text: JSON.stringify(await handleM3u8Parser(page, args || {})) }] };
238
+ // M3U8_PARSER case REMOVED - merged into STREAM_EXTRACTOR
248
239
  case TOOL_NAMES.COOKIE_MANAGER:
249
240
  if (!page)
250
241
  throw new Error('Browser not initialized. Call browser_init first.');
@@ -485,21 +485,7 @@ export const TOOLS = [
485
485
  },
486
486
  },
487
487
  },
488
- {
489
- name: 'media_extractor',
490
- description: 'Extract media (audio/video) from page with quality options and ad-bypass',
491
- inputSchema: {
492
- type: 'object',
493
- additionalProperties: false,
494
- properties: {
495
- mediaType: { type: 'string', enum: ['video', 'audio', 'all'], description: 'Type of media to extract', default: 'all' },
496
- includeEmbedded: { type: 'boolean', description: 'Include embedded iframes', default: true },
497
- quality: { type: 'string', description: 'Preferred quality (highest, lowest, 1080p, 720p)' },
498
- format: { type: 'string', description: 'Preferred format (mp4, webm, m3u8)' },
499
- bypassAds: { type: 'boolean', description: 'Attempt to bypass video ads', default: false },
500
- },
501
- },
502
- },
488
+ // media_extractor REMOVED - functionality merged into stream_extractor
503
489
  {
504
490
  name: 'element_screenshot',
505
491
  description: 'Capture screenshot of a specific element',
@@ -554,23 +540,7 @@ export const TOOLS = [
554
540
  },
555
541
  },
556
542
  },
557
- // ============================================================
558
- // STREAMING & MEDIA TOOLS (3 new tools)
559
- // ============================================================
560
- {
561
- name: 'm3u8_parser',
562
- description: 'Parse and extract HLS/m3u8 streaming URLs with quality options',
563
- inputSchema: {
564
- type: 'object',
565
- additionalProperties: false,
566
- properties: {
567
- url: { type: 'string', description: 'URL of the page or m3u8 file' },
568
- extractAll: { type: 'boolean', description: 'Extract all quality variants', default: true },
569
- preferQuality: { type: 'string', description: 'Preferred quality (1080p, 720p, 480p, best, worst)', default: 'best' },
570
- includeAudio: { type: 'boolean', description: 'Include audio-only streams', default: true },
571
- },
572
- },
573
- },
543
+ // m3u8_parser REMOVED - functionality merged into stream_extractor
574
544
  {
575
545
  name: 'cookie_manager',
576
546
  description: 'Manage browser cookies for premium accounts and sessions',
@@ -678,12 +648,12 @@ export const TOOL_NAMES = {
678
648
  NETWORK_RECORDER: 'network_recorder',
679
649
  API_FINDER: 'api_finder',
680
650
  AJAX_CONTENT_WAITER: 'ajax_content_waiter',
681
- MEDIA_EXTRACTOR: 'media_extractor',
651
+ // MEDIA_EXTRACTOR: 'media_extractor', // REMOVED - merged into STREAM_EXTRACTOR
682
652
  ELEMENT_SCREENSHOT: 'element_screenshot',
683
653
  LINK_HARVESTER: 'link_harvester',
684
654
  BATCH_ELEMENT_SCRAPER: 'batch_element_scraper',
685
655
  EXTRACT_SCHEMA: 'extract_schema',
686
- M3U8_PARSER: 'm3u8_parser',
656
+ // M3U8_PARSER: 'm3u8_parser', // REMOVED - merged into STREAM_EXTRACTOR
687
657
  COOKIE_MANAGER: 'cookie_manager',
688
658
  FILE_DOWNLOADER: 'file_downloader',
689
659
  // Enhanced tools
@@ -132,11 +132,7 @@ mcpServer.setRequestHandler(CallToolRequestSchema, async (request) => {
132
132
  throw new Error('Browser not initialized');
133
133
  result = { content: [{ type: 'text', text: JSON.stringify(await advancedTools.handleSearchContent(page, args)) }] };
134
134
  break;
135
- case TOOL_NAMES.MEDIA_EXTRACTOR:
136
- if (!page)
137
- throw new Error('Browser not initialized');
138
- result = { content: [{ type: 'text', text: JSON.stringify(await advancedTools.handleMediaExtractor(page, args || {})) }] };
139
- break;
135
+ // MEDIA_EXTRACTOR case REMOVED - merged into STREAM_EXTRACTOR
140
136
  case TOOL_NAMES.STREAM_EXTRACTOR:
141
137
  if (!page)
142
138
  throw new Error('Browser not initialized');
@@ -513,10 +509,9 @@ async function main() {
513
509
  console.error(' 📜 random_scroll - Natural scrolling');
514
510
  console.error(' 🤖 solve_captcha - Solve CAPTCHAs');
515
511
  console.error('');
516
- console.error(' Media Extraction:');
517
- console.error(' 🎬 media_extractor - Extract video/audio');
518
- console.error(' 📺 m3u8_parser - Parse HLS streams');
519
- console.error(' 🎥 stream_extractor - Master stream extraction');
512
+ console.error(' Media & Streaming:');
513
+ console.error(' 🎬 stream_extractor - Master: Extract video/audio/m3u8/mp4');
514
+ console.error(' 🖼️ iframe_handler - Handle nested iframes (deep_scrape)');
520
515
  console.error('');
521
516
  console.error(' Advanced Tools:');
522
517
  console.error(' 🔎 search_content - Search patterns in page');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "brave-real-browser-mcp-server",
3
- "version": "2.24.2",
3
+ "version": "2.24.4",
4
4
  "description": "🦁 MCP server for Brave Real Browser - NPM Workspaces Monorepo with anti-detection features, SSE streaming, and LSP compatibility",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -50,7 +50,7 @@
50
50
  "dependencies": {
51
51
  "@modelcontextprotocol/sdk": "latest",
52
52
  "@types/turndown": "latest",
53
- "brave-real-browser": "^2.5.2",
53
+ "brave-real-browser": "^2.5.4",
54
54
  "turndown": "latest",
55
55
  "vscode-languageserver": "^9.0.1",
56
56
  "vscode-languageserver-textdocument": "^1.0.12"