brave-real-browser-mcp-server 2.24.3 → 2.24.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -586,22 +586,78 @@ export async function handleDeepAnalysis(page, args) {
586
586
  }
587
587
  /**
588
588
  * Record full network traffic - Uses response events to avoid crashes
589
+ * ULTRA POWERFUL: API detection, media URLs, smart categorization
589
590
  */
590
591
  export async function handleNetworkRecorder(page, args) {
591
592
  const requests = [];
592
593
  const duration = args.duration || 10000;
593
594
  let totalSize = 0;
595
+ const categories = {};
596
+ const apis = [];
597
+ const mediaUrls = [];
598
+ const seen = new Set();
599
+ // ============================================================
600
+ // SMART CATEGORIZATION HELPER
601
+ // ============================================================
602
+ const categorizeUrl = (url, resourceType) => {
603
+ const urlLower = url.toLowerCase();
604
+ // API endpoints
605
+ if (/\/api\/|\/v\d+\/|\.json(\?|$)|graphql/i.test(url))
606
+ return 'api';
607
+ // Media
608
+ if (/\.(mp4|webm|m3u8|ts|mp3|flac|ogg)/i.test(url))
609
+ return 'media';
610
+ if (resourceType === 'media' || resourceType === 'video' || resourceType === 'audio')
611
+ return 'media';
612
+ // Images
613
+ if (/\.(jpg|jpeg|png|gif|webp|svg|ico)/i.test(url) || resourceType === 'image')
614
+ return 'image';
615
+ // Scripts
616
+ if (/\.js(\?|$)/i.test(url) || resourceType === 'script')
617
+ return 'script';
618
+ // Styles
619
+ if (/\.css(\?|$)/i.test(url) || resourceType === 'stylesheet')
620
+ return 'style';
621
+ // Fonts
622
+ if (/\.(woff2?|ttf|eot|otf)/i.test(url) || resourceType === 'font')
623
+ return 'font';
624
+ // XHR/Fetch
625
+ if (resourceType === 'xhr' || resourceType === 'fetch')
626
+ return 'xhr';
627
+ // Documents
628
+ if (resourceType === 'document')
629
+ return 'document';
630
+ return 'other';
631
+ };
594
632
  // Response handler - safer than request interception
595
633
  const responseHandler = (response) => {
596
634
  try {
597
635
  const url = response.url();
636
+ // Dedup
637
+ if (seen.has(url))
638
+ return;
639
+ seen.add(url);
598
640
  if (args.filterUrl && !url.includes(args.filterUrl)) {
599
641
  return;
600
642
  }
643
+ const resourceType = response.request()?.resourceType?.() || 'unknown';
644
+ const method = response.request()?.method?.() || 'GET';
645
+ const category = categorizeUrl(url, resourceType);
646
+ categories[category] = (categories[category] || 0) + 1;
647
+ // Collect API endpoints
648
+ if (category === 'api' || resourceType === 'xhr' || resourceType === 'fetch') {
649
+ apis.push({ url, method, type: resourceType });
650
+ }
651
+ // Collect media URLs
652
+ if (category === 'media' || /\.(mp4|webm|m3u8|ts|mp3)/i.test(url)) {
653
+ mediaUrls.push(url);
654
+ }
601
655
  const entry = {
602
656
  url,
603
657
  status: response.status(),
604
- resourceType: response.request()?.resourceType?.() || 'unknown',
658
+ resourceType,
659
+ category,
660
+ method,
605
661
  timestamp: Date.now(),
606
662
  };
607
663
  if (args.includeHeaders) {
@@ -612,7 +668,6 @@ export async function handleNetworkRecorder(page, args) {
612
668
  entry.headers = {};
613
669
  }
614
670
  }
615
- // Note: Response body requires async handling, skip for stability
616
671
  requests.push(entry);
617
672
  // Track size from headers
618
673
  try {
@@ -647,6 +702,10 @@ export async function handleNetworkRecorder(page, args) {
647
702
  requests: requests.slice(0, 500),
648
703
  count: requests.length,
649
704
  totalSize,
705
+ categories,
706
+ apis: apis.length > 0 ? apis : undefined,
707
+ mediaUrls: mediaUrls.length > 0 ? mediaUrls : undefined,
708
+ message: `📡 Recorded ${requests.length} requests (${Math.round(totalSize / 1024)}KB) | APIs: ${apis.length} | Media: ${mediaUrls.length}`
650
709
  };
651
710
  }
652
711
  /**
@@ -776,6 +835,7 @@ export async function handleAdProtectionDetector(page, args) {
776
835
  }
777
836
  /**
778
837
  * Wait for dynamic AJAX loading
838
+ * ULTRA POWERFUL: Infinite scroll, lazy load, mutation observer
779
839
  */
780
840
  export async function handleAjaxContentWaiter(page, args) {
781
841
  const timeout = args.timeout || 30000;
@@ -783,6 +843,79 @@ export async function handleAjaxContentWaiter(page, args) {
783
843
  const startTime = Date.now();
784
844
  let content;
785
845
  let loaded = false;
846
+ let newElementsCount = 0;
847
+ let scrollDepth = 0;
848
+ // ============================================================
849
+ // 1. MUTATION OBSERVER: Track DOM changes in real-time
850
+ // ============================================================
851
+ const setupMutationObserver = async () => {
852
+ return await page.evaluate(() => {
853
+ return new Promise((resolve) => {
854
+ let added = 0;
855
+ let modified = 0;
856
+ const observer = new MutationObserver((mutations) => {
857
+ mutations.forEach(m => {
858
+ added += m.addedNodes.length;
859
+ if (m.type === 'attributes' || m.type === 'characterData')
860
+ modified++;
861
+ });
862
+ });
863
+ observer.observe(document.body, {
864
+ childList: true,
865
+ subtree: true,
866
+ attributes: true,
867
+ characterData: true
868
+ });
869
+ // Return after 2 seconds of observation
870
+ setTimeout(() => {
871
+ observer.disconnect();
872
+ resolve({ added, modified });
873
+ }, 2000);
874
+ });
875
+ });
876
+ };
877
+ // ============================================================
878
+ // 2. INFINITE SCROLL DETECTION
879
+ // ============================================================
880
+ const handleInfiniteScroll = async () => {
881
+ const initialHeight = await page.evaluate(() => document.body.scrollHeight);
882
+ const initialCount = await page.evaluate(() => document.querySelectorAll('*').length);
883
+ // Scroll to bottom
884
+ await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
885
+ await new Promise(r => setTimeout(r, 1000));
886
+ // Check if new content loaded
887
+ const newHeight = await page.evaluate(() => document.body.scrollHeight);
888
+ const newCount = await page.evaluate(() => document.querySelectorAll('*').length);
889
+ return {
890
+ scrolled: newHeight > initialHeight,
891
+ newElements: newCount - initialCount,
892
+ scrollDepth: newHeight
893
+ };
894
+ };
895
+ // ============================================================
896
+ // 3. LAZY LOAD DETECTION
897
+ // ============================================================
898
+ const detectLazyLoad = async () => {
899
+ return await page.evaluate(() => {
900
+ const lazyElements = [];
901
+ // Check for common lazy load patterns
902
+ document.querySelectorAll('[data-src], [data-lazy], [loading="lazy"], .lazy, .lazyload').forEach(el => {
903
+ const dataSrc = el.getAttribute('data-src') || el.getAttribute('data-lazy');
904
+ if (dataSrc)
905
+ lazyElements.push(dataSrc);
906
+ });
907
+ // Intersection Observer based lazy images
908
+ document.querySelectorAll('img[data-src], img.lazy').forEach(img => {
909
+ const dataSrc = img.dataset.src;
910
+ if (dataSrc)
911
+ lazyElements.push(dataSrc);
912
+ });
913
+ return lazyElements;
914
+ });
915
+ };
916
+ // ============================================================
917
+ // 4. MAIN WAITING LOGIC
918
+ // ============================================================
786
919
  while (Date.now() - startTime < timeout) {
787
920
  if (args.selector) {
788
921
  const element = await page.$(args.selector);
@@ -795,17 +928,35 @@ export async function handleAjaxContentWaiter(page, args) {
795
928
  }
796
929
  }
797
930
  else {
798
- // Wait for network to be idle
799
- await page.waitForNetworkIdle({ timeout: pollInterval }).catch(() => { });
800
- loaded = true;
801
- break;
931
+ // Smart waiting: Check for ongoing activity
932
+ const mutationResult = await setupMutationObserver();
933
+ newElementsCount = mutationResult.added;
934
+ if (mutationResult.added === 0 && mutationResult.modified === 0) {
935
+ // No DOM changes, content likely loaded
936
+ loaded = true;
937
+ break;
938
+ }
939
+ }
940
+ // Try infinite scroll to load more content
941
+ const scrollResult = await handleInfiniteScroll();
942
+ if (scrollResult.scrolled) {
943
+ scrollDepth = scrollResult.scrollDepth;
944
+ newElementsCount += scrollResult.newElements;
802
945
  }
803
946
  await new Promise((r) => setTimeout(r, pollInterval));
804
947
  }
948
+ // Detect any lazy-loaded content
949
+ const lazyElements = await detectLazyLoad();
805
950
  return {
806
951
  loaded,
807
952
  waitTime: Date.now() - startTime,
808
953
  content,
954
+ newElementsCount,
955
+ scrollDepth,
956
+ lazyElements: lazyElements.length > 0 ? lazyElements : undefined,
957
+ message: loaded
958
+ ? `✅ Content loaded in ${Date.now() - startTime}ms (${newElementsCount} new elements, scroll: ${scrollDepth}px)`
959
+ : `⏱️ Timeout after ${timeout}ms`
809
960
  };
810
961
  }
811
962
  /**
@@ -1002,20 +1153,124 @@ export async function handleVideoRecording(page, args, recorderState) {
1002
1153
  }
1003
1154
  /**
1004
1155
  * Harvest all links from page
1156
+ * ULTRA POWERFUL: Pagination detection, smart categorization, file types
1005
1157
  */
1006
1158
  export async function handleLinkHarvester(page, args) {
1007
1159
  const currentUrl = new URL(page.url());
1008
- const allLinks = await page.evaluate((filter) => {
1009
- return Array.from(document.querySelectorAll('a[href]')).map((a) => ({
1010
- url: a.href,
1011
- text: a.textContent?.trim()?.substring(0, 100) || '',
1012
- }));
1013
- }, args.filter);
1160
+ // ============================================================
1161
+ // 1. EXTRACT ALL LINKS WITH SMART CATEGORIZATION
1162
+ // ============================================================
1163
+ const allLinks = await page.evaluate(() => {
1164
+ const links = [];
1165
+ document.querySelectorAll('a[href]').forEach((a) => {
1166
+ const anchor = a;
1167
+ links.push({
1168
+ url: anchor.href,
1169
+ text: a.textContent?.trim()?.substring(0, 100) || '',
1170
+ attrs: {
1171
+ rel: anchor.rel || '',
1172
+ target: anchor.target || '',
1173
+ class: anchor.className || '',
1174
+ id: anchor.id || '',
1175
+ download: anchor.download || '',
1176
+ }
1177
+ });
1178
+ });
1179
+ return links;
1180
+ });
1181
+ // ============================================================
1182
+ // 2. PAGINATION DETECTION
1183
+ // ============================================================
1184
+ const pagination = await page.evaluate(() => {
1185
+ let nextPage;
1186
+ let prevPage;
1187
+ let totalPages;
1188
+ // Common pagination selectors
1189
+ const nextSelectors = [
1190
+ 'a[rel="next"]', 'a.next', 'a.pagination-next',
1191
+ '[aria-label="Next"]', 'a:has-text("Next")', 'a:has-text(">")',
1192
+ '.pagination a:last-child', 'a.page-link:last-child'
1193
+ ];
1194
+ const prevSelectors = [
1195
+ 'a[rel="prev"]', 'a.prev', 'a.pagination-prev',
1196
+ '[aria-label="Previous"]', 'a:has-text("Prev")', 'a:has-text("<")'
1197
+ ];
1198
+ for (const sel of nextSelectors) {
1199
+ try {
1200
+ const el = document.querySelector(sel);
1201
+ if (el?.href) {
1202
+ nextPage = el.href;
1203
+ break;
1204
+ }
1205
+ }
1206
+ catch { /* invalid selector */ }
1207
+ }
1208
+ for (const sel of prevSelectors) {
1209
+ try {
1210
+ const el = document.querySelector(sel);
1211
+ if (el?.href) {
1212
+ prevPage = el.href;
1213
+ break;
1214
+ }
1215
+ }
1216
+ catch { /* invalid selector */ }
1217
+ }
1218
+ // Count page numbers
1219
+ const pageNumbers = Array.from(document.querySelectorAll('.pagination a, .page-numbers a, nav a'))
1220
+ .map(a => parseInt(a.textContent || '0', 10))
1221
+ .filter(n => !isNaN(n) && n > 0);
1222
+ if (pageNumbers.length > 0) {
1223
+ totalPages = Math.max(...pageNumbers);
1224
+ }
1225
+ return { nextPage, prevPage, totalPages };
1226
+ });
1227
+ // ============================================================
1228
+ // 3. SMART LINK CATEGORIZATION
1229
+ // ============================================================
1230
+ const categorizeLink = (url, text, attrs) => {
1231
+ const urlLower = url.toLowerCase();
1232
+ const textLower = text.toLowerCase();
1233
+ // File downloads
1234
+ if (/\.(pdf|doc|docx|xls|xlsx|zip|rar|7z|tar|gz)(\?.*)?$/i.test(url))
1235
+ return 'document';
1236
+ if (/\.(mp4|mkv|avi|mov|webm|flv)(\?.*)?$/i.test(url))
1237
+ return 'video';
1238
+ if (/\.(mp3|wav|flac|aac|ogg)(\?.*)?$/i.test(url))
1239
+ return 'audio';
1240
+ if (/\.(jpg|jpeg|png|gif|webp|svg|bmp)(\?.*)?$/i.test(url))
1241
+ return 'image';
1242
+ if (attrs.download)
1243
+ return 'download';
1244
+ // Navigation
1245
+ if (/\/(next|page|p)\/\d+|[?&]page=\d+/i.test(url))
1246
+ return 'pagination';
1247
+ if (textLower.includes('next') || textLower.includes('prev'))
1248
+ return 'pagination';
1249
+ // Social
1250
+ if (/facebook|twitter|instagram|linkedin|youtube|tiktok/i.test(url))
1251
+ return 'social';
1252
+ // Common patterns
1253
+ if (/login|signin|sign-in/i.test(url))
1254
+ return 'auth';
1255
+ if (/register|signup|sign-up/i.test(url))
1256
+ return 'auth';
1257
+ if (/search|query|q=/i.test(url))
1258
+ return 'search';
1259
+ if (/contact|about|faq|help/i.test(url))
1260
+ return 'info';
1261
+ return 'navigation';
1262
+ };
1014
1263
  const processedLinks = [];
1264
+ const categories = {};
1265
+ const seen = new Set();
1015
1266
  let internal = 0;
1016
1267
  let external = 0;
1017
1268
  for (const link of allLinks) {
1018
1269
  try {
1270
+ // Dedup by URL
1271
+ if (seen.has(link.url))
1272
+ continue;
1273
+ seen.add(link.url);
1019
1274
  const linkUrl = new URL(link.url);
1020
1275
  const isInternal = linkUrl.hostname === currentUrl.hostname;
1021
1276
  if (args.filter && !link.url.includes(args.filter) && !link.text.includes(args.filter)) {
@@ -1025,10 +1280,13 @@ export async function handleLinkHarvester(page, args) {
1025
1280
  continue;
1026
1281
  if (!isInternal && args.includeExternal === false)
1027
1282
  continue;
1283
+ const category = categorizeLink(link.url, link.text, link.attrs);
1284
+ categories[category] = (categories[category] || 0) + 1;
1028
1285
  processedLinks.push({
1029
1286
  url: link.url,
1030
1287
  text: link.text,
1031
1288
  type: isInternal ? 'internal' : 'external',
1289
+ category,
1032
1290
  });
1033
1291
  if (isInternal)
1034
1292
  internal++;
@@ -1045,6 +1303,10 @@ export async function handleLinkHarvester(page, args) {
1045
1303
  links: processedLinks,
1046
1304
  internal,
1047
1305
  external,
1306
+ pagination: (pagination.nextPage || pagination.prevPage || pagination.totalPages) ? pagination : undefined,
1307
+ categories,
1308
+ message: `🔗 Found ${processedLinks.length} links (${internal} internal, ${external} external)` +
1309
+ (pagination.nextPage ? ` | Next: ${pagination.nextPage}` : '')
1048
1310
  };
1049
1311
  }
1050
1312
  /**
@@ -2367,18 +2629,19 @@ export async function handleCloudflareBypass(page, args) {
2367
2629
  }
2368
2630
  /**
2369
2631
  * Master tool: Extract direct stream/download URLs
2632
+ * ULTRA POWERFUL: Handles packed JS, JW Player, Video.js, HLS.js, obfuscated scripts
2370
2633
  */
2371
2634
  export async function handleStreamExtractor(page, args) {
2372
- const formats = args.formats || ['mp4', 'mkv', 'm3u8', 'mp3', 'webm'];
2635
+ const formats = args.formats || ['mp4', 'mkv', 'm3u8', 'mp3', 'webm', 'flv', 'avi'];
2373
2636
  const maxRedirects = args.maxRedirects || 10;
2374
2637
  const directUrls = [];
2375
2638
  // Navigate if URL provided
2376
2639
  if (args.url) {
2377
2640
  await page.goto(args.url, { waitUntil: 'domcontentloaded', timeout: 30000 });
2378
2641
  }
2379
- // Handle Cloudflare if enabled (inline logic)
2642
+ // Handle Cloudflare if enabled
2380
2643
  if (args.bypassCloudflare) {
2381
- const cfPatterns = ['Checking your browser', 'Just a moment', 'cf-browser-verification'];
2644
+ const cfPatterns = ['Checking your browser', 'Just a moment', 'cf-browser-verification', 'cf_chl_opt'];
2382
2645
  const isCloudflare = async () => {
2383
2646
  try {
2384
2647
  const content = await page.content();
@@ -2388,72 +2651,212 @@ export async function handleStreamExtractor(page, args) {
2388
2651
  return false;
2389
2652
  }
2390
2653
  };
2391
- // Wait up to 15 seconds for Cloudflare to pass
2392
2654
  const startCf = Date.now();
2393
- while (await isCloudflare() && Date.now() - startCf < 15000) {
2655
+ while (await isCloudflare() && Date.now() - startCf < 20000) {
2394
2656
  await new Promise(r => setTimeout(r, 1000));
2395
2657
  }
2396
2658
  }
2397
- // Handle countdown if enabled (inline logic)
2659
+ // Handle countdown if enabled
2398
2660
  if (args.waitForCountdown) {
2399
- const maxWait = 60;
2661
+ const maxWait = 120;
2400
2662
  const startTime = Date.now();
2401
2663
  while ((Date.now() - startTime) / 1000 < maxWait) {
2402
2664
  const hasCountdown = await page.evaluate(() => {
2403
2665
  const text = document.body?.innerText || '';
2404
- return /\d+\s*seconds?|wait\s*\d+|please\s*wait|countdown/gi.test(text);
2666
+ return /\d+\s*seconds?|wait\s*\d+|please\s*wait|countdown|getting link/gi.test(text);
2405
2667
  });
2406
2668
  if (!hasCountdown)
2407
2669
  break;
2408
2670
  await new Promise(r => setTimeout(r, 1000));
2409
2671
  }
2410
2672
  }
2411
- // Extract URLs from page
2412
- const extractedUrls = await page.evaluate((fmts) => {
2673
+ // ULTRA POWERFUL: Extract from all sources
2674
+ const extractedData = await page.evaluate((fmts) => {
2413
2675
  const urls = [];
2414
- const patterns = fmts.map(f => new RegExp(`https?://[^"'\\s]+\\.${f}([?#][^"'\\s]*)?`, 'gi'));
2415
- // Check page HTML
2416
2676
  const html = document.documentElement.innerHTML;
2417
- patterns.forEach(pattern => {
2418
- const matches = html.match(pattern);
2419
- if (matches)
2420
- urls.push(...matches);
2677
+ // ============================================================
2678
+ // 1. PACKED JS UNPACKING (p,a,c,k,e,d)
2679
+ // ============================================================
2680
+ const unpackPackedJS = (packed) => {
2681
+ try {
2682
+ // Find packed function pattern
2683
+ const match = packed.match(/eval\(function\(p,a,c,k,e,[rd]\)\{[^}]+\}[^)]+\('[^']+'/);
2684
+ if (!match)
2685
+ return '';
2686
+ // Extract the encoded string and dictionary
2687
+ const stringsMatch = packed.match(/'([^']+)'\.split\('\|'\)/);
2688
+ if (!stringsMatch)
2689
+ return '';
2690
+ const dict = stringsMatch[1].split('|');
2691
+ let result = packed;
2692
+ // Replace placeholders with actual values
2693
+ for (let i = 0; i < dict.length; i++) {
2694
+ if (dict[i]) {
2695
+ const base36 = i.toString(36);
2696
+ result = result.replace(new RegExp(`\\b${base36}\\b`, 'g'), dict[i]);
2697
+ }
2698
+ }
2699
+ return result;
2700
+ }
2701
+ catch {
2702
+ return '';
2703
+ }
2704
+ };
2705
+ // Find and unpack all packed scripts
2706
+ const scripts = document.querySelectorAll('script');
2707
+ scripts.forEach(script => {
2708
+ const content = script.textContent || '';
2709
+ if (content.includes('eval(function(p,a,c,k,e,')) {
2710
+ const unpacked = unpackPackedJS(content);
2711
+ // Extract URLs from unpacked content
2712
+ fmts.forEach(fmt => {
2713
+ const regex = new RegExp(`https?://[^"'\\s]+\\.${fmt}[^"'\\s]*`, 'gi');
2714
+ const matches = unpacked.match(regex);
2715
+ if (matches)
2716
+ matches.forEach(url => urls.push({ url, source: 'packed_js' }));
2717
+ });
2718
+ }
2421
2719
  });
2422
- // Check video/audio sources
2423
- document.querySelectorAll('video source, audio source, video, audio').forEach(el => {
2424
- const src = el.getAttribute('src');
2425
- if (src && fmts.some(f => src.includes(`.${f}`))) {
2426
- urls.push(src);
2720
+ // ============================================================
2721
+ // 2. JW PLAYER DETECTION
2722
+ // ============================================================
2723
+ if (window.jwplayer) {
2724
+ try {
2725
+ const player = window.jwplayer();
2726
+ if (player && player.getPlaylistItem) {
2727
+ const item = player.getPlaylistItem();
2728
+ if (item) {
2729
+ if (item.file)
2730
+ urls.push({ url: item.file, source: 'jwplayer' });
2731
+ if (item.sources) {
2732
+ item.sources.forEach((s) => {
2733
+ if (s.file)
2734
+ urls.push({ url: s.file, source: 'jwplayer' });
2735
+ });
2736
+ }
2737
+ }
2738
+ }
2427
2739
  }
2740
+ catch { /* ignore */ }
2741
+ }
2742
+ // JW Player setup patterns in scripts
2743
+ const jwPatterns = [
2744
+ /file:\s*["']([^"']+\.m3u8[^"']*?)["']/gi,
2745
+ /file:\s*["']([^"']+\.mp4[^"']*?)["']/gi,
2746
+ /sources:\s*\[\s*\{[^}]*file:\s*["']([^"']+)["']/gi,
2747
+ /setup\([^)]*file:\s*["']([^"']+)["']/gi,
2748
+ ];
2749
+ jwPatterns.forEach(pattern => {
2750
+ let match;
2751
+ while ((match = pattern.exec(html)) !== null) {
2752
+ urls.push({ url: match[1], source: 'jwplayer_setup' });
2753
+ }
2754
+ });
2755
+ // ============================================================
2756
+ // 3. VIDEO.JS DETECTION
2757
+ // ============================================================
2758
+ const videoJsPlayers = document.querySelectorAll('.video-js, [data-setup], video[id^="vjs"]');
2759
+ videoJsPlayers.forEach(player => {
2760
+ const video = player.querySelector('source') || player;
2761
+ const src = video.getAttribute('src') || player.src;
2762
+ if (src)
2763
+ urls.push({ url: src, source: 'videojs' });
2764
+ });
2765
+ // ============================================================
2766
+ // 4. HLS.JS DETECTION
2767
+ // ============================================================
2768
+ const hlsPatterns = [
2769
+ /hls\.loadSource\(["']([^"']+)["']\)/gi,
2770
+ /Hls\.loadSource\(["']([^"']+)["']\)/gi,
2771
+ /source:\s*["']([^"']+\.m3u8[^"']*)["']/gi,
2772
+ /src:\s*["']([^"']+\.m3u8[^"']*)["']/gi,
2773
+ ];
2774
+ hlsPatterns.forEach(pattern => {
2775
+ let match;
2776
+ while ((match = pattern.exec(html)) !== null) {
2777
+ urls.push({ url: match[1], source: 'hlsjs' });
2778
+ }
2779
+ });
2780
+ // ============================================================
2781
+ // 5. PLYR DETECTION
2782
+ // ============================================================
2783
+ if (window.Plyr) {
2784
+ try {
2785
+ const plyrPlayer = window.player;
2786
+ if (plyrPlayer && plyrPlayer.source) {
2787
+ urls.push({ url: plyrPlayer.source, source: 'plyr' });
2788
+ }
2789
+ }
2790
+ catch { /* ignore */ }
2791
+ }
2792
+ // ============================================================
2793
+ // 6. DATA ATTRIBUTES
2794
+ // ============================================================
2795
+ document.querySelectorAll('[data-src], [data-video], [data-file], [data-stream]').forEach(el => {
2796
+ const attrs = ['data-src', 'data-video', 'data-file', 'data-stream', 'data-link'];
2797
+ attrs.forEach(attr => {
2798
+ const val = el.getAttribute(attr);
2799
+ if (val && fmts.some(f => val.includes(`.${f}`))) {
2800
+ urls.push({ url: val, source: 'data_attr' });
2801
+ }
2802
+ });
2803
+ });
2804
+ // ============================================================
2805
+ // 7. STANDARD VIDEO/AUDIO ELEMENTS
2806
+ // ============================================================
2807
+ document.querySelectorAll('video, audio, source').forEach(el => {
2808
+ const src = el.getAttribute('src');
2809
+ if (src)
2810
+ urls.push({ url: src, source: 'html_media' });
2428
2811
  });
2429
- // Check links
2812
+ // ============================================================
2813
+ // 8. DIRECT LINKS
2814
+ // ============================================================
2430
2815
  document.querySelectorAll('a[href]').forEach(el => {
2431
2816
  const href = el.href;
2432
2817
  if (href && fmts.some(f => href.includes(`.${f}`))) {
2433
- urls.push(href);
2818
+ urls.push({ url: href, source: 'direct_link' });
2434
2819
  }
2435
2820
  });
2436
- // Check iframes for embedded players
2821
+ // ============================================================
2822
+ // 9. IFRAME PLAYERS
2823
+ // ============================================================
2437
2824
  document.querySelectorAll('iframe').forEach(iframe => {
2438
2825
  const src = iframe.src;
2439
- if (src && (src.includes('player') || src.includes('embed'))) {
2440
- urls.push(`iframe:${src}`);
2826
+ if (src && (src.includes('player') || src.includes('embed') || src.includes('video'))) {
2827
+ urls.push({ url: `iframe:${src}`, source: 'iframe' });
2441
2828
  }
2442
2829
  });
2443
- return [...new Set(urls)];
2830
+ // ============================================================
2831
+ // 10. REGEX SCAN OF ENTIRE HTML
2832
+ // ============================================================
2833
+ fmts.forEach(fmt => {
2834
+ const pattern = new RegExp(`https?://[^"'\\s<>]+\\.${fmt}[^"'\\s<>]*`, 'gi');
2835
+ const matches = html.match(pattern);
2836
+ if (matches)
2837
+ matches.forEach(url => urls.push({ url, source: 'regex_scan' }));
2838
+ });
2839
+ // Deduplicate
2840
+ const seen = new Set();
2841
+ return urls.filter(u => {
2842
+ if (seen.has(u.url))
2843
+ return false;
2844
+ seen.add(u.url);
2845
+ return true;
2846
+ });
2444
2847
  }, formats);
2445
2848
  // Process found URLs
2446
- for (const url of extractedUrls) {
2447
- const format = formats.find(f => url.includes(`.${f}`)) || 'unknown';
2849
+ for (const item of extractedData) {
2850
+ const format = formats.find(f => item.url.includes(`.${f}`)) || 'unknown';
2448
2851
  directUrls.push({
2449
- url,
2852
+ url: item.url,
2450
2853
  format,
2451
2854
  quality: args.quality || 'auto',
2855
+ source: item.source,
2452
2856
  });
2453
2857
  }
2454
2858
  // Check network requests for media URLs
2455
2859
  const networkUrls = await page.evaluate((fmts) => {
2456
- // Check performance entries for loaded resources
2457
2860
  const resources = performance.getEntriesByType('resource');
2458
2861
  return resources
2459
2862
  .filter(r => fmts.some(f => r.name.includes(`.${f}`)))
@@ -2462,14 +2865,14 @@ export async function handleStreamExtractor(page, args) {
2462
2865
  for (const url of networkUrls) {
2463
2866
  if (!directUrls.some(d => d.url === url)) {
2464
2867
  const format = formats.find(f => url.includes(`.${f}`)) || 'unknown';
2465
- directUrls.push({ url, format });
2868
+ directUrls.push({ url, format, source: 'network' });
2466
2869
  }
2467
2870
  }
2468
2871
  return {
2469
2872
  success: directUrls.length > 0,
2470
2873
  directUrls,
2471
2874
  message: directUrls.length > 0
2472
- ? `Found ${directUrls.length} direct URL(s)`
2875
+ ? `🎬 Found ${directUrls.length} direct URL(s) from ${new Set(directUrls.map(d => d.source)).size} sources`
2473
2876
  : 'No direct URLs found',
2474
2877
  };
2475
2878
  }
@@ -80,6 +80,40 @@ export async function handleFindSelector(args) {
80
80
  // Ensure elementType has a fallback value
81
81
  const elementType = args?.elementType || '*';
82
82
  tracker.setProgress(10, '🔧 Preparing search strategies...');
83
+ // ============================================================
84
+ // FUZZY MATCHING: Levenshtein distance for typo tolerance
85
+ // ============================================================
86
+ const fuzzyMatch = (str1, str2, threshold = 0.7) => {
87
+ const s1 = str1.toLowerCase();
88
+ const s2 = str2.toLowerCase();
89
+ // Exact match
90
+ if (s1 === s2)
91
+ return { match: true, score: 1 };
92
+ // Contains match
93
+ if (s1.includes(s2) || s2.includes(s1))
94
+ return { match: true, score: 0.9 };
95
+ // Levenshtein distance
96
+ const len1 = s1.length;
97
+ const len2 = s2.length;
98
+ const matrix = [];
99
+ for (let i = 0; i <= len1; i++)
100
+ matrix[i] = [i];
101
+ for (let j = 0; j <= len2; j++)
102
+ matrix[0][j] = j;
103
+ for (let i = 1; i <= len1; i++) {
104
+ for (let j = 1; j <= len2; j++) {
105
+ const cost = s1[i - 1] === s2[j - 1] ? 0 : 1;
106
+ matrix[i][j] = Math.min(matrix[i - 1][j] + 1, // deletion
107
+ matrix[i][j - 1] + 1, // insertion
108
+ matrix[i - 1][j - 1] + cost // substitution
109
+ );
110
+ }
111
+ }
112
+ const distance = matrix[len1][len2];
113
+ const maxLen = Math.max(len1, len2);
114
+ const score = 1 - (distance / maxLen);
115
+ return { match: score >= threshold, score };
116
+ };
83
117
  // Helper: Search in Shadow DOM
84
118
  const searchInShadowDOM = async (sel) => {
85
119
  return await pageInstance.evaluate((selector) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "brave-real-browser-mcp-server",
3
- "version": "2.24.3",
3
+ "version": "2.24.5",
4
4
  "description": "🦁 MCP server for Brave Real Browser - NPM Workspaces Monorepo with anti-detection features, SSE streaming, and LSP compatibility",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -50,7 +50,7 @@
50
50
  "dependencies": {
51
51
  "@modelcontextprotocol/sdk": "latest",
52
52
  "@types/turndown": "latest",
53
- "brave-real-browser": "^2.5.3",
53
+ "brave-real-browser": "^2.5.5",
54
54
  "turndown": "latest",
55
55
  "vscode-languageserver": "^9.0.1",
56
56
  "vscode-languageserver-textdocument": "^1.0.12"