brave-real-browser-mcp-server 2.27.32 → 2.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1707,6 +1707,7 @@ export async function handleExtractJson(page, args) {
1707
1707
  try {
1708
1708
  const key = args.decryptAES.key || "kiemtienmua911ca";
1709
1709
  const ivList = args.decryptAES.ivList || ["1234567890oiuytr", "0123456789abcdef"];
1710
+ const autoIV = args.decryptAES.autoIV !== false; // Default to true for hubstream-style encryption
1710
1711
  let input = args.decryptAES.input || "";
1711
1712
  // Option: Fetch encrypted data from API URL directly (recommended for hubstream)
1712
1713
  if (!input && args.decryptAES.fetchUrl) {
@@ -1733,24 +1734,43 @@ export async function handleExtractJson(page, args) {
1733
1734
  }
1734
1735
  return bytes;
1735
1736
  };
1736
- for (const iv of params.ivList) {
1737
+ // Build IV list - if autoIV, try extracting from first 16 bytes first
1738
+ const ivsToTry = [];
1739
+ if (params.autoIV && encryptedHex.length >= 32) {
1740
+ // First 32 hex chars = first 16 bytes = IV (hubstream style)
1741
+ const ivHex = encryptedHex.substring(0, 32);
1742
+ ivsToTry.push({ iv: hexToBytes(ivHex), label: 'autoIV (first 16 bytes)' });
1743
+ }
1744
+ // Add manual IV list
1745
+ for (const ivStr of params.ivList) {
1746
+ ivsToTry.push({ iv: new TextEncoder().encode(ivStr), label: ivStr });
1747
+ }
1748
+ for (const { iv: ivData, label: ivLabel } of ivsToTry) {
1737
1749
  try {
1738
1750
  const keyData = new TextEncoder().encode(params.key);
1739
- const ivData = new TextEncoder().encode(iv);
1740
- const encryptedData = hexToBytes(encryptedHex);
1751
+ // If using autoIV, encrypted data starts after first 32 hex chars (16 bytes)
1752
+ const isAutoIV = ivLabel.startsWith('autoIV');
1753
+ const dataHex = isAutoIV ? encryptedHex.substring(32) : encryptedHex;
1754
+ const encryptedData = hexToBytes(dataHex);
1741
1755
  const cryptoKey = await crypto.subtle.importKey("raw", keyData, "AES-CBC", false, ["decrypt"]);
1742
1756
  const decrypted = await crypto.subtle.decrypt({ name: "AES-CBC", iv: ivData }, cryptoKey, encryptedData);
1743
1757
  const decryptedText = new TextDecoder().decode(decrypted);
1744
- // Extract source URL from decrypted JSON
1758
+ // Extract URLs from decrypted JSON
1745
1759
  const sourceMatch = /"source":"([^"]+)"/.exec(decryptedText);
1760
+ const mp4Match = /"mp4":"([^"]+)"/.exec(decryptedText);
1761
+ const hlsMatch = /"hls":"([^"]+)"/.exec(decryptedText);
1746
1762
  const streamUrl = sourceMatch ? sourceMatch[1].replace(/\\\//g, '/') : null;
1763
+ const mp4Url = mp4Match ? mp4Match[1].replace(/\\\//g, '/') : null;
1764
+ const hlsUrl = hlsMatch ? hlsMatch[1].replace(/\\\//g, '/') : null;
1747
1765
  return {
1748
1766
  success: true,
1749
- iv: iv,
1767
+ iv: ivLabel,
1750
1768
  decrypted: decryptedText,
1751
1769
  decryptedLength: decryptedText.length,
1752
1770
  extractedStreamUrl: streamUrl,
1753
- isStreamUrl: streamUrl ? (streamUrl.includes('m3u8') || streamUrl.includes('.mp4')) : false,
1771
+ extractedMp4Url: mp4Url,
1772
+ extractedHlsUrl: hlsUrl,
1773
+ isStreamUrl: (streamUrl && (streamUrl.includes('m3u8') || streamUrl.includes('.mp4'))) || !!mp4Url,
1754
1774
  inputLength: encryptedHex.length
1755
1775
  };
1756
1776
  }
@@ -1763,7 +1783,7 @@ export async function handleExtractJson(page, args) {
1763
1783
  catch (e) {
1764
1784
  return { success: false, error: String(e) };
1765
1785
  }
1766
- }, { input, key, ivList, fetchUrl: args.decryptAES.fetchUrl });
1786
+ }, { input, key, ivList, autoIV, fetchUrl: args.decryptAES.fetchUrl });
1767
1787
  decoded = {
1768
1788
  source: 'decryptAES',
1769
1789
  ...result,
@@ -2306,8 +2326,9 @@ export async function handleDeepAnalysis(page, args) {
2306
2326
  }
2307
2327
  }
2308
2328
  /**
2309
- * Record full network traffic - Uses response events to avoid crashes
2329
+ * Network recorder with API interception capabilities
2310
2330
  * ULTRA POWERFUL: API detection, media URLs, smart categorization
2331
+ * NEW: Request interception, mocking, blocking, and header modification
2311
2332
  */
2312
2333
  export async function handleNetworkRecorder(page, args) {
2313
2334
  // Progress tracking
@@ -2321,7 +2342,15 @@ export async function handleNetworkRecorder(page, args) {
2321
2342
  const apis = [];
2322
2343
  const mediaUrls = [];
2323
2344
  const seen = new Set();
2324
- tracker.setProgress(10, `⏱️ Recording for ${duration}ms...`);
2345
+ const interceptedRequests = [];
2346
+ let blockedCount = 0;
2347
+ let mockedCount = 0;
2348
+ const interceptMode = args.interceptMode || 'record';
2349
+ const blockPatterns = args.blockPatterns || [];
2350
+ const mockResponses = args.mockResponses || [];
2351
+ const modifyHeaders = args.modifyHeaders || [];
2352
+ const capturePayloads = args.capturePayloads === true;
2353
+ tracker.setProgress(10, `⏱️ Recording for ${duration}ms (mode: ${interceptMode})...`);
2325
2354
  // ============================================================
2326
2355
  // SMART CATEGORIZATION HELPER
2327
2356
  // ============================================================
@@ -2355,75 +2384,216 @@ export async function handleNetworkRecorder(page, args) {
2355
2384
  return 'document';
2356
2385
  return 'other';
2357
2386
  };
2358
- // Response handler - safer than request interception
2359
- const responseHandler = (response) => {
2360
- try {
2361
- const url = response.url();
2362
- // Dedup
2363
- if (seen.has(url))
2364
- return;
2365
- seen.add(url);
2366
- if (args.filterUrl && !url.includes(args.filterUrl)) {
2367
- return;
2368
- }
2369
- const resourceType = response.request()?.resourceType?.() || 'unknown';
2370
- const method = response.request()?.method?.() || 'GET';
2371
- const category = categorizeUrl(url, resourceType);
2372
- categories[category] = (categories[category] || 0) + 1;
2373
- // Collect API endpoints
2374
- if (category === 'api' || resourceType === 'xhr' || resourceType === 'fetch') {
2375
- apis.push({ url, method, type: resourceType });
2387
+ // Helper to check URL against patterns
2388
+ const matchesPattern = (url, patterns) => {
2389
+ return patterns.some(pattern => {
2390
+ try {
2391
+ if (pattern.startsWith('/') && pattern.endsWith('/')) {
2392
+ // Regex pattern
2393
+ const regex = new RegExp(pattern.slice(1, -1));
2394
+ return regex.test(url);
2395
+ }
2396
+ // Simple includes check
2397
+ return url.includes(pattern);
2376
2398
  }
2377
- // Collect media URLs
2378
- if (category === 'media' || /\.(mp4|webm|m3u8|ts|mp3)/i.test(url)) {
2379
- mediaUrls.push(url);
2399
+ catch {
2400
+ return url.includes(pattern);
2380
2401
  }
2381
- const entry = {
2382
- url,
2383
- status: response.status(),
2384
- resourceType,
2385
- category,
2386
- method,
2387
- timestamp: Date.now(),
2388
- };
2389
- if (args.includeHeaders) {
2390
- try {
2391
- entry.headers = response.headers();
2402
+ });
2403
+ };
2404
+ // ============================================================
2405
+ // INTERCEPTION MODE - Uses request interception
2406
+ // ============================================================
2407
+ if (interceptMode === 'intercept' || interceptMode === 'mock') {
2408
+ try {
2409
+ await page.setRequestInterception(true);
2410
+ const requestHandler = async (request) => {
2411
+ const url = request.url();
2412
+ const method = request.method();
2413
+ const resourceType = request.resourceType();
2414
+ const category = categorizeUrl(url, resourceType);
2415
+ // Check if should block
2416
+ if (blockPatterns.length > 0 && matchesPattern(url, blockPatterns)) {
2417
+ blockedCount++;
2418
+ interceptedRequests.push({
2419
+ url,
2420
+ method,
2421
+ action: 'blocked',
2422
+ timestamp: Date.now()
2423
+ });
2424
+ await request.abort();
2425
+ return;
2392
2426
  }
2393
- catch (e) {
2394
- entry.headers = {};
2427
+ // Check if should mock
2428
+ const mockConfig = mockResponses.find(m => matchesPattern(url, [m.urlPattern]));
2429
+ if (mockConfig) {
2430
+ mockedCount++;
2431
+ interceptedRequests.push({
2432
+ url,
2433
+ method,
2434
+ action: 'mocked',
2435
+ mockResponse: mockConfig.response,
2436
+ timestamp: Date.now()
2437
+ });
2438
+ await request.respond({
2439
+ status: mockConfig.statusCode || 200,
2440
+ contentType: 'application/json',
2441
+ body: typeof mockConfig.response === 'string'
2442
+ ? mockConfig.response
2443
+ : JSON.stringify(mockConfig.response)
2444
+ });
2445
+ return;
2446
+ }
2447
+ // Check if should modify headers
2448
+ const headerConfig = modifyHeaders.find(h => matchesPattern(url, [h.urlPattern]));
2449
+ if (headerConfig) {
2450
+ const headers = {
2451
+ ...request.headers(),
2452
+ ...headerConfig.headers
2453
+ };
2454
+ interceptedRequests.push({
2455
+ url,
2456
+ method,
2457
+ action: 'headers_modified',
2458
+ modifiedHeaders: headerConfig.headers,
2459
+ timestamp: Date.now()
2460
+ });
2461
+ await request.continue({ headers });
2462
+ return;
2463
+ }
2464
+ // Continue normally but record
2465
+ if (!seen.has(url)) {
2466
+ seen.add(url);
2467
+ categories[category] = (categories[category] || 0) + 1;
2468
+ const entry = {
2469
+ url,
2470
+ method,
2471
+ resourceType,
2472
+ category,
2473
+ timestamp: Date.now()
2474
+ };
2475
+ // Capture POST/PUT payloads
2476
+ if (capturePayloads && (method === 'POST' || method === 'PUT' || method === 'PATCH')) {
2477
+ try {
2478
+ entry.payload = request.postData();
2479
+ }
2480
+ catch {
2481
+ // Ignore
2482
+ }
2483
+ }
2484
+ requests.push(entry);
2485
+ // Collect API endpoints
2486
+ if (category === 'api' || resourceType === 'xhr' || resourceType === 'fetch') {
2487
+ apis.push({
2488
+ url,
2489
+ method,
2490
+ type: resourceType,
2491
+ payload: entry.payload
2492
+ });
2493
+ }
2494
+ // Collect media URLs
2495
+ if (category === 'media' || /\.(mp4|webm|m3u8|ts|mp3)/i.test(url)) {
2496
+ mediaUrls.push(url);
2497
+ }
2395
2498
  }
2499
+ await request.continue();
2500
+ };
2501
+ page.on('request', requestHandler);
2502
+ await new Promise(r => setTimeout(r, duration));
2503
+ page.off('request', requestHandler);
2504
+ await page.setRequestInterception(false);
2505
+ }
2506
+ catch (e) {
2507
+ // Cleanup on error
2508
+ try {
2509
+ await page.setRequestInterception(false);
2396
2510
  }
2397
- requests.push(entry);
2398
- // Track size from headers
2511
+ catch { }
2512
+ }
2513
+ }
2514
+ else {
2515
+ // ============================================================
2516
+ // RECORD MODE - Uses response events (safer)
2517
+ // ============================================================
2518
+ const responseHandler = (response) => {
2399
2519
  try {
2400
- const headers = response.headers();
2401
- const size = parseInt(headers['content-length'] || '0', 10);
2402
- totalSize += size;
2520
+ const url = response.url();
2521
+ // Dedup
2522
+ if (seen.has(url))
2523
+ return;
2524
+ seen.add(url);
2525
+ if (args.filterUrl && !url.includes(args.filterUrl)) {
2526
+ return;
2527
+ }
2528
+ const resourceType = response.request()?.resourceType?.() || 'unknown';
2529
+ const method = response.request()?.method?.() || 'GET';
2530
+ const category = categorizeUrl(url, resourceType);
2531
+ categories[category] = (categories[category] || 0) + 1;
2532
+ // Collect API endpoints
2533
+ if (category === 'api' || resourceType === 'xhr' || resourceType === 'fetch') {
2534
+ const apiEntry = { url, method, type: resourceType };
2535
+ // Capture POST data if enabled
2536
+ if (capturePayloads && (method === 'POST' || method === 'PUT' || method === 'PATCH')) {
2537
+ try {
2538
+ apiEntry.payload = response.request()?.postData?.();
2539
+ }
2540
+ catch { }
2541
+ }
2542
+ apis.push(apiEntry);
2543
+ }
2544
+ // Collect media URLs
2545
+ if (category === 'media' || /\.(mp4|webm|m3u8|ts|mp3)/i.test(url)) {
2546
+ mediaUrls.push(url);
2547
+ }
2548
+ const entry = {
2549
+ url,
2550
+ status: response.status(),
2551
+ resourceType,
2552
+ category,
2553
+ method,
2554
+ timestamp: Date.now(),
2555
+ };
2556
+ if (args.includeHeaders) {
2557
+ try {
2558
+ entry.headers = response.headers();
2559
+ }
2560
+ catch (e) {
2561
+ entry.headers = {};
2562
+ }
2563
+ }
2564
+ requests.push(entry);
2565
+ // Track size from headers
2566
+ try {
2567
+ const headers = response.headers();
2568
+ const size = parseInt(headers['content-length'] || '0', 10);
2569
+ totalSize += size;
2570
+ }
2571
+ catch {
2572
+ // Ignore
2573
+ }
2403
2574
  }
2404
2575
  catch {
2405
- // Ignore
2576
+ // Ignore all errors in handler to prevent crash
2406
2577
  }
2407
- }
2408
- catch {
2409
- // Ignore all errors in handler to prevent crash
2410
- }
2411
- };
2412
- try {
2413
- page.on('response', responseHandler);
2414
- await new Promise((r) => setTimeout(r, duration));
2415
- }
2416
- catch (e) {
2417
- // Capture setup errors
2418
- }
2419
- finally {
2578
+ };
2420
2579
  try {
2421
- page.off('response', responseHandler);
2580
+ page.on('response', responseHandler);
2581
+ await new Promise((r) => setTimeout(r, duration));
2422
2582
  }
2423
2583
  catch (e) {
2424
- // Ignore cleanup errors
2584
+ // Capture setup errors
2585
+ }
2586
+ finally {
2587
+ try {
2588
+ page.off('response', responseHandler);
2589
+ }
2590
+ catch (e) {
2591
+ // Ignore cleanup errors
2592
+ }
2425
2593
  }
2426
2594
  }
2595
+ tracker.setProgress(90, `✅ Recorded ${requests.length} requests`);
2596
+ tracker.complete(`🎉 Network recording complete`);
2427
2597
  return {
2428
2598
  requests: requests.slice(0, 500),
2429
2599
  count: requests.length,
@@ -2431,7 +2601,12 @@ export async function handleNetworkRecorder(page, args) {
2431
2601
  categories,
2432
2602
  apis: apis.length > 0 ? apis : undefined,
2433
2603
  mediaUrls: mediaUrls.length > 0 ? mediaUrls : undefined,
2434
- message: `📡 Recorded ${requests.length} requests (${Math.round(totalSize / 1024)}KB) | APIs: ${apis.length} | Media: ${mediaUrls.length}`
2604
+ blockedCount: blockedCount > 0 ? blockedCount : undefined,
2605
+ mockedCount: mockedCount > 0 ? mockedCount : undefined,
2606
+ interceptedRequests: interceptedRequests.length > 0 ? interceptedRequests : undefined,
2607
+ message: `📡 Recorded ${requests.length} requests (${Math.round(totalSize / 1024)}KB) | APIs: ${apis.length} | Media: ${mediaUrls.length}` +
2608
+ (blockedCount > 0 ? ` | Blocked: ${blockedCount}` : '') +
2609
+ (mockedCount > 0 ? ` | Mocked: ${mockedCount}` : '')
2435
2610
  };
2436
2611
  }
2437
2612
  /**
@@ -2905,6 +3080,7 @@ export async function handleVideoRecording(page, args, recorderState) {
2905
3080
  /**
2906
3081
  * Harvest all links from page
2907
3082
  * ULTRA POWERFUL: Pagination detection, smart categorization, file types
3083
+ * NEW: Auto-follow pagination to scrape multiple pages
2908
3084
  */
2909
3085
  export async function handleLinkHarvester(page, args) {
2910
3086
  // Progress tracking for real-time updates
@@ -2913,76 +3089,110 @@ export async function handleLinkHarvester(page, args) {
2913
3089
  tracker.start(100, '🔗 Starting link harvesting...');
2914
3090
  const currentUrl = new URL(page.url());
2915
3091
  tracker.setProgress(10, `📍 Analyzing page: ${currentUrl.hostname}`);
2916
- // ============================================================
2917
- // 1. EXTRACT ALL LINKS WITH SMART CATEGORIZATION
2918
- // ============================================================
2919
- tracker.setProgress(20, '🔍 Extracting all links from page...');
2920
- const allLinks = await page.evaluate(() => {
2921
- const links = [];
2922
- document.querySelectorAll('a[href]').forEach((a) => {
2923
- const anchor = a;
2924
- links.push({
2925
- url: anchor.href,
2926
- text: a.textContent?.trim()?.substring(0, 100) || '',
2927
- attrs: {
2928
- rel: anchor.rel || '',
2929
- target: anchor.target || '',
2930
- class: anchor.className || '',
2931
- id: anchor.id || '',
2932
- download: anchor.download || '',
2933
- }
3092
+ // Pagination settings
3093
+ const followPagination = args.followPagination === true;
3094
+ const maxPages = Math.min(args.maxPages || 5, 20); // Max 20 pages
3095
+ const delayBetweenPages = args.delayBetweenPages || 1000;
3096
+ const paginationSelector = args.paginationSelector;
3097
+ // Helper function to extract links from current page
3098
+ const extractLinksFromPage = async () => {
3099
+ const allLinks = await page.evaluate(() => {
3100
+ const links = [];
3101
+ document.querySelectorAll('a[href]').forEach((a) => {
3102
+ const anchor = a;
3103
+ links.push({
3104
+ url: anchor.href,
3105
+ text: a.textContent?.trim()?.substring(0, 100) || '',
3106
+ attrs: {
3107
+ rel: anchor.rel || '',
3108
+ target: anchor.target || '',
3109
+ class: anchor.className || '',
3110
+ id: anchor.id || '',
3111
+ download: anchor.download || '',
3112
+ }
3113
+ });
2934
3114
  });
3115
+ return links;
2935
3116
  });
2936
- return links;
2937
- });
2938
- // ============================================================
2939
- // 2. PAGINATION DETECTION
2940
- // ============================================================
2941
- const pagination = await page.evaluate(() => {
2942
- let nextPage;
2943
- let prevPage;
2944
- let totalPages;
2945
- // Common pagination selectors
2946
- const nextSelectors = [
2947
- 'a[rel="next"]', 'a.next', 'a.pagination-next',
2948
- '[aria-label="Next"]', 'a:has-text("Next")', 'a:has-text(">")',
2949
- '.pagination a:last-child', 'a.page-link:last-child'
2950
- ];
2951
- const prevSelectors = [
2952
- 'a[rel="prev"]', 'a.prev', 'a.pagination-prev',
2953
- '[aria-label="Previous"]', 'a:has-text("Prev")', 'a:has-text("<")'
2954
- ];
2955
- for (const sel of nextSelectors) {
2956
- try {
2957
- const el = document.querySelector(sel);
2958
- if (el?.href) {
2959
- nextPage = el.href;
2960
- break;
3117
+ // Pagination detection
3118
+ const pagination = await page.evaluate((customSelector) => {
3119
+ let nextPage;
3120
+ let prevPage;
3121
+ let totalPages;
3122
+ let currentPage;
3123
+ // Custom selector first
3124
+ if (customSelector) {
3125
+ try {
3126
+ const el = document.querySelector(customSelector);
3127
+ if (el?.href)
3128
+ nextPage = el.href;
3129
+ }
3130
+ catch { /* invalid selector */ }
3131
+ }
3132
+ // Common pagination selectors
3133
+ const nextSelectors = [
3134
+ 'a[rel="next"]', 'a.next', 'a.pagination-next',
3135
+ '[aria-label="Next"]', 'a.page-link.next', '.next a',
3136
+ '.pagination a:last-child', 'a[title="Next"]',
3137
+ 'a[aria-label*="next" i]', 'button.next', '[data-testid="next"]'
3138
+ ];
3139
+ const prevSelectors = [
3140
+ 'a[rel="prev"]', 'a.prev', 'a.pagination-prev',
3141
+ '[aria-label="Previous"]', 'a.page-link.prev', '.prev a'
3142
+ ];
3143
+ if (!nextPage) {
3144
+ for (const sel of nextSelectors) {
3145
+ try {
3146
+ const el = document.querySelector(sel);
3147
+ if (el?.href) {
3148
+ nextPage = el.href;
3149
+ break;
3150
+ }
3151
+ }
3152
+ catch { /* invalid selector */ }
2961
3153
  }
2962
3154
  }
2963
- catch { /* invalid selector */ }
2964
- }
2965
- for (const sel of prevSelectors) {
2966
- try {
2967
- const el = document.querySelector(sel);
2968
- if (el?.href) {
2969
- prevPage = el.href;
2970
- break;
3155
+ // Text-based next detection
3156
+ if (!nextPage) {
3157
+ const links = Array.from(document.querySelectorAll('a'));
3158
+ for (const link of links) {
3159
+ const text = link.textContent?.toLowerCase().trim() || '';
3160
+ if (text === 'next' || text === 'next →' || text === '>' || text === '»' || text === 'next page') {
3161
+ nextPage = link.href;
3162
+ break;
3163
+ }
2971
3164
  }
2972
3165
  }
2973
- catch { /* invalid selector */ }
2974
- }
2975
- // Count page numbers
2976
- const pageNumbers = Array.from(document.querySelectorAll('.pagination a, .page-numbers a, nav a'))
2977
- .map(a => parseInt(a.textContent || '0', 10))
2978
- .filter(n => !isNaN(n) && n > 0);
2979
- if (pageNumbers.length > 0) {
2980
- totalPages = Math.max(...pageNumbers);
2981
- }
2982
- return { nextPage, prevPage, totalPages };
2983
- });
3166
+ for (const sel of prevSelectors) {
3167
+ try {
3168
+ const el = document.querySelector(sel);
3169
+ if (el?.href) {
3170
+ prevPage = el.href;
3171
+ break;
3172
+ }
3173
+ }
3174
+ catch { /* invalid selector */ }
3175
+ }
3176
+ // Detect current page and total pages
3177
+ const pageNumbers = Array.from(document.querySelectorAll('.pagination a, .page-numbers a, nav a, .pager a'))
3178
+ .map(a => ({
3179
+ num: parseInt(a.textContent || '0', 10),
3180
+ isActive: a.classList.contains('active') || a.classList.contains('current') ||
3181
+ a.getAttribute('aria-current') === 'page'
3182
+ }))
3183
+ .filter(p => !isNaN(p.num) && p.num > 0);
3184
+ if (pageNumbers.length > 0) {
3185
+ totalPages = Math.max(...pageNumbers.map(p => p.num));
3186
+ const active = pageNumbers.find(p => p.isActive);
3187
+ if (active)
3188
+ currentPage = active.num;
3189
+ }
3190
+ return { nextPage, prevPage, totalPages, currentPage };
3191
+ }, paginationSelector);
3192
+ return { links: allLinks, pagination };
3193
+ };
2984
3194
  // ============================================================
2985
- // 3. SMART LINK CATEGORIZATION
3195
+ // SMART LINK CATEGORIZATION
2986
3196
  // ============================================================
2987
3197
  const categorizeLink = (url, text, attrs) => {
2988
3198
  const urlLower = url.toLowerCase();
@@ -3017,55 +3227,107 @@ export async function handleLinkHarvester(page, args) {
3017
3227
  return 'info';
3018
3228
  return 'navigation';
3019
3229
  };
3230
+ // ============================================================
3231
+ // MAIN SCRAPING LOGIC
3232
+ // ============================================================
3020
3233
  const processedLinks = [];
3021
3234
  const categories = {};
3022
3235
  const seen = new Set();
3023
3236
  let internal = 0;
3024
3237
  let external = 0;
3025
- for (const link of allLinks) {
3026
- try {
3027
- // Dedup by URL
3028
- if (seen.has(link.url))
3029
- continue;
3030
- seen.add(link.url);
3031
- const linkUrl = new URL(link.url);
3032
- const isInternal = linkUrl.hostname === currentUrl.hostname;
3033
- if (args.filter && !link.url.includes(args.filter) && !link.text.includes(args.filter)) {
3034
- continue;
3035
- }
3036
- if (isInternal && args.includeInternal === false)
3037
- continue;
3038
- if (!isInternal && args.includeExternal === false)
3039
- continue;
3040
- const category = categorizeLink(link.url, link.text, link.attrs);
3041
- categories[category] = (categories[category] || 0) + 1;
3042
- processedLinks.push({
3043
- url: link.url,
3044
- text: link.text,
3045
- type: isInternal ? 'internal' : 'external',
3046
- category,
3047
- });
3048
- if (isInternal)
3049
- internal++;
3050
- else
3051
- external++;
3052
- if (args.maxLinks && processedLinks.length >= args.maxLinks)
3053
- break;
3238
+ let pagesScraped = 0;
3239
+ let lastPagination = {};
3240
+ const visitedPages = new Set();
3241
+ // Process links from a page
3242
+ const processLinks = (allLinks, pageNum) => {
3243
+ for (const link of allLinks) {
3244
+ try {
3245
+ if (seen.has(link.url))
3246
+ continue;
3247
+ seen.add(link.url);
3248
+ const linkUrl = new URL(link.url);
3249
+ const isInternal = linkUrl.hostname === currentUrl.hostname;
3250
+ if (args.filter && !link.url.includes(args.filter) && !link.text.includes(args.filter)) {
3251
+ continue;
3252
+ }
3253
+ if (isInternal && args.includeInternal === false)
3254
+ continue;
3255
+ if (!isInternal && args.includeExternal === false)
3256
+ continue;
3257
+ const category = categorizeLink(link.url, link.text, link.attrs);
3258
+ categories[category] = (categories[category] || 0) + 1;
3259
+ processedLinks.push({
3260
+ url: link.url,
3261
+ text: link.text,
3262
+ type: isInternal ? 'internal' : 'external',
3263
+ category,
3264
+ page: pageNum,
3265
+ });
3266
+ if (isInternal)
3267
+ internal++;
3268
+ else
3269
+ external++;
3270
+ if (args.maxLinks && processedLinks.length >= args.maxLinks)
3271
+ return true; // Stop
3272
+ }
3273
+ catch {
3274
+ // Invalid URL, skip
3275
+ }
3054
3276
  }
3055
- catch {
3056
- // Invalid URL, skip
3277
+ return false; // Continue
3278
+ };
3279
+ // Scrape first page
3280
+ tracker.setProgress(20, '🔍 Extracting links from page 1...');
3281
+ const firstPage = await extractLinksFromPage();
3282
+ pagesScraped = 1;
3283
+ visitedPages.add(page.url());
3284
+ lastPagination = firstPage.pagination;
3285
+ const shouldStop = processLinks(firstPage.links, 1);
3286
+ // Follow pagination if enabled
3287
+ if (followPagination && !shouldStop && firstPage.pagination.nextPage) {
3288
+ let nextUrl = firstPage.pagination.nextPage;
3289
+ while (nextUrl && pagesScraped < maxPages && !(args.maxLinks && processedLinks.length >= args.maxLinks)) {
3290
+ // Check if we've already visited this page
3291
+ if (visitedPages.has(nextUrl)) {
3292
+ break;
3293
+ }
3294
+ visitedPages.add(nextUrl);
3295
+ tracker.setProgress(20 + (pagesScraped / maxPages) * 60, `📄 Scraping page ${pagesScraped + 1}...`);
3296
+ try {
3297
+ // Navigate to next page
3298
+ await page.goto(nextUrl, {
3299
+ waitUntil: 'domcontentloaded',
3300
+ timeout: 15000
3301
+ });
3302
+ // Wait for content to load
3303
+ await new Promise(r => setTimeout(r, delayBetweenPages));
3304
+ // Extract links from this page
3305
+ const pageData = await extractLinksFromPage();
3306
+ pagesScraped++;
3307
+ lastPagination = pageData.pagination;
3308
+ const stop = processLinks(pageData.links, pagesScraped);
3309
+ if (stop)
3310
+ break;
3311
+ // Get next page URL
3312
+ nextUrl = pageData.pagination.nextPage || undefined;
3313
+ }
3314
+ catch (error) {
3315
+ // Failed to navigate, stop pagination
3316
+ break;
3317
+ }
3057
3318
  }
3058
3319
  }
3059
- tracker.setProgress(90, `✅ Processed ${processedLinks.length} links`);
3060
- tracker.complete(`🎉 Link harvesting complete: ${processedLinks.length} links found`);
3320
+ tracker.setProgress(90, `✅ Processed ${processedLinks.length} links from ${pagesScraped} pages`);
3321
+ tracker.complete(`🎉 Link harvesting complete: ${processedLinks.length} links from ${pagesScraped} pages`);
3061
3322
  return {
3062
3323
  links: processedLinks,
3063
3324
  internal,
3064
3325
  external,
3065
- pagination: (pagination.nextPage || pagination.prevPage || pagination.totalPages) ? pagination : undefined,
3326
+ pagination: (lastPagination.nextPage || lastPagination.prevPage || lastPagination.totalPages) ? lastPagination : undefined,
3066
3327
  categories,
3067
- message: `🔗 Found ${processedLinks.length} links (${internal} internal, ${external} external)` +
3068
- (pagination.nextPage ? ` | Next: ${pagination.nextPage}` : '')
3328
+ pagesScraped,
3329
+ message: `🔗 Found ${processedLinks.length} links (${internal} internal, ${external} external) from ${pagesScraped} pages` +
3330
+ (lastPagination.nextPage && pagesScraped >= maxPages ? ` | More pages available: ${lastPagination.nextPage}` : '')
3069
3331
  };
3070
3332
  }
3071
3333
  /**
@@ -3440,10 +3702,13 @@ export async function handleSolveCaptchaAdvanced(page, args) {
3440
3702
  }
3441
3703
  /**
3442
3704
  * Parse and extract HLS/m3u8 streaming URLs
3705
+ * ENHANCED: Segment parsing, bandwidth extraction, playlist fetching
3443
3706
  */
3444
3707
  export async function handleM3u8Parser(page, args) {
3445
3708
  const streams = [];
3446
3709
  const qualities = [];
3710
+ const variants = [];
3711
+ const segments = [];
3447
3712
  let masterPlaylist;
3448
3713
  // Intercept network requests to find m3u8 files
3449
3714
  const m3u8Urls = [];
@@ -3544,6 +3809,109 @@ export async function handleM3u8Parser(page, args) {
3544
3809
  streams.push(stream);
3545
3810
  }
3546
3811
  }
3812
+ // ============================================================
3813
+ // NEW: FETCH AND PARSE MASTER PLAYLIST FOR VARIANTS
3814
+ // ============================================================
3815
+ if ((args.fetchPlaylist || args.extractBandwidth) && masterPlaylist) {
3816
+ try {
3817
+ const playlistContent = await page.evaluate(async (url) => {
3818
+ try {
3819
+ const response = await fetch(url);
3820
+ return await response.text();
3821
+ }
3822
+ catch {
3823
+ return null;
3824
+ }
3825
+ }, masterPlaylist);
3826
+ if (playlistContent) {
3827
+ // Parse #EXT-X-STREAM-INF lines for variants
3828
+ const variantRegex = /#EXT-X-STREAM-INF:.*?BANDWIDTH=(\d+)(?:.*?RESOLUTION=(\d+x\d+))?[^\n]*\n([^\n]+)/g;
3829
+ let match;
3830
+ while ((match = variantRegex.exec(playlistContent)) !== null) {
3831
+ const bandwidth = parseInt(match[1], 10);
3832
+ const resolution = match[2] || undefined;
3833
+ let variantUrl = match[3].trim();
3834
+ // Make relative URLs absolute
3835
+ if (!variantUrl.startsWith('http')) {
3836
+ const baseUrl = masterPlaylist.substring(0, masterPlaylist.lastIndexOf('/') + 1);
3837
+ variantUrl = baseUrl + variantUrl;
3838
+ }
3839
+ // Determine quality from resolution or bandwidth
3840
+ let quality = 'unknown';
3841
+ if (resolution) {
3842
+ const height = parseInt(resolution.split('x')[1], 10);
3843
+ if (height >= 2160)
3844
+ quality = '4K';
3845
+ else if (height >= 1080)
3846
+ quality = '1080p';
3847
+ else if (height >= 720)
3848
+ quality = '720p';
3849
+ else if (height >= 480)
3850
+ quality = '480p';
3851
+ else if (height >= 360)
3852
+ quality = '360p';
3853
+ else
3854
+ quality = `${height}p`;
3855
+ }
3856
+ else if (bandwidth >= 5000000)
3857
+ quality = '1080p';
3858
+ else if (bandwidth >= 2500000)
3859
+ quality = '720p';
3860
+ else if (bandwidth >= 1000000)
3861
+ quality = '480p';
3862
+ else
3863
+ quality = '360p';
3864
+ variants.push({ quality, bandwidth, url: variantUrl, resolution });
3865
+ }
3866
+ // Sort variants by bandwidth (highest first)
3867
+ variants.sort((a, b) => b.bandwidth - a.bandwidth);
3868
+ }
3869
+ }
3870
+ catch (e) {
3871
+ // Ignore fetch errors
3872
+ }
3873
+ }
3874
+ // ============================================================
3875
+ // NEW: PARSE SEGMENTS FROM MEDIA PLAYLIST
3876
+ // ============================================================
3877
+ if (args.parseSegments && streams.length > 0) {
3878
+ const mediaPlaylistUrl = streams[0].url;
3879
+ try {
3880
+ const mediaContent = await page.evaluate(async (url) => {
3881
+ try {
3882
+ const response = await fetch(url);
3883
+ return await response.text();
3884
+ }
3885
+ catch {
3886
+ return null;
3887
+ }
3888
+ }, mediaPlaylistUrl);
3889
+ if (mediaContent) {
3890
+ const lines = mediaContent.split('\n');
3891
+ let segmentIndex = 0;
3892
+ let currentDuration = 0;
3893
+ for (let i = 0; i < lines.length; i++) {
3894
+ const line = lines[i].trim();
3895
+ // Parse duration from #EXTINF
3896
+ if (line.startsWith('#EXTINF:')) {
3897
+ currentDuration = parseFloat(line.replace('#EXTINF:', '').split(',')[0]);
3898
+ }
3899
+ // Capture segment URL
3900
+ else if (line && !line.startsWith('#') && (line.includes('.ts') || line.includes('.m4s'))) {
3901
+ let segmentUrl = line;
3902
+ if (!segmentUrl.startsWith('http')) {
3903
+ const baseUrl = mediaPlaylistUrl.substring(0, mediaPlaylistUrl.lastIndexOf('/') + 1);
3904
+ segmentUrl = baseUrl + segmentUrl;
3905
+ }
3906
+ segments.push({ url: segmentUrl, duration: currentDuration, index: segmentIndex++ });
3907
+ }
3908
+ }
3909
+ }
3910
+ }
3911
+ catch (e) {
3912
+ // Ignore segment parsing errors
3913
+ }
3914
+ }
3547
3915
  // Filter audio if not wanted
3548
3916
  const filteredStreams = args.includeAudio !== false
3549
3917
  ? streams
@@ -3553,6 +3921,8 @@ export async function handleM3u8Parser(page, args) {
3553
3921
  streams: filteredStreams,
3554
3922
  masterPlaylist,
3555
3923
  qualities: [...new Set(qualities)],
3924
+ variants: variants.length > 0 ? variants : undefined,
3925
+ segments: segments.length > 0 ? segments : undefined,
3556
3926
  };
3557
3927
  }
3558
3928
  /**
@@ -4414,11 +4784,76 @@ export async function handleCloudflareBypass(page, args) {
4414
4784
  /**
4415
4785
  * Master tool: Extract direct stream/download URLs
4416
4786
  * ULTRA POWERFUL: Handles packed JS, JW Player, Video.js, HLS.js, obfuscated scripts
4787
+ * ENHANCED: Multi-Quality Selector, VidSrc, Filemoon, StreamWish support
4417
4788
  */
4418
4789
  export async function handleStreamExtractor(page, args) {
4419
4790
  const formats = args.formats || ['mp4', 'mkv', 'm3u8', 'mp3', 'webm', 'flv', 'avi'];
4420
4791
  const maxRedirects = args.maxRedirects || 10;
4421
4792
  const directUrls = [];
4793
+ const subtitles = [];
4794
+ // Quality priority for auto-selection
4795
+ const qualityPriority = {
4796
+ '2160p': 100, '4k': 100, 'uhd': 100,
4797
+ '1080p': 90, 'fhd': 90, 'full hd': 90,
4798
+ '720p': 80, 'hd': 80,
4799
+ '480p': 70, 'sd': 70,
4800
+ '360p': 60,
4801
+ '240p': 50,
4802
+ '144p': 40,
4803
+ 'unknown': 10, 'auto': 10
4804
+ };
4805
+ // Site-specific extraction patterns
4806
+ const sitePatterns = {
4807
+ vidsrc: {
4808
+ urlPattern: /vidsrc|v2\.vidsrc/i,
4809
+ sourcePattern: [
4810
+ /source:\s*["']([^"']+\.m3u8[^"']*)/gi,
4811
+ /file:\s*["']([^"']+\.m3u8[^"']*)/gi
4812
+ ]
4813
+ },
4814
+ filemoon: {
4815
+ urlPattern: /filemoon|moonplayer/i,
4816
+ sourcePattern: [
4817
+ /sources:\s*\[\s*\{[^}]*file:\s*["']([^"']+)/gi,
4818
+ /eval\(function\(p,a,c,k,e,[rd]\)/gi
4819
+ ]
4820
+ },
4821
+ streamwish: {
4822
+ urlPattern: /streamwish|swish/i,
4823
+ sourcePattern: [
4824
+ /file:\s*["']([^"']+\.m3u8[^"']*)/gi,
4825
+ /sources:\s*\[.*?["']([^"']+\.m3u8[^"']*)/gi
4826
+ ]
4827
+ },
4828
+ doodstream: {
4829
+ urlPattern: /dood|doodstream/i,
4830
+ sourcePattern: [
4831
+ /\/pass_md5\/[^"']+/gi,
4832
+ /\$.get\(['"]([^'"]+pass_md5[^'"]+)/gi
4833
+ ]
4834
+ },
4835
+ mixdrop: {
4836
+ urlPattern: /mixdrop/i,
4837
+ sourcePattern: [
4838
+ /MDCore\.wurl\s*=\s*["']([^"']+)/gi,
4839
+ /wurl\s*=\s*["']([^"']+)/gi
4840
+ ]
4841
+ },
4842
+ streamtape: {
4843
+ urlPattern: /streamtape/i,
4844
+ sourcePattern: [
4845
+ /id=.*?&token=/gi,
4846
+ /robotlink.*?=\s*['"]([^'"]+)/gi
4847
+ ]
4848
+ },
4849
+ mp4upload: {
4850
+ urlPattern: /mp4upload/i,
4851
+ sourcePattern: [
4852
+ /player\.src\(\{src:\s*["']([^"']+)/gi,
4853
+ /src:\s*["']([^"']+\.mp4[^"']*)/gi
4854
+ ]
4855
+ }
4856
+ };
4422
4857
  // Navigate if URL provided
4423
4858
  if (args.url) {
4424
4859
  await page.goto(args.url, { waitUntil: 'domcontentloaded', timeout: 30000 });
@@ -4652,25 +5087,173 @@ export async function handleStreamExtractor(page, args) {
4652
5087
  directUrls.push({ url, format, source: 'network' });
4653
5088
  }
4654
5089
  }
5090
+ // ============================================================
5091
+ // NEW: SITE-SPECIFIC EXTRACTION (VidSrc, Filemoon, StreamWish, etc.)
5092
+ // ============================================================
5093
+ if (args.siteType && args.siteType !== 'auto') {
5094
+ const siteConfig = sitePatterns[args.siteType];
5095
+ if (siteConfig) {
5096
+ const html = await page.content();
5097
+ for (const pattern of siteConfig.sourcePattern) {
5098
+ let match;
5099
+ while ((match = pattern.exec(html)) !== null) {
5100
+ if (match[1] && !directUrls.some(d => d.url === match[1])) {
5101
+ const format = formats.find(f => match[1].includes(`.${f}`)) || 'm3u8';
5102
+ directUrls.push({ url: match[1], format, source: args.siteType });
5103
+ }
5104
+ }
5105
+ }
5106
+ }
5107
+ }
5108
+ else {
5109
+ // Auto-detect site type from URL
5110
+ const currentUrl = page.url();
5111
+ for (const [siteName, config] of Object.entries(sitePatterns)) {
5112
+ if (config.urlPattern.test(currentUrl)) {
5113
+ const html = await page.content();
5114
+ for (const pattern of config.sourcePattern) {
5115
+ let match;
5116
+ while ((match = pattern.exec(html)) !== null) {
5117
+ if (match[1] && !directUrls.some(d => d.url === match[1])) {
5118
+ const format = formats.find(f => match[1].includes(`.${f}`)) || 'm3u8';
5119
+ directUrls.push({ url: match[1], format, source: siteName });
5120
+ }
5121
+ }
5122
+ }
5123
+ break;
5124
+ }
5125
+ }
5126
+ }
5127
+ // ============================================================
5128
+ // NEW: EXTRACT QUALITY FROM URLs
5129
+ // ============================================================
5130
+ for (const item of directUrls) {
5131
+ if (!item.quality || item.quality === 'auto') {
5132
+ const url = item.url.toLowerCase();
5133
+ if (url.includes('2160') || url.includes('4k') || url.includes('uhd'))
5134
+ item.quality = '2160p';
5135
+ else if (url.includes('1080'))
5136
+ item.quality = '1080p';
5137
+ else if (url.includes('720'))
5138
+ item.quality = '720p';
5139
+ else if (url.includes('480'))
5140
+ item.quality = '480p';
5141
+ else if (url.includes('360'))
5142
+ item.quality = '360p';
5143
+ else if (url.includes('240'))
5144
+ item.quality = '240p';
5145
+ else if (url.includes('144'))
5146
+ item.quality = '144p';
5147
+ else
5148
+ item.quality = 'unknown';
5149
+ }
5150
+ }
5151
+ // ============================================================
5152
+ // NEW: AUTO-SELECT BEST QUALITY
5153
+ // ============================================================
5154
+ let bestQuality;
5155
+ if (args.autoSelectBest || args.preferredQuality) {
5156
+ const preferredQ = args.preferredQuality || 'highest';
5157
+ if (preferredQ === 'highest') {
5158
+ // Sort by quality priority (highest first)
5159
+ const sorted = [...directUrls].sort((a, b) => {
5160
+ const aScore = qualityPriority[a.quality?.toLowerCase() || 'unknown'] || 0;
5161
+ const bScore = qualityPriority[b.quality?.toLowerCase() || 'unknown'] || 0;
5162
+ return bScore - aScore;
5163
+ });
5164
+ if (sorted.length > 0) {
5165
+ bestQuality = { url: sorted[0].url, format: sorted[0].format, quality: sorted[0].quality || 'unknown', source: sorted[0].source };
5166
+ }
5167
+ }
5168
+ else if (preferredQ === 'lowest') {
5169
+ // Sort by quality priority (lowest first)
5170
+ const sorted = [...directUrls].sort((a, b) => {
5171
+ const aScore = qualityPriority[a.quality?.toLowerCase() || 'unknown'] || 0;
5172
+ const bScore = qualityPriority[b.quality?.toLowerCase() || 'unknown'] || 0;
5173
+ return aScore - bScore;
5174
+ });
5175
+ if (sorted.length > 0) {
5176
+ bestQuality = { url: sorted[0].url, format: sorted[0].format, quality: sorted[0].quality || 'unknown', source: sorted[0].source };
5177
+ }
5178
+ }
5179
+ else {
5180
+ // Find exact match for preferred quality
5181
+ const exact = directUrls.find(d => d.quality?.toLowerCase() === preferredQ.toLowerCase());
5182
+ if (exact) {
5183
+ bestQuality = { url: exact.url, format: exact.format, quality: exact.quality || preferredQ, source: exact.source };
5184
+ }
5185
+ else {
5186
+ // Fallback to highest available
5187
+ const sorted = [...directUrls].sort((a, b) => {
5188
+ const aScore = qualityPriority[a.quality?.toLowerCase() || 'unknown'] || 0;
5189
+ const bScore = qualityPriority[b.quality?.toLowerCase() || 'unknown'] || 0;
5190
+ return bScore - aScore;
5191
+ });
5192
+ if (sorted.length > 0) {
5193
+ bestQuality = { url: sorted[0].url, format: sorted[0].format, quality: sorted[0].quality || 'unknown', source: sorted[0].source };
5194
+ }
5195
+ }
5196
+ }
5197
+ }
5198
+ // ============================================================
5199
+ // NEW: EXTRACT SUBTITLES
5200
+ // ============================================================
5201
+ if (args.extractSubtitles) {
5202
+ const subData = await page.evaluate(() => {
5203
+ const subs = [];
5204
+ // HTML5 track elements
5205
+ document.querySelectorAll('track[kind="subtitles"], track[kind="captions"]').forEach(track => {
5206
+ const src = track.getAttribute('src');
5207
+ if (src) {
5208
+ subs.push({
5209
+ url: src,
5210
+ language: track.getAttribute('srclang') || undefined,
5211
+ label: track.getAttribute('label') || undefined
5212
+ });
5213
+ }
5214
+ });
5215
+ // VTT/SRT links
5216
+ document.querySelectorAll('a[href*=".vtt"], a[href*=".srt"], a[href*=".ass"]').forEach(link => {
5217
+ const href = link.href;
5218
+ subs.push({ url: href, label: link.textContent?.trim() || undefined });
5219
+ });
5220
+ // Look in scripts for subtitle URLs
5221
+ const html = document.documentElement.innerHTML;
5222
+ const vttMatches = html.match(/https?:\/\/[^\s"']+\.vtt[^\s"']*/gi);
5223
+ const srtMatches = html.match(/https?:\/\/[^\s"']+\.srt[^\s"']*/gi);
5224
+ if (vttMatches)
5225
+ vttMatches.forEach(url => subs.push({ url }));
5226
+ if (srtMatches)
5227
+ srtMatches.forEach(url => subs.push({ url }));
5228
+ // Deduplicate
5229
+ const seen = new Set();
5230
+ return subs.filter(s => {
5231
+ if (seen.has(s.url))
5232
+ return false;
5233
+ seen.add(s.url);
5234
+ return true;
5235
+ });
5236
+ });
5237
+ subtitles.push(...subData);
5238
+ }
4655
5239
  return {
4656
5240
  success: directUrls.length > 0,
4657
5241
  directUrls,
5242
+ bestQuality,
5243
+ subtitles: args.extractSubtitles ? subtitles : undefined,
4658
5244
  message: directUrls.length > 0
4659
- ? `🎬 Found ${directUrls.length} direct URL(s) from ${new Set(directUrls.map(d => d.source)).size} sources`
5245
+ ? `🎬 Found ${directUrls.length} URL(s)${bestQuality ? ` | Best: ${bestQuality.quality}` : ''}${subtitles.length > 0 ? ` | ${subtitles.length} subtitles` : ''}`
4660
5246
  : 'No direct URLs found',
4661
5247
  };
4662
5248
  }
4663
- /**
4664
- * JS Scrape - Single-call JavaScript-rendered content extraction
4665
- * Combines navigation, waiting, scrolling, and content extraction in one call
4666
- * Perfect for scraping dynamic/AJAX-loaded content
4667
- */
4668
- export async function handleJsScrape(page, args) {
5249
+ // Helper function to scrape a single URL
5250
+ async function scrapeSingleUrl(page, url, args) {
5251
+ const startTime = Date.now();
4669
5252
  const waitForTimeout = args.waitForTimeout || 10000;
4670
5253
  const returnType = args.returnType || 'html';
4671
5254
  try {
4672
5255
  // Step 1: Navigate to URL
4673
- await page.goto(args.url, {
5256
+ await page.goto(url, {
4674
5257
  waitUntil: 'domcontentloaded',
4675
5258
  timeout: waitForTimeout
4676
5259
  });
@@ -4772,26 +5355,156 @@ export async function handleJsScrape(page, args) {
4772
5355
  }
4773
5356
  return {
4774
5357
  success: true,
4775
- url: args.url,
5358
+ url,
4776
5359
  finalUrl,
4777
5360
  title,
4778
5361
  html,
4779
5362
  text,
4780
5363
  elements,
4781
- elementCount
5364
+ elementCount,
5365
+ duration: Date.now() - startTime
4782
5366
  };
4783
5367
  }
4784
5368
  catch (error) {
4785
5369
  return {
4786
5370
  success: false,
4787
- url: args.url,
4788
- finalUrl: page.url() || args.url,
5371
+ url,
5372
+ finalUrl: page.url() || url,
4789
5373
  title: '',
4790
5374
  elementCount: 0,
4791
- error: error instanceof Error ? error.message : String(error)
5375
+ error: error instanceof Error ? error.message : String(error),
5376
+ duration: Date.now() - startTime
4792
5377
  };
4793
5378
  }
4794
5379
  }
5380
+ /**
5381
+ * JS Scrape - Single-call JavaScript-rendered content extraction
5382
+ * Combines navigation, waiting, scrolling, and content extraction in one call
5383
+ * Perfect for scraping dynamic/AJAX-loaded content
5384
+ * NEW: Supports parallel scraping of multiple URLs with concurrency control
5385
+ */
5386
+ export async function handleJsScrape(page, args) {
5387
+ const startTime = Date.now();
5388
+ // Determine URLs to scrape
5389
+ const urlList = args.urls || (args.url ? [args.url] : []);
5390
+ if (urlList.length === 0) {
5391
+ return {
5392
+ success: false,
5393
+ error: 'No URL(s) provided. Use "url" for single URL or "urls" for multiple URLs.',
5394
+ elementCount: 0
5395
+ };
5396
+ }
5397
+ // Single URL mode - backward compatible
5398
+ if (urlList.length === 1 && !args.urls) {
5399
+ const result = await scrapeSingleUrl(page, urlList[0], args);
5400
+ return result;
5401
+ }
5402
+ // Parallel scraping mode
5403
+ const concurrency = Math.min(args.concurrency || 3, 10); // Max 10 concurrent
5404
+ const continueOnError = args.continueOnError !== false;
5405
+ const delayBetween = args.delayBetween || 500;
5406
+ const results = [];
5407
+ const browser = page.browser();
5408
+ if (!browser) {
5409
+ return {
5410
+ success: false,
5411
+ error: 'Browser not available for parallel scraping',
5412
+ elementCount: 0
5413
+ };
5414
+ }
5415
+ // Create a semaphore for concurrency control
5416
+ let activeCount = 0;
5417
+ const queue = [...urlList];
5418
+ const errors = [];
5419
+ // Process URLs with concurrency limit
5420
+ const processUrl = async (url) => {
5421
+ let newPage = null;
5422
+ try {
5423
+ // Create new page for each URL
5424
+ newPage = await browser.newPage();
5425
+ // Copy settings from original page if needed
5426
+ await newPage.setViewport({ width: 1280, height: 720 });
5427
+ const result = await scrapeSingleUrl(newPage, url, args);
5428
+ return result;
5429
+ }
5430
+ catch (error) {
5431
+ return {
5432
+ success: false,
5433
+ url,
5434
+ finalUrl: url,
5435
+ title: '',
5436
+ elementCount: 0,
5437
+ error: error instanceof Error ? error.message : String(error),
5438
+ duration: 0
5439
+ };
5440
+ }
5441
+ finally {
5442
+ // Close the page
5443
+ if (newPage) {
5444
+ try {
5445
+ await newPage.close();
5446
+ }
5447
+ catch (e) {
5448
+ // Ignore close errors
5449
+ }
5450
+ }
5451
+ }
5452
+ };
5453
+ // Process all URLs with concurrency control
5454
+ const processBatch = async () => {
5455
+ const promises = [];
5456
+ while (queue.length > 0 || activeCount > 0) {
5457
+ // Start new tasks up to concurrency limit
5458
+ while (activeCount < concurrency && queue.length > 0) {
5459
+ const url = queue.shift();
5460
+ activeCount++;
5461
+ const promise = (async () => {
5462
+ try {
5463
+ // Add delay between starting each scrape
5464
+ if (results.length > 0) {
5465
+ await new Promise(r => setTimeout(r, delayBetween));
5466
+ }
5467
+ const result = await processUrl(url);
5468
+ if (result) {
5469
+ results.push(result);
5470
+ if (!result.success && !continueOnError) {
5471
+ // Clear queue to stop processing
5472
+ queue.length = 0;
5473
+ errors.push(`Stopped at ${url}: ${result.error}`);
5474
+ }
5475
+ }
5476
+ }
5477
+ finally {
5478
+ activeCount--;
5479
+ }
5480
+ })();
5481
+ promises.push(promise);
5482
+ }
5483
+ // Wait for at least one to complete before continuing
5484
+ if (promises.length > 0) {
5485
+ await Promise.race(promises);
5486
+ }
5487
+ // Small delay to prevent tight loop
5488
+ await new Promise(r => setTimeout(r, 50));
5489
+ }
5490
+ // Wait for all remaining promises
5491
+ await Promise.all(promises);
5492
+ };
5493
+ await processBatch();
5494
+ const successCount = results.filter(r => r.success).length;
5495
+ const failedCount = results.filter(r => !r.success).length;
5496
+ return {
5497
+ success: successCount > 0,
5498
+ isParallel: true,
5499
+ urls: urlList,
5500
+ results,
5501
+ totalUrls: urlList.length,
5502
+ successCount,
5503
+ failedCount,
5504
+ totalDuration: Date.now() - startTime,
5505
+ error: errors.length > 0 ? errors.join('; ') : undefined
5506
+ };
5507
+ }
4795
5508
  /**
4796
5509
  * Execute custom JavaScript on page
4797
5510
  * ULTRA POWERFUL: Execute any JS code and get results
@@ -4801,9 +5514,16 @@ export async function handleExecuteJs(page, args) {
4801
5514
  const waitTime = args.waitForResult || 5000;
4802
5515
  try {
4803
5516
  let result;
5517
+ // Auto-detect if code contains await and wrap with async IIFE
5518
+ const hasAwait = /\bawait\b/.test(args.code);
5519
+ const isAlreadyAsync = /^\s*\(async\s+function|\(\s*async\s*\(|\(async\s*\(\)\s*=>/.test(args.code.trim());
5520
+ // Wrap code with async IIFE if it contains await but isn't already async
5521
+ const codeToExecute = (hasAwait && !isAlreadyAsync)
5522
+ ? `(async () => { ${args.code} })()`
5523
+ : args.code;
4804
5524
  if (args.context === 'isolated') {
4805
5525
  // Execute in isolated context (sandboxed)
4806
- result = await page.evaluate(args.code);
5526
+ result = await page.evaluate(codeToExecute);
4807
5527
  }
4808
5528
  else {
4809
5529
  // Execute in page context with full access
@@ -4815,7 +5535,7 @@ export async function handleExecuteJs(page, args) {
4815
5535
  catch (e) {
4816
5536
  return { error: String(e) };
4817
5537
  }
4818
- }, args.code);
5538
+ }, codeToExecute);
4819
5539
  }
4820
5540
  // Handle async results
4821
5541
  if (result instanceof Promise) {