brave-real-browser-mcp-server 2.27.32 → 2.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser-manager.js +344 -0
- package/dist/handlers/advanced-tools.js +899 -179
- package/dist/handlers/navigation-handlers.js +220 -16
- package/dist/handlers/tool-executor.js +201 -0
- package/dist/tool-definitions.js +104 -6
- package/package.json +2 -2
|
@@ -1707,6 +1707,7 @@ export async function handleExtractJson(page, args) {
|
|
|
1707
1707
|
try {
|
|
1708
1708
|
const key = args.decryptAES.key || "kiemtienmua911ca";
|
|
1709
1709
|
const ivList = args.decryptAES.ivList || ["1234567890oiuytr", "0123456789abcdef"];
|
|
1710
|
+
const autoIV = args.decryptAES.autoIV !== false; // Default to true for hubstream-style encryption
|
|
1710
1711
|
let input = args.decryptAES.input || "";
|
|
1711
1712
|
// Option: Fetch encrypted data from API URL directly (recommended for hubstream)
|
|
1712
1713
|
if (!input && args.decryptAES.fetchUrl) {
|
|
@@ -1733,24 +1734,43 @@ export async function handleExtractJson(page, args) {
|
|
|
1733
1734
|
}
|
|
1734
1735
|
return bytes;
|
|
1735
1736
|
};
|
|
1736
|
-
|
|
1737
|
+
// Build IV list - if autoIV, try extracting from first 16 bytes first
|
|
1738
|
+
const ivsToTry = [];
|
|
1739
|
+
if (params.autoIV && encryptedHex.length >= 32) {
|
|
1740
|
+
// First 32 hex chars = first 16 bytes = IV (hubstream style)
|
|
1741
|
+
const ivHex = encryptedHex.substring(0, 32);
|
|
1742
|
+
ivsToTry.push({ iv: hexToBytes(ivHex), label: 'autoIV (first 16 bytes)' });
|
|
1743
|
+
}
|
|
1744
|
+
// Add manual IV list
|
|
1745
|
+
for (const ivStr of params.ivList) {
|
|
1746
|
+
ivsToTry.push({ iv: new TextEncoder().encode(ivStr), label: ivStr });
|
|
1747
|
+
}
|
|
1748
|
+
for (const { iv: ivData, label: ivLabel } of ivsToTry) {
|
|
1737
1749
|
try {
|
|
1738
1750
|
const keyData = new TextEncoder().encode(params.key);
|
|
1739
|
-
|
|
1740
|
-
const
|
|
1751
|
+
// If using autoIV, encrypted data starts after first 32 hex chars (16 bytes)
|
|
1752
|
+
const isAutoIV = ivLabel.startsWith('autoIV');
|
|
1753
|
+
const dataHex = isAutoIV ? encryptedHex.substring(32) : encryptedHex;
|
|
1754
|
+
const encryptedData = hexToBytes(dataHex);
|
|
1741
1755
|
const cryptoKey = await crypto.subtle.importKey("raw", keyData, "AES-CBC", false, ["decrypt"]);
|
|
1742
1756
|
const decrypted = await crypto.subtle.decrypt({ name: "AES-CBC", iv: ivData }, cryptoKey, encryptedData);
|
|
1743
1757
|
const decryptedText = new TextDecoder().decode(decrypted);
|
|
1744
|
-
// Extract
|
|
1758
|
+
// Extract URLs from decrypted JSON
|
|
1745
1759
|
const sourceMatch = /"source":"([^"]+)"/.exec(decryptedText);
|
|
1760
|
+
const mp4Match = /"mp4":"([^"]+)"/.exec(decryptedText);
|
|
1761
|
+
const hlsMatch = /"hls":"([^"]+)"/.exec(decryptedText);
|
|
1746
1762
|
const streamUrl = sourceMatch ? sourceMatch[1].replace(/\\\//g, '/') : null;
|
|
1763
|
+
const mp4Url = mp4Match ? mp4Match[1].replace(/\\\//g, '/') : null;
|
|
1764
|
+
const hlsUrl = hlsMatch ? hlsMatch[1].replace(/\\\//g, '/') : null;
|
|
1747
1765
|
return {
|
|
1748
1766
|
success: true,
|
|
1749
|
-
iv:
|
|
1767
|
+
iv: ivLabel,
|
|
1750
1768
|
decrypted: decryptedText,
|
|
1751
1769
|
decryptedLength: decryptedText.length,
|
|
1752
1770
|
extractedStreamUrl: streamUrl,
|
|
1753
|
-
|
|
1771
|
+
extractedMp4Url: mp4Url,
|
|
1772
|
+
extractedHlsUrl: hlsUrl,
|
|
1773
|
+
isStreamUrl: (streamUrl && (streamUrl.includes('m3u8') || streamUrl.includes('.mp4'))) || !!mp4Url,
|
|
1754
1774
|
inputLength: encryptedHex.length
|
|
1755
1775
|
};
|
|
1756
1776
|
}
|
|
@@ -1763,7 +1783,7 @@ export async function handleExtractJson(page, args) {
|
|
|
1763
1783
|
catch (e) {
|
|
1764
1784
|
return { success: false, error: String(e) };
|
|
1765
1785
|
}
|
|
1766
|
-
}, { input, key, ivList, fetchUrl: args.decryptAES.fetchUrl });
|
|
1786
|
+
}, { input, key, ivList, autoIV, fetchUrl: args.decryptAES.fetchUrl });
|
|
1767
1787
|
decoded = {
|
|
1768
1788
|
source: 'decryptAES',
|
|
1769
1789
|
...result,
|
|
@@ -2306,8 +2326,9 @@ export async function handleDeepAnalysis(page, args) {
|
|
|
2306
2326
|
}
|
|
2307
2327
|
}
|
|
2308
2328
|
/**
|
|
2309
|
-
*
|
|
2329
|
+
* Network recorder with API interception capabilities
|
|
2310
2330
|
* ULTRA POWERFUL: API detection, media URLs, smart categorization
|
|
2331
|
+
* NEW: Request interception, mocking, blocking, and header modification
|
|
2311
2332
|
*/
|
|
2312
2333
|
export async function handleNetworkRecorder(page, args) {
|
|
2313
2334
|
// Progress tracking
|
|
@@ -2321,7 +2342,15 @@ export async function handleNetworkRecorder(page, args) {
|
|
|
2321
2342
|
const apis = [];
|
|
2322
2343
|
const mediaUrls = [];
|
|
2323
2344
|
const seen = new Set();
|
|
2324
|
-
|
|
2345
|
+
const interceptedRequests = [];
|
|
2346
|
+
let blockedCount = 0;
|
|
2347
|
+
let mockedCount = 0;
|
|
2348
|
+
const interceptMode = args.interceptMode || 'record';
|
|
2349
|
+
const blockPatterns = args.blockPatterns || [];
|
|
2350
|
+
const mockResponses = args.mockResponses || [];
|
|
2351
|
+
const modifyHeaders = args.modifyHeaders || [];
|
|
2352
|
+
const capturePayloads = args.capturePayloads === true;
|
|
2353
|
+
tracker.setProgress(10, `⏱️ Recording for ${duration}ms (mode: ${interceptMode})...`);
|
|
2325
2354
|
// ============================================================
|
|
2326
2355
|
// SMART CATEGORIZATION HELPER
|
|
2327
2356
|
// ============================================================
|
|
@@ -2355,75 +2384,216 @@ export async function handleNetworkRecorder(page, args) {
|
|
|
2355
2384
|
return 'document';
|
|
2356
2385
|
return 'other';
|
|
2357
2386
|
};
|
|
2358
|
-
//
|
|
2359
|
-
const
|
|
2360
|
-
|
|
2361
|
-
|
|
2362
|
-
|
|
2363
|
-
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
const resourceType = response.request()?.resourceType?.() || 'unknown';
|
|
2370
|
-
const method = response.request()?.method?.() || 'GET';
|
|
2371
|
-
const category = categorizeUrl(url, resourceType);
|
|
2372
|
-
categories[category] = (categories[category] || 0) + 1;
|
|
2373
|
-
// Collect API endpoints
|
|
2374
|
-
if (category === 'api' || resourceType === 'xhr' || resourceType === 'fetch') {
|
|
2375
|
-
apis.push({ url, method, type: resourceType });
|
|
2387
|
+
// Helper to check URL against patterns
|
|
2388
|
+
const matchesPattern = (url, patterns) => {
|
|
2389
|
+
return patterns.some(pattern => {
|
|
2390
|
+
try {
|
|
2391
|
+
if (pattern.startsWith('/') && pattern.endsWith('/')) {
|
|
2392
|
+
// Regex pattern
|
|
2393
|
+
const regex = new RegExp(pattern.slice(1, -1));
|
|
2394
|
+
return regex.test(url);
|
|
2395
|
+
}
|
|
2396
|
+
// Simple includes check
|
|
2397
|
+
return url.includes(pattern);
|
|
2376
2398
|
}
|
|
2377
|
-
|
|
2378
|
-
|
|
2379
|
-
mediaUrls.push(url);
|
|
2399
|
+
catch {
|
|
2400
|
+
return url.includes(pattern);
|
|
2380
2401
|
}
|
|
2381
|
-
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
|
|
2402
|
+
});
|
|
2403
|
+
};
|
|
2404
|
+
// ============================================================
|
|
2405
|
+
// INTERCEPTION MODE - Uses request interception
|
|
2406
|
+
// ============================================================
|
|
2407
|
+
if (interceptMode === 'intercept' || interceptMode === 'mock') {
|
|
2408
|
+
try {
|
|
2409
|
+
await page.setRequestInterception(true);
|
|
2410
|
+
const requestHandler = async (request) => {
|
|
2411
|
+
const url = request.url();
|
|
2412
|
+
const method = request.method();
|
|
2413
|
+
const resourceType = request.resourceType();
|
|
2414
|
+
const category = categorizeUrl(url, resourceType);
|
|
2415
|
+
// Check if should block
|
|
2416
|
+
if (blockPatterns.length > 0 && matchesPattern(url, blockPatterns)) {
|
|
2417
|
+
blockedCount++;
|
|
2418
|
+
interceptedRequests.push({
|
|
2419
|
+
url,
|
|
2420
|
+
method,
|
|
2421
|
+
action: 'blocked',
|
|
2422
|
+
timestamp: Date.now()
|
|
2423
|
+
});
|
|
2424
|
+
await request.abort();
|
|
2425
|
+
return;
|
|
2392
2426
|
}
|
|
2393
|
-
|
|
2394
|
-
|
|
2427
|
+
// Check if should mock
|
|
2428
|
+
const mockConfig = mockResponses.find(m => matchesPattern(url, [m.urlPattern]));
|
|
2429
|
+
if (mockConfig) {
|
|
2430
|
+
mockedCount++;
|
|
2431
|
+
interceptedRequests.push({
|
|
2432
|
+
url,
|
|
2433
|
+
method,
|
|
2434
|
+
action: 'mocked',
|
|
2435
|
+
mockResponse: mockConfig.response,
|
|
2436
|
+
timestamp: Date.now()
|
|
2437
|
+
});
|
|
2438
|
+
await request.respond({
|
|
2439
|
+
status: mockConfig.statusCode || 200,
|
|
2440
|
+
contentType: 'application/json',
|
|
2441
|
+
body: typeof mockConfig.response === 'string'
|
|
2442
|
+
? mockConfig.response
|
|
2443
|
+
: JSON.stringify(mockConfig.response)
|
|
2444
|
+
});
|
|
2445
|
+
return;
|
|
2446
|
+
}
|
|
2447
|
+
// Check if should modify headers
|
|
2448
|
+
const headerConfig = modifyHeaders.find(h => matchesPattern(url, [h.urlPattern]));
|
|
2449
|
+
if (headerConfig) {
|
|
2450
|
+
const headers = {
|
|
2451
|
+
...request.headers(),
|
|
2452
|
+
...headerConfig.headers
|
|
2453
|
+
};
|
|
2454
|
+
interceptedRequests.push({
|
|
2455
|
+
url,
|
|
2456
|
+
method,
|
|
2457
|
+
action: 'headers_modified',
|
|
2458
|
+
modifiedHeaders: headerConfig.headers,
|
|
2459
|
+
timestamp: Date.now()
|
|
2460
|
+
});
|
|
2461
|
+
await request.continue({ headers });
|
|
2462
|
+
return;
|
|
2463
|
+
}
|
|
2464
|
+
// Continue normally but record
|
|
2465
|
+
if (!seen.has(url)) {
|
|
2466
|
+
seen.add(url);
|
|
2467
|
+
categories[category] = (categories[category] || 0) + 1;
|
|
2468
|
+
const entry = {
|
|
2469
|
+
url,
|
|
2470
|
+
method,
|
|
2471
|
+
resourceType,
|
|
2472
|
+
category,
|
|
2473
|
+
timestamp: Date.now()
|
|
2474
|
+
};
|
|
2475
|
+
// Capture POST/PUT payloads
|
|
2476
|
+
if (capturePayloads && (method === 'POST' || method === 'PUT' || method === 'PATCH')) {
|
|
2477
|
+
try {
|
|
2478
|
+
entry.payload = request.postData();
|
|
2479
|
+
}
|
|
2480
|
+
catch {
|
|
2481
|
+
// Ignore
|
|
2482
|
+
}
|
|
2483
|
+
}
|
|
2484
|
+
requests.push(entry);
|
|
2485
|
+
// Collect API endpoints
|
|
2486
|
+
if (category === 'api' || resourceType === 'xhr' || resourceType === 'fetch') {
|
|
2487
|
+
apis.push({
|
|
2488
|
+
url,
|
|
2489
|
+
method,
|
|
2490
|
+
type: resourceType,
|
|
2491
|
+
payload: entry.payload
|
|
2492
|
+
});
|
|
2493
|
+
}
|
|
2494
|
+
// Collect media URLs
|
|
2495
|
+
if (category === 'media' || /\.(mp4|webm|m3u8|ts|mp3)/i.test(url)) {
|
|
2496
|
+
mediaUrls.push(url);
|
|
2497
|
+
}
|
|
2395
2498
|
}
|
|
2499
|
+
await request.continue();
|
|
2500
|
+
};
|
|
2501
|
+
page.on('request', requestHandler);
|
|
2502
|
+
await new Promise(r => setTimeout(r, duration));
|
|
2503
|
+
page.off('request', requestHandler);
|
|
2504
|
+
await page.setRequestInterception(false);
|
|
2505
|
+
}
|
|
2506
|
+
catch (e) {
|
|
2507
|
+
// Cleanup on error
|
|
2508
|
+
try {
|
|
2509
|
+
await page.setRequestInterception(false);
|
|
2396
2510
|
}
|
|
2397
|
-
|
|
2398
|
-
|
|
2511
|
+
catch { }
|
|
2512
|
+
}
|
|
2513
|
+
}
|
|
2514
|
+
else {
|
|
2515
|
+
// ============================================================
|
|
2516
|
+
// RECORD MODE - Uses response events (safer)
|
|
2517
|
+
// ============================================================
|
|
2518
|
+
const responseHandler = (response) => {
|
|
2399
2519
|
try {
|
|
2400
|
-
const
|
|
2401
|
-
|
|
2402
|
-
|
|
2520
|
+
const url = response.url();
|
|
2521
|
+
// Dedup
|
|
2522
|
+
if (seen.has(url))
|
|
2523
|
+
return;
|
|
2524
|
+
seen.add(url);
|
|
2525
|
+
if (args.filterUrl && !url.includes(args.filterUrl)) {
|
|
2526
|
+
return;
|
|
2527
|
+
}
|
|
2528
|
+
const resourceType = response.request()?.resourceType?.() || 'unknown';
|
|
2529
|
+
const method = response.request()?.method?.() || 'GET';
|
|
2530
|
+
const category = categorizeUrl(url, resourceType);
|
|
2531
|
+
categories[category] = (categories[category] || 0) + 1;
|
|
2532
|
+
// Collect API endpoints
|
|
2533
|
+
if (category === 'api' || resourceType === 'xhr' || resourceType === 'fetch') {
|
|
2534
|
+
const apiEntry = { url, method, type: resourceType };
|
|
2535
|
+
// Capture POST data if enabled
|
|
2536
|
+
if (capturePayloads && (method === 'POST' || method === 'PUT' || method === 'PATCH')) {
|
|
2537
|
+
try {
|
|
2538
|
+
apiEntry.payload = response.request()?.postData?.();
|
|
2539
|
+
}
|
|
2540
|
+
catch { }
|
|
2541
|
+
}
|
|
2542
|
+
apis.push(apiEntry);
|
|
2543
|
+
}
|
|
2544
|
+
// Collect media URLs
|
|
2545
|
+
if (category === 'media' || /\.(mp4|webm|m3u8|ts|mp3)/i.test(url)) {
|
|
2546
|
+
mediaUrls.push(url);
|
|
2547
|
+
}
|
|
2548
|
+
const entry = {
|
|
2549
|
+
url,
|
|
2550
|
+
status: response.status(),
|
|
2551
|
+
resourceType,
|
|
2552
|
+
category,
|
|
2553
|
+
method,
|
|
2554
|
+
timestamp: Date.now(),
|
|
2555
|
+
};
|
|
2556
|
+
if (args.includeHeaders) {
|
|
2557
|
+
try {
|
|
2558
|
+
entry.headers = response.headers();
|
|
2559
|
+
}
|
|
2560
|
+
catch (e) {
|
|
2561
|
+
entry.headers = {};
|
|
2562
|
+
}
|
|
2563
|
+
}
|
|
2564
|
+
requests.push(entry);
|
|
2565
|
+
// Track size from headers
|
|
2566
|
+
try {
|
|
2567
|
+
const headers = response.headers();
|
|
2568
|
+
const size = parseInt(headers['content-length'] || '0', 10);
|
|
2569
|
+
totalSize += size;
|
|
2570
|
+
}
|
|
2571
|
+
catch {
|
|
2572
|
+
// Ignore
|
|
2573
|
+
}
|
|
2403
2574
|
}
|
|
2404
2575
|
catch {
|
|
2405
|
-
// Ignore
|
|
2576
|
+
// Ignore all errors in handler to prevent crash
|
|
2406
2577
|
}
|
|
2407
|
-
}
|
|
2408
|
-
catch {
|
|
2409
|
-
// Ignore all errors in handler to prevent crash
|
|
2410
|
-
}
|
|
2411
|
-
};
|
|
2412
|
-
try {
|
|
2413
|
-
page.on('response', responseHandler);
|
|
2414
|
-
await new Promise((r) => setTimeout(r, duration));
|
|
2415
|
-
}
|
|
2416
|
-
catch (e) {
|
|
2417
|
-
// Capture setup errors
|
|
2418
|
-
}
|
|
2419
|
-
finally {
|
|
2578
|
+
};
|
|
2420
2579
|
try {
|
|
2421
|
-
page.
|
|
2580
|
+
page.on('response', responseHandler);
|
|
2581
|
+
await new Promise((r) => setTimeout(r, duration));
|
|
2422
2582
|
}
|
|
2423
2583
|
catch (e) {
|
|
2424
|
-
//
|
|
2584
|
+
// Capture setup errors
|
|
2585
|
+
}
|
|
2586
|
+
finally {
|
|
2587
|
+
try {
|
|
2588
|
+
page.off('response', responseHandler);
|
|
2589
|
+
}
|
|
2590
|
+
catch (e) {
|
|
2591
|
+
// Ignore cleanup errors
|
|
2592
|
+
}
|
|
2425
2593
|
}
|
|
2426
2594
|
}
|
|
2595
|
+
tracker.setProgress(90, `✅ Recorded ${requests.length} requests`);
|
|
2596
|
+
tracker.complete(`🎉 Network recording complete`);
|
|
2427
2597
|
return {
|
|
2428
2598
|
requests: requests.slice(0, 500),
|
|
2429
2599
|
count: requests.length,
|
|
@@ -2431,7 +2601,12 @@ export async function handleNetworkRecorder(page, args) {
|
|
|
2431
2601
|
categories,
|
|
2432
2602
|
apis: apis.length > 0 ? apis : undefined,
|
|
2433
2603
|
mediaUrls: mediaUrls.length > 0 ? mediaUrls : undefined,
|
|
2434
|
-
|
|
2604
|
+
blockedCount: blockedCount > 0 ? blockedCount : undefined,
|
|
2605
|
+
mockedCount: mockedCount > 0 ? mockedCount : undefined,
|
|
2606
|
+
interceptedRequests: interceptedRequests.length > 0 ? interceptedRequests : undefined,
|
|
2607
|
+
message: `📡 Recorded ${requests.length} requests (${Math.round(totalSize / 1024)}KB) | APIs: ${apis.length} | Media: ${mediaUrls.length}` +
|
|
2608
|
+
(blockedCount > 0 ? ` | Blocked: ${blockedCount}` : '') +
|
|
2609
|
+
(mockedCount > 0 ? ` | Mocked: ${mockedCount}` : '')
|
|
2435
2610
|
};
|
|
2436
2611
|
}
|
|
2437
2612
|
/**
|
|
@@ -2905,6 +3080,7 @@ export async function handleVideoRecording(page, args, recorderState) {
|
|
|
2905
3080
|
/**
|
|
2906
3081
|
* Harvest all links from page
|
|
2907
3082
|
* ULTRA POWERFUL: Pagination detection, smart categorization, file types
|
|
3083
|
+
* NEW: Auto-follow pagination to scrape multiple pages
|
|
2908
3084
|
*/
|
|
2909
3085
|
export async function handleLinkHarvester(page, args) {
|
|
2910
3086
|
// Progress tracking for real-time updates
|
|
@@ -2913,76 +3089,110 @@ export async function handleLinkHarvester(page, args) {
|
|
|
2913
3089
|
tracker.start(100, '🔗 Starting link harvesting...');
|
|
2914
3090
|
const currentUrl = new URL(page.url());
|
|
2915
3091
|
tracker.setProgress(10, `📍 Analyzing page: ${currentUrl.hostname}`);
|
|
2916
|
-
//
|
|
2917
|
-
|
|
2918
|
-
//
|
|
2919
|
-
|
|
2920
|
-
const
|
|
2921
|
-
|
|
2922
|
-
|
|
2923
|
-
|
|
2924
|
-
links
|
|
2925
|
-
|
|
2926
|
-
|
|
2927
|
-
|
|
2928
|
-
|
|
2929
|
-
|
|
2930
|
-
|
|
2931
|
-
|
|
2932
|
-
|
|
2933
|
-
|
|
3092
|
+
// Pagination settings
|
|
3093
|
+
const followPagination = args.followPagination === true;
|
|
3094
|
+
const maxPages = Math.min(args.maxPages || 5, 20); // Max 20 pages
|
|
3095
|
+
const delayBetweenPages = args.delayBetweenPages || 1000;
|
|
3096
|
+
const paginationSelector = args.paginationSelector;
|
|
3097
|
+
// Helper function to extract links from current page
|
|
3098
|
+
const extractLinksFromPage = async () => {
|
|
3099
|
+
const allLinks = await page.evaluate(() => {
|
|
3100
|
+
const links = [];
|
|
3101
|
+
document.querySelectorAll('a[href]').forEach((a) => {
|
|
3102
|
+
const anchor = a;
|
|
3103
|
+
links.push({
|
|
3104
|
+
url: anchor.href,
|
|
3105
|
+
text: a.textContent?.trim()?.substring(0, 100) || '',
|
|
3106
|
+
attrs: {
|
|
3107
|
+
rel: anchor.rel || '',
|
|
3108
|
+
target: anchor.target || '',
|
|
3109
|
+
class: anchor.className || '',
|
|
3110
|
+
id: anchor.id || '',
|
|
3111
|
+
download: anchor.download || '',
|
|
3112
|
+
}
|
|
3113
|
+
});
|
|
2934
3114
|
});
|
|
3115
|
+
return links;
|
|
2935
3116
|
});
|
|
2936
|
-
|
|
2937
|
-
|
|
2938
|
-
|
|
2939
|
-
|
|
2940
|
-
|
|
2941
|
-
|
|
2942
|
-
|
|
2943
|
-
|
|
2944
|
-
|
|
2945
|
-
|
|
2946
|
-
|
|
2947
|
-
|
|
2948
|
-
|
|
2949
|
-
|
|
2950
|
-
|
|
2951
|
-
|
|
2952
|
-
|
|
2953
|
-
|
|
2954
|
-
|
|
2955
|
-
|
|
2956
|
-
|
|
2957
|
-
|
|
2958
|
-
|
|
2959
|
-
|
|
2960
|
-
|
|
3117
|
+
// Pagination detection
|
|
3118
|
+
const pagination = await page.evaluate((customSelector) => {
|
|
3119
|
+
let nextPage;
|
|
3120
|
+
let prevPage;
|
|
3121
|
+
let totalPages;
|
|
3122
|
+
let currentPage;
|
|
3123
|
+
// Custom selector first
|
|
3124
|
+
if (customSelector) {
|
|
3125
|
+
try {
|
|
3126
|
+
const el = document.querySelector(customSelector);
|
|
3127
|
+
if (el?.href)
|
|
3128
|
+
nextPage = el.href;
|
|
3129
|
+
}
|
|
3130
|
+
catch { /* invalid selector */ }
|
|
3131
|
+
}
|
|
3132
|
+
// Common pagination selectors
|
|
3133
|
+
const nextSelectors = [
|
|
3134
|
+
'a[rel="next"]', 'a.next', 'a.pagination-next',
|
|
3135
|
+
'[aria-label="Next"]', 'a.page-link.next', '.next a',
|
|
3136
|
+
'.pagination a:last-child', 'a[title="Next"]',
|
|
3137
|
+
'a[aria-label*="next" i]', 'button.next', '[data-testid="next"]'
|
|
3138
|
+
];
|
|
3139
|
+
const prevSelectors = [
|
|
3140
|
+
'a[rel="prev"]', 'a.prev', 'a.pagination-prev',
|
|
3141
|
+
'[aria-label="Previous"]', 'a.page-link.prev', '.prev a'
|
|
3142
|
+
];
|
|
3143
|
+
if (!nextPage) {
|
|
3144
|
+
for (const sel of nextSelectors) {
|
|
3145
|
+
try {
|
|
3146
|
+
const el = document.querySelector(sel);
|
|
3147
|
+
if (el?.href) {
|
|
3148
|
+
nextPage = el.href;
|
|
3149
|
+
break;
|
|
3150
|
+
}
|
|
3151
|
+
}
|
|
3152
|
+
catch { /* invalid selector */ }
|
|
2961
3153
|
}
|
|
2962
3154
|
}
|
|
2963
|
-
|
|
2964
|
-
|
|
2965
|
-
|
|
2966
|
-
|
|
2967
|
-
|
|
2968
|
-
|
|
2969
|
-
|
|
2970
|
-
|
|
3155
|
+
// Text-based next detection
|
|
3156
|
+
if (!nextPage) {
|
|
3157
|
+
const links = Array.from(document.querySelectorAll('a'));
|
|
3158
|
+
for (const link of links) {
|
|
3159
|
+
const text = link.textContent?.toLowerCase().trim() || '';
|
|
3160
|
+
if (text === 'next' || text === 'next →' || text === '>' || text === '»' || text === 'next page') {
|
|
3161
|
+
nextPage = link.href;
|
|
3162
|
+
break;
|
|
3163
|
+
}
|
|
2971
3164
|
}
|
|
2972
3165
|
}
|
|
2973
|
-
|
|
2974
|
-
|
|
2975
|
-
|
|
2976
|
-
|
|
2977
|
-
|
|
2978
|
-
|
|
2979
|
-
|
|
2980
|
-
|
|
2981
|
-
|
|
2982
|
-
|
|
2983
|
-
|
|
3166
|
+
for (const sel of prevSelectors) {
|
|
3167
|
+
try {
|
|
3168
|
+
const el = document.querySelector(sel);
|
|
3169
|
+
if (el?.href) {
|
|
3170
|
+
prevPage = el.href;
|
|
3171
|
+
break;
|
|
3172
|
+
}
|
|
3173
|
+
}
|
|
3174
|
+
catch { /* invalid selector */ }
|
|
3175
|
+
}
|
|
3176
|
+
// Detect current page and total pages
|
|
3177
|
+
const pageNumbers = Array.from(document.querySelectorAll('.pagination a, .page-numbers a, nav a, .pager a'))
|
|
3178
|
+
.map(a => ({
|
|
3179
|
+
num: parseInt(a.textContent || '0', 10),
|
|
3180
|
+
isActive: a.classList.contains('active') || a.classList.contains('current') ||
|
|
3181
|
+
a.getAttribute('aria-current') === 'page'
|
|
3182
|
+
}))
|
|
3183
|
+
.filter(p => !isNaN(p.num) && p.num > 0);
|
|
3184
|
+
if (pageNumbers.length > 0) {
|
|
3185
|
+
totalPages = Math.max(...pageNumbers.map(p => p.num));
|
|
3186
|
+
const active = pageNumbers.find(p => p.isActive);
|
|
3187
|
+
if (active)
|
|
3188
|
+
currentPage = active.num;
|
|
3189
|
+
}
|
|
3190
|
+
return { nextPage, prevPage, totalPages, currentPage };
|
|
3191
|
+
}, paginationSelector);
|
|
3192
|
+
return { links: allLinks, pagination };
|
|
3193
|
+
};
|
|
2984
3194
|
// ============================================================
|
|
2985
|
-
//
|
|
3195
|
+
// SMART LINK CATEGORIZATION
|
|
2986
3196
|
// ============================================================
|
|
2987
3197
|
const categorizeLink = (url, text, attrs) => {
|
|
2988
3198
|
const urlLower = url.toLowerCase();
|
|
@@ -3017,55 +3227,107 @@ export async function handleLinkHarvester(page, args) {
|
|
|
3017
3227
|
return 'info';
|
|
3018
3228
|
return 'navigation';
|
|
3019
3229
|
};
|
|
3230
|
+
// ============================================================
|
|
3231
|
+
// MAIN SCRAPING LOGIC
|
|
3232
|
+
// ============================================================
|
|
3020
3233
|
const processedLinks = [];
|
|
3021
3234
|
const categories = {};
|
|
3022
3235
|
const seen = new Set();
|
|
3023
3236
|
let internal = 0;
|
|
3024
3237
|
let external = 0;
|
|
3025
|
-
|
|
3026
|
-
|
|
3027
|
-
|
|
3028
|
-
|
|
3029
|
-
|
|
3030
|
-
|
|
3031
|
-
|
|
3032
|
-
|
|
3033
|
-
|
|
3034
|
-
|
|
3035
|
-
|
|
3036
|
-
|
|
3037
|
-
|
|
3038
|
-
|
|
3039
|
-
|
|
3040
|
-
|
|
3041
|
-
|
|
3042
|
-
|
|
3043
|
-
|
|
3044
|
-
|
|
3045
|
-
|
|
3046
|
-
|
|
3047
|
-
|
|
3048
|
-
|
|
3049
|
-
|
|
3050
|
-
|
|
3051
|
-
|
|
3052
|
-
|
|
3053
|
-
|
|
3238
|
+
let pagesScraped = 0;
|
|
3239
|
+
let lastPagination = {};
|
|
3240
|
+
const visitedPages = new Set();
|
|
3241
|
+
// Process links from a page
|
|
3242
|
+
const processLinks = (allLinks, pageNum) => {
|
|
3243
|
+
for (const link of allLinks) {
|
|
3244
|
+
try {
|
|
3245
|
+
if (seen.has(link.url))
|
|
3246
|
+
continue;
|
|
3247
|
+
seen.add(link.url);
|
|
3248
|
+
const linkUrl = new URL(link.url);
|
|
3249
|
+
const isInternal = linkUrl.hostname === currentUrl.hostname;
|
|
3250
|
+
if (args.filter && !link.url.includes(args.filter) && !link.text.includes(args.filter)) {
|
|
3251
|
+
continue;
|
|
3252
|
+
}
|
|
3253
|
+
if (isInternal && args.includeInternal === false)
|
|
3254
|
+
continue;
|
|
3255
|
+
if (!isInternal && args.includeExternal === false)
|
|
3256
|
+
continue;
|
|
3257
|
+
const category = categorizeLink(link.url, link.text, link.attrs);
|
|
3258
|
+
categories[category] = (categories[category] || 0) + 1;
|
|
3259
|
+
processedLinks.push({
|
|
3260
|
+
url: link.url,
|
|
3261
|
+
text: link.text,
|
|
3262
|
+
type: isInternal ? 'internal' : 'external',
|
|
3263
|
+
category,
|
|
3264
|
+
page: pageNum,
|
|
3265
|
+
});
|
|
3266
|
+
if (isInternal)
|
|
3267
|
+
internal++;
|
|
3268
|
+
else
|
|
3269
|
+
external++;
|
|
3270
|
+
if (args.maxLinks && processedLinks.length >= args.maxLinks)
|
|
3271
|
+
return true; // Stop
|
|
3272
|
+
}
|
|
3273
|
+
catch {
|
|
3274
|
+
// Invalid URL, skip
|
|
3275
|
+
}
|
|
3054
3276
|
}
|
|
3055
|
-
|
|
3056
|
-
|
|
3277
|
+
return false; // Continue
|
|
3278
|
+
};
|
|
3279
|
+
// Scrape first page
|
|
3280
|
+
tracker.setProgress(20, '🔍 Extracting links from page 1...');
|
|
3281
|
+
const firstPage = await extractLinksFromPage();
|
|
3282
|
+
pagesScraped = 1;
|
|
3283
|
+
visitedPages.add(page.url());
|
|
3284
|
+
lastPagination = firstPage.pagination;
|
|
3285
|
+
const shouldStop = processLinks(firstPage.links, 1);
|
|
3286
|
+
// Follow pagination if enabled
|
|
3287
|
+
if (followPagination && !shouldStop && firstPage.pagination.nextPage) {
|
|
3288
|
+
let nextUrl = firstPage.pagination.nextPage;
|
|
3289
|
+
while (nextUrl && pagesScraped < maxPages && !(args.maxLinks && processedLinks.length >= args.maxLinks)) {
|
|
3290
|
+
// Check if we've already visited this page
|
|
3291
|
+
if (visitedPages.has(nextUrl)) {
|
|
3292
|
+
break;
|
|
3293
|
+
}
|
|
3294
|
+
visitedPages.add(nextUrl);
|
|
3295
|
+
tracker.setProgress(20 + (pagesScraped / maxPages) * 60, `📄 Scraping page ${pagesScraped + 1}...`);
|
|
3296
|
+
try {
|
|
3297
|
+
// Navigate to next page
|
|
3298
|
+
await page.goto(nextUrl, {
|
|
3299
|
+
waitUntil: 'domcontentloaded',
|
|
3300
|
+
timeout: 15000
|
|
3301
|
+
});
|
|
3302
|
+
// Wait for content to load
|
|
3303
|
+
await new Promise(r => setTimeout(r, delayBetweenPages));
|
|
3304
|
+
// Extract links from this page
|
|
3305
|
+
const pageData = await extractLinksFromPage();
|
|
3306
|
+
pagesScraped++;
|
|
3307
|
+
lastPagination = pageData.pagination;
|
|
3308
|
+
const stop = processLinks(pageData.links, pagesScraped);
|
|
3309
|
+
if (stop)
|
|
3310
|
+
break;
|
|
3311
|
+
// Get next page URL
|
|
3312
|
+
nextUrl = pageData.pagination.nextPage || undefined;
|
|
3313
|
+
}
|
|
3314
|
+
catch (error) {
|
|
3315
|
+
// Failed to navigate, stop pagination
|
|
3316
|
+
break;
|
|
3317
|
+
}
|
|
3057
3318
|
}
|
|
3058
3319
|
}
|
|
3059
|
-
tracker.setProgress(90, `✅ Processed ${processedLinks.length} links`);
|
|
3060
|
-
tracker.complete(`🎉 Link harvesting complete: ${processedLinks.length} links
|
|
3320
|
+
tracker.setProgress(90, `✅ Processed ${processedLinks.length} links from ${pagesScraped} pages`);
|
|
3321
|
+
tracker.complete(`🎉 Link harvesting complete: ${processedLinks.length} links from ${pagesScraped} pages`);
|
|
3061
3322
|
return {
|
|
3062
3323
|
links: processedLinks,
|
|
3063
3324
|
internal,
|
|
3064
3325
|
external,
|
|
3065
|
-
pagination: (
|
|
3326
|
+
pagination: (lastPagination.nextPage || lastPagination.prevPage || lastPagination.totalPages) ? lastPagination : undefined,
|
|
3066
3327
|
categories,
|
|
3067
|
-
|
|
3068
|
-
|
|
3328
|
+
pagesScraped,
|
|
3329
|
+
message: `🔗 Found ${processedLinks.length} links (${internal} internal, ${external} external) from ${pagesScraped} pages` +
|
|
3330
|
+
(lastPagination.nextPage && pagesScraped >= maxPages ? ` | More pages available: ${lastPagination.nextPage}` : '')
|
|
3069
3331
|
};
|
|
3070
3332
|
}
|
|
3071
3333
|
/**
|
|
@@ -3440,10 +3702,13 @@ export async function handleSolveCaptchaAdvanced(page, args) {
|
|
|
3440
3702
|
}
|
|
3441
3703
|
/**
|
|
3442
3704
|
* Parse and extract HLS/m3u8 streaming URLs
|
|
3705
|
+
* ENHANCED: Segment parsing, bandwidth extraction, playlist fetching
|
|
3443
3706
|
*/
|
|
3444
3707
|
export async function handleM3u8Parser(page, args) {
|
|
3445
3708
|
const streams = [];
|
|
3446
3709
|
const qualities = [];
|
|
3710
|
+
const variants = [];
|
|
3711
|
+
const segments = [];
|
|
3447
3712
|
let masterPlaylist;
|
|
3448
3713
|
// Intercept network requests to find m3u8 files
|
|
3449
3714
|
const m3u8Urls = [];
|
|
@@ -3544,6 +3809,109 @@ export async function handleM3u8Parser(page, args) {
|
|
|
3544
3809
|
streams.push(stream);
|
|
3545
3810
|
}
|
|
3546
3811
|
}
|
|
3812
|
+
// ============================================================
|
|
3813
|
+
// NEW: FETCH AND PARSE MASTER PLAYLIST FOR VARIANTS
|
|
3814
|
+
// ============================================================
|
|
3815
|
+
if ((args.fetchPlaylist || args.extractBandwidth) && masterPlaylist) {
|
|
3816
|
+
try {
|
|
3817
|
+
const playlistContent = await page.evaluate(async (url) => {
|
|
3818
|
+
try {
|
|
3819
|
+
const response = await fetch(url);
|
|
3820
|
+
return await response.text();
|
|
3821
|
+
}
|
|
3822
|
+
catch {
|
|
3823
|
+
return null;
|
|
3824
|
+
}
|
|
3825
|
+
}, masterPlaylist);
|
|
3826
|
+
if (playlistContent) {
|
|
3827
|
+
// Parse #EXT-X-STREAM-INF lines for variants
|
|
3828
|
+
const variantRegex = /#EXT-X-STREAM-INF:.*?BANDWIDTH=(\d+)(?:.*?RESOLUTION=(\d+x\d+))?[^\n]*\n([^\n]+)/g;
|
|
3829
|
+
let match;
|
|
3830
|
+
while ((match = variantRegex.exec(playlistContent)) !== null) {
|
|
3831
|
+
const bandwidth = parseInt(match[1], 10);
|
|
3832
|
+
const resolution = match[2] || undefined;
|
|
3833
|
+
let variantUrl = match[3].trim();
|
|
3834
|
+
// Make relative URLs absolute
|
|
3835
|
+
if (!variantUrl.startsWith('http')) {
|
|
3836
|
+
const baseUrl = masterPlaylist.substring(0, masterPlaylist.lastIndexOf('/') + 1);
|
|
3837
|
+
variantUrl = baseUrl + variantUrl;
|
|
3838
|
+
}
|
|
3839
|
+
// Determine quality from resolution or bandwidth
|
|
3840
|
+
let quality = 'unknown';
|
|
3841
|
+
if (resolution) {
|
|
3842
|
+
const height = parseInt(resolution.split('x')[1], 10);
|
|
3843
|
+
if (height >= 2160)
|
|
3844
|
+
quality = '4K';
|
|
3845
|
+
else if (height >= 1080)
|
|
3846
|
+
quality = '1080p';
|
|
3847
|
+
else if (height >= 720)
|
|
3848
|
+
quality = '720p';
|
|
3849
|
+
else if (height >= 480)
|
|
3850
|
+
quality = '480p';
|
|
3851
|
+
else if (height >= 360)
|
|
3852
|
+
quality = '360p';
|
|
3853
|
+
else
|
|
3854
|
+
quality = `${height}p`;
|
|
3855
|
+
}
|
|
3856
|
+
else if (bandwidth >= 5000000)
|
|
3857
|
+
quality = '1080p';
|
|
3858
|
+
else if (bandwidth >= 2500000)
|
|
3859
|
+
quality = '720p';
|
|
3860
|
+
else if (bandwidth >= 1000000)
|
|
3861
|
+
quality = '480p';
|
|
3862
|
+
else
|
|
3863
|
+
quality = '360p';
|
|
3864
|
+
variants.push({ quality, bandwidth, url: variantUrl, resolution });
|
|
3865
|
+
}
|
|
3866
|
+
// Sort variants by bandwidth (highest first)
|
|
3867
|
+
variants.sort((a, b) => b.bandwidth - a.bandwidth);
|
|
3868
|
+
}
|
|
3869
|
+
}
|
|
3870
|
+
catch (e) {
|
|
3871
|
+
// Ignore fetch errors
|
|
3872
|
+
}
|
|
3873
|
+
}
|
|
3874
|
+
// ============================================================
|
|
3875
|
+
// NEW: PARSE SEGMENTS FROM MEDIA PLAYLIST
|
|
3876
|
+
// ============================================================
|
|
3877
|
+
if (args.parseSegments && streams.length > 0) {
|
|
3878
|
+
const mediaPlaylistUrl = streams[0].url;
|
|
3879
|
+
try {
|
|
3880
|
+
const mediaContent = await page.evaluate(async (url) => {
|
|
3881
|
+
try {
|
|
3882
|
+
const response = await fetch(url);
|
|
3883
|
+
return await response.text();
|
|
3884
|
+
}
|
|
3885
|
+
catch {
|
|
3886
|
+
return null;
|
|
3887
|
+
}
|
|
3888
|
+
}, mediaPlaylistUrl);
|
|
3889
|
+
if (mediaContent) {
|
|
3890
|
+
const lines = mediaContent.split('\n');
|
|
3891
|
+
let segmentIndex = 0;
|
|
3892
|
+
let currentDuration = 0;
|
|
3893
|
+
for (let i = 0; i < lines.length; i++) {
|
|
3894
|
+
const line = lines[i].trim();
|
|
3895
|
+
// Parse duration from #EXTINF
|
|
3896
|
+
if (line.startsWith('#EXTINF:')) {
|
|
3897
|
+
currentDuration = parseFloat(line.replace('#EXTINF:', '').split(',')[0]);
|
|
3898
|
+
}
|
|
3899
|
+
// Capture segment URL
|
|
3900
|
+
else if (line && !line.startsWith('#') && (line.includes('.ts') || line.includes('.m4s'))) {
|
|
3901
|
+
let segmentUrl = line;
|
|
3902
|
+
if (!segmentUrl.startsWith('http')) {
|
|
3903
|
+
const baseUrl = mediaPlaylistUrl.substring(0, mediaPlaylistUrl.lastIndexOf('/') + 1);
|
|
3904
|
+
segmentUrl = baseUrl + segmentUrl;
|
|
3905
|
+
}
|
|
3906
|
+
segments.push({ url: segmentUrl, duration: currentDuration, index: segmentIndex++ });
|
|
3907
|
+
}
|
|
3908
|
+
}
|
|
3909
|
+
}
|
|
3910
|
+
}
|
|
3911
|
+
catch (e) {
|
|
3912
|
+
// Ignore segment parsing errors
|
|
3913
|
+
}
|
|
3914
|
+
}
|
|
3547
3915
|
// Filter audio if not wanted
|
|
3548
3916
|
const filteredStreams = args.includeAudio !== false
|
|
3549
3917
|
? streams
|
|
@@ -3553,6 +3921,8 @@ export async function handleM3u8Parser(page, args) {
|
|
|
3553
3921
|
streams: filteredStreams,
|
|
3554
3922
|
masterPlaylist,
|
|
3555
3923
|
qualities: [...new Set(qualities)],
|
|
3924
|
+
variants: variants.length > 0 ? variants : undefined,
|
|
3925
|
+
segments: segments.length > 0 ? segments : undefined,
|
|
3556
3926
|
};
|
|
3557
3927
|
}
|
|
3558
3928
|
/**
|
|
@@ -4414,11 +4784,76 @@ export async function handleCloudflareBypass(page, args) {
|
|
|
4414
4784
|
/**
|
|
4415
4785
|
* Master tool: Extract direct stream/download URLs
|
|
4416
4786
|
* ULTRA POWERFUL: Handles packed JS, JW Player, Video.js, HLS.js, obfuscated scripts
|
|
4787
|
+
* ENHANCED: Multi-Quality Selector, VidSrc, Filemoon, StreamWish support
|
|
4417
4788
|
*/
|
|
4418
4789
|
export async function handleStreamExtractor(page, args) {
|
|
4419
4790
|
const formats = args.formats || ['mp4', 'mkv', 'm3u8', 'mp3', 'webm', 'flv', 'avi'];
|
|
4420
4791
|
const maxRedirects = args.maxRedirects || 10;
|
|
4421
4792
|
const directUrls = [];
|
|
4793
|
+
const subtitles = [];
|
|
4794
|
+
// Quality priority for auto-selection
|
|
4795
|
+
const qualityPriority = {
|
|
4796
|
+
'2160p': 100, '4k': 100, 'uhd': 100,
|
|
4797
|
+
'1080p': 90, 'fhd': 90, 'full hd': 90,
|
|
4798
|
+
'720p': 80, 'hd': 80,
|
|
4799
|
+
'480p': 70, 'sd': 70,
|
|
4800
|
+
'360p': 60,
|
|
4801
|
+
'240p': 50,
|
|
4802
|
+
'144p': 40,
|
|
4803
|
+
'unknown': 10, 'auto': 10
|
|
4804
|
+
};
|
|
4805
|
+
// Site-specific extraction patterns
|
|
4806
|
+
const sitePatterns = {
|
|
4807
|
+
vidsrc: {
|
|
4808
|
+
urlPattern: /vidsrc|v2\.vidsrc/i,
|
|
4809
|
+
sourcePattern: [
|
|
4810
|
+
/source:\s*["']([^"']+\.m3u8[^"']*)/gi,
|
|
4811
|
+
/file:\s*["']([^"']+\.m3u8[^"']*)/gi
|
|
4812
|
+
]
|
|
4813
|
+
},
|
|
4814
|
+
filemoon: {
|
|
4815
|
+
urlPattern: /filemoon|moonplayer/i,
|
|
4816
|
+
sourcePattern: [
|
|
4817
|
+
/sources:\s*\[\s*\{[^}]*file:\s*["']([^"']+)/gi,
|
|
4818
|
+
/eval\(function\(p,a,c,k,e,[rd]\)/gi
|
|
4819
|
+
]
|
|
4820
|
+
},
|
|
4821
|
+
streamwish: {
|
|
4822
|
+
urlPattern: /streamwish|swish/i,
|
|
4823
|
+
sourcePattern: [
|
|
4824
|
+
/file:\s*["']([^"']+\.m3u8[^"']*)/gi,
|
|
4825
|
+
/sources:\s*\[.*?["']([^"']+\.m3u8[^"']*)/gi
|
|
4826
|
+
]
|
|
4827
|
+
},
|
|
4828
|
+
doodstream: {
|
|
4829
|
+
urlPattern: /dood|doodstream/i,
|
|
4830
|
+
sourcePattern: [
|
|
4831
|
+
/\/pass_md5\/[^"']+/gi,
|
|
4832
|
+
/\$.get\(['"]([^'"]+pass_md5[^'"]+)/gi
|
|
4833
|
+
]
|
|
4834
|
+
},
|
|
4835
|
+
mixdrop: {
|
|
4836
|
+
urlPattern: /mixdrop/i,
|
|
4837
|
+
sourcePattern: [
|
|
4838
|
+
/MDCore\.wurl\s*=\s*["']([^"']+)/gi,
|
|
4839
|
+
/wurl\s*=\s*["']([^"']+)/gi
|
|
4840
|
+
]
|
|
4841
|
+
},
|
|
4842
|
+
streamtape: {
|
|
4843
|
+
urlPattern: /streamtape/i,
|
|
4844
|
+
sourcePattern: [
|
|
4845
|
+
/id=.*?&token=/gi,
|
|
4846
|
+
/robotlink.*?=\s*['"]([^'"]+)/gi
|
|
4847
|
+
]
|
|
4848
|
+
},
|
|
4849
|
+
mp4upload: {
|
|
4850
|
+
urlPattern: /mp4upload/i,
|
|
4851
|
+
sourcePattern: [
|
|
4852
|
+
/player\.src\(\{src:\s*["']([^"']+)/gi,
|
|
4853
|
+
/src:\s*["']([^"']+\.mp4[^"']*)/gi
|
|
4854
|
+
]
|
|
4855
|
+
}
|
|
4856
|
+
};
|
|
4422
4857
|
// Navigate if URL provided
|
|
4423
4858
|
if (args.url) {
|
|
4424
4859
|
await page.goto(args.url, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
@@ -4652,25 +5087,173 @@ export async function handleStreamExtractor(page, args) {
|
|
|
4652
5087
|
directUrls.push({ url, format, source: 'network' });
|
|
4653
5088
|
}
|
|
4654
5089
|
}
|
|
5090
|
+
// ============================================================
|
|
5091
|
+
// NEW: SITE-SPECIFIC EXTRACTION (VidSrc, Filemoon, StreamWish, etc.)
|
|
5092
|
+
// ============================================================
|
|
5093
|
+
if (args.siteType && args.siteType !== 'auto') {
|
|
5094
|
+
const siteConfig = sitePatterns[args.siteType];
|
|
5095
|
+
if (siteConfig) {
|
|
5096
|
+
const html = await page.content();
|
|
5097
|
+
for (const pattern of siteConfig.sourcePattern) {
|
|
5098
|
+
let match;
|
|
5099
|
+
while ((match = pattern.exec(html)) !== null) {
|
|
5100
|
+
if (match[1] && !directUrls.some(d => d.url === match[1])) {
|
|
5101
|
+
const format = formats.find(f => match[1].includes(`.${f}`)) || 'm3u8';
|
|
5102
|
+
directUrls.push({ url: match[1], format, source: args.siteType });
|
|
5103
|
+
}
|
|
5104
|
+
}
|
|
5105
|
+
}
|
|
5106
|
+
}
|
|
5107
|
+
}
|
|
5108
|
+
else {
|
|
5109
|
+
// Auto-detect site type from URL
|
|
5110
|
+
const currentUrl = page.url();
|
|
5111
|
+
for (const [siteName, config] of Object.entries(sitePatterns)) {
|
|
5112
|
+
if (config.urlPattern.test(currentUrl)) {
|
|
5113
|
+
const html = await page.content();
|
|
5114
|
+
for (const pattern of config.sourcePattern) {
|
|
5115
|
+
let match;
|
|
5116
|
+
while ((match = pattern.exec(html)) !== null) {
|
|
5117
|
+
if (match[1] && !directUrls.some(d => d.url === match[1])) {
|
|
5118
|
+
const format = formats.find(f => match[1].includes(`.${f}`)) || 'm3u8';
|
|
5119
|
+
directUrls.push({ url: match[1], format, source: siteName });
|
|
5120
|
+
}
|
|
5121
|
+
}
|
|
5122
|
+
}
|
|
5123
|
+
break;
|
|
5124
|
+
}
|
|
5125
|
+
}
|
|
5126
|
+
}
|
|
5127
|
+
// ============================================================
|
|
5128
|
+
// NEW: EXTRACT QUALITY FROM URLs
|
|
5129
|
+
// ============================================================
|
|
5130
|
+
for (const item of directUrls) {
|
|
5131
|
+
if (!item.quality || item.quality === 'auto') {
|
|
5132
|
+
const url = item.url.toLowerCase();
|
|
5133
|
+
if (url.includes('2160') || url.includes('4k') || url.includes('uhd'))
|
|
5134
|
+
item.quality = '2160p';
|
|
5135
|
+
else if (url.includes('1080'))
|
|
5136
|
+
item.quality = '1080p';
|
|
5137
|
+
else if (url.includes('720'))
|
|
5138
|
+
item.quality = '720p';
|
|
5139
|
+
else if (url.includes('480'))
|
|
5140
|
+
item.quality = '480p';
|
|
5141
|
+
else if (url.includes('360'))
|
|
5142
|
+
item.quality = '360p';
|
|
5143
|
+
else if (url.includes('240'))
|
|
5144
|
+
item.quality = '240p';
|
|
5145
|
+
else if (url.includes('144'))
|
|
5146
|
+
item.quality = '144p';
|
|
5147
|
+
else
|
|
5148
|
+
item.quality = 'unknown';
|
|
5149
|
+
}
|
|
5150
|
+
}
|
|
5151
|
+
// ============================================================
|
|
5152
|
+
// NEW: AUTO-SELECT BEST QUALITY
|
|
5153
|
+
// ============================================================
|
|
5154
|
+
let bestQuality;
|
|
5155
|
+
if (args.autoSelectBest || args.preferredQuality) {
|
|
5156
|
+
const preferredQ = args.preferredQuality || 'highest';
|
|
5157
|
+
if (preferredQ === 'highest') {
|
|
5158
|
+
// Sort by quality priority (highest first)
|
|
5159
|
+
const sorted = [...directUrls].sort((a, b) => {
|
|
5160
|
+
const aScore = qualityPriority[a.quality?.toLowerCase() || 'unknown'] || 0;
|
|
5161
|
+
const bScore = qualityPriority[b.quality?.toLowerCase() || 'unknown'] || 0;
|
|
5162
|
+
return bScore - aScore;
|
|
5163
|
+
});
|
|
5164
|
+
if (sorted.length > 0) {
|
|
5165
|
+
bestQuality = { url: sorted[0].url, format: sorted[0].format, quality: sorted[0].quality || 'unknown', source: sorted[0].source };
|
|
5166
|
+
}
|
|
5167
|
+
}
|
|
5168
|
+
else if (preferredQ === 'lowest') {
|
|
5169
|
+
// Sort by quality priority (lowest first)
|
|
5170
|
+
const sorted = [...directUrls].sort((a, b) => {
|
|
5171
|
+
const aScore = qualityPriority[a.quality?.toLowerCase() || 'unknown'] || 0;
|
|
5172
|
+
const bScore = qualityPriority[b.quality?.toLowerCase() || 'unknown'] || 0;
|
|
5173
|
+
return aScore - bScore;
|
|
5174
|
+
});
|
|
5175
|
+
if (sorted.length > 0) {
|
|
5176
|
+
bestQuality = { url: sorted[0].url, format: sorted[0].format, quality: sorted[0].quality || 'unknown', source: sorted[0].source };
|
|
5177
|
+
}
|
|
5178
|
+
}
|
|
5179
|
+
else {
|
|
5180
|
+
// Find exact match for preferred quality
|
|
5181
|
+
const exact = directUrls.find(d => d.quality?.toLowerCase() === preferredQ.toLowerCase());
|
|
5182
|
+
if (exact) {
|
|
5183
|
+
bestQuality = { url: exact.url, format: exact.format, quality: exact.quality || preferredQ, source: exact.source };
|
|
5184
|
+
}
|
|
5185
|
+
else {
|
|
5186
|
+
// Fallback to highest available
|
|
5187
|
+
const sorted = [...directUrls].sort((a, b) => {
|
|
5188
|
+
const aScore = qualityPriority[a.quality?.toLowerCase() || 'unknown'] || 0;
|
|
5189
|
+
const bScore = qualityPriority[b.quality?.toLowerCase() || 'unknown'] || 0;
|
|
5190
|
+
return bScore - aScore;
|
|
5191
|
+
});
|
|
5192
|
+
if (sorted.length > 0) {
|
|
5193
|
+
bestQuality = { url: sorted[0].url, format: sorted[0].format, quality: sorted[0].quality || 'unknown', source: sorted[0].source };
|
|
5194
|
+
}
|
|
5195
|
+
}
|
|
5196
|
+
}
|
|
5197
|
+
}
|
|
5198
|
+
// ============================================================
|
|
5199
|
+
// NEW: EXTRACT SUBTITLES
|
|
5200
|
+
// ============================================================
|
|
5201
|
+
if (args.extractSubtitles) {
|
|
5202
|
+
const subData = await page.evaluate(() => {
|
|
5203
|
+
const subs = [];
|
|
5204
|
+
// HTML5 track elements
|
|
5205
|
+
document.querySelectorAll('track[kind="subtitles"], track[kind="captions"]').forEach(track => {
|
|
5206
|
+
const src = track.getAttribute('src');
|
|
5207
|
+
if (src) {
|
|
5208
|
+
subs.push({
|
|
5209
|
+
url: src,
|
|
5210
|
+
language: track.getAttribute('srclang') || undefined,
|
|
5211
|
+
label: track.getAttribute('label') || undefined
|
|
5212
|
+
});
|
|
5213
|
+
}
|
|
5214
|
+
});
|
|
5215
|
+
// VTT/SRT links
|
|
5216
|
+
document.querySelectorAll('a[href*=".vtt"], a[href*=".srt"], a[href*=".ass"]').forEach(link => {
|
|
5217
|
+
const href = link.href;
|
|
5218
|
+
subs.push({ url: href, label: link.textContent?.trim() || undefined });
|
|
5219
|
+
});
|
|
5220
|
+
// Look in scripts for subtitle URLs
|
|
5221
|
+
const html = document.documentElement.innerHTML;
|
|
5222
|
+
const vttMatches = html.match(/https?:\/\/[^\s"']+\.vtt[^\s"']*/gi);
|
|
5223
|
+
const srtMatches = html.match(/https?:\/\/[^\s"']+\.srt[^\s"']*/gi);
|
|
5224
|
+
if (vttMatches)
|
|
5225
|
+
vttMatches.forEach(url => subs.push({ url }));
|
|
5226
|
+
if (srtMatches)
|
|
5227
|
+
srtMatches.forEach(url => subs.push({ url }));
|
|
5228
|
+
// Deduplicate
|
|
5229
|
+
const seen = new Set();
|
|
5230
|
+
return subs.filter(s => {
|
|
5231
|
+
if (seen.has(s.url))
|
|
5232
|
+
return false;
|
|
5233
|
+
seen.add(s.url);
|
|
5234
|
+
return true;
|
|
5235
|
+
});
|
|
5236
|
+
});
|
|
5237
|
+
subtitles.push(...subData);
|
|
5238
|
+
}
|
|
4655
5239
|
return {
|
|
4656
5240
|
success: directUrls.length > 0,
|
|
4657
5241
|
directUrls,
|
|
5242
|
+
bestQuality,
|
|
5243
|
+
subtitles: args.extractSubtitles ? subtitles : undefined,
|
|
4658
5244
|
message: directUrls.length > 0
|
|
4659
|
-
? `🎬 Found ${directUrls.length}
|
|
5245
|
+
? `🎬 Found ${directUrls.length} URL(s)${bestQuality ? ` | Best: ${bestQuality.quality}` : ''}${subtitles.length > 0 ? ` | ${subtitles.length} subtitles` : ''}`
|
|
4660
5246
|
: 'No direct URLs found',
|
|
4661
5247
|
};
|
|
4662
5248
|
}
|
|
4663
|
-
|
|
4664
|
-
|
|
4665
|
-
|
|
4666
|
-
* Perfect for scraping dynamic/AJAX-loaded content
|
|
4667
|
-
*/
|
|
4668
|
-
export async function handleJsScrape(page, args) {
|
|
5249
|
+
// Helper function to scrape a single URL
|
|
5250
|
+
async function scrapeSingleUrl(page, url, args) {
|
|
5251
|
+
const startTime = Date.now();
|
|
4669
5252
|
const waitForTimeout = args.waitForTimeout || 10000;
|
|
4670
5253
|
const returnType = args.returnType || 'html';
|
|
4671
5254
|
try {
|
|
4672
5255
|
// Step 1: Navigate to URL
|
|
4673
|
-
await page.goto(
|
|
5256
|
+
await page.goto(url, {
|
|
4674
5257
|
waitUntil: 'domcontentloaded',
|
|
4675
5258
|
timeout: waitForTimeout
|
|
4676
5259
|
});
|
|
@@ -4772,26 +5355,156 @@ export async function handleJsScrape(page, args) {
|
|
|
4772
5355
|
}
|
|
4773
5356
|
return {
|
|
4774
5357
|
success: true,
|
|
4775
|
-
url
|
|
5358
|
+
url,
|
|
4776
5359
|
finalUrl,
|
|
4777
5360
|
title,
|
|
4778
5361
|
html,
|
|
4779
5362
|
text,
|
|
4780
5363
|
elements,
|
|
4781
|
-
elementCount
|
|
5364
|
+
elementCount,
|
|
5365
|
+
duration: Date.now() - startTime
|
|
4782
5366
|
};
|
|
4783
5367
|
}
|
|
4784
5368
|
catch (error) {
|
|
4785
5369
|
return {
|
|
4786
5370
|
success: false,
|
|
4787
|
-
url
|
|
4788
|
-
finalUrl: page.url() ||
|
|
5371
|
+
url,
|
|
5372
|
+
finalUrl: page.url() || url,
|
|
4789
5373
|
title: '',
|
|
4790
5374
|
elementCount: 0,
|
|
4791
|
-
error: error instanceof Error ? error.message : String(error)
|
|
5375
|
+
error: error instanceof Error ? error.message : String(error),
|
|
5376
|
+
duration: Date.now() - startTime
|
|
4792
5377
|
};
|
|
4793
5378
|
}
|
|
4794
5379
|
}
|
|
5380
|
+
/**
|
|
5381
|
+
* JS Scrape - Single-call JavaScript-rendered content extraction
|
|
5382
|
+
* Combines navigation, waiting, scrolling, and content extraction in one call
|
|
5383
|
+
* Perfect for scraping dynamic/AJAX-loaded content
|
|
5384
|
+
* NEW: Supports parallel scraping of multiple URLs with concurrency control
|
|
5385
|
+
*/
|
|
5386
|
+
export async function handleJsScrape(page, args) {
|
|
5387
|
+
const startTime = Date.now();
|
|
5388
|
+
// Determine URLs to scrape
|
|
5389
|
+
const urlList = args.urls || (args.url ? [args.url] : []);
|
|
5390
|
+
if (urlList.length === 0) {
|
|
5391
|
+
return {
|
|
5392
|
+
success: false,
|
|
5393
|
+
error: 'No URL(s) provided. Use "url" for single URL or "urls" for multiple URLs.',
|
|
5394
|
+
elementCount: 0
|
|
5395
|
+
};
|
|
5396
|
+
}
|
|
5397
|
+
// Single URL mode - backward compatible
|
|
5398
|
+
if (urlList.length === 1 && !args.urls) {
|
|
5399
|
+
const result = await scrapeSingleUrl(page, urlList[0], args);
|
|
5400
|
+
return result;
|
|
5401
|
+
}
|
|
5402
|
+
// Parallel scraping mode
|
|
5403
|
+
const concurrency = Math.min(args.concurrency || 3, 10); // Max 10 concurrent
|
|
5404
|
+
const continueOnError = args.continueOnError !== false;
|
|
5405
|
+
const delayBetween = args.delayBetween || 500;
|
|
5406
|
+
const results = [];
|
|
5407
|
+
const browser = page.browser();
|
|
5408
|
+
if (!browser) {
|
|
5409
|
+
return {
|
|
5410
|
+
success: false,
|
|
5411
|
+
error: 'Browser not available for parallel scraping',
|
|
5412
|
+
elementCount: 0
|
|
5413
|
+
};
|
|
5414
|
+
}
|
|
5415
|
+
// Create a semaphore for concurrency control
|
|
5416
|
+
let activeCount = 0;
|
|
5417
|
+
const queue = [...urlList];
|
|
5418
|
+
const errors = [];
|
|
5419
|
+
// Process URLs with concurrency limit
|
|
5420
|
+
const processUrl = async (url) => {
|
|
5421
|
+
let newPage = null;
|
|
5422
|
+
try {
|
|
5423
|
+
// Create new page for each URL
|
|
5424
|
+
newPage = await browser.newPage();
|
|
5425
|
+
// Copy settings from original page if needed
|
|
5426
|
+
await newPage.setViewport({ width: 1280, height: 720 });
|
|
5427
|
+
const result = await scrapeSingleUrl(newPage, url, args);
|
|
5428
|
+
return result;
|
|
5429
|
+
}
|
|
5430
|
+
catch (error) {
|
|
5431
|
+
return {
|
|
5432
|
+
success: false,
|
|
5433
|
+
url,
|
|
5434
|
+
finalUrl: url,
|
|
5435
|
+
title: '',
|
|
5436
|
+
elementCount: 0,
|
|
5437
|
+
error: error instanceof Error ? error.message : String(error),
|
|
5438
|
+
duration: 0
|
|
5439
|
+
};
|
|
5440
|
+
}
|
|
5441
|
+
finally {
|
|
5442
|
+
// Close the page
|
|
5443
|
+
if (newPage) {
|
|
5444
|
+
try {
|
|
5445
|
+
await newPage.close();
|
|
5446
|
+
}
|
|
5447
|
+
catch (e) {
|
|
5448
|
+
// Ignore close errors
|
|
5449
|
+
}
|
|
5450
|
+
}
|
|
5451
|
+
}
|
|
5452
|
+
};
|
|
5453
|
+
// Process all URLs with concurrency control
|
|
5454
|
+
const processBatch = async () => {
|
|
5455
|
+
const promises = [];
|
|
5456
|
+
while (queue.length > 0 || activeCount > 0) {
|
|
5457
|
+
// Start new tasks up to concurrency limit
|
|
5458
|
+
while (activeCount < concurrency && queue.length > 0) {
|
|
5459
|
+
const url = queue.shift();
|
|
5460
|
+
activeCount++;
|
|
5461
|
+
const promise = (async () => {
|
|
5462
|
+
try {
|
|
5463
|
+
// Add delay between starting each scrape
|
|
5464
|
+
if (results.length > 0) {
|
|
5465
|
+
await new Promise(r => setTimeout(r, delayBetween));
|
|
5466
|
+
}
|
|
5467
|
+
const result = await processUrl(url);
|
|
5468
|
+
if (result) {
|
|
5469
|
+
results.push(result);
|
|
5470
|
+
if (!result.success && !continueOnError) {
|
|
5471
|
+
// Clear queue to stop processing
|
|
5472
|
+
queue.length = 0;
|
|
5473
|
+
errors.push(`Stopped at ${url}: ${result.error}`);
|
|
5474
|
+
}
|
|
5475
|
+
}
|
|
5476
|
+
}
|
|
5477
|
+
finally {
|
|
5478
|
+
activeCount--;
|
|
5479
|
+
}
|
|
5480
|
+
})();
|
|
5481
|
+
promises.push(promise);
|
|
5482
|
+
}
|
|
5483
|
+
// Wait for at least one to complete before continuing
|
|
5484
|
+
if (promises.length > 0) {
|
|
5485
|
+
await Promise.race(promises);
|
|
5486
|
+
}
|
|
5487
|
+
// Small delay to prevent tight loop
|
|
5488
|
+
await new Promise(r => setTimeout(r, 50));
|
|
5489
|
+
}
|
|
5490
|
+
// Wait for all remaining promises
|
|
5491
|
+
await Promise.all(promises);
|
|
5492
|
+
};
|
|
5493
|
+
await processBatch();
|
|
5494
|
+
const successCount = results.filter(r => r.success).length;
|
|
5495
|
+
const failedCount = results.filter(r => !r.success).length;
|
|
5496
|
+
return {
|
|
5497
|
+
success: successCount > 0,
|
|
5498
|
+
isParallel: true,
|
|
5499
|
+
urls: urlList,
|
|
5500
|
+
results,
|
|
5501
|
+
totalUrls: urlList.length,
|
|
5502
|
+
successCount,
|
|
5503
|
+
failedCount,
|
|
5504
|
+
totalDuration: Date.now() - startTime,
|
|
5505
|
+
error: errors.length > 0 ? errors.join('; ') : undefined
|
|
5506
|
+
};
|
|
5507
|
+
}
|
|
4795
5508
|
/**
|
|
4796
5509
|
* Execute custom JavaScript on page
|
|
4797
5510
|
* ULTRA POWERFUL: Execute any JS code and get results
|
|
@@ -4801,9 +5514,16 @@ export async function handleExecuteJs(page, args) {
|
|
|
4801
5514
|
const waitTime = args.waitForResult || 5000;
|
|
4802
5515
|
try {
|
|
4803
5516
|
let result;
|
|
5517
|
+
// Auto-detect if code contains await and wrap with async IIFE
|
|
5518
|
+
const hasAwait = /\bawait\b/.test(args.code);
|
|
5519
|
+
const isAlreadyAsync = /^\s*\(async\s+function|\(\s*async\s*\(|\(async\s*\(\)\s*=>/.test(args.code.trim());
|
|
5520
|
+
// Wrap code with async IIFE if it contains await but isn't already async
|
|
5521
|
+
const codeToExecute = (hasAwait && !isAlreadyAsync)
|
|
5522
|
+
? `(async () => { ${args.code} })()`
|
|
5523
|
+
: args.code;
|
|
4804
5524
|
if (args.context === 'isolated') {
|
|
4805
5525
|
// Execute in isolated context (sandboxed)
|
|
4806
|
-
result = await page.evaluate(
|
|
5526
|
+
result = await page.evaluate(codeToExecute);
|
|
4807
5527
|
}
|
|
4808
5528
|
else {
|
|
4809
5529
|
// Execute in page context with full access
|
|
@@ -4815,7 +5535,7 @@ export async function handleExecuteJs(page, args) {
|
|
|
4815
5535
|
catch (e) {
|
|
4816
5536
|
return { error: String(e) };
|
|
4817
5537
|
}
|
|
4818
|
-
},
|
|
5538
|
+
}, codeToExecute);
|
|
4819
5539
|
}
|
|
4820
5540
|
// Handle async results
|
|
4821
5541
|
if (result instanceof Promise) {
|