brave-real-browser-mcp-server 2.17.9 → 2.17.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser-manager.js +0 -8
- package/dist/debug-logger.js +28 -0
- package/dist/handlers/advanced-extraction-handlers.js +0 -80
- package/dist/handlers/multi-element-handlers.js +0 -60
- package/dist/handlers/smart-data-extractors.js +0 -475
- package/dist/index.js +75 -30
- package/dist/tool-definitions.js +0 -89
- package/dist/workflows/forensic-media-extractor.js +3 -13
- package/dist/workflows/media-extraction-workflow.js +3 -8
- package/package.json +2 -2
- package/scripts/check-version.js +33 -0
- package/dist/handlers/advanced-video-media-handlers.js +0 -139
package/dist/browser-manager.js
CHANGED
|
@@ -4,15 +4,7 @@ import { ExtensionManager } from './extension-manager.js';
|
|
|
4
4
|
import * as path from 'path';
|
|
5
5
|
import * as net from 'net';
|
|
6
6
|
import { execSync, spawn } from 'child_process';
|
|
7
|
-
import { config as dotenvConfig } from 'dotenv';
|
|
8
7
|
import { BraveInstaller } from './brave-installer.js';
|
|
9
|
-
// Load environment variables from .env file
|
|
10
|
-
// Silence dotenv output
|
|
11
|
-
const originalWrite = process.stdout.write;
|
|
12
|
-
// @ts-ignore
|
|
13
|
-
process.stdout.write = () => true;
|
|
14
|
-
dotenvConfig();
|
|
15
|
-
process.stdout.write = originalWrite;
|
|
16
8
|
// Browser error categorization
|
|
17
9
|
export var BrowserErrorType;
|
|
18
10
|
(function (BrowserErrorType) {
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
5
|
+
const __dirname = path.dirname(__filename);
|
|
6
|
+
// Log to a file in the project root (up one level from src, or relative to where it runs)
|
|
7
|
+
// We try to find a writable location.
|
|
8
|
+
const LOG_FILE = path.join(process.cwd(), 'mcp-server-debug.log');
|
|
9
|
+
export function logDebug(message, data) {
|
|
10
|
+
const timestamp = new Date().toISOString();
|
|
11
|
+
let logMessage = `[${timestamp}] ${message}`;
|
|
12
|
+
if (data) {
|
|
13
|
+
try {
|
|
14
|
+
logMessage += `\nData: ${JSON.stringify(data, null, 2)}`;
|
|
15
|
+
}
|
|
16
|
+
catch (e) {
|
|
17
|
+
logMessage += `\nData: [Circular or Non-Serializable]`;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
logMessage += '\n' + '-'.repeat(40) + '\n';
|
|
21
|
+
try {
|
|
22
|
+
fs.appendFileSync(LOG_FILE, logMessage);
|
|
23
|
+
}
|
|
24
|
+
catch (error) {
|
|
25
|
+
// If we can't write to file, fallback to stderr (which is safe for MCP)
|
|
26
|
+
console.error(`[DEBUG FAILED] ${message}`);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
@@ -280,86 +280,6 @@ ${JSON.stringify(videoData, null, 2)}
|
|
|
280
280
|
};
|
|
281
281
|
}, 'Failed to extract advanced video sources');
|
|
282
282
|
}
|
|
283
|
-
/**
|
|
284
|
-
* Deobfuscate JavaScript - Attempt to decode obfuscated JavaScript
|
|
285
|
-
*/
|
|
286
|
-
export async function handleDeobfuscateJS(args) {
|
|
287
|
-
return await withErrorHandling(async () => {
|
|
288
|
-
validateWorkflow('deobfuscate_js', {
|
|
289
|
-
requireBrowser: true,
|
|
290
|
-
requirePage: true,
|
|
291
|
-
});
|
|
292
|
-
const page = getCurrentPage();
|
|
293
|
-
const deobfuscationResults = await page.evaluate(() => {
|
|
294
|
-
const results = [];
|
|
295
|
-
document.querySelectorAll('script').forEach((script, index) => {
|
|
296
|
-
const content = script.textContent || '';
|
|
297
|
-
if (content.length < 100)
|
|
298
|
-
return;
|
|
299
|
-
const analysis = {
|
|
300
|
-
scriptIndex: index,
|
|
301
|
-
obfuscationType: [],
|
|
302
|
-
extractedData: {
|
|
303
|
-
urls: [],
|
|
304
|
-
domains: [],
|
|
305
|
-
apiKeys: [],
|
|
306
|
-
base64Strings: []
|
|
307
|
-
}
|
|
308
|
-
};
|
|
309
|
-
// Detect obfuscation types
|
|
310
|
-
if (content.includes('eval('))
|
|
311
|
-
analysis.obfuscationType.push('eval');
|
|
312
|
-
if (content.includes('atob('))
|
|
313
|
-
analysis.obfuscationType.push('base64');
|
|
314
|
-
if (content.match(/0x[0-9a-f]{4}/gi))
|
|
315
|
-
analysis.obfuscationType.push('hex');
|
|
316
|
-
if (content.match(/_0x[0-9a-f]+/gi))
|
|
317
|
-
analysis.obfuscationType.push('identifier_obfuscation');
|
|
318
|
-
if (content.includes('\\x'))
|
|
319
|
-
analysis.obfuscationType.push('hex_escape');
|
|
320
|
-
if (analysis.obfuscationType.length === 0)
|
|
321
|
-
return;
|
|
322
|
-
// Extract URLs
|
|
323
|
-
const urlPattern = /https?:\/\/[^\s"'<>]+/gi;
|
|
324
|
-
const urls = content.match(urlPattern);
|
|
325
|
-
if (urls) {
|
|
326
|
-
analysis.extractedData.urls = [...new Set(urls)];
|
|
327
|
-
}
|
|
328
|
-
// Extract base64 encoded strings
|
|
329
|
-
const base64Pattern = /["']([A-Za-z0-9+/]{20,}={0,2})["']/g;
|
|
330
|
-
let match;
|
|
331
|
-
while ((match = base64Pattern.exec(content)) !== null) {
|
|
332
|
-
try {
|
|
333
|
-
const decoded = atob(match[1]);
|
|
334
|
-
if (decoded.includes('http') || decoded.includes('video') || decoded.includes('.m3u8')) {
|
|
335
|
-
analysis.extractedData.base64Strings.push({
|
|
336
|
-
original: match[1].substring(0, 50) + '...',
|
|
337
|
-
decoded: decoded.substring(0, 200)
|
|
338
|
-
});
|
|
339
|
-
}
|
|
340
|
-
}
|
|
341
|
-
catch (e) {
|
|
342
|
-
// Not valid base64
|
|
343
|
-
}
|
|
344
|
-
}
|
|
345
|
-
// Extract potential domains
|
|
346
|
-
const domainPattern = /[a-z0-9][a-z0-9-]*\.(com|net|org|io|tv|online|xyz|cc)/gi;
|
|
347
|
-
const domains = content.match(domainPattern);
|
|
348
|
-
if (domains) {
|
|
349
|
-
analysis.extractedData.domains = [...new Set(domains)];
|
|
350
|
-
}
|
|
351
|
-
results.push(analysis);
|
|
352
|
-
});
|
|
353
|
-
return results.filter(r => r.obfuscationType.length > 0);
|
|
354
|
-
});
|
|
355
|
-
return {
|
|
356
|
-
content: [{
|
|
357
|
-
type: 'text',
|
|
358
|
-
text: `🔓 Deobfuscation Results:\n\nFound ${deobfuscationResults.length} obfuscated scripts\n\n${JSON.stringify(deobfuscationResults, null, 2)}`
|
|
359
|
-
}]
|
|
360
|
-
};
|
|
361
|
-
}, 'Failed to deobfuscate JavaScript');
|
|
362
|
-
}
|
|
363
283
|
/**
|
|
364
284
|
* Multi-Layer Redirect Tracer - Follow multiple redirect layers to find final video source
|
|
365
285
|
*/
|
|
@@ -67,66 +67,6 @@ export async function handleBatchElementScraper(args) {
|
|
|
67
67
|
};
|
|
68
68
|
}, 'Failed to batch scrape elements');
|
|
69
69
|
}
|
|
70
|
-
/**
|
|
71
|
-
* सभी elements के attributes (href, src, data-*) collect करता है
|
|
72
|
-
*/
|
|
73
|
-
export async function handleAttributeHarvester(args) {
|
|
74
|
-
return await withErrorHandling(async () => {
|
|
75
|
-
validateWorkflow('attribute_harvester', {
|
|
76
|
-
requireBrowser: true,
|
|
77
|
-
requirePage: true,
|
|
78
|
-
});
|
|
79
|
-
const page = getCurrentPage();
|
|
80
|
-
const selector = args.selector;
|
|
81
|
-
const attributes = args.attributes || [];
|
|
82
|
-
const maxElements = args.maxElements || 100;
|
|
83
|
-
const attributeData = await page.evaluate(({ selector, attributes, maxElements }) => {
|
|
84
|
-
const elements = document.querySelectorAll(selector);
|
|
85
|
-
const result = {
|
|
86
|
-
selector,
|
|
87
|
-
count: Math.min(elements.length, maxElements),
|
|
88
|
-
attributes: [],
|
|
89
|
-
};
|
|
90
|
-
let count = 0;
|
|
91
|
-
elements.forEach((element, index) => {
|
|
92
|
-
if (count >= maxElements)
|
|
93
|
-
return;
|
|
94
|
-
const attrs = {};
|
|
95
|
-
if (attributes.length > 0) {
|
|
96
|
-
// Extract specific attributes
|
|
97
|
-
attributes.forEach((attr) => {
|
|
98
|
-
const value = element.getAttribute(attr);
|
|
99
|
-
if (value !== null) {
|
|
100
|
-
attrs[attr] = value;
|
|
101
|
-
}
|
|
102
|
-
});
|
|
103
|
-
}
|
|
104
|
-
else {
|
|
105
|
-
// Extract all attributes
|
|
106
|
-
Array.from(element.attributes).forEach((attr) => {
|
|
107
|
-
attrs[attr.name] = attr.value;
|
|
108
|
-
});
|
|
109
|
-
}
|
|
110
|
-
if (Object.keys(attrs).length > 0) {
|
|
111
|
-
result.attributes.push({
|
|
112
|
-
element: index,
|
|
113
|
-
attrs,
|
|
114
|
-
});
|
|
115
|
-
count++;
|
|
116
|
-
}
|
|
117
|
-
});
|
|
118
|
-
return result;
|
|
119
|
-
}, { selector, attributes, maxElements });
|
|
120
|
-
return {
|
|
121
|
-
content: [
|
|
122
|
-
{
|
|
123
|
-
type: 'text',
|
|
124
|
-
text: `✅ Harvested attributes from ${attributeData.count} elements\n\n${JSON.stringify(attributeData, null, 2)}`,
|
|
125
|
-
},
|
|
126
|
-
],
|
|
127
|
-
};
|
|
128
|
-
}, 'Failed to harvest attributes');
|
|
129
|
-
}
|
|
130
70
|
/**
|
|
131
71
|
* Internal/external links classification के साथ collect करता है
|
|
132
72
|
*/
|
|
@@ -4,54 +4,6 @@
|
|
|
4
4
|
import { getCurrentPage } from '../browser-manager.js';
|
|
5
5
|
import { validateWorkflow } from '../workflow-validation.js';
|
|
6
6
|
import { withErrorHandling, sleep } from '../system-utils.js';
|
|
7
|
-
/**
|
|
8
|
-
* HTML Elements Extractor - Extract all HTML elements with complete details
|
|
9
|
-
*/
|
|
10
|
-
export async function handleHtmlElementsExtractor(args) {
|
|
11
|
-
return await withErrorHandling(async () => {
|
|
12
|
-
validateWorkflow('html_elements_extractor', {
|
|
13
|
-
requireBrowser: true,
|
|
14
|
-
requirePage: true,
|
|
15
|
-
});
|
|
16
|
-
const page = getCurrentPage();
|
|
17
|
-
const selector = args.selector || '*';
|
|
18
|
-
const maxElements = args.maxElements || 100;
|
|
19
|
-
const includeStyles = args.includeStyles || false;
|
|
20
|
-
const elements = await page.evaluate(({ selector, maxElements, includeStyles }) => {
|
|
21
|
-
const nodes = document.querySelectorAll(selector);
|
|
22
|
-
const results = [];
|
|
23
|
-
let count = 0;
|
|
24
|
-
nodes.forEach((el, index) => {
|
|
25
|
-
if (count >= maxElements)
|
|
26
|
-
return;
|
|
27
|
-
const elementData = {
|
|
28
|
-
index,
|
|
29
|
-
tagName: el.tagName,
|
|
30
|
-
id: el.id || null,
|
|
31
|
-
className: el.className || null,
|
|
32
|
-
textContent: el.textContent?.trim().substring(0, 200) || '',
|
|
33
|
-
innerHTML: el.innerHTML?.substring(0, 200) || '',
|
|
34
|
-
attributes: {},
|
|
35
|
-
};
|
|
36
|
-
Array.from(el.attributes).forEach((attr) => {
|
|
37
|
-
elementData.attributes[attr.name] = attr.value;
|
|
38
|
-
});
|
|
39
|
-
if (includeStyles && el.style) {
|
|
40
|
-
elementData.styles = el.style.cssText;
|
|
41
|
-
}
|
|
42
|
-
results.push(elementData);
|
|
43
|
-
count++;
|
|
44
|
-
});
|
|
45
|
-
return results;
|
|
46
|
-
}, { selector, maxElements, includeStyles });
|
|
47
|
-
return {
|
|
48
|
-
content: [{
|
|
49
|
-
type: 'text',
|
|
50
|
-
text: `✅ Extracted ${elements.length} HTML elements\n\n${JSON.stringify(elements, null, 2)}`,
|
|
51
|
-
}],
|
|
52
|
-
};
|
|
53
|
-
}, 'Failed to extract HTML elements');
|
|
54
|
-
}
|
|
55
7
|
/**
|
|
56
8
|
* Tags Finder - Find specific HTML tags
|
|
57
9
|
*/
|
|
@@ -296,70 +248,6 @@ export async function handleAjaxExtractor(args) {
|
|
|
296
248
|
};
|
|
297
249
|
}, 'Failed to extract AJAX requests');
|
|
298
250
|
}
|
|
299
|
-
/**
|
|
300
|
-
* Fetch XHR - Capture fetch and XHR requests
|
|
301
|
-
*/
|
|
302
|
-
export async function handleFetchXHR(args) {
|
|
303
|
-
return await withErrorHandling(async () => {
|
|
304
|
-
validateWorkflow('fetch_xhr', {
|
|
305
|
-
requireBrowser: true,
|
|
306
|
-
requirePage: true,
|
|
307
|
-
});
|
|
308
|
-
const page = getCurrentPage();
|
|
309
|
-
const duration = args.duration || 15000;
|
|
310
|
-
const forceReload = args.forceReload !== false; // Default true to capture initial requests
|
|
311
|
-
const xhrData = [];
|
|
312
|
-
// Capture requests too for completeness
|
|
313
|
-
const requestHandler = (request) => {
|
|
314
|
-
try {
|
|
315
|
-
const resourceType = request.resourceType();
|
|
316
|
-
if (resourceType === 'xhr' || resourceType === 'fetch') {
|
|
317
|
-
// Optional: Log request if needed, but for now we focus on responses with bodies
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
catch (e) { }
|
|
321
|
-
};
|
|
322
|
-
const responseHandler = async (response) => {
|
|
323
|
-
const request = response.request();
|
|
324
|
-
const resourceType = request.resourceType();
|
|
325
|
-
if (resourceType === 'xhr' || resourceType === 'fetch') {
|
|
326
|
-
try {
|
|
327
|
-
const body = await response.text();
|
|
328
|
-
xhrData.push({
|
|
329
|
-
url: response.url(),
|
|
330
|
-
status: response.status(),
|
|
331
|
-
statusText: response.statusText(),
|
|
332
|
-
headers: response.headers(),
|
|
333
|
-
method: request.method(),
|
|
334
|
-
postData: request.postData(),
|
|
335
|
-
body: body.substring(0, 5000), // Increased limit
|
|
336
|
-
timestamp: new Date().toISOString(),
|
|
337
|
-
});
|
|
338
|
-
}
|
|
339
|
-
catch (e) {
|
|
340
|
-
// Response body not available
|
|
341
|
-
}
|
|
342
|
-
}
|
|
343
|
-
};
|
|
344
|
-
page.on('response', responseHandler);
|
|
345
|
-
if (forceReload) {
|
|
346
|
-
try {
|
|
347
|
-
await page.reload({ waitUntil: 'networkidle2', timeout: 30000 });
|
|
348
|
-
}
|
|
349
|
-
catch (e) {
|
|
350
|
-
// Continue even if reload times out
|
|
351
|
-
}
|
|
352
|
-
}
|
|
353
|
-
await sleep(duration);
|
|
354
|
-
page.off('response', responseHandler);
|
|
355
|
-
return {
|
|
356
|
-
content: [{
|
|
357
|
-
type: 'text',
|
|
358
|
-
text: `✅ Captured ${xhrData.length} Fetch/XHR responses\n\n${JSON.stringify(xhrData, null, 2)}`,
|
|
359
|
-
}],
|
|
360
|
-
};
|
|
361
|
-
}, 'Failed to fetch XHR');
|
|
362
|
-
}
|
|
363
251
|
/**
|
|
364
252
|
* Network Recorder - Record all network activity
|
|
365
253
|
*/
|
|
@@ -729,369 +617,6 @@ export async function handleImageExtractorAdvanced(args) {
|
|
|
729
617
|
};
|
|
730
618
|
}, 'Failed to extract images');
|
|
731
619
|
}
|
|
732
|
-
/**
|
|
733
|
-
* Video Source Extractor - Extract video sources
|
|
734
|
-
*/
|
|
735
|
-
export async function handleVideoSourceExtractor(args) {
|
|
736
|
-
return await withErrorHandling(async () => {
|
|
737
|
-
validateWorkflow('video_source_extractor', {
|
|
738
|
-
requireBrowser: true,
|
|
739
|
-
requirePage: true,
|
|
740
|
-
});
|
|
741
|
-
const page = getCurrentPage();
|
|
742
|
-
const captureDuration = typeof args.captureDuration === 'number' ? args.captureDuration : 6000;
|
|
743
|
-
// DOM video elements + iframe detection
|
|
744
|
-
const videoData = await page.evaluate(() => {
|
|
745
|
-
const results = {
|
|
746
|
-
videos: [],
|
|
747
|
-
iframes: [],
|
|
748
|
-
embeddedPlayers: []
|
|
749
|
-
};
|
|
750
|
-
// 1. Direct video elements
|
|
751
|
-
const videoElements = document.querySelectorAll('video');
|
|
752
|
-
videoElements.forEach((video, idx) => {
|
|
753
|
-
const sources = [];
|
|
754
|
-
// Direct src
|
|
755
|
-
if (video.src) {
|
|
756
|
-
sources.push({ src: video.src, type: video.type || 'unknown' });
|
|
757
|
-
}
|
|
758
|
-
// Source elements
|
|
759
|
-
video.querySelectorAll('source').forEach((source) => {
|
|
760
|
-
sources.push({
|
|
761
|
-
src: source.src,
|
|
762
|
-
type: source.type || 'unknown',
|
|
763
|
-
});
|
|
764
|
-
});
|
|
765
|
-
results.videos.push({
|
|
766
|
-
index: idx,
|
|
767
|
-
poster: video.poster || '',
|
|
768
|
-
sources,
|
|
769
|
-
duration: video.duration || 0,
|
|
770
|
-
width: video.videoWidth || video.width || 0,
|
|
771
|
-
height: video.videoHeight || video.height || 0,
|
|
772
|
-
type: 'direct_video'
|
|
773
|
-
});
|
|
774
|
-
});
|
|
775
|
-
// 2. Iframe video sources
|
|
776
|
-
const iframes = document.querySelectorAll('iframe');
|
|
777
|
-
iframes.forEach((iframe, idx) => {
|
|
778
|
-
if (iframe.src) {
|
|
779
|
-
results.iframes.push({
|
|
780
|
-
index: idx,
|
|
781
|
-
src: iframe.src,
|
|
782
|
-
title: iframe.title || '',
|
|
783
|
-
id: iframe.id,
|
|
784
|
-
className: iframe.className,
|
|
785
|
-
width: iframe.width,
|
|
786
|
-
height: iframe.height,
|
|
787
|
-
type: 'iframe_video'
|
|
788
|
-
});
|
|
789
|
-
}
|
|
790
|
-
});
|
|
791
|
-
// 3. Video players with iframes inside
|
|
792
|
-
const playerContainers = document.querySelectorAll('[class*="player"], [id*="player"], [data-player]');
|
|
793
|
-
playerContainers.forEach((container, idx) => {
|
|
794
|
-
const iframe = container.querySelector('iframe');
|
|
795
|
-
const video = container.querySelector('video');
|
|
796
|
-
if (iframe || video) {
|
|
797
|
-
results.embeddedPlayers.push({
|
|
798
|
-
index: idx,
|
|
799
|
-
hasVideo: !!video,
|
|
800
|
-
hasIframe: !!iframe,
|
|
801
|
-
videoSrc: video ? (video.src || video.currentSrc) : null,
|
|
802
|
-
iframeSrc: iframe ? iframe.src : null,
|
|
803
|
-
containerId: container.id,
|
|
804
|
-
containerClass: container.className
|
|
805
|
-
});
|
|
806
|
-
}
|
|
807
|
-
});
|
|
808
|
-
return results;
|
|
809
|
-
});
|
|
810
|
-
// Network capture for manifests and segments
|
|
811
|
-
const manifests = [];
|
|
812
|
-
const segments = [];
|
|
813
|
-
const respHandler = async (response) => {
|
|
814
|
-
try {
|
|
815
|
-
const url = response.url();
|
|
816
|
-
const ct = (response.headers()['content-type'] || '').toLowerCase();
|
|
817
|
-
if (/\.m3u8(\?|$)|\.mpd(\?|$)/i.test(url) || ct.includes('application/vnd.apple.mpegurl') || ct.includes('application/x-mpegurl')) {
|
|
818
|
-
const content = await response.text().catch(() => '');
|
|
819
|
-
manifests.push({ url, type: url.includes('.mpd') ? 'DASH' : 'HLS', status: response.status(), content: content.slice(0, 2000) });
|
|
820
|
-
}
|
|
821
|
-
else if (/\.ts(\?|$)|\.m4s(\?|$)|\.mp4(\?|$)/i.test(url)) {
|
|
822
|
-
segments.push({ url, status: response.status(), size: response.headers()['content-length'] });
|
|
823
|
-
}
|
|
824
|
-
}
|
|
825
|
-
catch { }
|
|
826
|
-
};
|
|
827
|
-
page.on('response', respHandler);
|
|
828
|
-
// Best-effort: click center of first iframe to trigger playback
|
|
829
|
-
try {
|
|
830
|
-
const pt = await page.evaluate(() => {
|
|
831
|
-
const ifr = document.querySelector('iframe');
|
|
832
|
-
if (!ifr)
|
|
833
|
-
return null;
|
|
834
|
-
const r = ifr.getBoundingClientRect();
|
|
835
|
-
return { x: r.left + r.width / 2, y: r.top + r.height / 2 };
|
|
836
|
-
});
|
|
837
|
-
if (pt)
|
|
838
|
-
await page.mouse.click(pt.x, pt.y);
|
|
839
|
-
}
|
|
840
|
-
catch { }
|
|
841
|
-
await sleep(captureDuration);
|
|
842
|
-
page.off('response', respHandler);
|
|
843
|
-
const result = {
|
|
844
|
-
...videoData,
|
|
845
|
-
manifests,
|
|
846
|
-
segments,
|
|
847
|
-
summary: {
|
|
848
|
-
totalVideos: videoData.videos.length,
|
|
849
|
-
totalIframes: videoData.iframes.length,
|
|
850
|
-
totalEmbeddedPlayers: videoData.embeddedPlayers.length,
|
|
851
|
-
totalManifests: manifests.length,
|
|
852
|
-
totalSegments: segments.length
|
|
853
|
-
}
|
|
854
|
-
};
|
|
855
|
-
return {
|
|
856
|
-
content: [{
|
|
857
|
-
type: 'text',
|
|
858
|
-
text: `✅ Extracted video sources\n\n📊 Summary:\n • Direct <video> elements: ${videoData.videos.length}\n • Iframe sources: ${videoData.iframes.length}\n • Embedded players: ${videoData.embeddedPlayers.length}\n • Manifests: ${manifests.length}\n • Segments: ${segments.length}\n\n${JSON.stringify(result, null, 2)}`,
|
|
859
|
-
}],
|
|
860
|
-
};
|
|
861
|
-
}, 'Failed to extract video sources');
|
|
862
|
-
}
|
|
863
|
-
/**
|
|
864
|
-
* Video Player Extractor - Extract video player information
|
|
865
|
-
*/
|
|
866
|
-
export async function handleVideoPlayerExtractor(args) {
|
|
867
|
-
return await withErrorHandling(async () => {
|
|
868
|
-
validateWorkflow('video_player_extractor', {
|
|
869
|
-
requireBrowser: true,
|
|
870
|
-
requirePage: true,
|
|
871
|
-
});
|
|
872
|
-
const page = getCurrentPage();
|
|
873
|
-
const players = await page.evaluate(() => {
|
|
874
|
-
const results = [];
|
|
875
|
-
// Common video player classes/IDs
|
|
876
|
-
const playerSelectors = [
|
|
877
|
-
'[class*="video-player"]',
|
|
878
|
-
'[class*="player"]',
|
|
879
|
-
'[id*="player"]',
|
|
880
|
-
'[data-player]',
|
|
881
|
-
];
|
|
882
|
-
playerSelectors.forEach(selector => {
|
|
883
|
-
document.querySelectorAll(selector).forEach((el, idx) => {
|
|
884
|
-
const videoEl = el.querySelector('video');
|
|
885
|
-
const iframeEl = el.querySelector('iframe');
|
|
886
|
-
if (videoEl || iframeEl) {
|
|
887
|
-
const playerInfo = {
|
|
888
|
-
selector,
|
|
889
|
-
index: idx,
|
|
890
|
-
hasVideo: !!videoEl,
|
|
891
|
-
hasIframe: !!iframeEl,
|
|
892
|
-
className: el.className,
|
|
893
|
-
id: el.id,
|
|
894
|
-
};
|
|
895
|
-
// Video element info
|
|
896
|
-
if (videoEl) {
|
|
897
|
-
playerInfo.videoSrc = videoEl.src || videoEl.currentSrc || '';
|
|
898
|
-
playerInfo.videoPoster = videoEl.poster || '';
|
|
899
|
-
playerInfo.videoType = 'direct';
|
|
900
|
-
}
|
|
901
|
-
// Iframe element info
|
|
902
|
-
if (iframeEl) {
|
|
903
|
-
playerInfo.iframeSrc = iframeEl.src || '';
|
|
904
|
-
playerInfo.iframeTitle = iframeEl.title || '';
|
|
905
|
-
playerInfo.iframeAllow = iframeEl.getAttribute('allow') || '';
|
|
906
|
-
playerInfo.videoType = videoEl ? 'hybrid' : 'iframe';
|
|
907
|
-
}
|
|
908
|
-
results.push(playerInfo);
|
|
909
|
-
}
|
|
910
|
-
});
|
|
911
|
-
});
|
|
912
|
-
// Also check standalone iframes (ALL iframes that might be video players)
|
|
913
|
-
document.querySelectorAll('iframe').forEach((iframe, idx) => {
|
|
914
|
-
const src = (iframe.src || '').toLowerCase();
|
|
915
|
-
// Check if iframe is likely a video player
|
|
916
|
-
const isLikelyVideoIframe = src.includes('embed') ||
|
|
917
|
-
src.includes('player') ||
|
|
918
|
-
src.includes('video') ||
|
|
919
|
-
src.includes('stream') ||
|
|
920
|
-
iframe.allow?.includes('autoplay') ||
|
|
921
|
-
iframe.allow?.includes('encrypted-media');
|
|
922
|
-
// Include ALL iframes if they have src and are likely video players
|
|
923
|
-
if (iframe.src && isLikelyVideoIframe) {
|
|
924
|
-
// Check if already added
|
|
925
|
-
const alreadyAdded = results.some(r => r.iframeSrc === iframe.src);
|
|
926
|
-
if (!alreadyAdded) {
|
|
927
|
-
results.push({
|
|
928
|
-
selector: iframe.id ? `#${iframe.id}` : `iframe:nth-of-type(${idx + 1})`,
|
|
929
|
-
index: idx,
|
|
930
|
-
hasVideo: false,
|
|
931
|
-
hasIframe: true,
|
|
932
|
-
iframeSrc: iframe.src,
|
|
933
|
-
iframeTitle: iframe.title || '',
|
|
934
|
-
iframeAllow: iframe.getAttribute('allow') || '',
|
|
935
|
-
className: iframe.className,
|
|
936
|
-
id: iframe.id,
|
|
937
|
-
videoType: 'standalone_iframe',
|
|
938
|
-
isVisible: iframe.offsetWidth > 0 && iframe.offsetHeight > 0
|
|
939
|
-
});
|
|
940
|
-
}
|
|
941
|
-
}
|
|
942
|
-
});
|
|
943
|
-
return results;
|
|
944
|
-
});
|
|
945
|
-
return {
|
|
946
|
-
content: [{
|
|
947
|
-
type: 'text',
|
|
948
|
-
text: `✅ Found ${players.length} video players\n\n${JSON.stringify(players, null, 2)}`,
|
|
949
|
-
}],
|
|
950
|
-
};
|
|
951
|
-
}, 'Failed to extract video players');
|
|
952
|
-
}
|
|
953
|
-
/**
|
|
954
|
-
* Video Player Hoster Finder - Detect video hosting platform
|
|
955
|
-
*/
|
|
956
|
-
export async function handleVideoPlayerHosterFinder(args) {
|
|
957
|
-
return await withErrorHandling(async () => {
|
|
958
|
-
validateWorkflow('video_player_hoster_finder', {
|
|
959
|
-
requireBrowser: true,
|
|
960
|
-
requirePage: true,
|
|
961
|
-
});
|
|
962
|
-
const page = getCurrentPage();
|
|
963
|
-
const hosters = await page.evaluate(() => {
|
|
964
|
-
const results = [];
|
|
965
|
-
const iframes = document.querySelectorAll('iframe');
|
|
966
|
-
const platforms = {
|
|
967
|
-
// Popular platforms
|
|
968
|
-
'youtube.com': 'YouTube',
|
|
969
|
-
'youtu.be': 'YouTube',
|
|
970
|
-
'vimeo.com': 'Vimeo',
|
|
971
|
-
'dailymotion.com': 'Dailymotion',
|
|
972
|
-
'facebook.com': 'Facebook',
|
|
973
|
-
'twitter.com': 'Twitter',
|
|
974
|
-
'twitch.tv': 'Twitch',
|
|
975
|
-
'streamable.com': 'Streamable',
|
|
976
|
-
// Custom video hosting platforms
|
|
977
|
-
'gdmirrorbot': 'GD Mirror Bot',
|
|
978
|
-
'multimoviesshg.com': 'MultiMovies StreamHG',
|
|
979
|
-
'streamhg.com': 'StreamHG',
|
|
980
|
-
'techinmind.space': 'Tech In Mind Player',
|
|
981
|
-
'premilkyway.com': 'Premium Milky Way CDN',
|
|
982
|
-
'p2pplay.pro': 'P2P Play',
|
|
983
|
-
'rpmhub.site': 'RPM Share',
|
|
984
|
-
'uns.bio': 'UpnShare',
|
|
985
|
-
'smoothpre.com': 'EarnVids/SmoothPre',
|
|
986
|
-
'doodstream.com': 'DoodStream',
|
|
987
|
-
'streamtape.com': 'StreamTape',
|
|
988
|
-
'mixdrop.co': 'MixDrop',
|
|
989
|
-
'upstream.to': 'UpStream',
|
|
990
|
-
'vidcloud': 'VidCloud',
|
|
991
|
-
'fembed': 'Fembed',
|
|
992
|
-
'mp4upload': 'MP4Upload',
|
|
993
|
-
};
|
|
994
|
-
iframes.forEach((iframe, idx) => {
|
|
995
|
-
const src = iframe.src.toLowerCase();
|
|
996
|
-
for (const [domain, platform] of Object.entries(platforms)) {
|
|
997
|
-
if (src.includes(domain)) {
|
|
998
|
-
results.push({
|
|
999
|
-
index: idx,
|
|
1000
|
-
platform,
|
|
1001
|
-
src: iframe.src,
|
|
1002
|
-
title: iframe.title || '',
|
|
1003
|
-
});
|
|
1004
|
-
break;
|
|
1005
|
-
}
|
|
1006
|
-
}
|
|
1007
|
-
});
|
|
1008
|
-
return results;
|
|
1009
|
-
});
|
|
1010
|
-
return {
|
|
1011
|
-
content: [{
|
|
1012
|
-
type: 'text',
|
|
1013
|
-
text: `✅ Found ${hosters.length} video hosting platforms\n\n${JSON.stringify(hosters, null, 2)}`,
|
|
1014
|
-
}],
|
|
1015
|
-
};
|
|
1016
|
-
}, 'Failed to find video hosters');
|
|
1017
|
-
}
|
|
1018
|
-
/**
|
|
1019
|
-
* Original Video Hoster Finder - Find original video source
|
|
1020
|
-
*/
|
|
1021
|
-
export async function handleOriginalVideoHosterFinder(args) {
|
|
1022
|
-
return await withErrorHandling(async () => {
|
|
1023
|
-
validateWorkflow('original_video_hoster_finder', {
|
|
1024
|
-
requireBrowser: true,
|
|
1025
|
-
requirePage: true,
|
|
1026
|
-
});
|
|
1027
|
-
const page = getCurrentPage();
|
|
1028
|
-
const captureDuration = typeof args.captureDuration === 'number' ? args.captureDuration : 6000;
|
|
1029
|
-
const videoData = await page.evaluate(() => {
|
|
1030
|
-
const results = {
|
|
1031
|
-
directVideos: [],
|
|
1032
|
-
iframeVideos: [],
|
|
1033
|
-
possibleSources: [],
|
|
1034
|
-
};
|
|
1035
|
-
// Direct video elements
|
|
1036
|
-
document.querySelectorAll('video').forEach((video) => {
|
|
1037
|
-
const src = video.src || video.currentSrc;
|
|
1038
|
-
if (src) {
|
|
1039
|
-
results.directVideos.push({ src, type: 'direct', poster: video.poster });
|
|
1040
|
-
}
|
|
1041
|
-
video.querySelectorAll('source').forEach((source) => {
|
|
1042
|
-
if (source.src) {
|
|
1043
|
-
results.directVideos.push({ src: source.src, type: source.type, quality: source.dataset.quality || 'unknown' });
|
|
1044
|
-
}
|
|
1045
|
-
});
|
|
1046
|
-
});
|
|
1047
|
-
// Iframe videos
|
|
1048
|
-
document.querySelectorAll('iframe').forEach((iframe) => {
|
|
1049
|
-
if (iframe.src) {
|
|
1050
|
-
results.iframeVideos.push({ src: iframe.src, type: 'iframe' });
|
|
1051
|
-
}
|
|
1052
|
-
});
|
|
1053
|
-
return results;
|
|
1054
|
-
});
|
|
1055
|
-
// Network-derived hosts (m3u8/mpd)
|
|
1056
|
-
const hosts = new Set();
|
|
1057
|
-
const respHandler = (response) => {
|
|
1058
|
-
try {
|
|
1059
|
-
const url = response.url();
|
|
1060
|
-
if (/\.m3u8(\?|$)|\.mpd(\?|$)/i.test(url)) {
|
|
1061
|
-
try {
|
|
1062
|
-
hosts.add(new URL(url).hostname);
|
|
1063
|
-
}
|
|
1064
|
-
catch { }
|
|
1065
|
-
}
|
|
1066
|
-
}
|
|
1067
|
-
catch { }
|
|
1068
|
-
};
|
|
1069
|
-
page.on('response', respHandler);
|
|
1070
|
-
// Kick the player once
|
|
1071
|
-
try {
|
|
1072
|
-
const pt = await page.evaluate(() => {
|
|
1073
|
-
const ifr = document.querySelector('iframe');
|
|
1074
|
-
if (!ifr)
|
|
1075
|
-
return null;
|
|
1076
|
-
const r = ifr.getBoundingClientRect();
|
|
1077
|
-
return { x: r.left + r.width / 2, y: r.top + r.height / 2 };
|
|
1078
|
-
});
|
|
1079
|
-
if (pt)
|
|
1080
|
-
await page.mouse.click(pt.x, pt.y);
|
|
1081
|
-
}
|
|
1082
|
-
catch { }
|
|
1083
|
-
await sleep(captureDuration);
|
|
1084
|
-
page.off('response', respHandler);
|
|
1085
|
-
const possibleSources = Array.from(hosts).map(h => ({ host: h }));
|
|
1086
|
-
const enriched = { ...videoData, possibleSources };
|
|
1087
|
-
return {
|
|
1088
|
-
content: [{
|
|
1089
|
-
type: 'text',
|
|
1090
|
-
text: `✅ Video sources found\n\n${JSON.stringify(enriched, null, 2)}`,
|
|
1091
|
-
}],
|
|
1092
|
-
};
|
|
1093
|
-
}, 'Failed to find original video hoster');
|
|
1094
|
-
}
|
|
1095
620
|
/**
|
|
1096
621
|
* URL Redirect Tracer - Trace URL redirects
|
|
1097
622
|
*/
|
package/dist/index.js
CHANGED
|
@@ -1,8 +1,79 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import '
|
|
2
|
+
import { fileURLToPath } from 'url';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import fs from 'fs';
|
|
5
|
+
import { logDebug } from './debug-logger.js';
|
|
6
|
+
// CRITICAL: Patch console.log immediately
|
|
7
|
+
const originalConsoleLog = console.log;
|
|
8
|
+
console.log = (...args) => {
|
|
9
|
+
logDebug('Captured stdout log:', args);
|
|
10
|
+
console.error(...args);
|
|
11
|
+
};
|
|
12
|
+
// Robust .env loading (Manual & Silent)
|
|
13
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
14
|
+
const __dirname = path.dirname(__filename);
|
|
15
|
+
const projectRoot = path.resolve(__dirname, '..');
|
|
16
|
+
const envPath = path.join(projectRoot, '.env');
|
|
17
|
+
// Manual .env parser to avoid stdout pollution from dotenv package
|
|
18
|
+
const loadEnvFile = (filePath) => {
|
|
19
|
+
try {
|
|
20
|
+
if (!fs.existsSync(filePath))
|
|
21
|
+
return false;
|
|
22
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
23
|
+
content.split('\n').forEach(line => {
|
|
24
|
+
const trimmed = line.trim();
|
|
25
|
+
if (trimmed && !trimmed.startsWith('#')) {
|
|
26
|
+
const [key, ...valueParts] = trimmed.split('=');
|
|
27
|
+
if (key && valueParts.length > 0) {
|
|
28
|
+
const value = valueParts.join('=').replace(/(^"|"$)/g, '').trim();
|
|
29
|
+
if (!process.env[key.trim()]) {
|
|
30
|
+
process.env[key.trim()] = value;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
});
|
|
35
|
+
return true;
|
|
36
|
+
}
|
|
37
|
+
catch (e) {
|
|
38
|
+
logDebug('Error loading .env file', e);
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
};
|
|
42
|
+
if (loadEnvFile(envPath)) {
|
|
43
|
+
logDebug(`Loaded .env manually from: ${envPath}`);
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
// Try CWD
|
|
47
|
+
const cwdEnv = path.join(process.cwd(), '.env');
|
|
48
|
+
if (loadEnvFile(cwdEnv)) {
|
|
49
|
+
logDebug(`Loaded .env manually from CWD: ${cwdEnv}`);
|
|
50
|
+
}
|
|
51
|
+
else {
|
|
52
|
+
logDebug(`Warning: No .env found at ${envPath} or CWD`);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
logDebug('Server Starting...', {
|
|
56
|
+
cwd: process.cwd(),
|
|
57
|
+
nodeVersion: process.version,
|
|
58
|
+
projectRoot,
|
|
59
|
+
bravePath: process.env.BRAVE_PATH || 'Not Set'
|
|
60
|
+
});
|
|
3
61
|
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
4
62
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
5
63
|
import { CallToolRequestSchema, ListToolsRequestSchema, ListResourcesRequestSchema, ListPromptsRequestSchema, InitializeRequestSchema, } from "@modelcontextprotocol/sdk/types.js";
|
|
64
|
+
// Log uncaught exceptions
|
|
65
|
+
process.on('uncaughtException', (error) => {
|
|
66
|
+
logDebug('CRITICAL: Uncaught Exception', {
|
|
67
|
+
message: error.message,
|
|
68
|
+
stack: error.stack
|
|
69
|
+
});
|
|
70
|
+
console.error('CRITICAL: Uncaught Exception', error);
|
|
71
|
+
process.exit(1);
|
|
72
|
+
});
|
|
73
|
+
process.on('unhandledRejection', (reason) => {
|
|
74
|
+
logDebug('CRITICAL: Unhandled Rejection', reason);
|
|
75
|
+
console.error('CRITICAL: Unhandled Rejection', reason);
|
|
76
|
+
});
|
|
6
77
|
import { TOOLS, SERVER_INFO, CAPABILITIES, TOOL_NAMES, } from "./tool-definitions.js";
|
|
7
78
|
import { withErrorHandling } from "./system-utils.js";
|
|
8
79
|
import { validateMCPResponse } from "./mcp-response-validator.js";
|
|
@@ -17,7 +88,7 @@ import { handleSaveContentAsMarkdown } from "./handlers/file-handlers.js";
|
|
|
17
88
|
// Import new data extraction handlers
|
|
18
89
|
import { handleExtractJSON, handleScrapeMetaTags, handleExtractSchema, } from "./handlers/data-extraction-handlers.js";
|
|
19
90
|
// Import multi-element handlers
|
|
20
|
-
import { handleBatchElementScraper,
|
|
91
|
+
import { handleBatchElementScraper, handleLinkHarvester, handleMediaExtractor, } from "./handlers/multi-element-handlers.js";
|
|
21
92
|
// Import pagination handlers
|
|
22
93
|
import { handleBreadcrumbNavigator, } from "./handlers/navigation-handlers.js";
|
|
23
94
|
// Import AI-powered handlers
|
|
@@ -31,15 +102,13 @@ import { handleOCREngine, handleAudioCaptchaSolver, handlePuzzleCaptchaHandler,
|
|
|
31
102
|
// Import visual tools handlers
|
|
32
103
|
import { handleElementScreenshot, handleVideoRecording, } from "./handlers/visual-tools-handlers.js";
|
|
33
104
|
// Import smart data extractors
|
|
34
|
-
import {
|
|
105
|
+
import { handleNetworkRecorder, handleImageExtractorAdvanced, handleUrlRedirectTracer, handleApiFinder, } from "./handlers/smart-data-extractors.js";
|
|
35
106
|
// Import dynamic session handlers
|
|
36
107
|
import { handleAjaxContentWaiter, } from "./handlers/dynamic-session-handlers.js";
|
|
37
108
|
// Import monitoring & reporting handlers
|
|
38
109
|
import { handleProgressTracker, } from "./handlers/monitoring-reporting-handlers.js";
|
|
39
|
-
// Import advanced video & media handlers
|
|
40
|
-
import { handleVideoPlayerFinder, handleStreamDetector, handleVideoDownloadLinkFinder, } from "./handlers/advanced-video-media-handlers.js";
|
|
41
110
|
// Import advanced extraction handlers (Ad-bypass & Obfuscation)
|
|
42
|
-
import { handleAdvancedVideoExtraction,
|
|
111
|
+
import { handleAdvancedVideoExtraction, handleMultiLayerRedirectTrace, handleAdProtectionDetector, } from "./handlers/advanced-extraction-handlers.js";
|
|
43
112
|
// Initialize MCP server
|
|
44
113
|
const server = new Server(SERVER_INFO, { capabilities: CAPABILITIES });
|
|
45
114
|
// Register initialize handler (CRITICAL - missing handler can cause crash)
|
|
@@ -115,9 +184,6 @@ export async function executeToolByName(name, args) {
|
|
|
115
184
|
break;
|
|
116
185
|
// Smart Data Extractors
|
|
117
186
|
// DOM & HTML Extraction
|
|
118
|
-
case TOOL_NAMES.HTML_ELEMENTS_EXTRACTOR:
|
|
119
|
-
result = await handleHtmlElementsExtractor(args || {});
|
|
120
|
-
break;
|
|
121
187
|
case TOOL_NAMES.EXTRACT_JSON:
|
|
122
188
|
result = await handleExtractJSON(args || {});
|
|
123
189
|
break;
|
|
@@ -131,9 +197,6 @@ export async function executeToolByName(name, args) {
|
|
|
131
197
|
case TOOL_NAMES.BATCH_ELEMENT_SCRAPER:
|
|
132
198
|
result = await handleBatchElementScraper(args);
|
|
133
199
|
break;
|
|
134
|
-
case TOOL_NAMES.ATTRIBUTE_HARVESTER:
|
|
135
|
-
result = await handleAttributeHarvester(args);
|
|
136
|
-
break;
|
|
137
200
|
// Content Type Specific
|
|
138
201
|
case TOOL_NAMES.LINK_HARVESTER:
|
|
139
202
|
result = await handleLinkHarvester(args || {});
|
|
@@ -191,9 +254,6 @@ export async function executeToolByName(name, args) {
|
|
|
191
254
|
result = await handleVideoRecording(args);
|
|
192
255
|
break;
|
|
193
256
|
// Smart Data Extractors (Advanced)
|
|
194
|
-
case "fetch_xhr":
|
|
195
|
-
result = await handleFetchXHR(args || {});
|
|
196
|
-
break;
|
|
197
257
|
case "network_recorder":
|
|
198
258
|
result = await handleNetworkRecorder(args || {});
|
|
199
259
|
break;
|
|
@@ -203,9 +263,6 @@ export async function executeToolByName(name, args) {
|
|
|
203
263
|
case "image_extractor_advanced":
|
|
204
264
|
result = await handleImageExtractorAdvanced(args || {});
|
|
205
265
|
break;
|
|
206
|
-
case "video_source_extractor":
|
|
207
|
-
result = await handleVideoSourceExtractor(args || {});
|
|
208
|
-
break;
|
|
209
266
|
case "url_redirect_tracer":
|
|
210
267
|
result = await handleUrlRedirectTracer(args);
|
|
211
268
|
break;
|
|
@@ -218,22 +275,10 @@ export async function executeToolByName(name, args) {
|
|
|
218
275
|
result = await handleProgressTracker(args || {});
|
|
219
276
|
break;
|
|
220
277
|
// Advanced Video & Media Download Tools
|
|
221
|
-
case "video_player_finder":
|
|
222
|
-
result = await handleVideoPlayerFinder(args || {});
|
|
223
|
-
break;
|
|
224
|
-
case "stream_detector":
|
|
225
|
-
result = await handleStreamDetector(args || {});
|
|
226
|
-
break;
|
|
227
|
-
case "video_download_link_finder":
|
|
228
|
-
result = await handleVideoDownloadLinkFinder(args || {});
|
|
229
|
-
break;
|
|
230
278
|
// Advanced Extraction Tools (Ad-Bypass & Obfuscation)
|
|
231
279
|
case "advanced_video_extraction":
|
|
232
280
|
result = await handleAdvancedVideoExtraction(args || {});
|
|
233
281
|
break;
|
|
234
|
-
case "deobfuscate_js":
|
|
235
|
-
result = await handleDeobfuscateJS(args || {});
|
|
236
|
-
break;
|
|
237
282
|
case "multi_layer_redirect_trace":
|
|
238
283
|
result = await handleMultiLayerRedirectTrace(args);
|
|
239
284
|
break;
|
package/dist/tool-definitions.js
CHANGED
|
@@ -336,18 +336,6 @@ export const TOOLS = [
|
|
|
336
336
|
},
|
|
337
337
|
// Smart Data Extractors
|
|
338
338
|
// DOM & HTML Extraction (Phase 1)
|
|
339
|
-
{
|
|
340
|
-
name: 'html_elements_extractor',
|
|
341
|
-
description: 'Extract detailed information about HTML elements matching a selector',
|
|
342
|
-
inputSchema: {
|
|
343
|
-
type: 'object',
|
|
344
|
-
properties: {
|
|
345
|
-
selector: { type: 'string', default: '*' },
|
|
346
|
-
maxElements: { type: 'number', default: 100 },
|
|
347
|
-
includeStyles: { type: 'boolean', default: false },
|
|
348
|
-
},
|
|
349
|
-
},
|
|
350
|
-
},
|
|
351
339
|
{
|
|
352
340
|
name: 'extract_json',
|
|
353
341
|
description: 'Extract embedded JSON/API data from the page',
|
|
@@ -398,19 +386,6 @@ export const TOOLS = [
|
|
|
398
386
|
required: ['selector'],
|
|
399
387
|
},
|
|
400
388
|
},
|
|
401
|
-
{
|
|
402
|
-
name: 'attribute_harvester',
|
|
403
|
-
description: 'Collect attributes (href, src, data-*) from elements',
|
|
404
|
-
inputSchema: {
|
|
405
|
-
type: 'object',
|
|
406
|
-
properties: {
|
|
407
|
-
selector: { type: 'string' },
|
|
408
|
-
attributes: { type: 'array', items: { type: 'string' } },
|
|
409
|
-
maxElements: { type: 'number', default: 100 },
|
|
410
|
-
},
|
|
411
|
-
required: ['selector'],
|
|
412
|
-
},
|
|
413
|
-
},
|
|
414
389
|
// Content Type Specific Extractors
|
|
415
390
|
{
|
|
416
391
|
name: 'link_harvester',
|
|
@@ -616,16 +591,6 @@ export const TOOLS = [
|
|
|
616
591
|
},
|
|
617
592
|
},
|
|
618
593
|
// Smart Data Extractors (Advanced)
|
|
619
|
-
{
|
|
620
|
-
name: 'fetch_xhr',
|
|
621
|
-
description: 'Capture fetch and XHR requests with responses',
|
|
622
|
-
inputSchema: {
|
|
623
|
-
type: 'object',
|
|
624
|
-
properties: {
|
|
625
|
-
duration: { type: 'number', default: 15000 },
|
|
626
|
-
},
|
|
627
|
-
},
|
|
628
|
-
},
|
|
629
594
|
{
|
|
630
595
|
name: 'network_recorder',
|
|
631
596
|
description: 'Record all network activity',
|
|
@@ -713,14 +678,6 @@ export const TOOLS = [
|
|
|
713
678
|
},
|
|
714
679
|
},
|
|
715
680
|
},
|
|
716
|
-
{
|
|
717
|
-
name: 'deobfuscate_js',
|
|
718
|
-
description: 'Deobfuscate JavaScript code and extract hidden URLs, domains, and base64-encoded content. Detects eval, atob, hex encoding, and identifier obfuscation.',
|
|
719
|
-
inputSchema: {
|
|
720
|
-
type: 'object',
|
|
721
|
-
properties: {},
|
|
722
|
-
},
|
|
723
|
-
},
|
|
724
681
|
{
|
|
725
682
|
name: 'multi_layer_redirect_trace',
|
|
726
683
|
description: 'Follow multiple layers of redirects (URL redirects and iframe chains) to find final video source. Traces up to specified depth.',
|
|
@@ -754,46 +711,6 @@ export const TOOLS = [
|
|
|
754
711
|
},
|
|
755
712
|
},
|
|
756
713
|
// Phase 3: Media & Video Tools
|
|
757
|
-
{
|
|
758
|
-
name: 'video_source_extractor',
|
|
759
|
-
description: 'Extract raw video sources from video tags and sources',
|
|
760
|
-
inputSchema: {
|
|
761
|
-
type: 'object',
|
|
762
|
-
properties: {
|
|
763
|
-
url: { type: 'string' }
|
|
764
|
-
}
|
|
765
|
-
}
|
|
766
|
-
},
|
|
767
|
-
{
|
|
768
|
-
name: 'video_player_finder',
|
|
769
|
-
description: 'Identify video players (JWPlayer, VideoJS, etc) and extract config',
|
|
770
|
-
inputSchema: {
|
|
771
|
-
type: 'object',
|
|
772
|
-
properties: {
|
|
773
|
-
url: { type: 'string' }
|
|
774
|
-
}
|
|
775
|
-
}
|
|
776
|
-
},
|
|
777
|
-
{
|
|
778
|
-
name: 'stream_detector',
|
|
779
|
-
description: 'Detects HLS (m3u8) and DASH (mpd) streams from network traffic',
|
|
780
|
-
inputSchema: {
|
|
781
|
-
type: 'object',
|
|
782
|
-
properties: {
|
|
783
|
-
duration: { type: 'number', description: 'Monitoring duration in ms' }
|
|
784
|
-
}
|
|
785
|
-
}
|
|
786
|
-
},
|
|
787
|
-
{
|
|
788
|
-
name: 'video_download_link_finder',
|
|
789
|
-
description: 'Find direct download links for video files',
|
|
790
|
-
inputSchema: {
|
|
791
|
-
type: 'object',
|
|
792
|
-
properties: {
|
|
793
|
-
extensions: { type: 'array', items: { type: 'string' } }
|
|
794
|
-
}
|
|
795
|
-
}
|
|
796
|
-
}
|
|
797
714
|
];
|
|
798
715
|
// Tool name constants for type safety
|
|
799
716
|
export const TOOL_NAMES = {
|
|
@@ -814,14 +731,11 @@ export const TOOL_NAMES = {
|
|
|
814
731
|
EXTRACT_SCHEMA: 'extract_schema',
|
|
815
732
|
// Multi-Element Extractors
|
|
816
733
|
BATCH_ELEMENT_SCRAPER: 'batch_element_scraper',
|
|
817
|
-
ATTRIBUTE_HARVESTER: 'attribute_harvester',
|
|
818
734
|
// Content Type Specific
|
|
819
735
|
LINK_HARVESTER: 'link_harvester',
|
|
820
736
|
MEDIA_EXTRACTOR: 'media_extractor',
|
|
821
737
|
// DOM & HTML Extraction (Phase 1)
|
|
822
|
-
HTML_ELEMENTS_EXTRACTOR: 'html_elements_extractor',
|
|
823
738
|
// Network Tools (Phase 1)
|
|
824
|
-
FETCH_XHR: 'fetch_xhr',
|
|
825
739
|
NETWORK_RECORDER: 'network_recorder',
|
|
826
740
|
API_FINDER: 'api_finder',
|
|
827
741
|
// Pagination Tools
|
|
@@ -831,9 +745,6 @@ export const TOOL_NAMES = {
|
|
|
831
745
|
SMART_SELECTOR_GENERATOR: 'smart_selector_generator',
|
|
832
746
|
CONTENT_CLASSIFICATION: 'content_classification',
|
|
833
747
|
// Phase 3: Media & Video
|
|
834
|
-
VIDEO_SOURCE_EXTRACTOR: 'video_source_extractor',
|
|
835
|
-
VIDEO_PLAYER_FINDER: 'video_player_finder',
|
|
836
|
-
STREAM_DETECTOR: 'stream_detector',
|
|
837
748
|
// Search & Filter Tools
|
|
838
749
|
KEYWORD_SEARCH: 'keyword_search',
|
|
839
750
|
REGEX_PATTERN_MATCHER: 'regex_pattern_matcher',
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { handleBrowserInit } from '../handlers/browser-handlers.js';
|
|
2
2
|
import { handleNavigate } from '../handlers/navigation-handlers.js';
|
|
3
|
-
import { handleAdProtectionDetector, handleAdvancedVideoExtraction
|
|
4
|
-
import { handleVideoDownloadLinkFinder } from '../handlers/advanced-video-media-handlers.js';
|
|
3
|
+
import { handleAdProtectionDetector, handleAdvancedVideoExtraction } from '../handlers/advanced-extraction-handlers.js';
|
|
5
4
|
import { handleSmartSelectorGenerator } from '../handlers/ai-powered-handlers.js';
|
|
6
5
|
import { handleNetworkRecorder, handleApiFinder } from '../handlers/smart-data-extractors.js';
|
|
7
6
|
import { handleRegexPatternMatcher } from '../handlers/search-filter-handlers.js';
|
|
@@ -48,15 +47,7 @@ async function main() {
|
|
|
48
47
|
}
|
|
49
48
|
// Logic Extraction (looking for player config)
|
|
50
49
|
try {
|
|
51
|
-
|
|
52
|
-
// Filter for relevant keys
|
|
53
|
-
if (deobfuscated && deobfuscated.content) {
|
|
54
|
-
const keyTerms = ['player', 'token', 'm3u8', 'mp4', 'config'];
|
|
55
|
-
const relevantScripts = deobfuscated.content
|
|
56
|
-
.filter((c) => c.text && keyTerms.some(term => c.text.includes(term)))
|
|
57
|
-
.map((c) => c.text.substring(0, 200) + "...");
|
|
58
|
-
report.infrastructure.event_handlers = relevantScripts;
|
|
59
|
-
}
|
|
50
|
+
console.log("Deobfuscation is now handled by Advanced Video Extraction.");
|
|
60
51
|
}
|
|
61
52
|
catch (e) {
|
|
62
53
|
console.log("Deobfuscation skipped");
|
|
@@ -91,8 +82,7 @@ async function main() {
|
|
|
91
82
|
// Assuming videoAssets structure, we'd loop and add to report.targets
|
|
92
83
|
// For now, dump the raw result
|
|
93
84
|
report.targets_raw = videoAssets;
|
|
94
|
-
|
|
95
|
-
report.download_links = downloadLinks;
|
|
85
|
+
report.download_links = "Handled by Advanced Video Extraction";
|
|
96
86
|
console.log("--- MASTER REPORT JSON ---");
|
|
97
87
|
console.log(JSON.stringify(report, null, 2));
|
|
98
88
|
console.log("--- END REPORT ---");
|
|
@@ -3,8 +3,7 @@ import { handleNavigate } from '../handlers/navigation-handlers.js';
|
|
|
3
3
|
import { handleRandomScroll } from '../handlers/interaction-handlers.js';
|
|
4
4
|
import { handleElementScreenshot } from '../handlers/visual-tools-handlers.js';
|
|
5
5
|
import { handleNetworkRecorder } from '../handlers/smart-data-extractors.js';
|
|
6
|
-
import { handleAdvancedVideoExtraction,
|
|
7
|
-
import { handleVideoDownloadLinkFinder } from '../handlers/advanced-video-media-handlers.js';
|
|
6
|
+
import { handleAdvancedVideoExtraction, handleAdProtectionDetector } from '../handlers/advanced-extraction-handlers.js';
|
|
8
7
|
async function main() {
|
|
9
8
|
const targetUrl = process.argv[2];
|
|
10
9
|
if (!targetUrl) {
|
|
@@ -45,9 +44,7 @@ async function main() {
|
|
|
45
44
|
}
|
|
46
45
|
// 6. Obfuscation Bypass
|
|
47
46
|
if (!foundMedia) {
|
|
48
|
-
console.log("Deep Scan empty.
|
|
49
|
-
const deobfuscated = await handleDeobfuscateJS({});
|
|
50
|
-
console.log("Deobfuscation Results:", deobfuscated);
|
|
47
|
+
console.log("Deep Scan empty. Deobfuscation is now handled by Advanced Video Extraction.");
|
|
51
48
|
}
|
|
52
49
|
// 7. Network Traffic
|
|
53
50
|
if (!foundMedia) {
|
|
@@ -57,9 +54,7 @@ async function main() {
|
|
|
57
54
|
}
|
|
58
55
|
// Phase 3: Redirect & Link Validation
|
|
59
56
|
console.log("\n--- Phase 3: Redirect & Link Validation ---");
|
|
60
|
-
console.log("
|
|
61
|
-
const links = await handleVideoDownloadLinkFinder({});
|
|
62
|
-
console.log("Page Video Links:", links);
|
|
57
|
+
console.log("Direct download link finding is now integrated into Advanced Video Extraction.");
|
|
63
58
|
// Phase 4: Fallback
|
|
64
59
|
console.log("\n--- Phase 4: Fallback / Manual Aid ---");
|
|
65
60
|
if (!foundMedia) {
|
package/package.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "brave-real-browser-mcp-server",
|
|
3
|
-
"version": "2.17.
|
|
3
|
+
"version": "2.17.11",
|
|
4
4
|
"description": "Universal AI IDE MCP Server - Auto-detects and supports all AI IDEs (Claude Desktop, Cursor, Windsurf, Cline, Zed, VSCode, Qoder AI, etc.) with Brave browser automation",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
7
|
"scripts": {
|
|
8
|
-
"preinstall": "npm
|
|
8
|
+
"preinstall": "npm update && node scripts/check-version.js",
|
|
9
9
|
"postinstall": "node scripts/patch-puppeteer-screen-recorder.cjs",
|
|
10
10
|
"clean": "rimraf dist",
|
|
11
11
|
"clean:cache": "npm cache clean --force",
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { exec } from 'child_process';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { fileURLToPath } from 'url';
|
|
5
|
+
|
|
6
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
7
|
+
const __dirname = path.dirname(__filename);
|
|
8
|
+
|
|
9
|
+
const packageName = 'brave-real-puppeteer-core';
|
|
10
|
+
const outputFile = path.join(process.cwd(), '.latest-version');
|
|
11
|
+
|
|
12
|
+
console.log(`Checking latest version for ${packageName}...`);
|
|
13
|
+
|
|
14
|
+
exec(`npm view ${packageName} version`, (error, stdout, stderr) => {
|
|
15
|
+
if (error) {
|
|
16
|
+
console.error(`Error checking version: ${error.message}`);
|
|
17
|
+
console.log('Version check skipped');
|
|
18
|
+
return;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const version = stdout.trim();
|
|
22
|
+
if (version) {
|
|
23
|
+
console.log(`Latest version found: ${version}`);
|
|
24
|
+
try {
|
|
25
|
+
fs.writeFileSync(outputFile, version);
|
|
26
|
+
console.log(`Saved to ${outputFile}`);
|
|
27
|
+
} catch (err) {
|
|
28
|
+
console.error(`Failed to write .latest-version: ${err.message}`);
|
|
29
|
+
}
|
|
30
|
+
} else {
|
|
31
|
+
console.log('Version check returned empty. Skipped.');
|
|
32
|
+
}
|
|
33
|
+
});
|
|
@@ -1,139 +0,0 @@
|
|
|
1
|
-
import { getPageInstance } from '../browser-manager.js';
|
|
2
|
-
/**
|
|
3
|
-
* Extract raw video sources from <video> tags and <source> elements
|
|
4
|
-
*/
|
|
5
|
-
export async function handleVideoSourceExtractor(args) {
|
|
6
|
-
const { url } = args;
|
|
7
|
-
const page = getPageInstance();
|
|
8
|
-
if (!page)
|
|
9
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
10
|
-
if (url && page.url() !== url)
|
|
11
|
-
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
12
|
-
const sources = await page.evaluate(() => {
|
|
13
|
-
return Array.from(document.querySelectorAll('video')).map((v, i) => ({
|
|
14
|
-
index: i,
|
|
15
|
-
src: v.src,
|
|
16
|
-
currentSrc: v.currentSrc,
|
|
17
|
-
sources: Array.from(v.querySelectorAll('source')).map(s => ({ src: s.src, type: s.type })),
|
|
18
|
-
poster: v.poster
|
|
19
|
-
}));
|
|
20
|
-
});
|
|
21
|
-
return { content: [{ type: 'text', text: JSON.stringify(sources, null, 2) }] };
|
|
22
|
-
}
|
|
23
|
-
/**
|
|
24
|
-
* Identify common video players and configuration
|
|
25
|
-
*/
|
|
26
|
-
export async function handleVideoPlayerFinder(args) {
|
|
27
|
-
const { url } = args;
|
|
28
|
-
const page = getPageInstance();
|
|
29
|
-
if (!page)
|
|
30
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
31
|
-
if (url && page.url() !== url)
|
|
32
|
-
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
33
|
-
const players = await page.evaluate(() => {
|
|
34
|
-
const detected = [];
|
|
35
|
-
// @ts-ignore
|
|
36
|
-
if (window.jwplayer)
|
|
37
|
-
detected.push('JWPlayer');
|
|
38
|
-
// @ts-ignore
|
|
39
|
-
if (window.videojs)
|
|
40
|
-
detected.push('VideoJS');
|
|
41
|
-
// Check for iframes
|
|
42
|
-
document.querySelectorAll('iframe').forEach(f => {
|
|
43
|
-
if (f.src.includes('youtube.com/embed'))
|
|
44
|
-
detected.push('YouTube Embed');
|
|
45
|
-
if (f.src.includes('vimeo.com'))
|
|
46
|
-
detected.push('Vimeo Embed');
|
|
47
|
-
});
|
|
48
|
-
return [...new Set(detected)];
|
|
49
|
-
});
|
|
50
|
-
return { content: [{ type: 'text', text: `Detected Players: ${players.join(', ') || 'None found'}` }] };
|
|
51
|
-
}
|
|
52
|
-
/**
|
|
53
|
-
* Detect HLS (m3u8) / DASH (mpd) streams in network traffic
|
|
54
|
-
*/
|
|
55
|
-
export async function handleStreamDetector(args) {
|
|
56
|
-
const page = getPageInstance();
|
|
57
|
-
if (!page)
|
|
58
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
59
|
-
const duration = args.duration || 10000;
|
|
60
|
-
const streams = [];
|
|
61
|
-
const handler = (response) => {
|
|
62
|
-
const url = response.url();
|
|
63
|
-
if (url.includes('.m3u8') || url.includes('.mpd')) {
|
|
64
|
-
streams.push({ url, type: url.includes('.m3u8') ? 'HLS' : 'DASH', status: response.status() });
|
|
65
|
-
}
|
|
66
|
-
};
|
|
67
|
-
page.on('response', handler);
|
|
68
|
-
await new Promise(resolve => setTimeout(resolve, duration));
|
|
69
|
-
page.off('response', handler);
|
|
70
|
-
return { content: [{ type: 'text', text: JSON.stringify(streams, null, 2) }] };
|
|
71
|
-
}
|
|
72
|
-
/**
|
|
73
|
-
* Trace URL redirects
|
|
74
|
-
*/
|
|
75
|
-
export async function handleRedirectTracer(args) {
|
|
76
|
-
const page = getPageInstance();
|
|
77
|
-
if (!page)
|
|
78
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
79
|
-
const chain = [];
|
|
80
|
-
const handler = (response) => {
|
|
81
|
-
if ([301, 302, 303, 307, 308].includes(response.status())) {
|
|
82
|
-
chain.push(`${response.url()} -> ${response.headers()['location']}`);
|
|
83
|
-
}
|
|
84
|
-
};
|
|
85
|
-
page.on('response', handler);
|
|
86
|
-
await page.goto(args.url, { waitUntil: 'networkidle2' });
|
|
87
|
-
page.off('response', handler);
|
|
88
|
-
return { content: [{ type: 'text', text: JSON.stringify({ finalUrl: page.url(), redirectChain: chain }, null, 2) }] };
|
|
89
|
-
}
|
|
90
|
-
/**
|
|
91
|
-
* Find direct video download links
|
|
92
|
-
*/
|
|
93
|
-
export async function handleVideoDownloadLinkFinder(args) {
|
|
94
|
-
const page = getPageInstance();
|
|
95
|
-
if (!page)
|
|
96
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
97
|
-
const exts = args.extensions || ['.mp4', '.mkv', '.avi', '.mov', '.webm'];
|
|
98
|
-
const links = await page.evaluate((extensions) => {
|
|
99
|
-
return Array.from(document.querySelectorAll('a'))
|
|
100
|
-
.filter(a => extensions.some(ext => a.href.toLowerCase().endsWith(ext)))
|
|
101
|
-
.map(a => ({ text: a.textContent, href: a.href }));
|
|
102
|
-
}, exts);
|
|
103
|
-
return { content: [{ type: 'text', text: JSON.stringify(links, null, 2) }] };
|
|
104
|
-
}
|
|
105
|
-
// --- Implementation of missing "Ghost" handlers required by index.ts ---
|
|
106
|
-
// Aliases or specific implementations
|
|
107
|
-
export const handleVideoLinkFinder = handleVideoDownloadLinkFinder;
|
|
108
|
-
export async function handleVideoDownloadButton(args) {
|
|
109
|
-
// Basic implementation trying to find "Download" buttons contextually
|
|
110
|
-
const page = getPageInstance();
|
|
111
|
-
if (!page)
|
|
112
|
-
throw new Error('Browser not initialized');
|
|
113
|
-
const downloadProbability = await page.evaluate(() => {
|
|
114
|
-
const buttons = Array.from(document.querySelectorAll('button, a'));
|
|
115
|
-
return buttons.filter(b => b.textContent?.toLowerCase().includes('download')).map(b => ({
|
|
116
|
-
text: b.textContent,
|
|
117
|
-
outerHTML: b.outerHTML.substring(0, 100)
|
|
118
|
-
}));
|
|
119
|
-
});
|
|
120
|
-
return { content: [{ type: 'text', text: JSON.stringify(downloadProbability, null, 2) }] };
|
|
121
|
-
}
|
|
122
|
-
export async function handleVideoPlayPushSource(args) {
|
|
123
|
-
return { content: [{ type: 'text', text: "Video Play Push Source detected (Simulated)" }] };
|
|
124
|
-
}
|
|
125
|
-
export async function handleVideoPlayButtonClick(args) {
|
|
126
|
-
const page = getPageInstance();
|
|
127
|
-
if (!page)
|
|
128
|
-
throw new Error('Browser not initialized');
|
|
129
|
-
// Try to click the first play button found
|
|
130
|
-
const clicked = await page.evaluate(() => {
|
|
131
|
-
const playBtn = document.querySelector('button[aria-label="Play"], .vjs-big-play-button, .ytp-play-button');
|
|
132
|
-
if (playBtn instanceof HTMLElement) {
|
|
133
|
-
playBtn.click();
|
|
134
|
-
return true;
|
|
135
|
-
}
|
|
136
|
-
return false;
|
|
137
|
-
});
|
|
138
|
-
return { content: [{ type: 'text', text: clicked ? "Clicked Play Button" : "No Play Button Found" }] };
|
|
139
|
-
}
|