brave-real-browser-mcp-server 2.24.5 → 2.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/handlers/advanced-tools.js +366 -0
- package/dist/index.js +8 -1
- package/dist/tool-definitions.js +123 -0
- package/package.json +4 -2
|
@@ -2876,3 +2876,369 @@ export async function handleStreamExtractor(page, args) {
|
|
|
2876
2876
|
: 'No direct URLs found',
|
|
2877
2877
|
};
|
|
2878
2878
|
}
|
|
2879
|
+
/**
|
|
2880
|
+
* Advanced web crawler with Crawlee + brave-real-launcher integration
|
|
2881
|
+
* Features: URL queue, proxy rotation, rate limiting, data extraction
|
|
2882
|
+
*/
|
|
2883
|
+
export async function handleWebCrawler(page, args) {
|
|
2884
|
+
// Import Crawlee dynamically to avoid load-time errors if not installed
|
|
2885
|
+
let PuppeteerCrawler;
|
|
2886
|
+
let RequestQueue;
|
|
2887
|
+
let Configuration;
|
|
2888
|
+
try {
|
|
2889
|
+
const crawlee = await import('crawlee');
|
|
2890
|
+
PuppeteerCrawler = crawlee.PuppeteerCrawler;
|
|
2891
|
+
RequestQueue = crawlee.RequestQueue;
|
|
2892
|
+
Configuration = crawlee.Configuration;
|
|
2893
|
+
}
|
|
2894
|
+
catch (e) {
|
|
2895
|
+
return {
|
|
2896
|
+
success: false,
|
|
2897
|
+
crawledPages: 0,
|
|
2898
|
+
results: [],
|
|
2899
|
+
errors: ['Crawlee not installed. Run: npm install crawlee'],
|
|
2900
|
+
message: '❌ Crawlee package not found',
|
|
2901
|
+
};
|
|
2902
|
+
}
|
|
2903
|
+
// Import brave-real-launcher for browser launch
|
|
2904
|
+
let getBravePath;
|
|
2905
|
+
let braveRealPuppeteerCore;
|
|
2906
|
+
try {
|
|
2907
|
+
const launcher = await import('brave-real-launcher');
|
|
2908
|
+
getBravePath = launcher.getBravePath;
|
|
2909
|
+
}
|
|
2910
|
+
catch (e) {
|
|
2911
|
+
// Fallback - will use default Chromium
|
|
2912
|
+
}
|
|
2913
|
+
// Import brave-real-puppeteer-core for stealth features
|
|
2914
|
+
try {
|
|
2915
|
+
braveRealPuppeteerCore = await import('brave-real-puppeteer-core');
|
|
2916
|
+
}
|
|
2917
|
+
catch (e) {
|
|
2918
|
+
// Will use default puppeteer
|
|
2919
|
+
}
|
|
2920
|
+
const results = [];
|
|
2921
|
+
const errors = [];
|
|
2922
|
+
const visited = new Set();
|
|
2923
|
+
// Configuration
|
|
2924
|
+
const maxDepth = args.maxDepth ?? 3;
|
|
2925
|
+
const maxPages = args.maxPages ?? 50;
|
|
2926
|
+
const concurrency = args.concurrency ?? 3;
|
|
2927
|
+
const rateLimit = args.rateLimit ?? 2;
|
|
2928
|
+
const retryCount = args.retryCount ?? 3;
|
|
2929
|
+
const timeout = args.timeout ?? 30000;
|
|
2930
|
+
// URL filtering patterns
|
|
2931
|
+
const includePattern = args.includePattern ? new RegExp(args.includePattern, 'i') : null;
|
|
2932
|
+
const excludePattern = args.excludePattern ? new RegExp(args.excludePattern, 'i') : null;
|
|
2933
|
+
// Proxy rotation
|
|
2934
|
+
let proxyIndex = 0;
|
|
2935
|
+
const getNextProxy = () => {
|
|
2936
|
+
if (!args.proxyList || args.proxyList.length === 0)
|
|
2937
|
+
return undefined;
|
|
2938
|
+
const proxy = args.proxyList[proxyIndex % args.proxyList.length];
|
|
2939
|
+
proxyIndex++;
|
|
2940
|
+
return proxy;
|
|
2941
|
+
};
|
|
2942
|
+
// Rate limiting
|
|
2943
|
+
let lastRequestTime = 0;
|
|
2944
|
+
const rateLimitDelay = 1000 / rateLimit;
|
|
2945
|
+
const enforceRateLimit = async () => {
|
|
2946
|
+
const now = Date.now();
|
|
2947
|
+
const elapsed = now - lastRequestTime;
|
|
2948
|
+
if (elapsed < rateLimitDelay) {
|
|
2949
|
+
await new Promise(r => setTimeout(r, rateLimitDelay - elapsed));
|
|
2950
|
+
}
|
|
2951
|
+
lastRequestTime = Date.now();
|
|
2952
|
+
};
|
|
2953
|
+
try {
|
|
2954
|
+
// Configure Crawlee to use memory storage (no disk)
|
|
2955
|
+
Configuration.getGlobalConfig().set('persistStorage', false);
|
|
2956
|
+
// Create request queue with start URLs
|
|
2957
|
+
const requestQueue = await RequestQueue.open();
|
|
2958
|
+
for (const url of args.startUrls) {
|
|
2959
|
+
await requestQueue.addRequest({
|
|
2960
|
+
url,
|
|
2961
|
+
userData: { depth: 0 },
|
|
2962
|
+
});
|
|
2963
|
+
}
|
|
2964
|
+
// Get Brave executable path if available
|
|
2965
|
+
let executablePath;
|
|
2966
|
+
try {
|
|
2967
|
+
if (getBravePath) {
|
|
2968
|
+
executablePath = getBravePath();
|
|
2969
|
+
}
|
|
2970
|
+
}
|
|
2971
|
+
catch (e) {
|
|
2972
|
+
// Use default
|
|
2973
|
+
}
|
|
2974
|
+
// Create crawler based on mode
|
|
2975
|
+
const crawler = new PuppeteerCrawler({
|
|
2976
|
+
requestQueue,
|
|
2977
|
+
maxConcurrency: concurrency,
|
|
2978
|
+
maxRequestRetries: retryCount,
|
|
2979
|
+
requestHandlerTimeoutSecs: timeout / 1000,
|
|
2980
|
+
// Use brave-real-puppeteer-core with all stealth features
|
|
2981
|
+
launchContext: {
|
|
2982
|
+
// Use brave-real-puppeteer-core as custom launcher for 50+ stealth features
|
|
2983
|
+
launcher: braveRealPuppeteerCore || undefined,
|
|
2984
|
+
launchOptions: {
|
|
2985
|
+
headless: true,
|
|
2986
|
+
executablePath,
|
|
2987
|
+
args: [
|
|
2988
|
+
'--no-sandbox',
|
|
2989
|
+
'--disable-setuid-sandbox',
|
|
2990
|
+
'--disable-blink-features=AutomationControlled',
|
|
2991
|
+
'--disable-dev-shm-usage',
|
|
2992
|
+
'--disable-accelerated-2d-canvas',
|
|
2993
|
+
'--disable-gpu',
|
|
2994
|
+
],
|
|
2995
|
+
},
|
|
2996
|
+
},
|
|
2997
|
+
// Browser pool configuration
|
|
2998
|
+
browserPoolOptions: {
|
|
2999
|
+
maxOpenPagesPerBrowser: 1,
|
|
3000
|
+
},
|
|
3001
|
+
// Pre-navigation hook for rate limiting, popup blocking, and movie streaming optimizations
|
|
3002
|
+
preNavigationHooks: [
|
|
3003
|
+
async (crawlingContext) => {
|
|
3004
|
+
await enforceRateLimit();
|
|
3005
|
+
const pg = crawlingContext.page;
|
|
3006
|
+
// Set custom user agent if provided
|
|
3007
|
+
if (args.userAgent) {
|
|
3008
|
+
await pg.setUserAgent(args.userAgent);
|
|
3009
|
+
}
|
|
3010
|
+
// Set custom headers if provided
|
|
3011
|
+
if (args.headers) {
|
|
3012
|
+
await pg.setExtraHTTPHeaders(args.headers);
|
|
3013
|
+
}
|
|
3014
|
+
// Block popups and overlay ads (default: true for movie streaming)
|
|
3015
|
+
if (args.blockPopups !== false) {
|
|
3016
|
+
await pg.evaluateOnNewDocument(() => {
|
|
3017
|
+
// Block window.open popups
|
|
3018
|
+
window.open = () => null;
|
|
3019
|
+
// Block alert, confirm, prompt
|
|
3020
|
+
window.alert = () => { };
|
|
3021
|
+
window.confirm = () => true;
|
|
3022
|
+
window.prompt = () => null;
|
|
3023
|
+
// Block popup via createElement
|
|
3024
|
+
const origCreate = document.createElement.bind(document);
|
|
3025
|
+
document.createElement = (tag) => {
|
|
3026
|
+
if (tag.toLowerCase() === 'a' && arguments[1]?.target === '_blank') {
|
|
3027
|
+
return origCreate('span');
|
|
3028
|
+
}
|
|
3029
|
+
return origCreate(tag);
|
|
3030
|
+
};
|
|
3031
|
+
});
|
|
3032
|
+
}
|
|
3033
|
+
// Block overlay ads and floating elements
|
|
3034
|
+
if (args.blockOverlayAds !== false) {
|
|
3035
|
+
await pg.evaluateOnNewDocument(() => {
|
|
3036
|
+
// Remove overlay ads after DOM load
|
|
3037
|
+
const removeOverlays = () => {
|
|
3038
|
+
const selectors = [
|
|
3039
|
+
'[class*="popup"]', '[class*="modal"]', '[class*="overlay"]',
|
|
3040
|
+
'[id*="popup"]', '[id*="modal"]', '[id*="overlay"]',
|
|
3041
|
+
'[class*="ad-"]', '[class*="-ad"]', '[class*="advert"]',
|
|
3042
|
+
'[class*="banner"]', '[class*="sticky"]', '[class*="float"]',
|
|
3043
|
+
'div[style*="position: fixed"]', 'div[style*="z-index: 9"]',
|
|
3044
|
+
];
|
|
3045
|
+
selectors.forEach(sel => {
|
|
3046
|
+
document.querySelectorAll(sel).forEach(el => {
|
|
3047
|
+
const style = window.getComputedStyle(el);
|
|
3048
|
+
if (style.position === 'fixed' || style.zIndex > '1000') {
|
|
3049
|
+
el.style.display = 'none';
|
|
3050
|
+
}
|
|
3051
|
+
});
|
|
3052
|
+
});
|
|
3053
|
+
};
|
|
3054
|
+
document.addEventListener('DOMContentLoaded', removeOverlays);
|
|
3055
|
+
setInterval(removeOverlays, 2000);
|
|
3056
|
+
});
|
|
3057
|
+
}
|
|
3058
|
+
},
|
|
3059
|
+
],
|
|
3060
|
+
// Main request handler
|
|
3061
|
+
requestHandler: async ({ request, page: crawlerPage, enqueueLinks }) => {
|
|
3062
|
+
const depth = request.userData.depth || 0;
|
|
3063
|
+
const url = request.url;
|
|
3064
|
+
// Skip if already visited or max pages reached
|
|
3065
|
+
if (visited.has(url) || results.length >= maxPages) {
|
|
3066
|
+
return;
|
|
3067
|
+
}
|
|
3068
|
+
visited.add(url);
|
|
3069
|
+
// URL filtering
|
|
3070
|
+
if (includePattern && !includePattern.test(url))
|
|
3071
|
+
return;
|
|
3072
|
+
if (excludePattern && excludePattern.test(url))
|
|
3073
|
+
return;
|
|
3074
|
+
const result = {
|
|
3075
|
+
url,
|
|
3076
|
+
depth,
|
|
3077
|
+
};
|
|
3078
|
+
try {
|
|
3079
|
+
// Get page title
|
|
3080
|
+
result.title = await crawlerPage.title();
|
|
3081
|
+
// Extract data using selectors
|
|
3082
|
+
if (args.extractSelectors) {
|
|
3083
|
+
result.extractedData = {};
|
|
3084
|
+
for (const [key, selector] of Object.entries(args.extractSelectors)) {
|
|
3085
|
+
try {
|
|
3086
|
+
const elements = await crawlerPage.$$(selector);
|
|
3087
|
+
if (elements.length === 1) {
|
|
3088
|
+
result.extractedData[key] = await crawlerPage.$eval(selector, (el) => el.textContent?.trim() || el.getAttribute('href') || el.getAttribute('src'));
|
|
3089
|
+
}
|
|
3090
|
+
else if (elements.length > 1) {
|
|
3091
|
+
result.extractedData[key] = await crawlerPage.$$eval(selector, (els) => els.map(el => el.textContent?.trim() || el.getAttribute('href') || el.getAttribute('src')).filter(Boolean));
|
|
3092
|
+
}
|
|
3093
|
+
}
|
|
3094
|
+
catch (e) {
|
|
3095
|
+
// Selector not found
|
|
3096
|
+
}
|
|
3097
|
+
}
|
|
3098
|
+
}
|
|
3099
|
+
// Extract video links (JWPlayer, DooPlayer, iframes, ajax sources)
|
|
3100
|
+
if (args.extractVideoLinks !== false) {
|
|
3101
|
+
result.videoLinks = await crawlerPage.evaluate(() => {
|
|
3102
|
+
const videoLinks = [];
|
|
3103
|
+
const videoPatterns = /\.(m3u8|mp4|mkv|webm|avi|mov|flv|wmv|ts)(\?|$)/i;
|
|
3104
|
+
// 1. JWPlayer detection
|
|
3105
|
+
if (window.jwplayer) {
|
|
3106
|
+
try {
|
|
3107
|
+
const players = document.querySelectorAll('.jwplayer, [id*="jwplayer"]');
|
|
3108
|
+
players.forEach((_, idx) => {
|
|
3109
|
+
try {
|
|
3110
|
+
const player = window.jwplayer(idx);
|
|
3111
|
+
if (player && player.getPlaylistItem) {
|
|
3112
|
+
const item = player.getPlaylistItem();
|
|
3113
|
+
if (item?.file) {
|
|
3114
|
+
videoLinks.push({ url: item.file, type: item.file.includes('.m3u8') ? 'm3u8' : 'mp4', source: 'jwplayer' });
|
|
3115
|
+
}
|
|
3116
|
+
if (item?.sources) {
|
|
3117
|
+
item.sources.forEach((s) => {
|
|
3118
|
+
if (s.file)
|
|
3119
|
+
videoLinks.push({ url: s.file, type: s.type || 'mp4', source: 'jwplayer' });
|
|
3120
|
+
});
|
|
3121
|
+
}
|
|
3122
|
+
}
|
|
3123
|
+
}
|
|
3124
|
+
catch { }
|
|
3125
|
+
});
|
|
3126
|
+
}
|
|
3127
|
+
catch { }
|
|
3128
|
+
}
|
|
3129
|
+
// 2. DooPlayer detection (common in movie sites)
|
|
3130
|
+
if (window.dooPlayer || document.querySelector('[id*="doo"]')) {
|
|
3131
|
+
try {
|
|
3132
|
+
const dooConfig = window.dooPlayer?.config || window.player_config;
|
|
3133
|
+
if (dooConfig?.source) {
|
|
3134
|
+
videoLinks.push({ url: dooConfig.source, type: 'm3u8', source: 'dooplayer' });
|
|
3135
|
+
}
|
|
3136
|
+
}
|
|
3137
|
+
catch { }
|
|
3138
|
+
}
|
|
3139
|
+
// 3. Iframe video sources
|
|
3140
|
+
document.querySelectorAll('iframe').forEach(iframe => {
|
|
3141
|
+
const src = iframe.src || iframe.getAttribute('data-src') || '';
|
|
3142
|
+
if (src && (src.includes('embed') || src.includes('player') || src.includes('stream'))) {
|
|
3143
|
+
videoLinks.push({ url: src, type: 'iframe', source: 'iframe' });
|
|
3144
|
+
}
|
|
3145
|
+
});
|
|
3146
|
+
// 4. Video tags
|
|
3147
|
+
document.querySelectorAll('video source, video').forEach(el => {
|
|
3148
|
+
const src = el.getAttribute('src') || el.src;
|
|
3149
|
+
if (src && videoPatterns.test(src)) {
|
|
3150
|
+
const ext = src.match(videoPatterns)?.[1] || 'mp4';
|
|
3151
|
+
videoLinks.push({ url: src, type: ext, source: 'video-tag' });
|
|
3152
|
+
}
|
|
3153
|
+
});
|
|
3154
|
+
// 5. Hidden links in scripts (ajax pattern)
|
|
3155
|
+
document.querySelectorAll('script:not([src])').forEach(script => {
|
|
3156
|
+
const content = script.textContent || '';
|
|
3157
|
+
// m3u8/mp4 in script
|
|
3158
|
+
const matches = content.match(/https?:\/\/[^\s"'<>]+\.(m3u8|mp4|mkv)[^\s"'<>]*/gi);
|
|
3159
|
+
if (matches) {
|
|
3160
|
+
matches.forEach(url => {
|
|
3161
|
+
const ext = url.match(videoPatterns)?.[1] || 'mp4';
|
|
3162
|
+
videoLinks.push({ url, type: ext, source: 'ajax-script' });
|
|
3163
|
+
});
|
|
3164
|
+
}
|
|
3165
|
+
});
|
|
3166
|
+
// 6. Data attributes with video URLs
|
|
3167
|
+
document.querySelectorAll('[data-file], [data-source], [data-video], [data-stream]').forEach(el => {
|
|
3168
|
+
const url = el.getAttribute('data-file') || el.getAttribute('data-source') ||
|
|
3169
|
+
el.getAttribute('data-video') || el.getAttribute('data-stream');
|
|
3170
|
+
if (url && (videoPatterns.test(url) || url.includes('m3u8'))) {
|
|
3171
|
+
videoLinks.push({ url, type: url.includes('m3u8') ? 'm3u8' : 'mp4', source: 'data-attr' });
|
|
3172
|
+
}
|
|
3173
|
+
});
|
|
3174
|
+
// Deduplicate
|
|
3175
|
+
return [...new Map(videoLinks.map(v => [v.url, v])).values()];
|
|
3176
|
+
});
|
|
3177
|
+
}
|
|
3178
|
+
// Follow links if enabled and depth allows
|
|
3179
|
+
if (args.followLinks !== false && depth < maxDepth && results.length < maxPages) {
|
|
3180
|
+
// Get all links
|
|
3181
|
+
const pageLinks = await crawlerPage.$$eval('a[href]', (anchors) => anchors.map(a => a.href).filter(href => href.startsWith('http')));
|
|
3182
|
+
result.links = pageLinks.slice(0, 100); // Limit stored links
|
|
3183
|
+
// Filter and enqueue links
|
|
3184
|
+
const linksToEnqueue = pageLinks.filter((link) => {
|
|
3185
|
+
if (visited.has(link))
|
|
3186
|
+
return false;
|
|
3187
|
+
if (includePattern && !includePattern.test(link))
|
|
3188
|
+
return false;
|
|
3189
|
+
if (excludePattern && excludePattern.test(link))
|
|
3190
|
+
return false;
|
|
3191
|
+
return true;
|
|
3192
|
+
});
|
|
3193
|
+
// Add filtered links using Crawlee's enqueueLinks
|
|
3194
|
+
for (const link of linksToEnqueue.slice(0, 50)) {
|
|
3195
|
+
try {
|
|
3196
|
+
await requestQueue.addRequest({
|
|
3197
|
+
url: link,
|
|
3198
|
+
userData: { depth: depth + 1 },
|
|
3199
|
+
});
|
|
3200
|
+
}
|
|
3201
|
+
catch (e) {
|
|
3202
|
+
// Link already in queue
|
|
3203
|
+
}
|
|
3204
|
+
}
|
|
3205
|
+
}
|
|
3206
|
+
// Download media if enabled
|
|
3207
|
+
if (args.downloadMedia && args.savePath) {
|
|
3208
|
+
const mediaUrls = await crawlerPage.$$eval('img[src], video source[src], a[href$=".pdf"], a[href$=".jpg"], a[href$=".png"]', (els) => els.map(el => el.getAttribute('src') || el.getAttribute('href')).filter(Boolean));
|
|
3209
|
+
result.extractedData = result.extractedData || {};
|
|
3210
|
+
result.extractedData.mediaUrls = mediaUrls;
|
|
3211
|
+
}
|
|
3212
|
+
results.push(result);
|
|
3213
|
+
}
|
|
3214
|
+
catch (error) {
|
|
3215
|
+
result.error = error instanceof Error ? error.message : String(error);
|
|
3216
|
+
errors.push(`${url}: ${result.error}`);
|
|
3217
|
+
results.push(result);
|
|
3218
|
+
}
|
|
3219
|
+
},
|
|
3220
|
+
// Failed request handler
|
|
3221
|
+
failedRequestHandler: async ({ request }, error) => {
|
|
3222
|
+
errors.push(`Failed: ${request.url} - ${error.message}`);
|
|
3223
|
+
},
|
|
3224
|
+
});
|
|
3225
|
+
// Run the crawler
|
|
3226
|
+
await crawler.run();
|
|
3227
|
+
return {
|
|
3228
|
+
success: results.length > 0,
|
|
3229
|
+
crawledPages: results.length,
|
|
3230
|
+
results,
|
|
3231
|
+
errors,
|
|
3232
|
+
message: `🕷️ Crawled ${results.length} pages (depth: ${maxDepth}, errors: ${errors.length})`,
|
|
3233
|
+
};
|
|
3234
|
+
}
|
|
3235
|
+
catch (error) {
|
|
3236
|
+
return {
|
|
3237
|
+
success: false,
|
|
3238
|
+
crawledPages: results.length,
|
|
3239
|
+
results,
|
|
3240
|
+
errors: [...errors, error instanceof Error ? error.message : String(error)],
|
|
3241
|
+
message: `❌ Crawler error: ${error instanceof Error ? error.message : String(error)}`,
|
|
3242
|
+
};
|
|
3243
|
+
}
|
|
3244
|
+
}
|
package/dist/index.js
CHANGED
|
@@ -61,7 +61,9 @@ import { handleBreadcrumbNavigator, handleUrlRedirectTracer, handleSearchContent
|
|
|
61
61
|
// Download tools
|
|
62
62
|
handleFileDownloader,
|
|
63
63
|
// Enhanced streaming/download tools
|
|
64
|
-
handleIframeHandler, handleStreamExtractor,
|
|
64
|
+
handleIframeHandler, handleStreamExtractor,
|
|
65
|
+
// Web crawler
|
|
66
|
+
handleWebCrawler, } from './handlers/advanced-tools.js';
|
|
65
67
|
// State for video recording
|
|
66
68
|
const recorderState = new Map();
|
|
67
69
|
debug('All modules loaded successfully');
|
|
@@ -254,6 +256,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
254
256
|
if (!page)
|
|
255
257
|
throw new Error('Browser not initialized. Call browser_init first.');
|
|
256
258
|
return { content: [{ type: 'text', text: JSON.stringify(await handleStreamExtractor(page, args)) }] };
|
|
259
|
+
// Web Crawler (Crawlee + brave-real-launcher)
|
|
260
|
+
case TOOL_NAMES.WEB_CRAWLER:
|
|
261
|
+
if (!page)
|
|
262
|
+
throw new Error('Browser not initialized. Call browser_init first.');
|
|
263
|
+
return { content: [{ type: 'text', text: JSON.stringify(await handleWebCrawler(page, args)) }] };
|
|
257
264
|
default:
|
|
258
265
|
throw new Error(`Unknown tool: ${name}`);
|
|
259
266
|
}
|
package/dist/tool-definitions.js
CHANGED
|
@@ -622,6 +622,127 @@ export const TOOLS = [
|
|
|
622
622
|
},
|
|
623
623
|
},
|
|
624
624
|
},
|
|
625
|
+
// ============================================================
|
|
626
|
+
// WEB CRAWLER TOOL (Movie Streaming Optimized)
|
|
627
|
+
// ============================================================
|
|
628
|
+
{
|
|
629
|
+
name: 'web_crawler',
|
|
630
|
+
description: 'Advanced web crawler optimized for movie downloading and streaming websites. Features: URL queue (breadth/depth-first), proxy rotation, auto-retry, rate limiting, JavaScript popup blocking, overlay ads blocking, and video link extraction. Uses brave-real-puppeteer-core with 50+ stealth features.',
|
|
631
|
+
inputSchema: {
|
|
632
|
+
type: 'object',
|
|
633
|
+
additionalProperties: false,
|
|
634
|
+
properties: {
|
|
635
|
+
startUrls: {
|
|
636
|
+
type: 'array',
|
|
637
|
+
items: { type: 'string' },
|
|
638
|
+
description: 'Initial URLs to start crawling from (movie/streaming pages)'
|
|
639
|
+
},
|
|
640
|
+
maxDepth: {
|
|
641
|
+
type: 'number',
|
|
642
|
+
description: 'Maximum crawl depth (1 = only start URLs)',
|
|
643
|
+
default: 3
|
|
644
|
+
},
|
|
645
|
+
maxPages: {
|
|
646
|
+
type: 'number',
|
|
647
|
+
description: 'Maximum pages to crawl',
|
|
648
|
+
default: 50
|
|
649
|
+
},
|
|
650
|
+
concurrency: {
|
|
651
|
+
type: 'number',
|
|
652
|
+
description: 'Number of concurrent requests',
|
|
653
|
+
default: 3
|
|
654
|
+
},
|
|
655
|
+
rateLimit: {
|
|
656
|
+
type: 'number',
|
|
657
|
+
description: 'Maximum requests per second',
|
|
658
|
+
default: 2
|
|
659
|
+
},
|
|
660
|
+
crawlStrategy: {
|
|
661
|
+
type: 'string',
|
|
662
|
+
enum: ['breadth-first', 'depth-first'],
|
|
663
|
+
description: 'URL queue strategy',
|
|
664
|
+
default: 'breadth-first'
|
|
665
|
+
},
|
|
666
|
+
includePattern: {
|
|
667
|
+
type: 'string',
|
|
668
|
+
description: 'Regex pattern for URLs to include'
|
|
669
|
+
},
|
|
670
|
+
excludePattern: {
|
|
671
|
+
type: 'string',
|
|
672
|
+
description: 'Regex pattern for URLs to exclude'
|
|
673
|
+
},
|
|
674
|
+
extractSelectors: {
|
|
675
|
+
type: 'object',
|
|
676
|
+
description: 'CSS selectors for data extraction (e.g., {"title": "h1", "links": "a[href]"})'
|
|
677
|
+
},
|
|
678
|
+
followLinks: {
|
|
679
|
+
type: 'boolean',
|
|
680
|
+
description: 'Follow discovered links',
|
|
681
|
+
default: true
|
|
682
|
+
},
|
|
683
|
+
// Movie streaming specific options
|
|
684
|
+
blockPopups: {
|
|
685
|
+
type: 'boolean',
|
|
686
|
+
description: 'Block JavaScript popup ads and window.open calls',
|
|
687
|
+
default: true
|
|
688
|
+
},
|
|
689
|
+
blockOverlayAds: {
|
|
690
|
+
type: 'boolean',
|
|
691
|
+
description: 'Block overlay ads, modal popups, and floating elements',
|
|
692
|
+
default: true
|
|
693
|
+
},
|
|
694
|
+
extractVideoLinks: {
|
|
695
|
+
type: 'boolean',
|
|
696
|
+
description: 'Auto-extract m3u8, mp4, mkv video links from pages',
|
|
697
|
+
default: true
|
|
698
|
+
},
|
|
699
|
+
downloadMedia: {
|
|
700
|
+
type: 'boolean',
|
|
701
|
+
description: 'Download video/audio files',
|
|
702
|
+
default: false
|
|
703
|
+
},
|
|
704
|
+
savePath: {
|
|
705
|
+
type: 'string',
|
|
706
|
+
description: 'Path to save downloaded files'
|
|
707
|
+
},
|
|
708
|
+
proxyList: {
|
|
709
|
+
type: 'array',
|
|
710
|
+
items: { type: 'string' },
|
|
711
|
+
description: 'Proxy URLs for rotation (format: protocol://host:port)'
|
|
712
|
+
},
|
|
713
|
+
retryCount: {
|
|
714
|
+
type: 'number',
|
|
715
|
+
description: 'Number of retries for failed requests',
|
|
716
|
+
default: 3
|
|
717
|
+
},
|
|
718
|
+
retryDelayMs: {
|
|
719
|
+
type: 'number',
|
|
720
|
+
description: 'Delay between retries in ms (exponential backoff)',
|
|
721
|
+
default: 1000
|
|
722
|
+
},
|
|
723
|
+
timeout: {
|
|
724
|
+
type: 'number',
|
|
725
|
+
description: 'Request timeout in ms',
|
|
726
|
+
default: 30000
|
|
727
|
+
},
|
|
728
|
+
mode: {
|
|
729
|
+
type: 'string',
|
|
730
|
+
enum: ['browser', 'http'],
|
|
731
|
+
description: 'Crawl mode (browser = Puppeteer, http = fast HTTP)',
|
|
732
|
+
default: 'browser'
|
|
733
|
+
},
|
|
734
|
+
userAgent: {
|
|
735
|
+
type: 'string',
|
|
736
|
+
description: 'Custom User-Agent string'
|
|
737
|
+
},
|
|
738
|
+
headers: {
|
|
739
|
+
type: 'object',
|
|
740
|
+
description: 'Custom headers for all requests'
|
|
741
|
+
},
|
|
742
|
+
},
|
|
743
|
+
required: ['startUrls'],
|
|
744
|
+
},
|
|
745
|
+
},
|
|
625
746
|
];
|
|
626
747
|
// Tool name constants for type safety
|
|
627
748
|
export const TOOL_NAMES = {
|
|
@@ -659,6 +780,8 @@ export const TOOL_NAMES = {
|
|
|
659
780
|
// Enhanced tools
|
|
660
781
|
IFRAME_HANDLER: 'iframe_handler',
|
|
661
782
|
STREAM_EXTRACTOR: 'stream_extractor',
|
|
783
|
+
// Crawler tool
|
|
784
|
+
WEB_CRAWLER: 'web_crawler',
|
|
662
785
|
};
|
|
663
786
|
// Tool categories for organization
|
|
664
787
|
export const TOOL_CATEGORIES = {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "brave-real-browser-mcp-server",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.26.0",
|
|
4
4
|
"description": "🦁 MCP server for Brave Real Browser - NPM Workspaces Monorepo with anti-detection features, SSE streaming, and LSP compatibility",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -50,7 +50,9 @@
|
|
|
50
50
|
"dependencies": {
|
|
51
51
|
"@modelcontextprotocol/sdk": "latest",
|
|
52
52
|
"@types/turndown": "latest",
|
|
53
|
-
"brave-real-browser": "^2.
|
|
53
|
+
"brave-real-browser": "^2.7.0",
|
|
54
|
+
"crawlee": "^3.15.3",
|
|
55
|
+
"puppeteer-core": "^24.35.0",
|
|
54
56
|
"turndown": "latest",
|
|
55
57
|
"vscode-languageserver": "^9.0.1",
|
|
56
58
|
"vscode-languageserver-textdocument": "^1.0.12"
|