brave-real-browser-mcp-server 2.26.3 → 2.27.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,10 +11,14 @@
11
11
 
12
12
  ```json
13
13
  {
14
+ {
14
15
  "mcpServers": {
15
16
  "brave-real-browser": {
16
17
  "command": "npx",
17
- "args": ["brave-real-browser-mcp-server@latest"]
18
+ "args": ["brave-real-browser-mcp-server@latest"],
19
+ "env": {
20
+ "headless": "false"
21
+ }
18
22
  }
19
23
  }
20
24
  }
@@ -2876,369 +2876,3 @@ export async function handleStreamExtractor(page, args) {
2876
2876
  : 'No direct URLs found',
2877
2877
  };
2878
2878
  }
2879
- /**
2880
- * Advanced web crawler with Crawlee + brave-real-launcher integration
2881
- * Features: URL queue, proxy rotation, rate limiting, data extraction
2882
- */
2883
- export async function handleWebCrawler(page, args) {
2884
- // Import Crawlee dynamically to avoid load-time errors if not installed
2885
- let PuppeteerCrawler;
2886
- let RequestQueue;
2887
- let Configuration;
2888
- try {
2889
- const crawlee = await import('crawlee');
2890
- PuppeteerCrawler = crawlee.PuppeteerCrawler;
2891
- RequestQueue = crawlee.RequestQueue;
2892
- Configuration = crawlee.Configuration;
2893
- }
2894
- catch (e) {
2895
- return {
2896
- success: false,
2897
- crawledPages: 0,
2898
- results: [],
2899
- errors: ['Crawlee not installed. Run: npm install crawlee'],
2900
- message: '❌ Crawlee package not found',
2901
- };
2902
- }
2903
- // Import brave-real-launcher for browser launch
2904
- let getBravePath;
2905
- let braveRealPuppeteerCore;
2906
- try {
2907
- const launcher = await import('brave-real-launcher');
2908
- getBravePath = launcher.getBravePath;
2909
- }
2910
- catch (e) {
2911
- // Fallback - will use default Chromium
2912
- }
2913
- // Import brave-real-puppeteer-core for stealth features
2914
- try {
2915
- braveRealPuppeteerCore = await import('brave-real-puppeteer-core');
2916
- }
2917
- catch (e) {
2918
- // Will use default puppeteer
2919
- }
2920
- const results = [];
2921
- const errors = [];
2922
- const visited = new Set();
2923
- // Configuration
2924
- const maxDepth = args.maxDepth ?? 3;
2925
- const maxPages = args.maxPages ?? 50;
2926
- const concurrency = args.concurrency ?? 3;
2927
- const rateLimit = args.rateLimit ?? 2;
2928
- const retryCount = args.retryCount ?? 3;
2929
- const timeout = args.timeout ?? 30000;
2930
- // URL filtering patterns
2931
- const includePattern = args.includePattern ? new RegExp(args.includePattern, 'i') : null;
2932
- const excludePattern = args.excludePattern ? new RegExp(args.excludePattern, 'i') : null;
2933
- // Proxy rotation
2934
- let proxyIndex = 0;
2935
- const getNextProxy = () => {
2936
- if (!args.proxyList || args.proxyList.length === 0)
2937
- return undefined;
2938
- const proxy = args.proxyList[proxyIndex % args.proxyList.length];
2939
- proxyIndex++;
2940
- return proxy;
2941
- };
2942
- // Rate limiting
2943
- let lastRequestTime = 0;
2944
- const rateLimitDelay = 1000 / rateLimit;
2945
- const enforceRateLimit = async () => {
2946
- const now = Date.now();
2947
- const elapsed = now - lastRequestTime;
2948
- if (elapsed < rateLimitDelay) {
2949
- await new Promise(r => setTimeout(r, rateLimitDelay - elapsed));
2950
- }
2951
- lastRequestTime = Date.now();
2952
- };
2953
- try {
2954
- // Configure Crawlee to use memory storage (no disk)
2955
- Configuration.getGlobalConfig().set('persistStorage', false);
2956
- // Create request queue with start URLs
2957
- const requestQueue = await RequestQueue.open();
2958
- for (const url of args.startUrls) {
2959
- await requestQueue.addRequest({
2960
- url,
2961
- userData: { depth: 0 },
2962
- });
2963
- }
2964
- // Get Brave executable path if available
2965
- let executablePath;
2966
- try {
2967
- if (getBravePath) {
2968
- executablePath = getBravePath();
2969
- }
2970
- }
2971
- catch (e) {
2972
- // Use default
2973
- }
2974
- // Create crawler based on mode
2975
- const crawler = new PuppeteerCrawler({
2976
- requestQueue,
2977
- maxConcurrency: concurrency,
2978
- maxRequestRetries: retryCount,
2979
- requestHandlerTimeoutSecs: timeout / 1000,
2980
- // Use brave-real-puppeteer-core with all stealth features
2981
- launchContext: {
2982
- // Use brave-real-puppeteer-core as custom launcher for 50+ stealth features
2983
- launcher: braveRealPuppeteerCore || undefined,
2984
- launchOptions: {
2985
- headless: true,
2986
- executablePath,
2987
- args: [
2988
- '--no-sandbox',
2989
- '--disable-setuid-sandbox',
2990
- '--disable-blink-features=AutomationControlled',
2991
- '--disable-dev-shm-usage',
2992
- '--disable-accelerated-2d-canvas',
2993
- '--disable-gpu',
2994
- ],
2995
- },
2996
- },
2997
- // Browser pool configuration
2998
- browserPoolOptions: {
2999
- maxOpenPagesPerBrowser: 1,
3000
- },
3001
- // Pre-navigation hook for rate limiting, popup blocking, and movie streaming optimizations
3002
- preNavigationHooks: [
3003
- async (crawlingContext) => {
3004
- await enforceRateLimit();
3005
- const pg = crawlingContext.page;
3006
- // Set custom user agent if provided
3007
- if (args.userAgent) {
3008
- await pg.setUserAgent(args.userAgent);
3009
- }
3010
- // Set custom headers if provided
3011
- if (args.headers) {
3012
- await pg.setExtraHTTPHeaders(args.headers);
3013
- }
3014
- // Block popups and overlay ads (default: true for movie streaming)
3015
- if (args.blockPopups !== false) {
3016
- await pg.evaluateOnNewDocument(() => {
3017
- // Block window.open popups
3018
- window.open = () => null;
3019
- // Block alert, confirm, prompt
3020
- window.alert = () => { };
3021
- window.confirm = () => true;
3022
- window.prompt = () => null;
3023
- // Block popup via createElement
3024
- const origCreate = document.createElement.bind(document);
3025
- document.createElement = (tag) => {
3026
- if (tag.toLowerCase() === 'a' && arguments[1]?.target === '_blank') {
3027
- return origCreate('span');
3028
- }
3029
- return origCreate(tag);
3030
- };
3031
- });
3032
- }
3033
- // Block overlay ads and floating elements
3034
- if (args.blockOverlayAds !== false) {
3035
- await pg.evaluateOnNewDocument(() => {
3036
- // Remove overlay ads after DOM load
3037
- const removeOverlays = () => {
3038
- const selectors = [
3039
- '[class*="popup"]', '[class*="modal"]', '[class*="overlay"]',
3040
- '[id*="popup"]', '[id*="modal"]', '[id*="overlay"]',
3041
- '[class*="ad-"]', '[class*="-ad"]', '[class*="advert"]',
3042
- '[class*="banner"]', '[class*="sticky"]', '[class*="float"]',
3043
- 'div[style*="position: fixed"]', 'div[style*="z-index: 9"]',
3044
- ];
3045
- selectors.forEach(sel => {
3046
- document.querySelectorAll(sel).forEach(el => {
3047
- const style = window.getComputedStyle(el);
3048
- if (style.position === 'fixed' || style.zIndex > '1000') {
3049
- el.style.display = 'none';
3050
- }
3051
- });
3052
- });
3053
- };
3054
- document.addEventListener('DOMContentLoaded', removeOverlays);
3055
- setInterval(removeOverlays, 2000);
3056
- });
3057
- }
3058
- },
3059
- ],
3060
- // Main request handler
3061
- requestHandler: async ({ request, page: crawlerPage, enqueueLinks }) => {
3062
- const depth = request.userData.depth || 0;
3063
- const url = request.url;
3064
- // Skip if already visited or max pages reached
3065
- if (visited.has(url) || results.length >= maxPages) {
3066
- return;
3067
- }
3068
- visited.add(url);
3069
- // URL filtering
3070
- if (includePattern && !includePattern.test(url))
3071
- return;
3072
- if (excludePattern && excludePattern.test(url))
3073
- return;
3074
- const result = {
3075
- url,
3076
- depth,
3077
- };
3078
- try {
3079
- // Get page title
3080
- result.title = await crawlerPage.title();
3081
- // Extract data using selectors
3082
- if (args.extractSelectors) {
3083
- result.extractedData = {};
3084
- for (const [key, selector] of Object.entries(args.extractSelectors)) {
3085
- try {
3086
- const elements = await crawlerPage.$$(selector);
3087
- if (elements.length === 1) {
3088
- result.extractedData[key] = await crawlerPage.$eval(selector, (el) => el.textContent?.trim() || el.getAttribute('href') || el.getAttribute('src'));
3089
- }
3090
- else if (elements.length > 1) {
3091
- result.extractedData[key] = await crawlerPage.$$eval(selector, (els) => els.map(el => el.textContent?.trim() || el.getAttribute('href') || el.getAttribute('src')).filter(Boolean));
3092
- }
3093
- }
3094
- catch (e) {
3095
- // Selector not found
3096
- }
3097
- }
3098
- }
3099
- // Extract video links (JWPlayer, DooPlayer, iframes, ajax sources)
3100
- if (args.extractVideoLinks !== false) {
3101
- result.videoLinks = await crawlerPage.evaluate(() => {
3102
- const videoLinks = [];
3103
- const videoPatterns = /\.(m3u8|mp4|mkv|webm|avi|mov|flv|wmv|ts)(\?|$)/i;
3104
- // 1. JWPlayer detection
3105
- if (window.jwplayer) {
3106
- try {
3107
- const players = document.querySelectorAll('.jwplayer, [id*="jwplayer"]');
3108
- players.forEach((_, idx) => {
3109
- try {
3110
- const player = window.jwplayer(idx);
3111
- if (player && player.getPlaylistItem) {
3112
- const item = player.getPlaylistItem();
3113
- if (item?.file) {
3114
- videoLinks.push({ url: item.file, type: item.file.includes('.m3u8') ? 'm3u8' : 'mp4', source: 'jwplayer' });
3115
- }
3116
- if (item?.sources) {
3117
- item.sources.forEach((s) => {
3118
- if (s.file)
3119
- videoLinks.push({ url: s.file, type: s.type || 'mp4', source: 'jwplayer' });
3120
- });
3121
- }
3122
- }
3123
- }
3124
- catch { }
3125
- });
3126
- }
3127
- catch { }
3128
- }
3129
- // 2. DooPlayer detection (common in movie sites)
3130
- if (window.dooPlayer || document.querySelector('[id*="doo"]')) {
3131
- try {
3132
- const dooConfig = window.dooPlayer?.config || window.player_config;
3133
- if (dooConfig?.source) {
3134
- videoLinks.push({ url: dooConfig.source, type: 'm3u8', source: 'dooplayer' });
3135
- }
3136
- }
3137
- catch { }
3138
- }
3139
- // 3. Iframe video sources
3140
- document.querySelectorAll('iframe').forEach(iframe => {
3141
- const src = iframe.src || iframe.getAttribute('data-src') || '';
3142
- if (src && (src.includes('embed') || src.includes('player') || src.includes('stream'))) {
3143
- videoLinks.push({ url: src, type: 'iframe', source: 'iframe' });
3144
- }
3145
- });
3146
- // 4. Video tags
3147
- document.querySelectorAll('video source, video').forEach(el => {
3148
- const src = el.getAttribute('src') || el.src;
3149
- if (src && videoPatterns.test(src)) {
3150
- const ext = src.match(videoPatterns)?.[1] || 'mp4';
3151
- videoLinks.push({ url: src, type: ext, source: 'video-tag' });
3152
- }
3153
- });
3154
- // 5. Hidden links in scripts (ajax pattern)
3155
- document.querySelectorAll('script:not([src])').forEach(script => {
3156
- const content = script.textContent || '';
3157
- // m3u8/mp4 in script
3158
- const matches = content.match(/https?:\/\/[^\s"'<>]+\.(m3u8|mp4|mkv)[^\s"'<>]*/gi);
3159
- if (matches) {
3160
- matches.forEach(url => {
3161
- const ext = url.match(videoPatterns)?.[1] || 'mp4';
3162
- videoLinks.push({ url, type: ext, source: 'ajax-script' });
3163
- });
3164
- }
3165
- });
3166
- // 6. Data attributes with video URLs
3167
- document.querySelectorAll('[data-file], [data-source], [data-video], [data-stream]').forEach(el => {
3168
- const url = el.getAttribute('data-file') || el.getAttribute('data-source') ||
3169
- el.getAttribute('data-video') || el.getAttribute('data-stream');
3170
- if (url && (videoPatterns.test(url) || url.includes('m3u8'))) {
3171
- videoLinks.push({ url, type: url.includes('m3u8') ? 'm3u8' : 'mp4', source: 'data-attr' });
3172
- }
3173
- });
3174
- // Deduplicate
3175
- return [...new Map(videoLinks.map(v => [v.url, v])).values()];
3176
- });
3177
- }
3178
- // Follow links if enabled and depth allows
3179
- if (args.followLinks !== false && depth < maxDepth && results.length < maxPages) {
3180
- // Get all links
3181
- const pageLinks = await crawlerPage.$$eval('a[href]', (anchors) => anchors.map(a => a.href).filter(href => href.startsWith('http')));
3182
- result.links = pageLinks.slice(0, 100); // Limit stored links
3183
- // Filter and enqueue links
3184
- const linksToEnqueue = pageLinks.filter((link) => {
3185
- if (visited.has(link))
3186
- return false;
3187
- if (includePattern && !includePattern.test(link))
3188
- return false;
3189
- if (excludePattern && excludePattern.test(link))
3190
- return false;
3191
- return true;
3192
- });
3193
- // Add filtered links using Crawlee's enqueueLinks
3194
- for (const link of linksToEnqueue.slice(0, 50)) {
3195
- try {
3196
- await requestQueue.addRequest({
3197
- url: link,
3198
- userData: { depth: depth + 1 },
3199
- });
3200
- }
3201
- catch (e) {
3202
- // Link already in queue
3203
- }
3204
- }
3205
- }
3206
- // Download media if enabled
3207
- if (args.downloadMedia && args.savePath) {
3208
- const mediaUrls = await crawlerPage.$$eval('img[src], video source[src], a[href$=".pdf"], a[href$=".jpg"], a[href$=".png"]', (els) => els.map(el => el.getAttribute('src') || el.getAttribute('href')).filter(Boolean));
3209
- result.extractedData = result.extractedData || {};
3210
- result.extractedData.mediaUrls = mediaUrls;
3211
- }
3212
- results.push(result);
3213
- }
3214
- catch (error) {
3215
- result.error = error instanceof Error ? error.message : String(error);
3216
- errors.push(`${url}: ${result.error}`);
3217
- results.push(result);
3218
- }
3219
- },
3220
- // Failed request handler
3221
- failedRequestHandler: async ({ request }, error) => {
3222
- errors.push(`Failed: ${request.url} - ${error.message}`);
3223
- },
3224
- });
3225
- // Run the crawler
3226
- await crawler.run();
3227
- return {
3228
- success: results.length > 0,
3229
- crawledPages: results.length,
3230
- results,
3231
- errors,
3232
- message: `🕷️ Crawled ${results.length} pages (depth: ${maxDepth}, errors: ${errors.length})`,
3233
- };
3234
- }
3235
- catch (error) {
3236
- return {
3237
- success: false,
3238
- crawledPages: results.length,
3239
- results,
3240
- errors: [...errors, error instanceof Error ? error.message : String(error)],
3241
- message: `❌ Crawler error: ${error instanceof Error ? error.message : String(error)}`,
3242
- };
3243
- }
3244
- }
@@ -126,6 +126,167 @@ export async function handleClick(args) {
126
126
  }, 'Failed to click element');
127
127
  });
128
128
  }
129
+ export async function handleSelect(args) {
130
+ const progressNotifier = getProgressNotifier();
131
+ const progressToken = `select-${Date.now()}`;
132
+ const tracker = progressNotifier.createTracker(progressToken);
133
+ return await withWorkflowValidation('select', args, async () => {
134
+ return await withErrorHandling(async () => {
135
+ tracker.start(100, '📋 Starting select operation...');
136
+ const pageInstance = getPageInstance();
137
+ if (!pageInstance) {
138
+ tracker.fail('Browser not initialized');
139
+ throw new Error('Browser not initialized. Call browser_init first.');
140
+ }
141
+ const { selector, value, text, index, searchText, waitForOptions = true, clickToOpen = false, optionSelector } = args;
142
+ tracker.setProgress(10, `🔍 Finding select element: ${selector}`);
143
+ // Find the select/dropdown element
144
+ const elementResult = await selfHealingLocators.findElementWithFallbacks(pageInstance, selector);
145
+ if (!elementResult) {
146
+ tracker.fail('Select element not found');
147
+ throw new Error(`Select element not found: ${selector}`);
148
+ }
149
+ const { element, usedSelector } = elementResult;
150
+ tracker.setProgress(25, '✅ Select element found, detecting type...');
151
+ // Detect select type: native <select> or custom dropdown
152
+ const selectType = await pageInstance.evaluate((sel) => {
153
+ const el = document.querySelector(sel);
154
+ if (!el)
155
+ return 'not-found';
156
+ if (el.tagName.toLowerCase() === 'select')
157
+ return 'native';
158
+ if (el.getAttribute('role') === 'listbox' || el.getAttribute('role') === 'combobox')
159
+ return 'aria';
160
+ if (el.classList.toString().match(/select|dropdown|combo/i))
161
+ return 'custom';
162
+ return 'custom';
163
+ }, usedSelector);
164
+ tracker.setProgress(35, `🎯 Detected type: ${selectType}`);
165
+ // Handle native <select> elements
166
+ if (selectType === 'native') {
167
+ tracker.setProgress(50, '📋 Handling native <select> element...');
168
+ let selectResult = [];
169
+ if (value !== undefined) {
170
+ selectResult = await pageInstance.select(usedSelector, value);
171
+ }
172
+ else if (text !== undefined) {
173
+ // Select by visible text
174
+ const optionValue = await pageInstance.evaluate((sel, targetText) => {
175
+ const select = document.querySelector(sel);
176
+ if (!select)
177
+ return null;
178
+ const option = Array.from(select.options).find(o => o.text.trim() === targetText.trim() ||
179
+ o.text.toLowerCase().includes(targetText.toLowerCase()));
180
+ return option?.value || null;
181
+ }, usedSelector, text);
182
+ if (optionValue) {
183
+ selectResult = await pageInstance.select(usedSelector, optionValue);
184
+ }
185
+ else {
186
+ throw new Error(`Option with text "${text}" not found`);
187
+ }
188
+ }
189
+ else if (index !== undefined) {
190
+ // Select by index
191
+ const optionValue = await pageInstance.evaluate((sel, idx) => {
192
+ const select = document.querySelector(sel);
193
+ return select?.options[idx]?.value || null;
194
+ }, usedSelector, index);
195
+ if (optionValue) {
196
+ selectResult = await pageInstance.select(usedSelector, optionValue);
197
+ }
198
+ else {
199
+ throw new Error(`Option at index ${index} not found`);
200
+ }
201
+ }
202
+ tracker.complete('🎉 Native select completed successfully');
203
+ return {
204
+ content: [{
205
+ type: 'text',
206
+ text: `✅ Selected value in native dropdown: ${selector}\nSelected: ${selectResult.join(', ') || 'success'}`,
207
+ }],
208
+ };
209
+ }
210
+ // Handle custom dropdowns (React, Vue, etc.)
211
+ tracker.setProgress(50, '📋 Handling custom dropdown...');
212
+ // Click to open if needed
213
+ if (clickToOpen) {
214
+ tracker.setProgress(55, '🖱️ Opening dropdown...');
215
+ await element.click();
216
+ await sleep(300); // Wait for animation
217
+ }
218
+ // Wait for options to load (AJAX dropdowns)
219
+ if (waitForOptions) {
220
+ tracker.setProgress(60, '⏳ Waiting for options to load...');
221
+ await sleep(500);
222
+ }
223
+ // Handle searchable dropdowns
224
+ if (searchText) {
225
+ tracker.setProgress(65, `🔍 Searching: ${searchText}...`);
226
+ // Type search text into the input
227
+ await element.type(searchText, { delay: 50 });
228
+ await sleep(500); // Wait for search results
229
+ }
230
+ // Find and click the option
231
+ tracker.setProgress(75, '🎯 Selecting option...');
232
+ const finalOptionSelector = optionSelector ||
233
+ '[role="option"],' +
234
+ ' [class*="option"]:not([class*="disabled"]),' +
235
+ ' li[data-value],' +
236
+ ' li:not(.disabled),' +
237
+ ' .dropdown-item';
238
+ const targetValue = value || text;
239
+ const optionClicked = await pageInstance.evaluate((optSel, targetVal, targetIndex) => {
240
+ const options = Array.from(document.querySelectorAll(optSel));
241
+ if (targetVal !== undefined) {
242
+ // Find by value or text
243
+ const option = options.find(opt => opt.getAttribute('value') === targetVal ||
244
+ opt.textContent?.trim() === targetVal ||
245
+ opt.textContent?.toLowerCase().includes(targetVal.toLowerCase()));
246
+ if (option) {
247
+ option.click();
248
+ return true;
249
+ }
250
+ }
251
+ else if (targetIndex !== undefined && options[targetIndex]) {
252
+ options[targetIndex].click();
253
+ return true;
254
+ }
255
+ return false;
256
+ }, finalOptionSelector, targetValue, index);
257
+ if (!optionClicked) {
258
+ // Fallback: try clicking by text content
259
+ const textToFind = text || value || searchText;
260
+ if (textToFind) {
261
+ const clicked = await pageInstance.evaluate((searchText) => {
262
+ const allElements = Array.from(document.querySelectorAll('*'));
263
+ for (const el of allElements) {
264
+ if (el.textContent?.trim() === searchText ||
265
+ el.textContent?.toLowerCase().includes(searchText.toLowerCase())) {
266
+ const style = window.getComputedStyle(el);
267
+ if (style.display !== 'none' && style.visibility !== 'hidden') {
268
+ el.click();
269
+ return true;
270
+ }
271
+ }
272
+ }
273
+ return false;
274
+ }, textToFind);
275
+ if (!clicked) {
276
+ throw new Error(`Could not find or click option: ${textToFind}`);
277
+ }
278
+ }
279
+ }
280
+ tracker.complete('🎉 Custom dropdown selection completed');
281
+ return {
282
+ content: [{
283
+ type: 'text',
284
+ text: `✅ Selected option in custom dropdown: ${selector}\nType: ${selectType}\nTarget: ${value || text || `index:${index}`}`,
285
+ }],
286
+ };
287
+ }, 'Failed to select option');
288
+ });
289
+ }
129
290
  // Type handler with real-time progress
130
291
  export async function handleType(args) {
131
292
  const progressNotifier = getProgressNotifier();
package/dist/index.js CHANGED
@@ -53,7 +53,7 @@ import { setupProcessCleanup } from './core-infrastructure.js';
53
53
  debug('Loading handlers...');
54
54
  import { handleBrowserInit, handleBrowserClose } from './handlers/browser-handlers.js';
55
55
  import { handleNavigate, handleWait } from './handlers/navigation-handlers.js';
56
- import { handleClick, handleType, handleSolveCaptcha, handleRandomScroll } from './handlers/interaction-handlers.js';
56
+ import { handleClick, handleSelect, handleType, handleSolveCaptcha, handleRandomScroll } from './handlers/interaction-handlers.js';
57
57
  import { handleGetContent, handleFindSelector } from './handlers/content-handlers.js';
58
58
  import { handleSaveContentAsMarkdown } from './handlers/file-handlers.js';
59
59
  // Import advanced tools handlers
@@ -61,9 +61,7 @@ import { handleBreadcrumbNavigator, handleUrlRedirectTracer, handleSearchContent
61
61
  // Download tools
62
62
  handleFileDownloader,
63
63
  // Enhanced streaming/download tools
64
- handleIframeHandler, handleStreamExtractor,
65
- // Web crawler
66
- handleWebCrawler, } from './handlers/advanced-tools.js';
64
+ handleIframeHandler, handleStreamExtractor, } from './handlers/advanced-tools.js';
67
65
  // State for video recording
68
66
  const recorderState = new Map();
69
67
  debug('All modules loaded successfully');
@@ -161,6 +159,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
161
159
  return await handleGetContent(args || {});
162
160
  case TOOL_NAMES.CLICK:
163
161
  return await handleClick(args);
162
+ case TOOL_NAMES.DROPDOWN_SELECT:
163
+ return await handleSelect(args);
164
164
  case TOOL_NAMES.TYPE:
165
165
  return await handleType(args);
166
166
  case TOOL_NAMES.WAIT:
@@ -256,11 +256,6 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
256
256
  if (!page)
257
257
  throw new Error('Browser not initialized. Call browser_init first.');
258
258
  return { content: [{ type: 'text', text: JSON.stringify(await handleStreamExtractor(page, args)) }] };
259
- // Web Crawler (Crawlee + brave-real-launcher)
260
- case TOOL_NAMES.WEB_CRAWLER:
261
- if (!page)
262
- throw new Error('Browser not initialized. Call browser_init first.');
263
- return { content: [{ type: 'text', text: JSON.stringify(await handleWebCrawler(page, args)) }] };
264
259
  default:
265
260
  throw new Error(`Unknown tool: ${name}`);
266
261
  }
@@ -7,9 +7,103 @@ export const SERVER_INFO = {
7
7
  // MCP capabilities with LSP-compatible streaming support
8
8
  export const CAPABILITIES = {
9
9
  tools: {},
10
- resources: {},
11
- prompts: {},
10
+ resources: { subscribe: true, listChanged: true },
11
+ prompts: { listChanged: true },
12
12
  };
13
+ // MCP Resources - Dynamic browser state resources
14
+ export const RESOURCES = [
15
+ {
16
+ uri: 'browser://state',
17
+ name: 'Browser State',
18
+ description: 'Current browser instance state including page URL, title, and session info',
19
+ mimeType: 'application/json',
20
+ },
21
+ {
22
+ uri: 'browser://page/content',
23
+ name: 'Page Content',
24
+ description: 'Current page HTML content',
25
+ mimeType: 'text/html',
26
+ },
27
+ {
28
+ uri: 'browser://page/text',
29
+ name: 'Page Text',
30
+ description: 'Current page text content (no HTML)',
31
+ mimeType: 'text/plain',
32
+ },
33
+ {
34
+ uri: 'browser://cookies',
35
+ name: 'Browser Cookies',
36
+ description: 'All cookies in current browser session',
37
+ mimeType: 'application/json',
38
+ },
39
+ {
40
+ uri: 'browser://network/requests',
41
+ name: 'Network Requests',
42
+ description: 'Recent network requests captured during browsing',
43
+ mimeType: 'application/json',
44
+ },
45
+ {
46
+ uri: 'browser://console/logs',
47
+ name: 'Console Logs',
48
+ description: 'Browser console log messages',
49
+ mimeType: 'application/json',
50
+ },
51
+ ];
52
+ // MCP Prompts - Reusable automation workflows
53
+ export const PROMPTS = [
54
+ {
55
+ name: 'scrape_website',
56
+ description: 'Scrape content from a website with automatic navigation and extraction',
57
+ arguments: [
58
+ { name: 'url', description: 'URL to scrape', required: true },
59
+ { name: 'selector', description: 'CSS selector for target content', required: false },
60
+ { name: 'outputFormat', description: 'Output format (json, markdown, text)', required: false },
61
+ ],
62
+ },
63
+ {
64
+ name: 'extract_download_links',
65
+ description: 'Extract all download links from a page with quality and size info',
66
+ arguments: [
67
+ { name: 'url', description: 'Page URL to extract from', required: true },
68
+ { name: 'fileTypes', description: 'File types to extract (mp4, mkv, pdf, etc.)', required: false },
69
+ ],
70
+ },
71
+ {
72
+ name: 'monitor_page_changes',
73
+ description: 'Monitor a page for changes and notify when content updates',
74
+ arguments: [
75
+ { name: 'url', description: 'URL to monitor', required: true },
76
+ { name: 'selector', description: 'Element to watch for changes', required: true },
77
+ { name: 'interval', description: 'Check interval in seconds', required: false },
78
+ ],
79
+ },
80
+ {
81
+ name: 'automate_login',
82
+ description: 'Automate login to a website with credentials',
83
+ arguments: [
84
+ { name: 'url', description: 'Login page URL', required: true },
85
+ { name: 'usernameSelector', description: 'Username field selector', required: true },
86
+ { name: 'passwordSelector', description: 'Password field selector', required: true },
87
+ { name: 'submitSelector', description: 'Submit button selector', required: true },
88
+ ],
89
+ },
90
+ {
91
+ name: 'batch_screenshot',
92
+ description: 'Take screenshots of multiple URLs',
93
+ arguments: [
94
+ { name: 'urls', description: 'Comma-separated list of URLs', required: true },
95
+ { name: 'outputDir', description: 'Directory to save screenshots', required: true },
96
+ ],
97
+ },
98
+ {
99
+ name: 'extract_video_stream',
100
+ description: 'Extract video streaming URL from a page (m3u8, mp4)',
101
+ arguments: [
102
+ { name: 'url', description: 'Video page URL', required: true },
103
+ { name: 'quality', description: 'Preferred quality (highest, 1080p, 720p)', required: false },
104
+ ],
105
+ },
106
+ ];
13
107
  // Extended capabilities for streaming and auto-sync (for documentation/client info)
14
108
  export const EXTENDED_CAPABILITIES = {
15
109
  streaming: true,
@@ -140,6 +234,76 @@ export const TOOLS = [
140
234
  },
141
235
  },
142
236
  },
237
+ {
238
+ name: 'wait',
239
+ description: 'Wait for various conditions',
240
+ inputSchema: {
241
+ type: 'object',
242
+ additionalProperties: false,
243
+ properties: {
244
+ type: {
245
+ type: 'string',
246
+ enum: ['selector', 'navigation', 'timeout'],
247
+ description: 'Type of wait condition',
248
+ },
249
+ value: {
250
+ type: 'string',
251
+ description: 'Selector to wait for or timeout in ms',
252
+ },
253
+ timeout: {
254
+ type: 'number',
255
+ description: 'Maximum wait time in ms',
256
+ default: 30000,
257
+ },
258
+ },
259
+ required: ['type', 'value'],
260
+ },
261
+ },
262
+ {
263
+ name: 'dropdown_select',
264
+ description: 'Intelligent dropdown/select element handler. Supports native HTML select, custom dropdowns, autocomplete, and searchable selects with smart fallback strategies.',
265
+ inputSchema: {
266
+ type: 'object',
267
+ additionalProperties: false,
268
+ properties: {
269
+ selector: {
270
+ type: 'string',
271
+ description: 'CSS selector for the select/dropdown element',
272
+ },
273
+ value: {
274
+ type: 'string',
275
+ description: 'Value to select (option value attribute)',
276
+ },
277
+ text: {
278
+ type: 'string',
279
+ description: 'Visible text of option to select (alternative to value)',
280
+ },
281
+ index: {
282
+ type: 'number',
283
+ description: 'Index of option to select (0-based, alternative to value/text)',
284
+ },
285
+ searchText: {
286
+ type: 'string',
287
+ description: 'For searchable dropdowns: text to type before selecting',
288
+ },
289
+ waitForOptions: {
290
+ type: 'boolean',
291
+ description: 'Wait for dropdown options to load (useful for AJAX dropdowns)',
292
+ default: true,
293
+ },
294
+ clickToOpen: {
295
+ type: 'boolean',
296
+ description: 'Click to open dropdown before selecting (for custom dropdowns)',
297
+ default: false,
298
+ },
299
+ optionSelector: {
300
+ type: 'string',
301
+ description: 'CSS selector for dropdown options (for custom dropdowns)',
302
+ },
303
+ },
304
+ required: ['selector'],
305
+ },
306
+ },
143
307
  {
144
308
  name: 'click',
145
309
  description: 'Click on an element',
@@ -184,31 +348,6 @@ export const TOOLS = [
184
348
  required: ['selector', 'text'],
185
349
  },
186
350
  },
187
- {
188
- name: 'wait',
189
- description: 'Wait for various conditions',
190
- inputSchema: {
191
- type: 'object',
192
- additionalProperties: false,
193
- properties: {
194
- type: {
195
- type: 'string',
196
- enum: ['selector', 'navigation', 'timeout'],
197
- description: 'Type of wait condition',
198
- },
199
- value: {
200
- type: 'string',
201
- description: 'Selector to wait for or timeout in ms',
202
- },
203
- timeout: {
204
- type: 'number',
205
- description: 'Maximum wait time in ms',
206
- default: 30000,
207
- },
208
- },
209
- required: ['type', 'value'],
210
- },
211
- },
212
351
  {
213
352
  name: 'browser_close',
214
353
  description: 'Close the browser instance',
@@ -622,127 +761,6 @@ export const TOOLS = [
622
761
  },
623
762
  },
624
763
  },
625
- // ============================================================
626
- // WEB CRAWLER TOOL (Movie Streaming Optimized)
627
- // ============================================================
628
- {
629
- name: 'web_crawler',
630
- description: 'Advanced web crawler optimized for movie downloading and streaming websites. Features: URL queue (breadth/depth-first), proxy rotation, auto-retry, rate limiting, JavaScript popup blocking, overlay ads blocking, and video link extraction. Uses brave-real-puppeteer-core with 50+ stealth features.',
631
- inputSchema: {
632
- type: 'object',
633
- additionalProperties: false,
634
- properties: {
635
- startUrls: {
636
- type: 'array',
637
- items: { type: 'string' },
638
- description: 'Initial URLs to start crawling from (movie/streaming pages)'
639
- },
640
- maxDepth: {
641
- type: 'number',
642
- description: 'Maximum crawl depth (1 = only start URLs)',
643
- default: 3
644
- },
645
- maxPages: {
646
- type: 'number',
647
- description: 'Maximum pages to crawl',
648
- default: 50
649
- },
650
- concurrency: {
651
- type: 'number',
652
- description: 'Number of concurrent requests',
653
- default: 3
654
- },
655
- rateLimit: {
656
- type: 'number',
657
- description: 'Maximum requests per second',
658
- default: 2
659
- },
660
- crawlStrategy: {
661
- type: 'string',
662
- enum: ['breadth-first', 'depth-first'],
663
- description: 'URL queue strategy',
664
- default: 'breadth-first'
665
- },
666
- includePattern: {
667
- type: 'string',
668
- description: 'Regex pattern for URLs to include'
669
- },
670
- excludePattern: {
671
- type: 'string',
672
- description: 'Regex pattern for URLs to exclude'
673
- },
674
- extractSelectors: {
675
- type: 'object',
676
- description: 'CSS selectors for data extraction (e.g., {"title": "h1", "links": "a[href]"})'
677
- },
678
- followLinks: {
679
- type: 'boolean',
680
- description: 'Follow discovered links',
681
- default: true
682
- },
683
- // Movie streaming specific options
684
- blockPopups: {
685
- type: 'boolean',
686
- description: 'Block JavaScript popup ads and window.open calls',
687
- default: true
688
- },
689
- blockOverlayAds: {
690
- type: 'boolean',
691
- description: 'Block overlay ads, modal popups, and floating elements',
692
- default: true
693
- },
694
- extractVideoLinks: {
695
- type: 'boolean',
696
- description: 'Auto-extract m3u8, mp4, mkv video links from pages',
697
- default: true
698
- },
699
- downloadMedia: {
700
- type: 'boolean',
701
- description: 'Download video/audio files',
702
- default: false
703
- },
704
- savePath: {
705
- type: 'string',
706
- description: 'Path to save downloaded files'
707
- },
708
- proxyList: {
709
- type: 'array',
710
- items: { type: 'string' },
711
- description: 'Proxy URLs for rotation (format: protocol://host:port)'
712
- },
713
- retryCount: {
714
- type: 'number',
715
- description: 'Number of retries for failed requests',
716
- default: 3
717
- },
718
- retryDelayMs: {
719
- type: 'number',
720
- description: 'Delay between retries in ms (exponential backoff)',
721
- default: 1000
722
- },
723
- timeout: {
724
- type: 'number',
725
- description: 'Request timeout in ms',
726
- default: 30000
727
- },
728
- mode: {
729
- type: 'string',
730
- enum: ['browser', 'http'],
731
- description: 'Crawl mode (browser = Puppeteer, http = fast HTTP)',
732
- default: 'browser'
733
- },
734
- userAgent: {
735
- type: 'string',
736
- description: 'Custom User-Agent string'
737
- },
738
- headers: {
739
- type: 'object',
740
- description: 'Custom headers for all requests'
741
- },
742
- },
743
- required: ['startUrls'],
744
- },
745
- },
746
764
  ];
747
765
  // Tool name constants for type safety
748
766
  export const TOOL_NAMES = {
@@ -750,6 +768,7 @@ export const TOOL_NAMES = {
750
768
  NAVIGATE: 'navigate',
751
769
  GET_CONTENT: 'get_content',
752
770
  CLICK: 'click',
771
+ DROPDOWN_SELECT: 'dropdown_select',
753
772
  TYPE: 'type',
754
773
  WAIT: 'wait',
755
774
  BROWSER_CLOSE: 'browser_close',
@@ -780,8 +799,6 @@ export const TOOL_NAMES = {
780
799
  // Enhanced tools
781
800
  IFRAME_HANDLER: 'iframe_handler',
782
801
  STREAM_EXTRACTOR: 'stream_extractor',
783
- // Crawler tool
784
- WEB_CRAWLER: 'web_crawler',
785
802
  };
786
803
  // Tool categories for organization
787
804
  export const TOOL_CATEGORIES = {
@@ -17,10 +17,10 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
17
17
  import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
18
18
  import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
19
19
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
20
- import { CallToolRequestSchema, ListToolsRequestSchema, ListResourcesRequestSchema, ListPromptsRequestSchema, InitializeRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
20
+ import { CallToolRequestSchema, ListToolsRequestSchema, ListResourcesRequestSchema, ReadResourceRequestSchema, ListPromptsRequestSchema, GetPromptRequestSchema, InitializeRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
21
21
  import { randomUUID } from 'crypto';
22
22
  // Import core modules
23
- import { TOOLS, SERVER_INFO, CAPABILITIES, TOOL_NAMES } from './tool-definitions.js';
23
+ import { TOOLS, SERVER_INFO, CAPABILITIES, TOOL_NAMES, RESOURCES, PROMPTS } from './tool-definitions.js';
24
24
  import { getSharedEventBus } from './shared/event-bus.js';
25
25
  import { getProgressNotifier } from './transport/progress-notifier.js';
26
26
  import { getSessionManager } from './transport/session-manager.js';
@@ -69,8 +69,109 @@ mcpServer.setRequestHandler(InitializeRequestSchema, async (request) => {
69
69
  };
70
70
  });
71
71
  mcpServer.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: TOOLS }));
72
- mcpServer.setRequestHandler(ListResourcesRequestSchema, async () => ({ resources: [] }));
73
- mcpServer.setRequestHandler(ListPromptsRequestSchema, async () => ({ prompts: [] }));
72
+ mcpServer.setRequestHandler(ListResourcesRequestSchema, async () => ({ resources: RESOURCES }));
73
+ mcpServer.setRequestHandler(ListPromptsRequestSchema, async () => ({ prompts: PROMPTS }));
74
+ // Read Resource Handler
75
+ mcpServer.setRequestHandler(ReadResourceRequestSchema, async (request) => {
76
+ const { uri } = request.params;
77
+ const page = getPageInstance();
78
+ debug(`ReadResource: ${uri}`);
79
+ try {
80
+ switch (uri) {
81
+ case 'browser://state': {
82
+ const state = page ? {
83
+ url: page.url(),
84
+ title: await page.title(),
85
+ isConnected: page.browser()?.connected ?? false,
86
+ viewport: page.viewport(),
87
+ } : { status: 'Browser not initialized' };
88
+ return { contents: [{ uri, mimeType: 'application/json', text: JSON.stringify(state, null, 2) }] };
89
+ }
90
+ case 'browser://page/content': {
91
+ if (!page)
92
+ throw new Error('Browser not initialized');
93
+ const html = await page.content();
94
+ return { contents: [{ uri, mimeType: 'text/html', text: html }] };
95
+ }
96
+ case 'browser://page/text': {
97
+ if (!page)
98
+ throw new Error('Browser not initialized');
99
+ const text = await page.evaluate(() => document.body.innerText);
100
+ return { contents: [{ uri, mimeType: 'text/plain', text }] };
101
+ }
102
+ case 'browser://cookies': {
103
+ if (!page)
104
+ throw new Error('Browser not initialized');
105
+ const cookies = await page.cookies();
106
+ return { contents: [{ uri, mimeType: 'application/json', text: JSON.stringify(cookies, null, 2) }] };
107
+ }
108
+ case 'browser://network/requests': {
109
+ // Return cached network requests from event bus
110
+ const history = eventBus.getHistory().filter(e => e.type.includes('network'));
111
+ return { contents: [{ uri, mimeType: 'application/json', text: JSON.stringify(history.slice(-50), null, 2) }] };
112
+ }
113
+ case 'browser://console/logs': {
114
+ const logs = eventBus.getHistory().filter(e => e.type.includes('console'));
115
+ return { contents: [{ uri, mimeType: 'application/json', text: JSON.stringify(logs.slice(-100), null, 2) }] };
116
+ }
117
+ default:
118
+ throw new Error(`Unknown resource: ${uri}`);
119
+ }
120
+ }
121
+ catch (error) {
122
+ const errorMessage = error instanceof Error ? error.message : String(error);
123
+ return { contents: [{ uri, mimeType: 'text/plain', text: `Error: ${errorMessage}` }] };
124
+ }
125
+ });
126
+ // Get Prompt Handler
127
+ mcpServer.setRequestHandler(GetPromptRequestSchema, async (request) => {
128
+ const { name, arguments: args } = request.params;
129
+ debug(`GetPrompt: ${name}`);
130
+ const prompt = PROMPTS.find(p => p.name === name);
131
+ if (!prompt) {
132
+ throw new Error(`Unknown prompt: ${name}`);
133
+ }
134
+ // Generate prompt messages based on template
135
+ let messages = [];
136
+ switch (name) {
137
+ case 'scrape_website':
138
+ messages = [
139
+ { role: 'user', content: { type: 'text', text: `Scrape the website at ${args?.url || '[URL]'}. ${args?.selector ? `Focus on elements matching: ${args.selector}` : 'Extract main content.'}` } },
140
+ ];
141
+ break;
142
+ case 'extract_download_links':
143
+ messages = [
144
+ { role: 'user', content: { type: 'text', text: `Extract all download links from ${args?.url || '[URL]'}. ${args?.fileTypes ? `Filter for: ${args.fileTypes}` : 'Include all file types.'}` } },
145
+ ];
146
+ break;
147
+ case 'monitor_page_changes':
148
+ messages = [
149
+ { role: 'user', content: { type: 'text', text: `Monitor ${args?.url || '[URL]'} for changes in element: ${args?.selector || '[SELECTOR]'}. Check every ${args?.interval || 60} seconds.` } },
150
+ ];
151
+ break;
152
+ case 'automate_login':
153
+ messages = [
154
+ { role: 'user', content: { type: 'text', text: `Automate login to ${args?.url || '[URL]'}. Use ${args?.usernameSelector || '#username'} for username, ${args?.passwordSelector || '#password'} for password, and ${args?.submitSelector || 'button[type=submit]'} to submit.` } },
155
+ ];
156
+ break;
157
+ case 'batch_screenshot':
158
+ messages = [
159
+ { role: 'user', content: { type: 'text', text: `Take screenshots of: ${args?.urls || '[URLs]'}. Save to: ${args?.outputDir || './screenshots'}` } },
160
+ ];
161
+ break;
162
+ case 'extract_video_stream':
163
+ messages = [
164
+ { role: 'user', content: { type: 'text', text: `Extract video streaming URL from ${args?.url || '[URL]'}. Prefer quality: ${args?.quality || 'highest'}` } },
165
+ ];
166
+ break;
167
+ default:
168
+ messages = [{ role: 'user', content: { type: 'text', text: `Execute prompt: ${name}` } }];
169
+ }
170
+ return {
171
+ description: prompt.description,
172
+ messages,
173
+ };
174
+ });
74
175
  mcpServer.setRequestHandler(CallToolRequestSchema, async (request) => {
75
176
  const { name, arguments: args } = request.params;
76
177
  debug(`Tool call: ${name}`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "brave-real-browser-mcp-server",
3
- "version": "2.26.3",
3
+ "version": "2.27.1",
4
4
  "description": "🦁 MCP server for Brave Real Browser - NPM Workspaces Monorepo with anti-detection features, SSE streaming, and LSP compatibility",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -50,8 +50,7 @@
50
50
  "dependencies": {
51
51
  "@modelcontextprotocol/sdk": "latest",
52
52
  "@types/turndown": "latest",
53
- "brave-real-browser": "^2.7.2",
54
- "crawlee": "^3.15.3",
53
+ "brave-real-browser": "^2.8.1",
55
54
  "puppeteer-core": "^24.35.0",
56
55
  "turndown": "latest",
57
56
  "vscode-languageserver": "^9.0.1",