brave-real-browser-mcp-server 2.17.10 → 2.17.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser-manager.js +0 -8
- package/dist/debug-logger.js +28 -0
- package/dist/handlers/advanced-extraction-handlers.js +0 -80
- package/dist/handlers/deep-analysis-handler.js +119 -0
- package/dist/handlers/multi-element-handlers.js +0 -60
- package/dist/handlers/smart-data-extractors.js +0 -475
- package/dist/handlers/unified-captcha-handler.js +137 -0
- package/dist/handlers/unified-search-handler.js +137 -0
- package/dist/index.js +87 -63
- package/dist/tool-definitions.js +58 -186
- package/dist/workflows/forensic-media-extractor.js +5 -15
- package/dist/workflows/media-extraction-workflow.js +3 -8
- package/package.json +1 -1
- package/dist/handlers/advanced-video-media-handlers.js +0 -139
- package/dist/handlers/captcha-handlers.js +0 -257
- package/dist/handlers/data-quality-handlers.js +0 -82
- package/dist/handlers/search-filter-handlers.js +0 -264
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
// @ts-nocheck
|
|
2
|
+
import { getPageInstance } from '../browser-manager.js';
|
|
3
|
+
import { withErrorHandling } from '../system-utils.js';
|
|
4
|
+
import { validateWorkflow } from '../workflow-validation.js';
|
|
5
|
+
/**
|
|
6
|
+
* Unified Search Content Handler
|
|
7
|
+
* Merges Keyword Search and Regex Pattern Matcher
|
|
8
|
+
*/
|
|
9
|
+
export async function handleSearchContent(args) {
|
|
10
|
+
return await withErrorHandling(async () => {
|
|
11
|
+
validateWorkflow('search_content', { requireBrowser: true, requirePage: true });
|
|
12
|
+
// Logic based on type
|
|
13
|
+
if (args.type === 'regex') {
|
|
14
|
+
return await handleRegexPatternMatcher(args);
|
|
15
|
+
}
|
|
16
|
+
else {
|
|
17
|
+
return await handleKeywordSearch(args);
|
|
18
|
+
}
|
|
19
|
+
}, 'Search Content Failed');
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Unified Find Element Advanced Handler
|
|
23
|
+
* Merges XPath and Advanced CSS Selectors
|
|
24
|
+
*/
|
|
25
|
+
export async function handleFindElementAdvanced(args) {
|
|
26
|
+
return await withErrorHandling(async () => {
|
|
27
|
+
validateWorkflow('find_element_advanced', { requireBrowser: true, requirePage: true });
|
|
28
|
+
if (args.type === 'xpath') {
|
|
29
|
+
return await handleXPathSupport(args);
|
|
30
|
+
}
|
|
31
|
+
else {
|
|
32
|
+
return await handleAdvancedCSSSelectors(args);
|
|
33
|
+
}
|
|
34
|
+
}, 'Find Element Advanced Failed');
|
|
35
|
+
}
|
|
36
|
+
// --- Internal Sub-Handlers (Preserved Logic) ---
|
|
37
|
+
async function handleKeywordSearch(args) {
|
|
38
|
+
const { url, query, caseSensitive = false, wholeWord = false, context = 50 } = args;
|
|
39
|
+
const keywords = Array.isArray(query) ? query : [query]; // Handling if someone passes array (unlikely with new schema but good for compat)
|
|
40
|
+
const page = getPageInstance();
|
|
41
|
+
if (url && page.url() !== url)
|
|
42
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
43
|
+
const results = await page.evaluate((kws, caseSens, whole, ctx) => {
|
|
44
|
+
const allMatches = [];
|
|
45
|
+
kws.forEach(keyword => {
|
|
46
|
+
const flags = caseSens ? 'g' : 'gi';
|
|
47
|
+
const pattern = whole ? `\\b${keyword}\\b` : keyword;
|
|
48
|
+
const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, null);
|
|
49
|
+
let node;
|
|
50
|
+
while (node = walker.nextNode()) {
|
|
51
|
+
const text = node.textContent || '';
|
|
52
|
+
const nodeRegex = new RegExp(pattern, flags);
|
|
53
|
+
let match;
|
|
54
|
+
while ((match = nodeRegex.exec(text)) !== null) {
|
|
55
|
+
allMatches.push({
|
|
56
|
+
keyword,
|
|
57
|
+
match: match[0],
|
|
58
|
+
context: text.substring(Math.max(0, match.index - ctx), Math.min(text.length, match.index + match[0].length + ctx))
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
return { totalMatches: allMatches.length, matches: allMatches.slice(0, 100) };
|
|
64
|
+
}, keywords, caseSensitive, wholeWord, context);
|
|
65
|
+
return {
|
|
66
|
+
content: [{ type: 'text', text: `Keyword Search Results (${results.totalMatches}):\n${JSON.stringify(results.matches, null, 2)}` }]
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
async function handleRegexPatternMatcher(args) {
|
|
70
|
+
const { url, query, flags = 'g', selector } = args;
|
|
71
|
+
const page = getPageInstance();
|
|
72
|
+
if (url && page.url() !== url)
|
|
73
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
74
|
+
const results = await page.evaluate((pat, flgs, sel) => {
|
|
75
|
+
const content = sel ? document.querySelector(sel)?.textContent || '' : document.body.innerText;
|
|
76
|
+
const regex = new RegExp(pat, flgs);
|
|
77
|
+
const matches = [];
|
|
78
|
+
let match;
|
|
79
|
+
let count = 0;
|
|
80
|
+
while ((match = regex.exec(content)) !== null && count < 1000) {
|
|
81
|
+
count++;
|
|
82
|
+
matches.push({ match: match[0], index: match.index, groups: match.slice(1) });
|
|
83
|
+
if (match.index === regex.lastIndex)
|
|
84
|
+
regex.lastIndex++;
|
|
85
|
+
}
|
|
86
|
+
return { totalMatches: matches.length, matches: matches.slice(0, 100) };
|
|
87
|
+
}, query, flags, selector || '');
|
|
88
|
+
return { content: [{ type: 'text', text: `Regex Results (${results.totalMatches}):\n${JSON.stringify(results.matches, null, 2)}` }] };
|
|
89
|
+
}
|
|
90
|
+
async function handleXPathSupport(args) {
|
|
91
|
+
const { url, query, returnType = 'elements' } = args;
|
|
92
|
+
const page = getPageInstance();
|
|
93
|
+
if (url && page.url() !== url)
|
|
94
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
95
|
+
const results = await page.evaluate((xp, type) => {
|
|
96
|
+
const xpathResult = document.evaluate(xp, document, null, XPathResult.ANY_TYPE, null);
|
|
97
|
+
const elements = [];
|
|
98
|
+
let node = xpathResult.iterateNext();
|
|
99
|
+
while (node) {
|
|
100
|
+
if (node.nodeType === Node.ELEMENT_NODE) {
|
|
101
|
+
const el = node;
|
|
102
|
+
elements.push({
|
|
103
|
+
tagName: el.tagName.toLowerCase(),
|
|
104
|
+
text: el.textContent?.substring(0, 100),
|
|
105
|
+
attributes: Array.from(el.attributes).reduce((acc, a) => { acc[a.name] = a.value; return acc; }, {})
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
node = xpathResult.iterateNext();
|
|
109
|
+
}
|
|
110
|
+
return { count: elements.length, elements };
|
|
111
|
+
}, query, returnType);
|
|
112
|
+
return { content: [{ type: 'text', text: `XPath Results (${results.count}):\n${JSON.stringify(results.elements, null, 2)}` }] };
|
|
113
|
+
}
|
|
114
|
+
async function handleAdvancedCSSSelectors(args) {
|
|
115
|
+
const { url, query, operation = 'query', returnType = 'elements' } = args;
|
|
116
|
+
const page = getPageInstance();
|
|
117
|
+
if (url && page.url() !== url)
|
|
118
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
119
|
+
const results = await page.evaluate((sel, op) => {
|
|
120
|
+
let elements = [];
|
|
121
|
+
if (op === 'closest')
|
|
122
|
+
elements = document.querySelector(sel) ? [document.querySelector(sel).closest(sel)].filter(Boolean) : [];
|
|
123
|
+
else if (op === 'matches')
|
|
124
|
+
elements = Array.from(document.querySelectorAll('*')).filter(el => el.matches(sel));
|
|
125
|
+
else
|
|
126
|
+
elements = Array.from(document.querySelectorAll(sel));
|
|
127
|
+
return {
|
|
128
|
+
count: elements.length,
|
|
129
|
+
elements: elements.map(el => ({
|
|
130
|
+
tagName: el.tagName.toLowerCase(),
|
|
131
|
+
className: el.className,
|
|
132
|
+
text: el.textContent?.substring(0, 100)
|
|
133
|
+
})).slice(0, 50)
|
|
134
|
+
};
|
|
135
|
+
}, query, operation);
|
|
136
|
+
return { content: [{ type: 'text', text: `CSS Results (${results.count}):\n${JSON.stringify(results.elements, null, 2)}` }] };
|
|
137
|
+
}
|
package/dist/index.js
CHANGED
|
@@ -1,14 +1,83 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import '
|
|
3
|
-
|
|
4
|
-
|
|
2
|
+
import { fileURLToPath } from 'url';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import fs from 'fs';
|
|
5
|
+
import { logDebug } from './debug-logger.js';
|
|
6
|
+
// CRITICAL: Patch console.log immediately
|
|
5
7
|
const originalConsoleLog = console.log;
|
|
6
8
|
console.log = (...args) => {
|
|
9
|
+
logDebug('Captured stdout log:', args);
|
|
7
10
|
console.error(...args);
|
|
8
11
|
};
|
|
12
|
+
// Robust .env loading (Manual & Silent)
|
|
13
|
+
// Import unified handlers
|
|
14
|
+
import { handleUnifiedCaptcha } from './handlers/unified-captcha-handler.js';
|
|
15
|
+
import { handleSearchContent, handleFindElementAdvanced } from './handlers/unified-search-handler.js';
|
|
16
|
+
import { handleDeepAnalysis } from './handlers/deep-analysis-handler.js';
|
|
17
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
18
|
+
const __dirname = path.dirname(__filename);
|
|
19
|
+
const projectRoot = path.resolve(__dirname, '..');
|
|
20
|
+
const envPath = path.join(projectRoot, '.env');
|
|
21
|
+
// Manual .env parser to avoid stdout pollution from dotenv package
|
|
22
|
+
const loadEnvFile = (filePath) => {
|
|
23
|
+
try {
|
|
24
|
+
if (!fs.existsSync(filePath))
|
|
25
|
+
return false;
|
|
26
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
27
|
+
content.split('\n').forEach(line => {
|
|
28
|
+
const trimmed = line.trim();
|
|
29
|
+
if (trimmed && !trimmed.startsWith('#')) {
|
|
30
|
+
const [key, ...valueParts] = trimmed.split('=');
|
|
31
|
+
if (key && valueParts.length > 0) {
|
|
32
|
+
const value = valueParts.join('=').replace(/(^"|"$)/g, '').trim();
|
|
33
|
+
if (!process.env[key.trim()]) {
|
|
34
|
+
process.env[key.trim()] = value;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
});
|
|
39
|
+
return true;
|
|
40
|
+
}
|
|
41
|
+
catch (e) {
|
|
42
|
+
logDebug('Error loading .env file', e);
|
|
43
|
+
return false;
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
if (loadEnvFile(envPath)) {
|
|
47
|
+
logDebug(`Loaded .env manually from: ${envPath}`);
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
// Try CWD
|
|
51
|
+
const cwdEnv = path.join(process.cwd(), '.env');
|
|
52
|
+
if (loadEnvFile(cwdEnv)) {
|
|
53
|
+
logDebug(`Loaded .env manually from CWD: ${cwdEnv}`);
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
logDebug(`Warning: No .env found at ${envPath} or CWD`);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
logDebug('Server Starting...', {
|
|
60
|
+
cwd: process.cwd(),
|
|
61
|
+
nodeVersion: process.version,
|
|
62
|
+
projectRoot,
|
|
63
|
+
bravePath: process.env.BRAVE_PATH || 'Not Set'
|
|
64
|
+
});
|
|
9
65
|
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
10
66
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
11
67
|
import { CallToolRequestSchema, ListToolsRequestSchema, ListResourcesRequestSchema, ListPromptsRequestSchema, InitializeRequestSchema, } from "@modelcontextprotocol/sdk/types.js";
|
|
68
|
+
// Log uncaught exceptions
|
|
69
|
+
process.on('uncaughtException', (error) => {
|
|
70
|
+
logDebug('CRITICAL: Uncaught Exception', {
|
|
71
|
+
message: error.message,
|
|
72
|
+
stack: error.stack
|
|
73
|
+
});
|
|
74
|
+
console.error('CRITICAL: Uncaught Exception', error);
|
|
75
|
+
process.exit(1);
|
|
76
|
+
});
|
|
77
|
+
process.on('unhandledRejection', (reason) => {
|
|
78
|
+
logDebug('CRITICAL: Unhandled Rejection', reason);
|
|
79
|
+
console.error('CRITICAL: Unhandled Rejection', reason);
|
|
80
|
+
});
|
|
12
81
|
import { TOOLS, SERVER_INFO, CAPABILITIES, TOOL_NAMES, } from "./tool-definitions.js";
|
|
13
82
|
import { withErrorHandling } from "./system-utils.js";
|
|
14
83
|
import { validateMCPResponse } from "./mcp-response-validator.js";
|
|
@@ -23,29 +92,23 @@ import { handleSaveContentAsMarkdown } from "./handlers/file-handlers.js";
|
|
|
23
92
|
// Import new data extraction handlers
|
|
24
93
|
import { handleExtractJSON, handleScrapeMetaTags, handleExtractSchema, } from "./handlers/data-extraction-handlers.js";
|
|
25
94
|
// Import multi-element handlers
|
|
26
|
-
import { handleBatchElementScraper,
|
|
95
|
+
import { handleBatchElementScraper, handleLinkHarvester, handleMediaExtractor, } from "./handlers/multi-element-handlers.js";
|
|
27
96
|
// Import pagination handlers
|
|
28
97
|
import { handleBreadcrumbNavigator, } from "./handlers/navigation-handlers.js";
|
|
29
98
|
// Import AI-powered handlers
|
|
30
99
|
import { handleSmartSelectorGenerator, handleContentClassification, } from "./handlers/ai-powered-handlers.js";
|
|
31
100
|
// Import search & filter handlers
|
|
32
|
-
|
|
33
|
-
// Import data quality handlers
|
|
34
|
-
import { handleDataTypeValidator, } from "./handlers/data-quality-handlers.js";
|
|
35
|
-
// Import captcha handlers
|
|
36
|
-
import { handleOCREngine, handleAudioCaptchaSolver, handlePuzzleCaptchaHandler, } from "./handlers/captcha-handlers.js";
|
|
101
|
+
// Import visual tools handlers
|
|
37
102
|
// Import visual tools handlers
|
|
38
103
|
import { handleElementScreenshot, handleVideoRecording, } from "./handlers/visual-tools-handlers.js";
|
|
39
104
|
// Import smart data extractors
|
|
40
|
-
import {
|
|
105
|
+
import { handleNetworkRecorder, handleImageExtractorAdvanced, handleUrlRedirectTracer, handleApiFinder, } from "./handlers/smart-data-extractors.js";
|
|
41
106
|
// Import dynamic session handlers
|
|
42
107
|
import { handleAjaxContentWaiter, } from "./handlers/dynamic-session-handlers.js";
|
|
43
108
|
// Import monitoring & reporting handlers
|
|
44
109
|
import { handleProgressTracker, } from "./handlers/monitoring-reporting-handlers.js";
|
|
45
|
-
// Import advanced video & media handlers
|
|
46
|
-
import { handleVideoPlayerFinder, handleStreamDetector, handleVideoDownloadLinkFinder, } from "./handlers/advanced-video-media-handlers.js";
|
|
47
110
|
// Import advanced extraction handlers (Ad-bypass & Obfuscation)
|
|
48
|
-
import { handleAdvancedVideoExtraction,
|
|
111
|
+
import { handleAdvancedVideoExtraction, handleMultiLayerRedirectTrace, handleAdProtectionDetector, } from "./handlers/advanced-extraction-handlers.js";
|
|
49
112
|
// Initialize MCP server
|
|
50
113
|
const server = new Server(SERVER_INFO, { capabilities: CAPABILITIES });
|
|
51
114
|
// Register initialize handler (CRITICAL - missing handler can cause crash)
|
|
@@ -121,9 +184,6 @@ export async function executeToolByName(name, args) {
|
|
|
121
184
|
break;
|
|
122
185
|
// Smart Data Extractors
|
|
123
186
|
// DOM & HTML Extraction
|
|
124
|
-
case TOOL_NAMES.HTML_ELEMENTS_EXTRACTOR:
|
|
125
|
-
result = await handleHtmlElementsExtractor(args || {});
|
|
126
|
-
break;
|
|
127
187
|
case TOOL_NAMES.EXTRACT_JSON:
|
|
128
188
|
result = await handleExtractJSON(args || {});
|
|
129
189
|
break;
|
|
@@ -137,9 +197,6 @@ export async function executeToolByName(name, args) {
|
|
|
137
197
|
case TOOL_NAMES.BATCH_ELEMENT_SCRAPER:
|
|
138
198
|
result = await handleBatchElementScraper(args);
|
|
139
199
|
break;
|
|
140
|
-
case TOOL_NAMES.ATTRIBUTE_HARVESTER:
|
|
141
|
-
result = await handleAttributeHarvester(args);
|
|
142
|
-
break;
|
|
143
200
|
// Content Type Specific
|
|
144
201
|
case TOOL_NAMES.LINK_HARVESTER:
|
|
145
202
|
result = await handleLinkHarvester(args || {});
|
|
@@ -163,32 +220,17 @@ export async function executeToolByName(name, args) {
|
|
|
163
220
|
result = await handleContentClassification(args);
|
|
164
221
|
break;
|
|
165
222
|
// Search & Filter Tools
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
case TOOL_NAMES.
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
case TOOL_NAMES.
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
case TOOL_NAMES.
|
|
176
|
-
|
|
177
|
-
break;
|
|
178
|
-
// Data Quality & Validation
|
|
179
|
-
case TOOL_NAMES.DATA_TYPE_VALIDATOR:
|
|
180
|
-
result = await handleDataTypeValidator(args);
|
|
181
|
-
break;
|
|
182
|
-
// Advanced Captcha Handling
|
|
183
|
-
case TOOL_NAMES.OCR_ENGINE:
|
|
184
|
-
result = await handleOCREngine(args);
|
|
185
|
-
break;
|
|
186
|
-
case TOOL_NAMES.AUDIO_CAPTCHA_SOLVER:
|
|
187
|
-
result = await handleAudioCaptchaSolver(args);
|
|
188
|
-
break;
|
|
189
|
-
case TOOL_NAMES.PUZZLE_CAPTCHA_HANDLER:
|
|
190
|
-
result = await handlePuzzleCaptchaHandler(args);
|
|
191
|
-
break;
|
|
223
|
+
// --- Search & Filter (Consolidated) ---
|
|
224
|
+
case TOOL_NAMES.SEARCH_CONTENT:
|
|
225
|
+
return await handleSearchContent(args);
|
|
226
|
+
case TOOL_NAMES.FIND_ELEMENT_ADVANCED:
|
|
227
|
+
return await handleFindElementAdvanced(args);
|
|
228
|
+
// --- Deep Analysis ---
|
|
229
|
+
case TOOL_NAMES.DEEP_ANALYSIS:
|
|
230
|
+
return await handleDeepAnalysis(args);
|
|
231
|
+
// --- Advanced Captcha Handling (Consolidated) ---
|
|
232
|
+
case TOOL_NAMES.SOLVE_CAPTCHA:
|
|
233
|
+
return await handleUnifiedCaptcha({ strategy: 'auto', ...args });
|
|
192
234
|
// Screenshot & Visual Tools
|
|
193
235
|
case TOOL_NAMES.ELEMENT_SCREENSHOT:
|
|
194
236
|
result = await handleElementScreenshot(args);
|
|
@@ -197,9 +239,6 @@ export async function executeToolByName(name, args) {
|
|
|
197
239
|
result = await handleVideoRecording(args);
|
|
198
240
|
break;
|
|
199
241
|
// Smart Data Extractors (Advanced)
|
|
200
|
-
case "fetch_xhr":
|
|
201
|
-
result = await handleFetchXHR(args || {});
|
|
202
|
-
break;
|
|
203
242
|
case "network_recorder":
|
|
204
243
|
result = await handleNetworkRecorder(args || {});
|
|
205
244
|
break;
|
|
@@ -209,9 +248,6 @@ export async function executeToolByName(name, args) {
|
|
|
209
248
|
case "image_extractor_advanced":
|
|
210
249
|
result = await handleImageExtractorAdvanced(args || {});
|
|
211
250
|
break;
|
|
212
|
-
case "video_source_extractor":
|
|
213
|
-
result = await handleVideoSourceExtractor(args || {});
|
|
214
|
-
break;
|
|
215
251
|
case "url_redirect_tracer":
|
|
216
252
|
result = await handleUrlRedirectTracer(args);
|
|
217
253
|
break;
|
|
@@ -224,22 +260,10 @@ export async function executeToolByName(name, args) {
|
|
|
224
260
|
result = await handleProgressTracker(args || {});
|
|
225
261
|
break;
|
|
226
262
|
// Advanced Video & Media Download Tools
|
|
227
|
-
case "video_player_finder":
|
|
228
|
-
result = await handleVideoPlayerFinder(args || {});
|
|
229
|
-
break;
|
|
230
|
-
case "stream_detector":
|
|
231
|
-
result = await handleStreamDetector(args || {});
|
|
232
|
-
break;
|
|
233
|
-
case "video_download_link_finder":
|
|
234
|
-
result = await handleVideoDownloadLinkFinder(args || {});
|
|
235
|
-
break;
|
|
236
263
|
// Advanced Extraction Tools (Ad-Bypass & Obfuscation)
|
|
237
264
|
case "advanced_video_extraction":
|
|
238
265
|
result = await handleAdvancedVideoExtraction(args || {});
|
|
239
266
|
break;
|
|
240
|
-
case "deobfuscate_js":
|
|
241
|
-
result = await handleDeobfuscateJS(args || {});
|
|
242
|
-
break;
|
|
243
267
|
case "multi_layer_redirect_trace":
|
|
244
268
|
result = await handleMultiLayerRedirectTrace(args);
|
|
245
269
|
break;
|