brave-real-browser-mcp-server 2.15.1 → 2.15.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1 -10
- package/dist/tool-definitions.js +0 -43
- package/package.json +2 -2
- package/dist/handlers/APPLY_OPTIMIZATION_PATTERN.js +0 -326
- package/dist/handlers/advanced-scraping-handlers.js +0 -58
- package/dist/handlers/data-transform-handlers.js +0 -66
- package/dist/handlers/dom-handlers.js +0 -206
- package/dist/handlers/network-handlers.js +0 -111
- package/dist/mcp-server.js +0 -265
- package/dist/test-constants.js +0 -111
- package/scripts/update-to-latest.cjs +0 -130
package/dist/index.js
CHANGED
|
@@ -44,7 +44,7 @@ import { handleOCREngine, handleAudioCaptchaSolver, handlePuzzleCaptchaHandler,
|
|
|
44
44
|
// Import visual tools handlers
|
|
45
45
|
import { handleFullPageScreenshot, handleElementScreenshot, handleVideoRecording, handleVisualComparison, } from "./handlers/visual-tools-handlers.js";
|
|
46
46
|
// Import smart data extractors
|
|
47
|
-
import { handleHtmlElementsExtractor, handleTagsFinder, handleLinksFinder, handleXpathLinks, handleAjaxExtractor, handleFetchXHR, handleNetworkRecorder, handleRegexPatternFinder, handleIframeExtractor, handleEmbedPageExtractor, handleImageExtractorAdvanced, handleVideoSourceExtractor,
|
|
47
|
+
import { handleHtmlElementsExtractor, handleTagsFinder, handleLinksFinder, handleXpathLinks, handleAjaxExtractor, handleFetchXHR, handleNetworkRecorder, handleRegexPatternFinder, handleIframeExtractor, handleEmbedPageExtractor, handleImageExtractorAdvanced, handleVideoSourceExtractor, handleUrlRedirectTracer, handleUserAgentExtractor, } from "./handlers/smart-data-extractors.js";
|
|
48
48
|
// Import dynamic session handlers
|
|
49
49
|
import { handleShadowDOMExtractor, handleCookieManager, handleFormAutoFill, handleAjaxContentWaiter, } from "./handlers/dynamic-session-handlers.js";
|
|
50
50
|
// Import monitoring & reporting handlers
|
|
@@ -276,15 +276,6 @@ export async function executeToolByName(name, args) {
|
|
|
276
276
|
case "video_source_extractor":
|
|
277
277
|
result = await handleVideoSourceExtractor(args || {});
|
|
278
278
|
break;
|
|
279
|
-
case "video_player_extractor":
|
|
280
|
-
result = await handleVideoPlayerExtractor(args || {});
|
|
281
|
-
break;
|
|
282
|
-
case "video_player_hoster_finder":
|
|
283
|
-
result = await handleVideoPlayerHosterFinder(args || {});
|
|
284
|
-
break;
|
|
285
|
-
case "original_video_hoster_finder":
|
|
286
|
-
result = await handleOriginalVideoHosterFinder(args || {});
|
|
287
|
-
break;
|
|
288
279
|
case "url_redirect_tracer":
|
|
289
280
|
result = await handleUrlRedirectTracer(args);
|
|
290
281
|
break;
|
package/dist/tool-definitions.js
CHANGED
|
@@ -868,38 +868,6 @@ export const TOOLS = [
|
|
|
868
868
|
},
|
|
869
869
|
},
|
|
870
870
|
},
|
|
871
|
-
{
|
|
872
|
-
name: 'video_source_extractor',
|
|
873
|
-
description: 'Extract video sources from video elements',
|
|
874
|
-
inputSchema: {
|
|
875
|
-
type: 'object',
|
|
876
|
-
properties: {},
|
|
877
|
-
},
|
|
878
|
-
},
|
|
879
|
-
{
|
|
880
|
-
name: 'video_player_extractor',
|
|
881
|
-
description: 'Extract video player information',
|
|
882
|
-
inputSchema: {
|
|
883
|
-
type: 'object',
|
|
884
|
-
properties: {},
|
|
885
|
-
},
|
|
886
|
-
},
|
|
887
|
-
{
|
|
888
|
-
name: 'video_player_hoster_finder',
|
|
889
|
-
description: 'Detect video hosting platform (YouTube, Vimeo, etc.)',
|
|
890
|
-
inputSchema: {
|
|
891
|
-
type: 'object',
|
|
892
|
-
properties: {},
|
|
893
|
-
},
|
|
894
|
-
},
|
|
895
|
-
{
|
|
896
|
-
name: 'original_video_hoster_finder',
|
|
897
|
-
description: 'Find original video source',
|
|
898
|
-
inputSchema: {
|
|
899
|
-
type: 'object',
|
|
900
|
-
properties: {},
|
|
901
|
-
},
|
|
902
|
-
},
|
|
903
871
|
{
|
|
904
872
|
name: 'url_redirect_tracer',
|
|
905
873
|
description: 'Trace URL redirects',
|
|
@@ -1155,16 +1123,6 @@ export const TOOLS = [
|
|
|
1155
1123
|
}
|
|
1156
1124
|
}
|
|
1157
1125
|
},
|
|
1158
|
-
{
|
|
1159
|
-
name: 'redirect_tracer',
|
|
1160
|
-
description: 'Trace URL redirects to find final destination',
|
|
1161
|
-
inputSchema: {
|
|
1162
|
-
type: 'object',
|
|
1163
|
-
properties: {
|
|
1164
|
-
url: { type: 'string' }
|
|
1165
|
-
}
|
|
1166
|
-
}
|
|
1167
|
-
},
|
|
1168
1126
|
{
|
|
1169
1127
|
name: 'video_download_link_finder',
|
|
1170
1128
|
description: 'Find direct download links for video files',
|
|
@@ -1231,7 +1189,6 @@ export const TOOL_NAMES = {
|
|
|
1231
1189
|
VIDEO_SOURCE_EXTRACTOR: 'video_source_extractor',
|
|
1232
1190
|
VIDEO_PLAYER_FINDER: 'video_player_finder',
|
|
1233
1191
|
STREAM_DETECTOR: 'stream_detector',
|
|
1234
|
-
REDIRECT_TRACER: 'redirect_tracer',
|
|
1235
1192
|
// Search & Filter Tools
|
|
1236
1193
|
KEYWORD_SEARCH: 'keyword_search',
|
|
1237
1194
|
REGEX_PATTERN_MATCHER: 'regex_pattern_matcher',
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "brave-real-browser-mcp-server",
|
|
3
|
-
"version": "2.15.
|
|
3
|
+
"version": "2.15.3",
|
|
4
4
|
"description": "Universal AI IDE MCP Server - Auto-detects and supports all AI IDEs (Claude Desktop, Cursor, Windsurf, Cline, Zed, VSCode, Qoder AI, etc.) with Brave browser automation",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
"clean": "rimraf dist",
|
|
11
11
|
"clean:cache": "npm cache clean --force",
|
|
12
12
|
"fix-cache-permissions": "echo 'Run: sudo chown -R $(whoami):$(id -gn) ~/.npm' && echo 'This fixes npm cache permission issues'",
|
|
13
|
-
"clean:modules": "
|
|
13
|
+
"clean:modules": "rimraf node_modules",
|
|
14
14
|
"clean:all": "npm run clean:modules && npm run clean:cache && npm run clean",
|
|
15
15
|
"fresh-install": "npm run clean:all && npm install",
|
|
16
16
|
"fresh-start": "npm run fresh-install && npm run build && npm start",
|
|
@@ -1,326 +0,0 @@
|
|
|
1
|
-
// 🚀 APPLY THIS PATTERN TO ALL 113 TOOLS
|
|
2
|
-
// This file shows the EXACT pattern to apply to every tool handler
|
|
3
|
-
// Copy-paste and customize for each tool
|
|
4
|
-
import { getCurrentPage } from '../browser-manager.js';
|
|
5
|
-
import { validateWorkflow } from '../workflow-validation.js';
|
|
6
|
-
import { TOOL_OPTIMIZATION_CONFIG, globalCache, deduplicateResults, executeToolWithOptimizations, VIDEO_HOSTERS_DB, SELECTOR_UTILS, globalMetrics, globalToolStatus, createErrorHandler, } from '../optimization-utils.js';
|
|
7
|
-
import { sleep } from '../system-utils.js';
|
|
8
|
-
/**
|
|
9
|
-
* ✅ TEMPLATE: Universal Pattern for ALL 113 Tools
|
|
10
|
-
*
|
|
11
|
-
* Replace:
|
|
12
|
-
* - TOOL_NAME: Actual tool name (e.g., 'ajax_extractor')
|
|
13
|
-
* - CATEGORY: Category from TOOL_OPTIMIZATION_CONFIG
|
|
14
|
-
* - TIMEOUT: Duration value from TOOL_OPTIMIZATION_CONFIG.xxx.defaultTimeout
|
|
15
|
-
* - CACHE_KEY: Unique key for caching (usually based on URL + params)
|
|
16
|
-
*/
|
|
17
|
-
export async function handleUNIVERSAL_TOOL_TEMPLATE(args) {
|
|
18
|
-
const toolName = 'TOOL_NAME'; // ← REPLACE with actual tool name
|
|
19
|
-
const errorHandler = createErrorHandler(toolName);
|
|
20
|
-
globalMetrics.start(toolName);
|
|
21
|
-
try {
|
|
22
|
-
// Validate workflow
|
|
23
|
-
validateWorkflow(toolName, {
|
|
24
|
-
requireBrowser: true,
|
|
25
|
-
requirePage: true,
|
|
26
|
-
});
|
|
27
|
-
// Get timeout from configuration (example)
|
|
28
|
-
const timeout = args.timeout || TOOL_OPTIMIZATION_CONFIG.dataExtraction.defaultTimeout;
|
|
29
|
-
const shouldCache = args.cache !== false;
|
|
30
|
-
const cacheKey = `${toolName}_${args.url || 'default'}`;
|
|
31
|
-
// Check cache first
|
|
32
|
-
if (shouldCache) {
|
|
33
|
-
const cached = globalCache.get(cacheKey);
|
|
34
|
-
if (cached) {
|
|
35
|
-
console.error(`[${toolName}] Cache hit for key: ${cacheKey}`);
|
|
36
|
-
return {
|
|
37
|
-
content: [
|
|
38
|
-
{
|
|
39
|
-
type: 'text',
|
|
40
|
-
text: `✅ ${toolName} (from cache)\n\n${JSON.stringify(cached, null, 2)}`,
|
|
41
|
-
},
|
|
42
|
-
],
|
|
43
|
-
};
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
// ======================================================
|
|
47
|
-
// YOUR ORIGINAL TOOL LOGIC GOES HERE
|
|
48
|
-
// ======================================================
|
|
49
|
-
const page = getCurrentPage();
|
|
50
|
-
let results = [];
|
|
51
|
-
// Example: Basic extraction logic
|
|
52
|
-
// Replace this with your actual tool implementation
|
|
53
|
-
results = await page.evaluate(() => {
|
|
54
|
-
// Your extraction logic here
|
|
55
|
-
return [];
|
|
56
|
-
});
|
|
57
|
-
// ======================================================
|
|
58
|
-
// OPTIMIZATION LAYER (Apply to all results)
|
|
59
|
-
// ======================================================
|
|
60
|
-
// 1. Deduplicate results
|
|
61
|
-
if (Array.isArray(results)) {
|
|
62
|
-
results = deduplicateResults(results, 'url'); // Use 'url' or your unique key
|
|
63
|
-
}
|
|
64
|
-
// 2. Filter empty/invalid results
|
|
65
|
-
results = results.filter((r) => r && Object.keys(r).length > 0);
|
|
66
|
-
// 3. Cache results if enabled
|
|
67
|
-
if (shouldCache && results.length > 0) {
|
|
68
|
-
globalCache.set(cacheKey, results);
|
|
69
|
-
}
|
|
70
|
-
// ======================================================
|
|
71
|
-
// RETURN OPTIMIZED RESULT
|
|
72
|
-
// ======================================================
|
|
73
|
-
return {
|
|
74
|
-
content: [
|
|
75
|
-
{
|
|
76
|
-
type: 'text',
|
|
77
|
-
text: `✅ ${toolName}\nFound ${results.length} results\n\n${JSON.stringify(results, null, 2)}`,
|
|
78
|
-
},
|
|
79
|
-
],
|
|
80
|
-
};
|
|
81
|
-
}
|
|
82
|
-
catch (error) {
|
|
83
|
-
// Enhanced error handling
|
|
84
|
-
return errorHandler.handle(error, 'tool execution');
|
|
85
|
-
}
|
|
86
|
-
finally {
|
|
87
|
-
// Cleanup and metrics
|
|
88
|
-
const duration = globalMetrics.end(toolName);
|
|
89
|
-
globalToolStatus.recordExecution(toolName, duration);
|
|
90
|
-
console.error(`[${toolName}] Execution time: ${duration}ms`);
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
// ============================================================================
|
|
94
|
-
// SPECIFIC OPTIMIZATION PATTERNS FOR EACH TOOL CATEGORY
|
|
95
|
-
// ============================================================================
|
|
96
|
-
/**
|
|
97
|
-
* 🎥 VIDEO EXTRACTION TOOLS (20+ tools)
|
|
98
|
-
*
|
|
99
|
-
* Pattern: Use VIDEO_HOSTERS_DB, extended timeout, deep iframe scanning
|
|
100
|
-
*/
|
|
101
|
-
export async function handleVideoExtractionTemplate(args) {
|
|
102
|
-
const toolName = 'video_extraction_tool';
|
|
103
|
-
const errorHandler = createErrorHandler(toolName);
|
|
104
|
-
globalMetrics.start(toolName);
|
|
105
|
-
try {
|
|
106
|
-
const page = getCurrentPage();
|
|
107
|
-
const timeout = args.timeout || TOOL_OPTIMIZATION_CONFIG.videoExtraction.defaultTimeout;
|
|
108
|
-
const videoData = await executeToolWithOptimizations(toolName, async () => {
|
|
109
|
-
let videos = [];
|
|
110
|
-
// Use optimized selectors
|
|
111
|
-
videos = await page.evaluate((videoSelectors) => {
|
|
112
|
-
return Array.from(document.querySelectorAll(videoSelectors.join(','))).map((el) => ({
|
|
113
|
-
type: el.tagName.toLowerCase(),
|
|
114
|
-
src: el.src || el.getAttribute('data-src') || el.getAttribute('href'),
|
|
115
|
-
hoster: null,
|
|
116
|
-
platform: null,
|
|
117
|
-
}));
|
|
118
|
-
}, SELECTOR_UTILS.videoSelectors);
|
|
119
|
-
// Detect video hosters
|
|
120
|
-
videos = videos.map((video) => ({
|
|
121
|
-
...video,
|
|
122
|
-
hoster: VIDEO_HOSTERS_DB.getHosterName(video.src),
|
|
123
|
-
}));
|
|
124
|
-
// Deduplicate
|
|
125
|
-
videos = deduplicateResults(videos, 'src');
|
|
126
|
-
// Filter by hoster if needed
|
|
127
|
-
if (args.filterByHoster) {
|
|
128
|
-
videos = videos.filter((v) => v.hoster !== null);
|
|
129
|
-
}
|
|
130
|
-
return videos;
|
|
131
|
-
}, {
|
|
132
|
-
timeout,
|
|
133
|
-
retryAttempts: 2,
|
|
134
|
-
cacheKey: `videos_${page.url()}`,
|
|
135
|
-
shouldCache: true,
|
|
136
|
-
});
|
|
137
|
-
return {
|
|
138
|
-
content: [
|
|
139
|
-
{
|
|
140
|
-
type: 'text',
|
|
141
|
-
text: `🎥 Found ${videoData.length} video sources\n\n${JSON.stringify(videoData, null, 2)}`,
|
|
142
|
-
},
|
|
143
|
-
],
|
|
144
|
-
};
|
|
145
|
-
}
|
|
146
|
-
catch (error) {
|
|
147
|
-
return errorHandler.handle(error, 'video extraction');
|
|
148
|
-
}
|
|
149
|
-
finally {
|
|
150
|
-
const duration = globalMetrics.end(toolName);
|
|
151
|
-
globalToolStatus.recordExecution(toolName, duration);
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
/**
|
|
155
|
-
* ⏱️ NETWORK MONITORING TOOLS (ajax_extractor, fetch_xhr, network_recorder)
|
|
156
|
-
*
|
|
157
|
-
* Pattern: Extended duration, retry logic, comprehensive request capture
|
|
158
|
-
*/
|
|
159
|
-
export async function handleNetworkMonitoringTemplate(args) {
|
|
160
|
-
const toolName = 'network_monitoring_tool';
|
|
161
|
-
const errorHandler = createErrorHandler(toolName);
|
|
162
|
-
globalMetrics.start(toolName);
|
|
163
|
-
try {
|
|
164
|
-
const page = getCurrentPage();
|
|
165
|
-
const duration = args.duration || TOOL_OPTIMIZATION_CONFIG.networkMonitoring.defaultDuration;
|
|
166
|
-
const networkData = await executeToolWithOptimizations(toolName, async () => {
|
|
167
|
-
const requests = [];
|
|
168
|
-
const requestHandler = (request) => {
|
|
169
|
-
const resourceType = request.resourceType();
|
|
170
|
-
if (resourceType === 'xhr' || resourceType === 'fetch') {
|
|
171
|
-
requests.push({
|
|
172
|
-
url: request.url(),
|
|
173
|
-
method: request.method(),
|
|
174
|
-
type: resourceType,
|
|
175
|
-
timestamp: new Date().toISOString(),
|
|
176
|
-
});
|
|
177
|
-
}
|
|
178
|
-
};
|
|
179
|
-
page.on('request', requestHandler);
|
|
180
|
-
// Monitor for configured duration
|
|
181
|
-
await sleep(duration);
|
|
182
|
-
page.off('request', requestHandler);
|
|
183
|
-
// Deduplicate requests
|
|
184
|
-
return deduplicateResults(requests, 'url');
|
|
185
|
-
}, {
|
|
186
|
-
timeout: TOOL_OPTIMIZATION_CONFIG.networkMonitoring.monitoringTimeout,
|
|
187
|
-
retryAttempts: 3,
|
|
188
|
-
cacheKey: `network_${Date.now() / 60000}`, // Cache per minute
|
|
189
|
-
shouldCache: false, // Don't cache network data
|
|
190
|
-
});
|
|
191
|
-
return {
|
|
192
|
-
content: [
|
|
193
|
-
{
|
|
194
|
-
type: 'text',
|
|
195
|
-
text: `📡 Captured ${networkData.length} network requests\n\n${JSON.stringify(networkData, null, 2)}`,
|
|
196
|
-
},
|
|
197
|
-
],
|
|
198
|
-
};
|
|
199
|
-
}
|
|
200
|
-
catch (error) {
|
|
201
|
-
return errorHandler.handle(error, 'network monitoring');
|
|
202
|
-
}
|
|
203
|
-
finally {
|
|
204
|
-
const duration = globalMetrics.end(toolName);
|
|
205
|
-
globalToolStatus.recordExecution(toolName, duration);
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
/**
|
|
209
|
-
* 📊 DATA EXTRACTION TOOLS (scrape_table, extract_links, extract_images, etc.)
|
|
210
|
-
*
|
|
211
|
-
* Pattern: Deep scanning, result deduplication, background-image extraction
|
|
212
|
-
*/
|
|
213
|
-
export async function handleDataExtractionTemplate(args) {
|
|
214
|
-
const toolName = 'data_extraction_tool';
|
|
215
|
-
const errorHandler = createErrorHandler(toolName);
|
|
216
|
-
globalMetrics.start(toolName);
|
|
217
|
-
try {
|
|
218
|
-
const page = getCurrentPage();
|
|
219
|
-
const selector = args.selector || '*';
|
|
220
|
-
const deepScan = args.deepScan ?? TOOL_OPTIMIZATION_CONFIG.dataExtraction.deepScanEnabled;
|
|
221
|
-
const extractedData = await executeToolWithOptimizations(toolName, async () => {
|
|
222
|
-
let results = [];
|
|
223
|
-
// Primary extraction
|
|
224
|
-
results = await page.evaluate((sel, deep) => {
|
|
225
|
-
const data = [];
|
|
226
|
-
// Standard extraction
|
|
227
|
-
document.querySelectorAll(sel).forEach((el, idx) => {
|
|
228
|
-
data.push({
|
|
229
|
-
index: idx,
|
|
230
|
-
tag: el.tagName.toLowerCase(),
|
|
231
|
-
text: el.textContent?.trim().substring(0, 100),
|
|
232
|
-
attributes: Array.from(el.attributes || []).reduce((acc, attr) => {
|
|
233
|
-
acc[attr.name] = attr.value;
|
|
234
|
-
return acc;
|
|
235
|
-
}, {}),
|
|
236
|
-
});
|
|
237
|
-
});
|
|
238
|
-
// Deep scan if enabled
|
|
239
|
-
if (deep && data.length < 1000) {
|
|
240
|
-
// Scan hidden elements, data attributes, etc.
|
|
241
|
-
document.querySelectorAll('[data-*]').forEach((el) => {
|
|
242
|
-
Object.keys(el.dataset || {}).forEach((key) => {
|
|
243
|
-
data.push({
|
|
244
|
-
type: 'data-attribute',
|
|
245
|
-
key,
|
|
246
|
-
value: el.dataset[key],
|
|
247
|
-
});
|
|
248
|
-
});
|
|
249
|
-
});
|
|
250
|
-
}
|
|
251
|
-
return data;
|
|
252
|
-
}, selector, deepScan);
|
|
253
|
-
// Deduplicate
|
|
254
|
-
results = deduplicateResults(results);
|
|
255
|
-
// Filter empty
|
|
256
|
-
results = results.filter((r) => r && Object.keys(r).length > 0);
|
|
257
|
-
return results;
|
|
258
|
-
}, {
|
|
259
|
-
timeout: TOOL_OPTIMIZATION_CONFIG.dataExtraction.defaultTimeout,
|
|
260
|
-
retryAttempts: 2,
|
|
261
|
-
cacheKey: `data_${page.url()}_${selector}`,
|
|
262
|
-
shouldCache: true,
|
|
263
|
-
});
|
|
264
|
-
return {
|
|
265
|
-
content: [
|
|
266
|
-
{
|
|
267
|
-
type: 'text',
|
|
268
|
-
text: `📋 Extracted ${extractedData.length} items\n\n${JSON.stringify(extractedData.slice(0, 20), null, 2)}`,
|
|
269
|
-
},
|
|
270
|
-
],
|
|
271
|
-
};
|
|
272
|
-
}
|
|
273
|
-
catch (error) {
|
|
274
|
-
return errorHandler.handle(error, 'data extraction');
|
|
275
|
-
}
|
|
276
|
-
finally {
|
|
277
|
-
const duration = globalMetrics.end(toolName);
|
|
278
|
-
globalToolStatus.recordExecution(toolName, duration);
|
|
279
|
-
}
|
|
280
|
-
}
|
|
281
|
-
// ============================================================================
|
|
282
|
-
// IMPLEMENTATION INSTRUCTIONS
|
|
283
|
-
// ============================================================================
|
|
284
|
-
/**
|
|
285
|
-
* TO APPLY THIS OPTIMIZATION PATTERN TO ALL 113 TOOLS:
|
|
286
|
-
*
|
|
287
|
-
* 1. For EACH tool handler function:
|
|
288
|
-
* - Add imports from optimization-utils at top of file
|
|
289
|
-
* - Wrap main logic with try-catch-finally
|
|
290
|
-
* - Add globalMetrics.start() and end() calls
|
|
291
|
-
* - Apply deduplicateResults() to list outputs
|
|
292
|
-
* - Use appropriate TOOL_OPTIMIZATION_CONFIG value
|
|
293
|
-
*
|
|
294
|
-
* 2. For VIDEO tools specifically:
|
|
295
|
-
* - Use handleVideoExtractionTemplate pattern
|
|
296
|
-
* - Replace hardcoded selectors with SELECTOR_UTILS.videoSelectors
|
|
297
|
-
* - Use VIDEO_HOSTERS_DB for hoster detection
|
|
298
|
-
*
|
|
299
|
-
* 3. For NETWORK tools specifically:
|
|
300
|
-
* - Use handleNetworkMonitoringTemplate pattern
|
|
301
|
-
* - Extend duration to TOOL_OPTIMIZATION_CONFIG.networkMonitoring.defaultDuration
|
|
302
|
-
* - Add retry logic for failed captures
|
|
303
|
-
*
|
|
304
|
-
* 4. For DATA tools specifically:
|
|
305
|
-
* - Use handleDataExtractionTemplate pattern
|
|
306
|
-
* - Enable deep scanning with TOOL_OPTIMIZATION_CONFIG.dataExtraction.deepScanEnabled
|
|
307
|
-
* - Always deduplicate before returning results
|
|
308
|
-
*
|
|
309
|
-
* 5. Test each optimized tool:
|
|
310
|
-
* - Verify it still works with original functionality
|
|
311
|
-
* - Check success rate > 95%
|
|
312
|
-
* - Verify deduplication works (no duplicate results)
|
|
313
|
-
* - Check caching is effective
|
|
314
|
-
*
|
|
315
|
-
* 6. After completing ALL 113 tools:
|
|
316
|
-
* - Run: npm run build
|
|
317
|
-
* - Run: npm run test
|
|
318
|
-
* - Run: npm run lint:fix
|
|
319
|
-
* - Commit with: git commit -m "✅ Optimize ALL 113 MCP tools for production"
|
|
320
|
-
*/
|
|
321
|
-
export default {
|
|
322
|
-
handleUNIVERSAL_TOOL_TEMPLATE,
|
|
323
|
-
handleVideoExtractionTemplate,
|
|
324
|
-
handleNetworkMonitoringTemplate,
|
|
325
|
-
handleDataExtractionTemplate,
|
|
326
|
-
};
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
import { getPageInstance } from '../browser-manager.js';
|
|
2
|
-
export async function handleMultiPageScraper(args) {
|
|
3
|
-
const page = getPageInstance();
|
|
4
|
-
if (!page)
|
|
5
|
-
throw new Error('Browser not initialized. Use browser_init first.');
|
|
6
|
-
const results = [];
|
|
7
|
-
const errors = [];
|
|
8
|
-
for (const url of args.urls) {
|
|
9
|
-
try {
|
|
10
|
-
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
11
|
-
if (args.waitBetweenPages) {
|
|
12
|
-
await new Promise(resolve => setTimeout(resolve, args.waitBetweenPages));
|
|
13
|
-
}
|
|
14
|
-
const data = await page.evaluate((selector) => {
|
|
15
|
-
const elements = document.querySelectorAll(selector);
|
|
16
|
-
return Array.from(elements).map(el => el.textContent?.trim()).filter(Boolean);
|
|
17
|
-
}, args.dataSelector);
|
|
18
|
-
results.push({ url, data, success: true });
|
|
19
|
-
}
|
|
20
|
-
catch (error) {
|
|
21
|
-
errors.push({ url, error: String(error) });
|
|
22
|
-
results.push({ url, success: false, error: String(error) });
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
return {
|
|
26
|
-
content: [{
|
|
27
|
-
type: 'text',
|
|
28
|
-
text: JSON.stringify({ summary: `Scraped ${results.length} pages`, results, errors }, null, 2)
|
|
29
|
-
}]
|
|
30
|
-
};
|
|
31
|
-
}
|
|
32
|
-
export async function handleBreadcrumbNavigator(args) {
|
|
33
|
-
const page = getPageInstance();
|
|
34
|
-
if (!page)
|
|
35
|
-
throw new Error('Browser not initialized. Use browser_init first.');
|
|
36
|
-
const breadcrumbs = await page.evaluate((selector) => {
|
|
37
|
-
const elements = document.querySelectorAll(selector);
|
|
38
|
-
return Array.from(elements).map(el => ({
|
|
39
|
-
text: el.textContent?.trim() || '',
|
|
40
|
-
href: el.href || null
|
|
41
|
-
}));
|
|
42
|
-
}, args.breadcrumbSelector);
|
|
43
|
-
let navigationResult = null;
|
|
44
|
-
if (args.followLinks && breadcrumbs.length > 0) {
|
|
45
|
-
// Navigate to parent (second to last)
|
|
46
|
-
const parent = breadcrumbs.length > 1 ? breadcrumbs[breadcrumbs.length - 2] : null;
|
|
47
|
-
if (parent && parent.href) {
|
|
48
|
-
await page.goto(parent.href);
|
|
49
|
-
navigationResult = `Navigated to parent: ${parent.text} (${parent.href})`;
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
return {
|
|
53
|
-
content: [{
|
|
54
|
-
type: 'text',
|
|
55
|
-
text: JSON.stringify({ breadcrumbs, navigationResult }, null, 2)
|
|
56
|
-
}]
|
|
57
|
-
};
|
|
58
|
-
}
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
import TurndownService from 'turndown';
|
|
2
|
-
export async function handleHtmlToText(args) {
|
|
3
|
-
try {
|
|
4
|
-
const turndownService = new TurndownService({
|
|
5
|
-
headingStyle: 'atx',
|
|
6
|
-
codeBlockStyle: 'fenced'
|
|
7
|
-
});
|
|
8
|
-
// Config based on args
|
|
9
|
-
if (!args.preserveLinks) {
|
|
10
|
-
turndownService.addRule('no-links', {
|
|
11
|
-
filter: 'a',
|
|
12
|
-
replacement: function (content) { return content; }
|
|
13
|
-
});
|
|
14
|
-
}
|
|
15
|
-
const text = turndownService.turndown(args.html);
|
|
16
|
-
return {
|
|
17
|
-
content: [{
|
|
18
|
-
type: 'text',
|
|
19
|
-
text: text
|
|
20
|
-
}]
|
|
21
|
-
};
|
|
22
|
-
}
|
|
23
|
-
catch (error) {
|
|
24
|
-
return {
|
|
25
|
-
content: [{
|
|
26
|
-
type: 'text',
|
|
27
|
-
text: `Error converting HTML to text: ${error}`
|
|
28
|
-
}],
|
|
29
|
-
isError: true
|
|
30
|
-
};
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
export async function handleDuplicateRemover(args) {
|
|
34
|
-
if (!Array.isArray(args.data)) {
|
|
35
|
-
throw new Error("Input 'data' must be an array.");
|
|
36
|
-
}
|
|
37
|
-
let uniqueData;
|
|
38
|
-
if (args.uniqueKey) {
|
|
39
|
-
const seen = new Set();
|
|
40
|
-
uniqueData = args.data.filter(item => {
|
|
41
|
-
const val = item[args.uniqueKey];
|
|
42
|
-
if (seen.has(val))
|
|
43
|
-
return false;
|
|
44
|
-
seen.add(val);
|
|
45
|
-
return true;
|
|
46
|
-
});
|
|
47
|
-
}
|
|
48
|
-
else {
|
|
49
|
-
// Deep equality check or simple JSON stringify check?
|
|
50
|
-
// Use JSON stringify for simplicity and speed on complex objects
|
|
51
|
-
const seen = new Set();
|
|
52
|
-
uniqueData = args.data.filter(item => {
|
|
53
|
-
const val = JSON.stringify(item);
|
|
54
|
-
if (seen.has(val))
|
|
55
|
-
return false;
|
|
56
|
-
seen.add(val);
|
|
57
|
-
return true;
|
|
58
|
-
});
|
|
59
|
-
}
|
|
60
|
-
return {
|
|
61
|
-
content: [{
|
|
62
|
-
type: 'text',
|
|
63
|
-
text: JSON.stringify(uniqueData, null, 2)
|
|
64
|
-
}]
|
|
65
|
-
};
|
|
66
|
-
}
|