brave-real-browser-mcp-server 2.15.5 → 2.15.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/handlers/multi-element-handlers.js +0 -67
- package/dist/handlers/navigation-handlers.js +59 -0
- package/dist/handlers/search-filter-handlers.js +0 -121
- package/dist/index.js +5 -17
- package/dist/tool-definitions.js +1 -58
- package/package.json +2 -2
- package/scripts/full-verification.ts +98 -0
- package/scripts/live-verification.ts +61 -0
- package/dist/handlers/data-processing-handlers.js +0 -49
- package/dist/handlers/pagination-handlers.js +0 -115
|
@@ -67,73 +67,6 @@ export async function handleBatchElementScraper(args) {
|
|
|
67
67
|
};
|
|
68
68
|
}, 'Failed to batch scrape elements');
|
|
69
69
|
}
|
|
70
|
-
/**
|
|
71
|
-
* Parent-child relationships maintain करते हुए data निकालता है
|
|
72
|
-
*/
|
|
73
|
-
export async function handleNestedDataExtraction(args) {
|
|
74
|
-
return await withErrorHandling(async () => {
|
|
75
|
-
validateWorkflow('nested_data_extraction', {
|
|
76
|
-
requireBrowser: true,
|
|
77
|
-
requirePage: true,
|
|
78
|
-
});
|
|
79
|
-
const page = getCurrentPage();
|
|
80
|
-
const parentSelector = args.parentSelector;
|
|
81
|
-
const childSelector = args.childSelector;
|
|
82
|
-
const maxParents = args.maxParents || 50;
|
|
83
|
-
const nestedData = await page.evaluate(({ parentSelector, childSelector, maxParents }) => {
|
|
84
|
-
const parents = document.querySelectorAll(parentSelector);
|
|
85
|
-
const results = [];
|
|
86
|
-
let count = 0;
|
|
87
|
-
parents.forEach((parent) => {
|
|
88
|
-
if (count >= maxParents)
|
|
89
|
-
return;
|
|
90
|
-
const parentData = {
|
|
91
|
-
selector: parentSelector,
|
|
92
|
-
text: Array.from(parent.childNodes)
|
|
93
|
-
.filter((node) => node.nodeType === Node.TEXT_NODE)
|
|
94
|
-
.map((node) => node.textContent?.trim())
|
|
95
|
-
.filter((text) => text)
|
|
96
|
-
.join(' '),
|
|
97
|
-
attributes: {},
|
|
98
|
-
};
|
|
99
|
-
// Get parent attributes
|
|
100
|
-
Array.from(parent.attributes).forEach((attr) => {
|
|
101
|
-
parentData.attributes[attr.name] = attr.value;
|
|
102
|
-
});
|
|
103
|
-
// Get children
|
|
104
|
-
const children = parent.querySelectorAll(childSelector);
|
|
105
|
-
const childrenData = [];
|
|
106
|
-
children.forEach((child) => {
|
|
107
|
-
const childData = {
|
|
108
|
-
selector: childSelector,
|
|
109
|
-
text: child.textContent?.trim() || '',
|
|
110
|
-
attributes: {},
|
|
111
|
-
};
|
|
112
|
-
Array.from(child.attributes).forEach((attr) => {
|
|
113
|
-
childData.attributes[attr.name] = attr.value;
|
|
114
|
-
});
|
|
115
|
-
childrenData.push(childData);
|
|
116
|
-
});
|
|
117
|
-
if (childrenData.length > 0) {
|
|
118
|
-
results.push({
|
|
119
|
-
parent: parentData,
|
|
120
|
-
children: childrenData,
|
|
121
|
-
});
|
|
122
|
-
count++;
|
|
123
|
-
}
|
|
124
|
-
});
|
|
125
|
-
return results;
|
|
126
|
-
}, { parentSelector, childSelector, maxParents });
|
|
127
|
-
return {
|
|
128
|
-
content: [
|
|
129
|
-
{
|
|
130
|
-
type: 'text',
|
|
131
|
-
text: `✅ Extracted ${nestedData.length} parent-child relationships\n\n${JSON.stringify(nestedData, null, 2)}`,
|
|
132
|
-
},
|
|
133
|
-
],
|
|
134
|
-
};
|
|
135
|
-
}, 'Failed to extract nested data');
|
|
136
|
-
}
|
|
137
70
|
/**
|
|
138
71
|
* सभी elements के attributes (href, src, data-*) collect करता है
|
|
139
72
|
*/
|
|
@@ -143,3 +143,62 @@ async function withWorkflowValidation(toolName, args, operation) {
|
|
|
143
143
|
throw error;
|
|
144
144
|
}
|
|
145
145
|
}
|
|
146
|
+
/**
|
|
147
|
+
* Site structure follow करके pages scrape करता है
|
|
148
|
+
*/
|
|
149
|
+
export async function handleBreadcrumbNavigator(args) {
|
|
150
|
+
return await withWorkflowValidation('breadcrumb_navigator', args, async () => {
|
|
151
|
+
return await withErrorHandling(async () => {
|
|
152
|
+
const page = getPageInstance();
|
|
153
|
+
if (!page) {
|
|
154
|
+
throw new Error('Browser not initialized. Call browser_init first.');
|
|
155
|
+
}
|
|
156
|
+
const breadcrumbSelector = args.breadcrumbSelector || '.breadcrumb, nav[aria-label="breadcrumb"], .breadcrumbs';
|
|
157
|
+
const followLinks = args.followLinks || false;
|
|
158
|
+
const breadcrumbData = await page.evaluate((selector) => {
|
|
159
|
+
const breadcrumbs = document.querySelectorAll(selector);
|
|
160
|
+
const results = [];
|
|
161
|
+
breadcrumbs.forEach((breadcrumb) => {
|
|
162
|
+
const links = breadcrumb.querySelectorAll('a');
|
|
163
|
+
const items = [];
|
|
164
|
+
links.forEach((link, index) => {
|
|
165
|
+
items.push({
|
|
166
|
+
text: link.textContent?.trim() || '',
|
|
167
|
+
href: link.href,
|
|
168
|
+
level: index,
|
|
169
|
+
});
|
|
170
|
+
});
|
|
171
|
+
if (items.length > 0) {
|
|
172
|
+
results.push({
|
|
173
|
+
path: items.map((i) => i.text).join(' > '),
|
|
174
|
+
links: items,
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
});
|
|
178
|
+
return results;
|
|
179
|
+
}, breadcrumbSelector);
|
|
180
|
+
if (breadcrumbData.length === 0) {
|
|
181
|
+
return {
|
|
182
|
+
content: [
|
|
183
|
+
{
|
|
184
|
+
type: 'text',
|
|
185
|
+
text: '❌ No breadcrumbs found on page',
|
|
186
|
+
},
|
|
187
|
+
],
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
let additionalData = '';
|
|
191
|
+
if (followLinks && breadcrumbData[0]?.links) {
|
|
192
|
+
additionalData = `\n\n📌 To scrape breadcrumb pages, use multi_page_scraper with URLs: ${JSON.stringify(breadcrumbData[0].links.map((l) => l.href))}`;
|
|
193
|
+
}
|
|
194
|
+
return {
|
|
195
|
+
content: [
|
|
196
|
+
{
|
|
197
|
+
type: 'text',
|
|
198
|
+
text: `✅ Found ${breadcrumbData.length} breadcrumb trail(s)\n\n${JSON.stringify(breadcrumbData, null, 2)}${additionalData}`,
|
|
199
|
+
},
|
|
200
|
+
],
|
|
201
|
+
};
|
|
202
|
+
}, 'Failed to navigate breadcrumbs');
|
|
203
|
+
});
|
|
204
|
+
}
|
|
@@ -262,124 +262,3 @@ export async function handleAdvancedCSSSelectors(args) {
|
|
|
262
262
|
return { content: [{ type: 'text', text: `❌ CSS selector query failed: ${error.message}` }], isError: true };
|
|
263
263
|
}
|
|
264
264
|
}
|
|
265
|
-
/**
|
|
266
|
-
* Visual Element Finder - Find elements by visual properties
|
|
267
|
-
*/
|
|
268
|
-
export async function handleVisualElementFinder(args) {
|
|
269
|
-
const { url, criteria } = args;
|
|
270
|
-
try {
|
|
271
|
-
const page = getPageInstance();
|
|
272
|
-
if (!page) {
|
|
273
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
274
|
-
}
|
|
275
|
-
if (url && page.url() !== url) {
|
|
276
|
-
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
277
|
-
}
|
|
278
|
-
const results = await page.evaluate((crit) => {
|
|
279
|
-
const allElements = Array.from(document.querySelectorAll('*'));
|
|
280
|
-
const matches = [];
|
|
281
|
-
allElements.forEach(element => {
|
|
282
|
-
const computed = window.getComputedStyle(element);
|
|
283
|
-
const rect = element.getBoundingClientRect();
|
|
284
|
-
let matchScore = 0;
|
|
285
|
-
const reasons = [];
|
|
286
|
-
// Check visibility
|
|
287
|
-
if (crit.visible !== undefined) {
|
|
288
|
-
const isVisible = computed.display !== 'none' &&
|
|
289
|
-
computed.visibility !== 'hidden' &&
|
|
290
|
-
rect.width > 0 &&
|
|
291
|
-
rect.height > 0;
|
|
292
|
-
if (isVisible === crit.visible) {
|
|
293
|
-
matchScore += 10;
|
|
294
|
-
reasons.push('visibility');
|
|
295
|
-
}
|
|
296
|
-
}
|
|
297
|
-
// Check color
|
|
298
|
-
if (crit.color) {
|
|
299
|
-
if (computed.color.includes(crit.color) || computed.backgroundColor.includes(crit.color)) {
|
|
300
|
-
matchScore += 5;
|
|
301
|
-
reasons.push('color');
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
// Check size
|
|
305
|
-
if (crit.minWidth && rect.width >= crit.minWidth) {
|
|
306
|
-
matchScore += 3;
|
|
307
|
-
reasons.push('minWidth');
|
|
308
|
-
}
|
|
309
|
-
if (crit.maxWidth && rect.width <= crit.maxWidth) {
|
|
310
|
-
matchScore += 3;
|
|
311
|
-
reasons.push('maxWidth');
|
|
312
|
-
}
|
|
313
|
-
if (crit.minHeight && rect.height >= crit.minHeight) {
|
|
314
|
-
matchScore += 3;
|
|
315
|
-
reasons.push('minHeight');
|
|
316
|
-
}
|
|
317
|
-
if (crit.maxHeight && rect.height <= crit.maxHeight) {
|
|
318
|
-
matchScore += 3;
|
|
319
|
-
reasons.push('maxHeight');
|
|
320
|
-
}
|
|
321
|
-
// Check position
|
|
322
|
-
if (crit.position) {
|
|
323
|
-
if (computed.position === crit.position) {
|
|
324
|
-
matchScore += 5;
|
|
325
|
-
reasons.push('position');
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
// Check text content
|
|
329
|
-
if (crit.hasText !== undefined) {
|
|
330
|
-
const hasText = (element.textContent?.trim().length || 0) > 0;
|
|
331
|
-
if (hasText === crit.hasText) {
|
|
332
|
-
matchScore += 5;
|
|
333
|
-
reasons.push('hasText');
|
|
334
|
-
}
|
|
335
|
-
}
|
|
336
|
-
// Check if element is in viewport
|
|
337
|
-
if (crit.inViewport !== undefined) {
|
|
338
|
-
const inViewport = rect.top >= 0 &&
|
|
339
|
-
rect.left >= 0 &&
|
|
340
|
-
rect.bottom <= window.innerHeight &&
|
|
341
|
-
rect.right <= window.innerWidth;
|
|
342
|
-
if (inViewport === crit.inViewport) {
|
|
343
|
-
matchScore += 5;
|
|
344
|
-
reasons.push('inViewport');
|
|
345
|
-
}
|
|
346
|
-
}
|
|
347
|
-
if (matchScore > 0) {
|
|
348
|
-
matches.push({
|
|
349
|
-
element: {
|
|
350
|
-
tagName: element.tagName.toLowerCase(),
|
|
351
|
-
id: element.id,
|
|
352
|
-
className: element.className,
|
|
353
|
-
text: element.textContent?.substring(0, 100)
|
|
354
|
-
},
|
|
355
|
-
score: matchScore,
|
|
356
|
-
matchedCriteria: reasons,
|
|
357
|
-
visualProperties: {
|
|
358
|
-
display: computed.display,
|
|
359
|
-
visibility: computed.visibility,
|
|
360
|
-
position: computed.position,
|
|
361
|
-
color: computed.color,
|
|
362
|
-
backgroundColor: computed.backgroundColor,
|
|
363
|
-
width: rect.width,
|
|
364
|
-
height: rect.height,
|
|
365
|
-
top: rect.top,
|
|
366
|
-
left: rect.left
|
|
367
|
-
}
|
|
368
|
-
});
|
|
369
|
-
}
|
|
370
|
-
});
|
|
371
|
-
matches.sort((a, b) => b.score - a.score);
|
|
372
|
-
return {
|
|
373
|
-
totalMatches: matches.length,
|
|
374
|
-
topMatches: matches.slice(0, 20)
|
|
375
|
-
};
|
|
376
|
-
}, criteria);
|
|
377
|
-
const resultText = `✅ Visual Element Finder Results\n\nCriteria: ${JSON.stringify(criteria, null, 2)}\nTotal Matches: ${results.totalMatches}\n\nTop Matches:\n${JSON.stringify(results.topMatches, null, 2)}`;
|
|
378
|
-
return {
|
|
379
|
-
content: [{ type: 'text', text: resultText }],
|
|
380
|
-
};
|
|
381
|
-
}
|
|
382
|
-
catch (error) {
|
|
383
|
-
return { content: [{ type: 'text', text: `❌ Visual element finder failed: ${error.message}` }], isError: true };
|
|
384
|
-
}
|
|
385
|
-
}
|
package/dist/index.js
CHANGED
|
@@ -28,15 +28,13 @@ import { handleSaveContentAsMarkdown } from "./handlers/file-handlers.js";
|
|
|
28
28
|
// Import new data extraction handlers
|
|
29
29
|
import { handleExtractJSON, handleScrapeMetaTags, handleExtractSchema, } from "./handlers/data-extraction-handlers.js";
|
|
30
30
|
// Import multi-element handlers
|
|
31
|
-
import { handleBatchElementScraper,
|
|
31
|
+
import { handleBatchElementScraper, handleAttributeHarvester, handleLinkHarvester, handleMediaExtractor, } from "./handlers/multi-element-handlers.js";
|
|
32
32
|
// Import pagination handlers
|
|
33
|
-
import {
|
|
34
|
-
// Import data processing handlers
|
|
35
|
-
import { handleHTMLToText, } from "./handlers/data-processing-handlers.js";
|
|
33
|
+
import { handleBreadcrumbNavigator, } from "./handlers/navigation-handlers.js";
|
|
36
34
|
// Import AI-powered handlers
|
|
37
35
|
import { handleSmartSelectorGenerator, handleContentClassification, } from "./handlers/ai-powered-handlers.js";
|
|
38
36
|
// Import search & filter handlers
|
|
39
|
-
import { handleKeywordSearch, handleRegexPatternMatcher, handleXPathSupport, handleAdvancedCSSSelectors,
|
|
37
|
+
import { handleKeywordSearch, handleRegexPatternMatcher, handleXPathSupport, handleAdvancedCSSSelectors, } from "./handlers/search-filter-handlers.js";
|
|
40
38
|
// Import data quality handlers
|
|
41
39
|
import { handleDataTypeValidator, } from "./handlers/data-quality-handlers.js";
|
|
42
40
|
// Import captcha handlers
|
|
@@ -152,9 +150,6 @@ export async function executeToolByName(name, args) {
|
|
|
152
150
|
case TOOL_NAMES.BATCH_ELEMENT_SCRAPER:
|
|
153
151
|
result = await handleBatchElementScraper(args);
|
|
154
152
|
break;
|
|
155
|
-
case TOOL_NAMES.NESTED_DATA_EXTRACTION:
|
|
156
|
-
result = await handleNestedDataExtraction(args);
|
|
157
|
-
break;
|
|
158
153
|
case TOOL_NAMES.ATTRIBUTE_HARVESTER:
|
|
159
154
|
result = await handleAttributeHarvester(args);
|
|
160
155
|
break;
|
|
@@ -167,16 +162,12 @@ export async function executeToolByName(name, args) {
|
|
|
167
162
|
break;
|
|
168
163
|
// Pagination Tools
|
|
169
164
|
// Pagination Tools
|
|
170
|
-
|
|
171
|
-
result = await handleMultiPageScraper(args);
|
|
172
|
-
break;
|
|
165
|
+
// Pagination Tools
|
|
173
166
|
case TOOL_NAMES.BREADCRUMB_NAVIGATOR:
|
|
174
167
|
result = await handleBreadcrumbNavigator(args || {});
|
|
175
168
|
break;
|
|
176
169
|
// Data Processing Tools
|
|
177
|
-
|
|
178
|
-
result = await handleHTMLToText(args);
|
|
179
|
-
break;
|
|
170
|
+
// Data Processing Tools
|
|
180
171
|
// AI-Powered Features
|
|
181
172
|
case TOOL_NAMES.SMART_SELECTOR_GENERATOR:
|
|
182
173
|
result = await handleSmartSelectorGenerator(args);
|
|
@@ -197,9 +188,6 @@ export async function executeToolByName(name, args) {
|
|
|
197
188
|
case TOOL_NAMES.ADVANCED_CSS_SELECTORS:
|
|
198
189
|
result = await handleAdvancedCSSSelectors(args);
|
|
199
190
|
break;
|
|
200
|
-
case TOOL_NAMES.VISUAL_ELEMENT_FINDER:
|
|
201
|
-
result = await handleVisualElementFinder(args);
|
|
202
|
-
break;
|
|
203
191
|
// Data Quality & Validation
|
|
204
192
|
case TOOL_NAMES.DATA_TYPE_VALIDATOR:
|
|
205
193
|
result = await handleDataTypeValidator(args);
|
package/dist/tool-definitions.js
CHANGED
|
@@ -381,19 +381,6 @@ export const TOOLS = [
|
|
|
381
381
|
required: ['selector'],
|
|
382
382
|
},
|
|
383
383
|
},
|
|
384
|
-
{
|
|
385
|
-
name: 'nested_data_extraction',
|
|
386
|
-
description: 'Extract data maintaining parent-child relationships',
|
|
387
|
-
inputSchema: {
|
|
388
|
-
type: 'object',
|
|
389
|
-
properties: {
|
|
390
|
-
parentSelector: { type: 'string' },
|
|
391
|
-
childSelector: { type: 'string' },
|
|
392
|
-
maxParents: { type: 'number', default: 50 },
|
|
393
|
-
},
|
|
394
|
-
required: ['parentSelector', 'childSelector'],
|
|
395
|
-
},
|
|
396
|
-
},
|
|
397
384
|
{
|
|
398
385
|
name: 'attribute_harvester',
|
|
399
386
|
description: 'Collect attributes (href, src, data-*) from elements',
|
|
@@ -432,19 +419,6 @@ export const TOOLS = [
|
|
|
432
419
|
},
|
|
433
420
|
},
|
|
434
421
|
// Pagination Tools
|
|
435
|
-
{
|
|
436
|
-
name: 'multi_page_scraper',
|
|
437
|
-
description: 'Collect and merge data from multiple pages',
|
|
438
|
-
inputSchema: {
|
|
439
|
-
type: 'object',
|
|
440
|
-
properties: {
|
|
441
|
-
urls: { type: 'array', items: { type: 'string' } },
|
|
442
|
-
dataSelector: { type: 'string' },
|
|
443
|
-
waitBetweenPages: { type: 'number', default: 1000 },
|
|
444
|
-
},
|
|
445
|
-
required: ['urls', 'dataSelector'],
|
|
446
|
-
},
|
|
447
|
-
},
|
|
448
422
|
{
|
|
449
423
|
name: 'breadcrumb_navigator',
|
|
450
424
|
description: 'Extract navigation path by following site structure',
|
|
@@ -457,19 +431,7 @@ export const TOOLS = [
|
|
|
457
431
|
},
|
|
458
432
|
},
|
|
459
433
|
// Data Processing Tools
|
|
460
|
-
|
|
461
|
-
name: 'html_to_text',
|
|
462
|
-
description: 'Convert HTML content to clean text',
|
|
463
|
-
inputSchema: {
|
|
464
|
-
type: 'object',
|
|
465
|
-
properties: {
|
|
466
|
-
html: { type: 'string' },
|
|
467
|
-
preserveLinks: { type: 'boolean', default: false },
|
|
468
|
-
preserveFormatting: { type: 'boolean', default: false },
|
|
469
|
-
},
|
|
470
|
-
required: ['html'],
|
|
471
|
-
},
|
|
472
|
-
},
|
|
434
|
+
// Data Validation Tools
|
|
473
435
|
// Data Validation Tools
|
|
474
436
|
// AI-Powered Features (5 tools)
|
|
475
437
|
{
|
|
@@ -553,21 +515,6 @@ export const TOOLS = [
|
|
|
553
515
|
required: ['selector'],
|
|
554
516
|
},
|
|
555
517
|
},
|
|
556
|
-
{
|
|
557
|
-
name: 'visual_element_finder',
|
|
558
|
-
description: 'Find elements by visual properties',
|
|
559
|
-
inputSchema: {
|
|
560
|
-
type: 'object',
|
|
561
|
-
properties: {
|
|
562
|
-
url: { type: 'string' },
|
|
563
|
-
criteria: {
|
|
564
|
-
type: 'object',
|
|
565
|
-
description: 'Visual criteria (color, size, position, etc.)'
|
|
566
|
-
},
|
|
567
|
-
},
|
|
568
|
-
required: ['criteria'],
|
|
569
|
-
},
|
|
570
|
-
},
|
|
571
518
|
// Data Quality & Validation (5 tools)
|
|
572
519
|
{
|
|
573
520
|
name: 'data_type_validator',
|
|
@@ -1001,7 +948,6 @@ export const TOOL_NAMES = {
|
|
|
1001
948
|
EXTRACT_SCHEMA: 'extract_schema',
|
|
1002
949
|
// Multi-Element Extractors
|
|
1003
950
|
BATCH_ELEMENT_SCRAPER: 'batch_element_scraper',
|
|
1004
|
-
NESTED_DATA_EXTRACTION: 'nested_data_extraction',
|
|
1005
951
|
ATTRIBUTE_HARVESTER: 'attribute_harvester',
|
|
1006
952
|
// Content Type Specific
|
|
1007
953
|
LINK_HARVESTER: 'link_harvester',
|
|
@@ -1020,10 +966,8 @@ export const TOOL_NAMES = {
|
|
|
1020
966
|
NETWORK_RECORDER: 'network_recorder',
|
|
1021
967
|
API_FINDER: 'api_finder',
|
|
1022
968
|
// Pagination Tools
|
|
1023
|
-
MULTI_PAGE_SCRAPER: 'multi_page_scraper',
|
|
1024
969
|
BREADCRUMB_NAVIGATOR: 'breadcrumb_navigator',
|
|
1025
970
|
// Data Processing
|
|
1026
|
-
HTML_TO_TEXT: 'html_to_text',
|
|
1027
971
|
// AI-Powered Features
|
|
1028
972
|
SMART_SELECTOR_GENERATOR: 'smart_selector_generator',
|
|
1029
973
|
CONTENT_CLASSIFICATION: 'content_classification',
|
|
@@ -1036,7 +980,6 @@ export const TOOL_NAMES = {
|
|
|
1036
980
|
REGEX_PATTERN_MATCHER: 'regex_pattern_matcher',
|
|
1037
981
|
XPATH_SUPPORT: 'xpath_support',
|
|
1038
982
|
ADVANCED_CSS_SELECTORS: 'advanced_css_selectors',
|
|
1039
|
-
VISUAL_ELEMENT_FINDER: 'visual_element_finder',
|
|
1040
983
|
// Data Quality & Validation
|
|
1041
984
|
DATA_TYPE_VALIDATOR: 'data_type_validator',
|
|
1042
985
|
// Advanced Captcha Handling
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "brave-real-browser-mcp-server",
|
|
3
|
-
"version": "2.15.
|
|
3
|
+
"version": "2.15.6",
|
|
4
4
|
"description": "Universal AI IDE MCP Server - Auto-detects and supports all AI IDEs (Claude Desktop, Cursor, Windsurf, Cline, Zed, VSCode, Qoder AI, etc.) with Brave browser automation",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
"test:brave:cleanup": "taskkill /F /IM brave.exe || pkill -f brave || true"
|
|
35
35
|
},
|
|
36
36
|
"dependencies": {
|
|
37
|
-
"@modelcontextprotocol/sdk": "^1.25.
|
|
37
|
+
"@modelcontextprotocol/sdk": "^1.25.1",
|
|
38
38
|
"@types/turndown": "^5.0.6",
|
|
39
39
|
"ajv": "^8.12.0",
|
|
40
40
|
"axios": "^1.6.5",
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
|
|
2
|
+
import { handleBrowserInit, handleBrowserClose } from '../src/handlers/browser-handlers.js';
|
|
3
|
+
import { handleNavigate, handleWait } from '../src/handlers/navigation-handlers.js';
|
|
4
|
+
import { handleGetContent, handleFindSelector } from '../src/handlers/content-handlers.js';
|
|
5
|
+
import { handleBreadcrumbNavigator } from '../src/handlers/navigation-handlers.js';
|
|
6
|
+
import {
|
|
7
|
+
handleBatchElementScraper,
|
|
8
|
+
handleAttributeHarvester,
|
|
9
|
+
handleLinkHarvester,
|
|
10
|
+
handleMediaExtractor
|
|
11
|
+
} from '../src/handlers/multi-element-handlers.js';
|
|
12
|
+
import {
|
|
13
|
+
handleKeywordSearch,
|
|
14
|
+
handleRegexPatternMatcher,
|
|
15
|
+
handleXPathSupport,
|
|
16
|
+
handleAdvancedCSSSelectors
|
|
17
|
+
} from '../src/handlers/search-filter-handlers.js';
|
|
18
|
+
import { handleRandomScroll } from '../src/handlers/interaction-handlers.js';
|
|
19
|
+
import { handleScrapeMetaTags, handleExtractSchema } from '../src/handlers/data-extraction-handlers.js';
|
|
20
|
+
|
|
21
|
+
async function runFullVerification() {
|
|
22
|
+
console.log('🚀 Starting Comprehensive Tool Verification...');
|
|
23
|
+
|
|
24
|
+
try {
|
|
25
|
+
await handleBrowserInit({ headless: true });
|
|
26
|
+
|
|
27
|
+
// Testing on one site primarily to save time, then brief check on second
|
|
28
|
+
const url = 'https://moviesdrive.forum/';
|
|
29
|
+
console.log(`\n--------------------------------------------------`);
|
|
30
|
+
console.log(`🔍 Targeting: ${url}`);
|
|
31
|
+
console.log(`--------------------------------------------------`);
|
|
32
|
+
|
|
33
|
+
// --- Navigation & Basic ---
|
|
34
|
+
console.log(`\n[1/12] 🟢 Testing Navigation & Wait...`);
|
|
35
|
+
await handleNavigate({ url });
|
|
36
|
+
await handleWait({ type: 'timeout', value: '2000' });
|
|
37
|
+
console.log(' ✅ Navigation complete.');
|
|
38
|
+
|
|
39
|
+
// --- Interaction ---
|
|
40
|
+
console.log(`\n[2/12] 🟢 Testing Random Scroll...`);
|
|
41
|
+
await handleRandomScroll({});
|
|
42
|
+
console.log(' ✅ Scroll complete.');
|
|
43
|
+
|
|
44
|
+
// --- Content Handlers ---
|
|
45
|
+
console.log(`\n[3/12] 🟢 Testing Find Selector (Text search)...`);
|
|
46
|
+
const findRes = await handleFindSelector({ text: 'Movie' }); // Assuming "Movie" exists
|
|
47
|
+
console.log(` Result: Found ${findRes.content[0].text.length > 50 ? 'matches' : 'no matches'} (Length: ${findRes.content[0].text.length})`);
|
|
48
|
+
|
|
49
|
+
// --- Multi-Element Handlers (The file we kept) ---
|
|
50
|
+
console.log(`\n[4/12] 🟢 Testing Batch Element Scraper...`);
|
|
51
|
+
const batchRes = await handleBatchElementScraper({ selector: 'a', maxElements: 3 });
|
|
52
|
+
console.log(` Result: ${batchRes.content[0].text.substring(0, 100)}...`);
|
|
53
|
+
|
|
54
|
+
console.log(`\n[5/12] 🟢 Testing Attribute Harvester...`);
|
|
55
|
+
const attrRes = await handleAttributeHarvester({ selector: 'img', attributes: ['src'], maxElements: 3 });
|
|
56
|
+
console.log(` Result: ${attrRes.content[0].text.substring(0, 100)}...`);
|
|
57
|
+
|
|
58
|
+
console.log(`\n[6/12] 🟢 Testing Media Extractor...`); // Might be empty on home page but runs logic
|
|
59
|
+
const mediaRes = await handleMediaExtractor({ types: ['video', 'iframe'] });
|
|
60
|
+
console.log(` Result: ${mediaRes.content[0].text.substring(0, 100)}...`);
|
|
61
|
+
|
|
62
|
+
// --- Search & Filter Handlers (The file we kept) ---
|
|
63
|
+
console.log(`\n[7/12] 🟢 Testing Keyword Search...`);
|
|
64
|
+
const keyRes = await handleKeywordSearch({ keywords: ['Bollywood', 'Hollywood'] });
|
|
65
|
+
console.log(` Result: ${keyRes.content[0].text.substring(0, 100)}...`);
|
|
66
|
+
|
|
67
|
+
console.log(`\n[8/12] 🟢 Testing Regex Pattern Matcher...`);
|
|
68
|
+
const regexRes = await handleRegexPatternMatcher({ pattern: 'https?://[^\\s"\']+' });
|
|
69
|
+
console.log(` Result: ${regexRes.content[0].text.substring(0, 100)}...`);
|
|
70
|
+
|
|
71
|
+
console.log(`\n[9/12] 🟢 Testing XPath Support...`);
|
|
72
|
+
const xpathRes = await handleXPathSupport({ xpath: '//body//div' });
|
|
73
|
+
console.log(` Result: ${xpathRes.content[0].text.substring(0, 100)}...`);
|
|
74
|
+
|
|
75
|
+
console.log(`\n[10/12] 🟢 Testing Advanced CSS Selectors...`);
|
|
76
|
+
const cssRes = await handleAdvancedCSSSelectors({ selector: 'div > a', operation: 'query' });
|
|
77
|
+
console.log(` Result: ${cssRes.content[0].text.substring(0, 100)}...`);
|
|
78
|
+
|
|
79
|
+
// --- Data Extraction ---
|
|
80
|
+
console.log(`\n[11/12] 🟢 Testing Schema Extraction...`);
|
|
81
|
+
const schemaRes = await handleExtractSchema({});
|
|
82
|
+
console.log(` Result: ${schemaRes.content[0].text.substring(0, 100)}...`);
|
|
83
|
+
|
|
84
|
+
// --- Pagination (Refactored) ---
|
|
85
|
+
console.log(`\n[12/12] 🟢 Testing Breadcrumb Navigator...`);
|
|
86
|
+
const breadRes = await handleBreadcrumbNavigator({});
|
|
87
|
+
console.log(` Result: ${breadRes.content[0].text.substring(0, 100)}...`);
|
|
88
|
+
|
|
89
|
+
console.log('\n✅ All primary handler categories verified successfully.');
|
|
90
|
+
|
|
91
|
+
} catch (error) {
|
|
92
|
+
console.error('\n❌ Verification Failed:', error);
|
|
93
|
+
} finally {
|
|
94
|
+
await handleBrowserClose({});
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
runFullVerification();
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
|
|
2
|
+
import { handleBrowserInit, handleBrowserClose } from '../src/handlers/browser-handlers.js';
|
|
3
|
+
import { handleNavigate } from '../src/handlers/navigation-handlers.js';
|
|
4
|
+
import { handleGetContent } from '../src/handlers/content-handlers.js';
|
|
5
|
+
import { handleBreadcrumbNavigator } from '../src/handlers/navigation-handlers.js';
|
|
6
|
+
import { handleLinkHarvester } from '../src/handlers/multi-element-handlers.js';
|
|
7
|
+
import { handleScrapeMetaTags } from '../src/handlers/data-extraction-handlers.js';
|
|
8
|
+
|
|
9
|
+
async function runVerification() {
|
|
10
|
+
console.log('🚀 Starting Verification on Live Sites...');
|
|
11
|
+
|
|
12
|
+
try {
|
|
13
|
+
// 1. Initialize Browser
|
|
14
|
+
console.log('\n🔵 Initializing Browser...');
|
|
15
|
+
await handleBrowserInit({ headless: true });
|
|
16
|
+
|
|
17
|
+
const sites = [
|
|
18
|
+
'https://moviesdrive.forum/',
|
|
19
|
+
'https://multimovies.golf/'
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
for (const url of sites) {
|
|
23
|
+
console.log(`\n--------------------------------------------------`);
|
|
24
|
+
console.log(`🔍 Testing Site: ${url}`);
|
|
25
|
+
console.log(`--------------------------------------------------`);
|
|
26
|
+
|
|
27
|
+
// 2. Navigate
|
|
28
|
+
console.log(`\n➡️ Navigating to ${url}...`);
|
|
29
|
+
await handleNavigate({ url });
|
|
30
|
+
|
|
31
|
+
// 3. Get Content (HTML preview)
|
|
32
|
+
console.log(`\n📄 Fetching Content (Preview)...`);
|
|
33
|
+
const contentRes = await handleGetContent({ type: 'text' });
|
|
34
|
+
console.log(` Result: ${contentRes.content[0].text.substring(0, 100)}...`);
|
|
35
|
+
|
|
36
|
+
// 4. Test Breadcrumb Navigator (Newly moved)
|
|
37
|
+
console.log(`\nnav Testing Breadcrumb Navigator...`);
|
|
38
|
+
const breadcrumbRes = await handleBreadcrumbNavigator({});
|
|
39
|
+
console.log(` Result: ${breadcrumbRes.content[0].text.substring(0, 200)}...`);
|
|
40
|
+
|
|
41
|
+
// 5. Test Link Harvester (Existing tool)
|
|
42
|
+
console.log(`\n🔗 Testing Link Harvester (First 5 links)...`);
|
|
43
|
+
const linksRes = await handleLinkHarvester({ maxElements: 5 });
|
|
44
|
+
console.log(` Result: ${linksRes.content[0].text.substring(0, 200)}...`);
|
|
45
|
+
|
|
46
|
+
// 6. Test Meta Tags (Data extraction)
|
|
47
|
+
console.log(`\n🏷️ Testing Meta Tag Scraper...`);
|
|
48
|
+
const metaRes = await handleScrapeMetaTags({});
|
|
49
|
+
console.log(` Result: ${metaRes.content[0].text.substring(0, 200)}...`);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
} catch (error) {
|
|
53
|
+
console.error('\n❌ Verification Failed:', error);
|
|
54
|
+
} finally {
|
|
55
|
+
// 7. Cleanup
|
|
56
|
+
console.log('\n🔴 Closing Browser...');
|
|
57
|
+
await handleBrowserClose({});
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
runVerification();
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
// Data Processing & Transformation Handlers
|
|
2
|
-
// Text cleaning, validation, formatting utilities
|
|
3
|
-
// @ts-nocheck
|
|
4
|
-
import { withErrorHandling } from '../system-utils.js';
|
|
5
|
-
/**
|
|
6
|
-
* HTML tags intelligently remove करता है
|
|
7
|
-
*/
|
|
8
|
-
export async function handleHTMLToText(args) {
|
|
9
|
-
return await withErrorHandling(async () => {
|
|
10
|
-
const html = args.html;
|
|
11
|
-
const preserveLinks = args.preserveLinks || false;
|
|
12
|
-
const preserveFormatting = args.preserveFormatting || false;
|
|
13
|
-
// Simple HTML to text conversion (can be enhanced with turndown)
|
|
14
|
-
let text = html;
|
|
15
|
-
// Preserve links if requested
|
|
16
|
-
if (preserveLinks) {
|
|
17
|
-
text = text.replace(/<a[^>]*href="([^"]*)"[^>]*>(.*?)<\/a>/gi, '$2 ($1)');
|
|
18
|
-
}
|
|
19
|
-
// Preserve basic formatting
|
|
20
|
-
if (preserveFormatting) {
|
|
21
|
-
text = text.replace(/<br\s*\/?>/gi, '\n');
|
|
22
|
-
text = text.replace(/<\/p>/gi, '\n\n');
|
|
23
|
-
text = text.replace(/<li>/gi, '• ');
|
|
24
|
-
text = text.replace(/<\/li>/gi, '\n');
|
|
25
|
-
}
|
|
26
|
-
// Remove all other HTML tags
|
|
27
|
-
text = text.replace(/<[^>]*>/g, '');
|
|
28
|
-
// Decode HTML entities
|
|
29
|
-
text = text
|
|
30
|
-
.replace(/ /g, ' ')
|
|
31
|
-
.replace(/&/g, '&')
|
|
32
|
-
.replace(/</g, '<')
|
|
33
|
-
.replace(/>/g, '>')
|
|
34
|
-
.replace(/"/g, '"')
|
|
35
|
-
.replace(/'/g, "'");
|
|
36
|
-
// Clean up whitespace
|
|
37
|
-
text = text.replace(/\n\s*\n/g, '\n\n');
|
|
38
|
-
text = text.trim();
|
|
39
|
-
return {
|
|
40
|
-
content: [
|
|
41
|
-
{
|
|
42
|
-
type: 'text',
|
|
43
|
-
text: `✅ HTML converted to text\n\n${text}`,
|
|
44
|
-
},
|
|
45
|
-
],
|
|
46
|
-
};
|
|
47
|
-
}, 'Failed to convert HTML to text');
|
|
48
|
-
}
|
|
49
|
-
// Duplicate Remover Arguments
|
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
// Pagination & Navigation Tools
|
|
2
|
-
// Auto pagination, infinite scroll, multi-page scraping, sitemap parser
|
|
3
|
-
// @ts-nocheck
|
|
4
|
-
import { getCurrentPage } from '../browser-manager.js';
|
|
5
|
-
import { validateWorkflow } from '../workflow-validation.js';
|
|
6
|
-
import { withErrorHandling, sleep } from '../system-utils.js';
|
|
7
|
-
/**
|
|
8
|
-
* Multiple pages से data collect और merge करता है
|
|
9
|
-
*/
|
|
10
|
-
export async function handleMultiPageScraper(args) {
|
|
11
|
-
return await withErrorHandling(async () => {
|
|
12
|
-
validateWorkflow('multi_page_scraper', {
|
|
13
|
-
requireBrowser: true,
|
|
14
|
-
requirePage: true,
|
|
15
|
-
});
|
|
16
|
-
const page = getCurrentPage();
|
|
17
|
-
const urls = args.urls;
|
|
18
|
-
const dataSelector = args.dataSelector;
|
|
19
|
-
const waitBetweenPages = args.waitBetweenPages || 1000;
|
|
20
|
-
const allData = [];
|
|
21
|
-
for (let i = 0; i < urls.length; i++) {
|
|
22
|
-
const url = urls[i];
|
|
23
|
-
try {
|
|
24
|
-
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
25
|
-
await sleep(waitBetweenPages);
|
|
26
|
-
const pageData = await page.evaluate((selector) => {
|
|
27
|
-
const elements = document.querySelectorAll(selector);
|
|
28
|
-
return Array.from(elements).map((el) => ({
|
|
29
|
-
text: el.textContent?.trim() || '',
|
|
30
|
-
html: el.innerHTML,
|
|
31
|
-
}));
|
|
32
|
-
}, dataSelector);
|
|
33
|
-
allData.push({
|
|
34
|
-
url,
|
|
35
|
-
pageIndex: i,
|
|
36
|
-
itemCount: pageData.length,
|
|
37
|
-
data: pageData,
|
|
38
|
-
});
|
|
39
|
-
}
|
|
40
|
-
catch (error) {
|
|
41
|
-
allData.push({
|
|
42
|
-
url,
|
|
43
|
-
pageIndex: i,
|
|
44
|
-
error: error instanceof Error ? error.message : String(error),
|
|
45
|
-
});
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
return {
|
|
49
|
-
content: [
|
|
50
|
-
{
|
|
51
|
-
type: 'text',
|
|
52
|
-
text: `✅ Scraped ${urls.length} pages\n\n${JSON.stringify(allData, null, 2)}`,
|
|
53
|
-
},
|
|
54
|
-
],
|
|
55
|
-
};
|
|
56
|
-
}, 'Failed to scrape multiple pages');
|
|
57
|
-
}
|
|
58
|
-
/**
|
|
59
|
-
* Site structure follow करके pages scrape करता है
|
|
60
|
-
*/
|
|
61
|
-
export async function handleBreadcrumbNavigator(args) {
|
|
62
|
-
return await withErrorHandling(async () => {
|
|
63
|
-
validateWorkflow('breadcrumb_navigator', {
|
|
64
|
-
requireBrowser: true,
|
|
65
|
-
requirePage: true,
|
|
66
|
-
});
|
|
67
|
-
const page = getCurrentPage();
|
|
68
|
-
const breadcrumbSelector = args.breadcrumbSelector || '.breadcrumb, nav[aria-label="breadcrumb"], .breadcrumbs';
|
|
69
|
-
const followLinks = args.followLinks || false;
|
|
70
|
-
const breadcrumbData = await page.evaluate((selector) => {
|
|
71
|
-
const breadcrumbs = document.querySelectorAll(selector);
|
|
72
|
-
const results = [];
|
|
73
|
-
breadcrumbs.forEach((breadcrumb) => {
|
|
74
|
-
const links = breadcrumb.querySelectorAll('a');
|
|
75
|
-
const items = [];
|
|
76
|
-
links.forEach((link, index) => {
|
|
77
|
-
items.push({
|
|
78
|
-
text: link.textContent?.trim() || '',
|
|
79
|
-
href: link.href,
|
|
80
|
-
level: index,
|
|
81
|
-
});
|
|
82
|
-
});
|
|
83
|
-
if (items.length > 0) {
|
|
84
|
-
results.push({
|
|
85
|
-
path: items.map((i) => i.text).join(' > '),
|
|
86
|
-
links: items,
|
|
87
|
-
});
|
|
88
|
-
}
|
|
89
|
-
});
|
|
90
|
-
return results;
|
|
91
|
-
}, breadcrumbSelector);
|
|
92
|
-
if (breadcrumbData.length === 0) {
|
|
93
|
-
return {
|
|
94
|
-
content: [
|
|
95
|
-
{
|
|
96
|
-
type: 'text',
|
|
97
|
-
text: '❌ No breadcrumbs found on page',
|
|
98
|
-
},
|
|
99
|
-
],
|
|
100
|
-
};
|
|
101
|
-
}
|
|
102
|
-
let additionalData = '';
|
|
103
|
-
if (followLinks && breadcrumbData[0]?.links) {
|
|
104
|
-
additionalData = `\n\n📌 To scrape breadcrumb pages, use multi_page_scraper with URLs: ${JSON.stringify(breadcrumbData[0].links.map((l) => l.href))}`;
|
|
105
|
-
}
|
|
106
|
-
return {
|
|
107
|
-
content: [
|
|
108
|
-
{
|
|
109
|
-
type: 'text',
|
|
110
|
-
text: `✅ Found ${breadcrumbData.length} breadcrumb trail(s)\n\n${JSON.stringify(breadcrumbData, null, 2)}${additionalData}`,
|
|
111
|
-
},
|
|
112
|
-
],
|
|
113
|
-
};
|
|
114
|
-
}, 'Failed to navigate breadcrumbs');
|
|
115
|
-
}
|