brave-real-browser-mcp-server 2.15.4 ā 2.15.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -40
- package/dist/handlers/data-extraction-handlers.js +0 -76
- package/dist/handlers/data-quality-handlers.js +0 -141
- package/dist/handlers/dynamic-session-handlers.js +0 -75
- package/dist/handlers/monitoring-reporting-handlers.js +0 -83
- package/dist/handlers/multi-element-handlers.js +0 -67
- package/dist/handlers/navigation-handlers.js +59 -0
- package/dist/handlers/search-filter-handlers.js +0 -121
- package/dist/handlers/visual-tools-handlers.js +0 -52
- package/dist/index.js +10 -40
- package/dist/tool-definitions.js +1 -150
- package/package.json +2 -2
- package/scripts/full-verification.ts +98 -0
- package/scripts/live-verification.ts +61 -0
- package/dist/handlers/data-processing-handlers.js +0 -91
- package/dist/handlers/pagination-handlers.js +0 -115
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
|
|
2
|
+
import { handleBrowserInit, handleBrowserClose } from '../src/handlers/browser-handlers.js';
|
|
3
|
+
import { handleNavigate, handleWait } from '../src/handlers/navigation-handlers.js';
|
|
4
|
+
import { handleGetContent, handleFindSelector } from '../src/handlers/content-handlers.js';
|
|
5
|
+
import { handleBreadcrumbNavigator } from '../src/handlers/navigation-handlers.js';
|
|
6
|
+
import {
|
|
7
|
+
handleBatchElementScraper,
|
|
8
|
+
handleAttributeHarvester,
|
|
9
|
+
handleLinkHarvester,
|
|
10
|
+
handleMediaExtractor
|
|
11
|
+
} from '../src/handlers/multi-element-handlers.js';
|
|
12
|
+
import {
|
|
13
|
+
handleKeywordSearch,
|
|
14
|
+
handleRegexPatternMatcher,
|
|
15
|
+
handleXPathSupport,
|
|
16
|
+
handleAdvancedCSSSelectors
|
|
17
|
+
} from '../src/handlers/search-filter-handlers.js';
|
|
18
|
+
import { handleRandomScroll } from '../src/handlers/interaction-handlers.js';
|
|
19
|
+
import { handleScrapeMetaTags, handleExtractSchema } from '../src/handlers/data-extraction-handlers.js';
|
|
20
|
+
|
|
21
|
+
async function runFullVerification() {
|
|
22
|
+
console.log('š Starting Comprehensive Tool Verification...');
|
|
23
|
+
|
|
24
|
+
try {
|
|
25
|
+
await handleBrowserInit({ headless: true });
|
|
26
|
+
|
|
27
|
+
// Testing on one site primarily to save time, then brief check on second
|
|
28
|
+
const url = 'https://moviesdrive.forum/';
|
|
29
|
+
console.log(`\n--------------------------------------------------`);
|
|
30
|
+
console.log(`š Targeting: ${url}`);
|
|
31
|
+
console.log(`--------------------------------------------------`);
|
|
32
|
+
|
|
33
|
+
// --- Navigation & Basic ---
|
|
34
|
+
console.log(`\n[1/12] š¢ Testing Navigation & Wait...`);
|
|
35
|
+
await handleNavigate({ url });
|
|
36
|
+
await handleWait({ type: 'timeout', value: '2000' });
|
|
37
|
+
console.log(' ā
Navigation complete.');
|
|
38
|
+
|
|
39
|
+
// --- Interaction ---
|
|
40
|
+
console.log(`\n[2/12] š¢ Testing Random Scroll...`);
|
|
41
|
+
await handleRandomScroll({});
|
|
42
|
+
console.log(' ā
Scroll complete.');
|
|
43
|
+
|
|
44
|
+
// --- Content Handlers ---
|
|
45
|
+
console.log(`\n[3/12] š¢ Testing Find Selector (Text search)...`);
|
|
46
|
+
const findRes = await handleFindSelector({ text: 'Movie' }); // Assuming "Movie" exists
|
|
47
|
+
console.log(` Result: Found ${findRes.content[0].text.length > 50 ? 'matches' : 'no matches'} (Length: ${findRes.content[0].text.length})`);
|
|
48
|
+
|
|
49
|
+
// --- Multi-Element Handlers (The file we kept) ---
|
|
50
|
+
console.log(`\n[4/12] š¢ Testing Batch Element Scraper...`);
|
|
51
|
+
const batchRes = await handleBatchElementScraper({ selector: 'a', maxElements: 3 });
|
|
52
|
+
console.log(` Result: ${batchRes.content[0].text.substring(0, 100)}...`);
|
|
53
|
+
|
|
54
|
+
console.log(`\n[5/12] š¢ Testing Attribute Harvester...`);
|
|
55
|
+
const attrRes = await handleAttributeHarvester({ selector: 'img', attributes: ['src'], maxElements: 3 });
|
|
56
|
+
console.log(` Result: ${attrRes.content[0].text.substring(0, 100)}...`);
|
|
57
|
+
|
|
58
|
+
console.log(`\n[6/12] š¢ Testing Media Extractor...`); // Might be empty on home page but runs logic
|
|
59
|
+
const mediaRes = await handleMediaExtractor({ types: ['video', 'iframe'] });
|
|
60
|
+
console.log(` Result: ${mediaRes.content[0].text.substring(0, 100)}...`);
|
|
61
|
+
|
|
62
|
+
// --- Search & Filter Handlers (The file we kept) ---
|
|
63
|
+
console.log(`\n[7/12] š¢ Testing Keyword Search...`);
|
|
64
|
+
const keyRes = await handleKeywordSearch({ keywords: ['Bollywood', 'Hollywood'] });
|
|
65
|
+
console.log(` Result: ${keyRes.content[0].text.substring(0, 100)}...`);
|
|
66
|
+
|
|
67
|
+
console.log(`\n[8/12] š¢ Testing Regex Pattern Matcher...`);
|
|
68
|
+
const regexRes = await handleRegexPatternMatcher({ pattern: 'https?://[^\\s"\']+' });
|
|
69
|
+
console.log(` Result: ${regexRes.content[0].text.substring(0, 100)}...`);
|
|
70
|
+
|
|
71
|
+
console.log(`\n[9/12] š¢ Testing XPath Support...`);
|
|
72
|
+
const xpathRes = await handleXPathSupport({ xpath: '//body//div' });
|
|
73
|
+
console.log(` Result: ${xpathRes.content[0].text.substring(0, 100)}...`);
|
|
74
|
+
|
|
75
|
+
console.log(`\n[10/12] š¢ Testing Advanced CSS Selectors...`);
|
|
76
|
+
const cssRes = await handleAdvancedCSSSelectors({ selector: 'div > a', operation: 'query' });
|
|
77
|
+
console.log(` Result: ${cssRes.content[0].text.substring(0, 100)}...`);
|
|
78
|
+
|
|
79
|
+
// --- Data Extraction ---
|
|
80
|
+
console.log(`\n[11/12] š¢ Testing Schema Extraction...`);
|
|
81
|
+
const schemaRes = await handleExtractSchema({});
|
|
82
|
+
console.log(` Result: ${schemaRes.content[0].text.substring(0, 100)}...`);
|
|
83
|
+
|
|
84
|
+
// --- Pagination (Refactored) ---
|
|
85
|
+
console.log(`\n[12/12] š¢ Testing Breadcrumb Navigator...`);
|
|
86
|
+
const breadRes = await handleBreadcrumbNavigator({});
|
|
87
|
+
console.log(` Result: ${breadRes.content[0].text.substring(0, 100)}...`);
|
|
88
|
+
|
|
89
|
+
console.log('\nā
All primary handler categories verified successfully.');
|
|
90
|
+
|
|
91
|
+
} catch (error) {
|
|
92
|
+
console.error('\nā Verification Failed:', error);
|
|
93
|
+
} finally {
|
|
94
|
+
await handleBrowserClose({});
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
runFullVerification();
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
|
|
2
|
+
import { handleBrowserInit, handleBrowserClose } from '../src/handlers/browser-handlers.js';
|
|
3
|
+
import { handleNavigate } from '../src/handlers/navigation-handlers.js';
|
|
4
|
+
import { handleGetContent } from '../src/handlers/content-handlers.js';
|
|
5
|
+
import { handleBreadcrumbNavigator } from '../src/handlers/navigation-handlers.js';
|
|
6
|
+
import { handleLinkHarvester } from '../src/handlers/multi-element-handlers.js';
|
|
7
|
+
import { handleScrapeMetaTags } from '../src/handlers/data-extraction-handlers.js';
|
|
8
|
+
|
|
9
|
+
async function runVerification() {
|
|
10
|
+
console.log('š Starting Verification on Live Sites...');
|
|
11
|
+
|
|
12
|
+
try {
|
|
13
|
+
// 1. Initialize Browser
|
|
14
|
+
console.log('\nšµ Initializing Browser...');
|
|
15
|
+
await handleBrowserInit({ headless: true });
|
|
16
|
+
|
|
17
|
+
const sites = [
|
|
18
|
+
'https://moviesdrive.forum/',
|
|
19
|
+
'https://multimovies.golf/'
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
for (const url of sites) {
|
|
23
|
+
console.log(`\n--------------------------------------------------`);
|
|
24
|
+
console.log(`š Testing Site: ${url}`);
|
|
25
|
+
console.log(`--------------------------------------------------`);
|
|
26
|
+
|
|
27
|
+
// 2. Navigate
|
|
28
|
+
console.log(`\nā”ļø Navigating to ${url}...`);
|
|
29
|
+
await handleNavigate({ url });
|
|
30
|
+
|
|
31
|
+
// 3. Get Content (HTML preview)
|
|
32
|
+
console.log(`\nš Fetching Content (Preview)...`);
|
|
33
|
+
const contentRes = await handleGetContent({ type: 'text' });
|
|
34
|
+
console.log(` Result: ${contentRes.content[0].text.substring(0, 100)}...`);
|
|
35
|
+
|
|
36
|
+
// 4. Test Breadcrumb Navigator (Newly moved)
|
|
37
|
+
console.log(`\nnav Testing Breadcrumb Navigator...`);
|
|
38
|
+
const breadcrumbRes = await handleBreadcrumbNavigator({});
|
|
39
|
+
console.log(` Result: ${breadcrumbRes.content[0].text.substring(0, 200)}...`);
|
|
40
|
+
|
|
41
|
+
// 5. Test Link Harvester (Existing tool)
|
|
42
|
+
console.log(`\nš Testing Link Harvester (First 5 links)...`);
|
|
43
|
+
const linksRes = await handleLinkHarvester({ maxElements: 5 });
|
|
44
|
+
console.log(` Result: ${linksRes.content[0].text.substring(0, 200)}...`);
|
|
45
|
+
|
|
46
|
+
// 6. Test Meta Tags (Data extraction)
|
|
47
|
+
console.log(`\nš·ļø Testing Meta Tag Scraper...`);
|
|
48
|
+
const metaRes = await handleScrapeMetaTags({});
|
|
49
|
+
console.log(` Result: ${metaRes.content[0].text.substring(0, 200)}...`);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
} catch (error) {
|
|
53
|
+
console.error('\nā Verification Failed:', error);
|
|
54
|
+
} finally {
|
|
55
|
+
// 7. Cleanup
|
|
56
|
+
console.log('\nš“ Closing Browser...');
|
|
57
|
+
await handleBrowserClose({});
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
runVerification();
|
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
// Data Processing & Transformation Handlers
|
|
2
|
-
// Text cleaning, validation, formatting utilities
|
|
3
|
-
// @ts-nocheck
|
|
4
|
-
import { withErrorHandling } from '../system-utils.js';
|
|
5
|
-
/**
|
|
6
|
-
* HTML tags intelligently remove ą¤ą¤°ą¤¤ą¤¾ हą„
|
|
7
|
-
*/
|
|
8
|
-
export async function handleHTMLToText(args) {
|
|
9
|
-
return await withErrorHandling(async () => {
|
|
10
|
-
const html = args.html;
|
|
11
|
-
const preserveLinks = args.preserveLinks || false;
|
|
12
|
-
const preserveFormatting = args.preserveFormatting || false;
|
|
13
|
-
// Simple HTML to text conversion (can be enhanced with turndown)
|
|
14
|
-
let text = html;
|
|
15
|
-
// Preserve links if requested
|
|
16
|
-
if (preserveLinks) {
|
|
17
|
-
text = text.replace(/<a[^>]*href="([^"]*)"[^>]*>(.*?)<\/a>/gi, '$2 ($1)');
|
|
18
|
-
}
|
|
19
|
-
// Preserve basic formatting
|
|
20
|
-
if (preserveFormatting) {
|
|
21
|
-
text = text.replace(/<br\s*\/?>/gi, '\n');
|
|
22
|
-
text = text.replace(/<\/p>/gi, '\n\n');
|
|
23
|
-
text = text.replace(/<li>/gi, '⢠');
|
|
24
|
-
text = text.replace(/<\/li>/gi, '\n');
|
|
25
|
-
}
|
|
26
|
-
// Remove all other HTML tags
|
|
27
|
-
text = text.replace(/<[^>]*>/g, '');
|
|
28
|
-
// Decode HTML entities
|
|
29
|
-
text = text
|
|
30
|
-
.replace(/ /g, ' ')
|
|
31
|
-
.replace(/&/g, '&')
|
|
32
|
-
.replace(/</g, '<')
|
|
33
|
-
.replace(/>/g, '>')
|
|
34
|
-
.replace(/"/g, '"')
|
|
35
|
-
.replace(/'/g, "'");
|
|
36
|
-
// Clean up whitespace
|
|
37
|
-
text = text.replace(/\n\s*\n/g, '\n\n');
|
|
38
|
-
text = text.trim();
|
|
39
|
-
return {
|
|
40
|
-
content: [
|
|
41
|
-
{
|
|
42
|
-
type: 'text',
|
|
43
|
-
text: `ā
HTML converted to text\n\n${text}`,
|
|
44
|
-
},
|
|
45
|
-
],
|
|
46
|
-
};
|
|
47
|
-
}, 'Failed to convert HTML to text');
|
|
48
|
-
}
|
|
49
|
-
/**
|
|
50
|
-
* Repeated data filter ą¤ą¤°ą¤¤ą¤¾ हą„
|
|
51
|
-
*/
|
|
52
|
-
export async function handleDuplicateRemover(args) {
|
|
53
|
-
return await withErrorHandling(async () => {
|
|
54
|
-
const data = args.data;
|
|
55
|
-
const uniqueKey = args.uniqueKey;
|
|
56
|
-
let unique;
|
|
57
|
-
if (uniqueKey) {
|
|
58
|
-
// Remove duplicates based on specific key
|
|
59
|
-
const seen = new Set();
|
|
60
|
-
unique = data.filter((item) => {
|
|
61
|
-
const value = item[uniqueKey];
|
|
62
|
-
if (seen.has(value)) {
|
|
63
|
-
return false;
|
|
64
|
-
}
|
|
65
|
-
seen.add(value);
|
|
66
|
-
return true;
|
|
67
|
-
});
|
|
68
|
-
}
|
|
69
|
-
else {
|
|
70
|
-
// Remove duplicates based on entire object
|
|
71
|
-
const seen = new Set();
|
|
72
|
-
unique = data.filter((item) => {
|
|
73
|
-
const serialized = JSON.stringify(item);
|
|
74
|
-
if (seen.has(serialized)) {
|
|
75
|
-
return false;
|
|
76
|
-
}
|
|
77
|
-
seen.add(serialized);
|
|
78
|
-
return true;
|
|
79
|
-
});
|
|
80
|
-
}
|
|
81
|
-
const removed = data.length - unique.length;
|
|
82
|
-
return {
|
|
83
|
-
content: [
|
|
84
|
-
{
|
|
85
|
-
type: 'text',
|
|
86
|
-
text: `ā
Duplicates removed\n\nOriginal: ${data.length} items\nUnique: ${unique.length} items\nRemoved: ${removed} duplicates\n\n${JSON.stringify(unique, null, 2)}`,
|
|
87
|
-
},
|
|
88
|
-
],
|
|
89
|
-
};
|
|
90
|
-
}, 'Failed to remove duplicates');
|
|
91
|
-
}
|
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
// Pagination & Navigation Tools
|
|
2
|
-
// Auto pagination, infinite scroll, multi-page scraping, sitemap parser
|
|
3
|
-
// @ts-nocheck
|
|
4
|
-
import { getCurrentPage } from '../browser-manager.js';
|
|
5
|
-
import { validateWorkflow } from '../workflow-validation.js';
|
|
6
|
-
import { withErrorHandling, sleep } from '../system-utils.js';
|
|
7
|
-
/**
|
|
8
|
-
* Multiple pages ą¤øą„ data collect ą¤ą¤° merge ą¤ą¤°ą¤¤ą¤¾ हą„
|
|
9
|
-
*/
|
|
10
|
-
export async function handleMultiPageScraper(args) {
|
|
11
|
-
return await withErrorHandling(async () => {
|
|
12
|
-
validateWorkflow('multi_page_scraper', {
|
|
13
|
-
requireBrowser: true,
|
|
14
|
-
requirePage: true,
|
|
15
|
-
});
|
|
16
|
-
const page = getCurrentPage();
|
|
17
|
-
const urls = args.urls;
|
|
18
|
-
const dataSelector = args.dataSelector;
|
|
19
|
-
const waitBetweenPages = args.waitBetweenPages || 1000;
|
|
20
|
-
const allData = [];
|
|
21
|
-
for (let i = 0; i < urls.length; i++) {
|
|
22
|
-
const url = urls[i];
|
|
23
|
-
try {
|
|
24
|
-
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
25
|
-
await sleep(waitBetweenPages);
|
|
26
|
-
const pageData = await page.evaluate((selector) => {
|
|
27
|
-
const elements = document.querySelectorAll(selector);
|
|
28
|
-
return Array.from(elements).map((el) => ({
|
|
29
|
-
text: el.textContent?.trim() || '',
|
|
30
|
-
html: el.innerHTML,
|
|
31
|
-
}));
|
|
32
|
-
}, dataSelector);
|
|
33
|
-
allData.push({
|
|
34
|
-
url,
|
|
35
|
-
pageIndex: i,
|
|
36
|
-
itemCount: pageData.length,
|
|
37
|
-
data: pageData,
|
|
38
|
-
});
|
|
39
|
-
}
|
|
40
|
-
catch (error) {
|
|
41
|
-
allData.push({
|
|
42
|
-
url,
|
|
43
|
-
pageIndex: i,
|
|
44
|
-
error: error instanceof Error ? error.message : String(error),
|
|
45
|
-
});
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
return {
|
|
49
|
-
content: [
|
|
50
|
-
{
|
|
51
|
-
type: 'text',
|
|
52
|
-
text: `ā
Scraped ${urls.length} pages\n\n${JSON.stringify(allData, null, 2)}`,
|
|
53
|
-
},
|
|
54
|
-
],
|
|
55
|
-
};
|
|
56
|
-
}, 'Failed to scrape multiple pages');
|
|
57
|
-
}
|
|
58
|
-
/**
|
|
59
|
-
* Site structure follow ą¤ą¤°ą¤ą„ pages scrape ą¤ą¤°ą¤¤ą¤¾ हą„
|
|
60
|
-
*/
|
|
61
|
-
export async function handleBreadcrumbNavigator(args) {
|
|
62
|
-
return await withErrorHandling(async () => {
|
|
63
|
-
validateWorkflow('breadcrumb_navigator', {
|
|
64
|
-
requireBrowser: true,
|
|
65
|
-
requirePage: true,
|
|
66
|
-
});
|
|
67
|
-
const page = getCurrentPage();
|
|
68
|
-
const breadcrumbSelector = args.breadcrumbSelector || '.breadcrumb, nav[aria-label="breadcrumb"], .breadcrumbs';
|
|
69
|
-
const followLinks = args.followLinks || false;
|
|
70
|
-
const breadcrumbData = await page.evaluate((selector) => {
|
|
71
|
-
const breadcrumbs = document.querySelectorAll(selector);
|
|
72
|
-
const results = [];
|
|
73
|
-
breadcrumbs.forEach((breadcrumb) => {
|
|
74
|
-
const links = breadcrumb.querySelectorAll('a');
|
|
75
|
-
const items = [];
|
|
76
|
-
links.forEach((link, index) => {
|
|
77
|
-
items.push({
|
|
78
|
-
text: link.textContent?.trim() || '',
|
|
79
|
-
href: link.href,
|
|
80
|
-
level: index,
|
|
81
|
-
});
|
|
82
|
-
});
|
|
83
|
-
if (items.length > 0) {
|
|
84
|
-
results.push({
|
|
85
|
-
path: items.map((i) => i.text).join(' > '),
|
|
86
|
-
links: items,
|
|
87
|
-
});
|
|
88
|
-
}
|
|
89
|
-
});
|
|
90
|
-
return results;
|
|
91
|
-
}, breadcrumbSelector);
|
|
92
|
-
if (breadcrumbData.length === 0) {
|
|
93
|
-
return {
|
|
94
|
-
content: [
|
|
95
|
-
{
|
|
96
|
-
type: 'text',
|
|
97
|
-
text: 'ā No breadcrumbs found on page',
|
|
98
|
-
},
|
|
99
|
-
],
|
|
100
|
-
};
|
|
101
|
-
}
|
|
102
|
-
let additionalData = '';
|
|
103
|
-
if (followLinks && breadcrumbData[0]?.links) {
|
|
104
|
-
additionalData = `\n\nš To scrape breadcrumb pages, use multi_page_scraper with URLs: ${JSON.stringify(breadcrumbData[0].links.map((l) => l.href))}`;
|
|
105
|
-
}
|
|
106
|
-
return {
|
|
107
|
-
content: [
|
|
108
|
-
{
|
|
109
|
-
type: 'text',
|
|
110
|
-
text: `ā
Found ${breadcrumbData.length} breadcrumb trail(s)\n\n${JSON.stringify(breadcrumbData, null, 2)}${additionalData}`,
|
|
111
|
-
},
|
|
112
|
-
],
|
|
113
|
-
};
|
|
114
|
-
}, 'Failed to navigate breadcrumbs');
|
|
115
|
-
}
|