brave-real-browser-mcp-server 2.15.1 → 2.15.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1 -10
- package/dist/tool-definitions.js +0 -43
- package/package.json +2 -2
- package/dist/handlers/APPLY_OPTIMIZATION_PATTERN.js +0 -326
- package/dist/handlers/advanced-scraping-handlers.js +0 -58
- package/dist/handlers/data-transform-handlers.js +0 -66
- package/dist/handlers/dom-handlers.js +0 -206
- package/dist/handlers/network-handlers.js +0 -111
- package/dist/mcp-server.js +0 -265
- package/dist/test-constants.js +0 -111
- package/scripts/update-to-latest.cjs +0 -130
|
@@ -1,206 +0,0 @@
|
|
|
1
|
-
import { getPageInstance } from '../browser-manager.js';
|
|
2
|
-
export async function handleHtmlElementsExtractor(args) {
|
|
3
|
-
const page = getPageInstance();
|
|
4
|
-
if (!page)
|
|
5
|
-
throw new Error('Browser not initialized');
|
|
6
|
-
const selector = args.selector || '*';
|
|
7
|
-
const max = args.maxElements || 100;
|
|
8
|
-
const includeStyles = args.includeStyles || false;
|
|
9
|
-
const elements = await page.evaluate((sel, maxCount, incStyles) => {
|
|
10
|
-
const els = Array.from(document.querySelectorAll(sel)).slice(0, maxCount);
|
|
11
|
-
return els.map(el => {
|
|
12
|
-
const rect = el.getBoundingClientRect();
|
|
13
|
-
const info = {
|
|
14
|
-
tagName: el.tagName.toLowerCase(),
|
|
15
|
-
id: el.id,
|
|
16
|
-
className: el.className,
|
|
17
|
-
text: el.textContent?.slice(0, 100).trim(),
|
|
18
|
-
attributes: {},
|
|
19
|
-
rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height }
|
|
20
|
-
};
|
|
21
|
-
if (incStyles) {
|
|
22
|
-
const computed = window.getComputedStyle(el);
|
|
23
|
-
info.styles = {
|
|
24
|
-
display: computed.display,
|
|
25
|
-
position: computed.position,
|
|
26
|
-
color: computed.color,
|
|
27
|
-
backgroundColor: computed.backgroundColor,
|
|
28
|
-
fontSize: computed.fontSize
|
|
29
|
-
};
|
|
30
|
-
}
|
|
31
|
-
Array.from(el.attributes).forEach((attr) => {
|
|
32
|
-
info.attributes[attr.name] = attr.value;
|
|
33
|
-
});
|
|
34
|
-
return info;
|
|
35
|
-
});
|
|
36
|
-
}, selector, max, includeStyles);
|
|
37
|
-
return {
|
|
38
|
-
content: [{
|
|
39
|
-
type: 'text',
|
|
40
|
-
text: JSON.stringify(elements, null, 2)
|
|
41
|
-
}]
|
|
42
|
-
};
|
|
43
|
-
}
|
|
44
|
-
export async function handleTagsFinder(args) {
|
|
45
|
-
const page = getPageInstance();
|
|
46
|
-
if (!page)
|
|
47
|
-
throw new Error('Browser not initialized');
|
|
48
|
-
const results = await page.evaluate((tags) => {
|
|
49
|
-
const found = {};
|
|
50
|
-
tags.forEach(tag => {
|
|
51
|
-
const elements = document.querySelectorAll(tag);
|
|
52
|
-
found[tag] = Array.from(elements).map(el => ({
|
|
53
|
-
text: el.textContent?.slice(0, 50).trim(),
|
|
54
|
-
html: el.outerHTML.slice(0, 100)
|
|
55
|
-
}));
|
|
56
|
-
});
|
|
57
|
-
return found;
|
|
58
|
-
}, args.tags);
|
|
59
|
-
return {
|
|
60
|
-
content: [{
|
|
61
|
-
type: 'text',
|
|
62
|
-
text: JSON.stringify(results, null, 2)
|
|
63
|
-
}]
|
|
64
|
-
};
|
|
65
|
-
}
|
|
66
|
-
export async function handleLinksFinder(args) {
|
|
67
|
-
const page = getPageInstance();
|
|
68
|
-
if (!page)
|
|
69
|
-
throw new Error('Browser not initialized');
|
|
70
|
-
const includeExt = args.includeExternal ?? true;
|
|
71
|
-
const max = args.maxLinks ?? 200;
|
|
72
|
-
const links = await page.evaluate((incExt, maxCount) => {
|
|
73
|
-
const allLinks = Array.from(document.querySelectorAll('a[href]'));
|
|
74
|
-
const filtered = incExt
|
|
75
|
-
? allLinks
|
|
76
|
-
: allLinks.filter(a => a.href.startsWith(window.location.origin));
|
|
77
|
-
return filtered.slice(0, maxCount).map(a => ({
|
|
78
|
-
text: a.textContent?.trim(),
|
|
79
|
-
href: a.href,
|
|
80
|
-
isExternal: !a.href.startsWith(window.location.origin)
|
|
81
|
-
}));
|
|
82
|
-
}, includeExt, max);
|
|
83
|
-
return {
|
|
84
|
-
content: [{
|
|
85
|
-
type: 'text',
|
|
86
|
-
text: JSON.stringify(links, null, 2)
|
|
87
|
-
}]
|
|
88
|
-
};
|
|
89
|
-
}
|
|
90
|
-
export async function handleXpathLinks(args) {
|
|
91
|
-
const page = getPageInstance();
|
|
92
|
-
if (!page)
|
|
93
|
-
throw new Error('Browser not initialized');
|
|
94
|
-
const links = await page.evaluate((xpathExpr) => {
|
|
95
|
-
const result = document.evaluate(xpathExpr, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
|
96
|
-
const items = [];
|
|
97
|
-
for (let i = 0; i < result.snapshotLength; i++) {
|
|
98
|
-
const node = result.snapshotItem(i);
|
|
99
|
-
if (node instanceof HTMLAnchorElement) {
|
|
100
|
-
items.push({ text: node.textContent?.trim(), href: node.href });
|
|
101
|
-
}
|
|
102
|
-
else if (node && node.textContent) {
|
|
103
|
-
items.push({ text: node.textContent.trim() });
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
return items;
|
|
107
|
-
}, args.xpath);
|
|
108
|
-
return {
|
|
109
|
-
content: [{
|
|
110
|
-
type: 'text',
|
|
111
|
-
text: JSON.stringify(links, null, 2)
|
|
112
|
-
}]
|
|
113
|
-
};
|
|
114
|
-
}
|
|
115
|
-
export async function handleShadowDomExtractor(args) {
|
|
116
|
-
const page = getPageInstance();
|
|
117
|
-
if (!page)
|
|
118
|
-
throw new Error('Browser not initialized');
|
|
119
|
-
const sel = args.selector || '*';
|
|
120
|
-
const results = await page.evaluate((selector) => {
|
|
121
|
-
function findAllShadowRoots(root) {
|
|
122
|
-
const shadowRoots = [];
|
|
123
|
-
if (root instanceof Element && root.shadowRoot) {
|
|
124
|
-
shadowRoots.push(root.shadowRoot);
|
|
125
|
-
shadowRoots.push(...findAllShadowRoots(root.shadowRoot));
|
|
126
|
-
}
|
|
127
|
-
if (root.childNodes) {
|
|
128
|
-
root.childNodes.forEach(child => {
|
|
129
|
-
shadowRoots.push(...findAllShadowRoots(child));
|
|
130
|
-
});
|
|
131
|
-
}
|
|
132
|
-
return shadowRoots;
|
|
133
|
-
}
|
|
134
|
-
const allShadows = findAllShadowRoots(document.body);
|
|
135
|
-
const data = [];
|
|
136
|
-
allShadows.forEach((shadow, index) => {
|
|
137
|
-
const elements = shadow.querySelectorAll(selector);
|
|
138
|
-
if (elements.length > 0) {
|
|
139
|
-
data.push({
|
|
140
|
-
shadowRootIndex: index,
|
|
141
|
-
elements: Array.from(elements).map(el => el.outerHTML)
|
|
142
|
-
});
|
|
143
|
-
}
|
|
144
|
-
});
|
|
145
|
-
return data;
|
|
146
|
-
}, sel);
|
|
147
|
-
return {
|
|
148
|
-
content: [{
|
|
149
|
-
type: 'text',
|
|
150
|
-
text: JSON.stringify({ message: `Found content in ${results.length} shadow roots`, data: results }, null, 2)
|
|
151
|
-
}]
|
|
152
|
-
};
|
|
153
|
-
}
|
|
154
|
-
export async function handleIframeExtractor() {
|
|
155
|
-
const page = getPageInstance();
|
|
156
|
-
if (!page)
|
|
157
|
-
throw new Error('Browser not initialized');
|
|
158
|
-
const frames = page.frames();
|
|
159
|
-
const frameData = await Promise.all(frames.map(async (frame, index) => {
|
|
160
|
-
try {
|
|
161
|
-
const title = await frame.title();
|
|
162
|
-
const url = frame.url();
|
|
163
|
-
const bodyText = await frame.evaluate(() => document.body.innerText.slice(0, 500));
|
|
164
|
-
return {
|
|
165
|
-
id: index,
|
|
166
|
-
url,
|
|
167
|
-
title,
|
|
168
|
-
preview: bodyText,
|
|
169
|
-
isMainFrame: frame === page.mainFrame()
|
|
170
|
-
};
|
|
171
|
-
}
|
|
172
|
-
catch (e) {
|
|
173
|
-
return { id: index, error: String(e) };
|
|
174
|
-
}
|
|
175
|
-
}));
|
|
176
|
-
return {
|
|
177
|
-
content: [{
|
|
178
|
-
type: 'text',
|
|
179
|
-
text: JSON.stringify(frameData, null, 2)
|
|
180
|
-
}]
|
|
181
|
-
};
|
|
182
|
-
}
|
|
183
|
-
export async function handleEmbedPageExtractor() {
|
|
184
|
-
const page = getPageInstance();
|
|
185
|
-
if (!page)
|
|
186
|
-
throw new Error('Browser not initialized');
|
|
187
|
-
const embeds = await page.evaluate(() => {
|
|
188
|
-
const embedTags = Array.from(document.querySelectorAll('embed')).map(el => ({
|
|
189
|
-
type: 'embed',
|
|
190
|
-
src: el.src,
|
|
191
|
-
typeAttr: el.type
|
|
192
|
-
}));
|
|
193
|
-
const objectTags = Array.from(document.querySelectorAll('object')).map(el => ({
|
|
194
|
-
type: 'object',
|
|
195
|
-
data: el.data,
|
|
196
|
-
typeAttr: el.type // Type casting
|
|
197
|
-
}));
|
|
198
|
-
return [...embedTags, ...objectTags];
|
|
199
|
-
});
|
|
200
|
-
return {
|
|
201
|
-
content: [{
|
|
202
|
-
type: 'text',
|
|
203
|
-
text: JSON.stringify(embeds, null, 2)
|
|
204
|
-
}]
|
|
205
|
-
};
|
|
206
|
-
}
|
|
@@ -1,111 +0,0 @@
|
|
|
1
|
-
import { getPageInstance } from '../browser-manager.js';
|
|
2
|
-
async function captureNetwork(page, duration, filter) {
|
|
3
|
-
const captured = [];
|
|
4
|
-
const responseHandler = async (response) => {
|
|
5
|
-
try {
|
|
6
|
-
const request = response.request();
|
|
7
|
-
if (filter(request, response)) {
|
|
8
|
-
let body = '[Binary or Too Large]';
|
|
9
|
-
try {
|
|
10
|
-
const type = response.headers()['content-type'] || '';
|
|
11
|
-
if (type.includes('text') || type.includes('json') || type.includes('xml')) {
|
|
12
|
-
body = await response.text();
|
|
13
|
-
}
|
|
14
|
-
}
|
|
15
|
-
catch (e) {
|
|
16
|
-
// Ignore
|
|
17
|
-
}
|
|
18
|
-
captured.push({
|
|
19
|
-
url: response.url(),
|
|
20
|
-
method: request.method(),
|
|
21
|
-
type: request.resourceType(),
|
|
22
|
-
status: response.status(),
|
|
23
|
-
headers: response.headers(),
|
|
24
|
-
body: body.slice(0, 5000)
|
|
25
|
-
});
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
catch (e) {
|
|
29
|
-
// ignore
|
|
30
|
-
}
|
|
31
|
-
};
|
|
32
|
-
page.on('response', responseHandler);
|
|
33
|
-
await new Promise(resolve => setTimeout(resolve, duration));
|
|
34
|
-
page.off('response', responseHandler);
|
|
35
|
-
return captured;
|
|
36
|
-
}
|
|
37
|
-
export async function handleNetworkRecorder(args) {
|
|
38
|
-
const page = getPageInstance();
|
|
39
|
-
if (!page)
|
|
40
|
-
throw new Error('Browser not initialized');
|
|
41
|
-
const duration = args.duration || 10000;
|
|
42
|
-
const results = await captureNetwork(page, duration, (req, res) => {
|
|
43
|
-
if (!args.filterTypes || args.filterTypes.length === 0)
|
|
44
|
-
return true;
|
|
45
|
-
const type = req.resourceType().toLowerCase();
|
|
46
|
-
return args.filterTypes.includes(type);
|
|
47
|
-
});
|
|
48
|
-
return {
|
|
49
|
-
content: [{
|
|
50
|
-
type: 'text',
|
|
51
|
-
text: JSON.stringify(results, null, 2)
|
|
52
|
-
}]
|
|
53
|
-
};
|
|
54
|
-
}
|
|
55
|
-
export async function handleAjaxExtractor(args) {
|
|
56
|
-
const page = getPageInstance();
|
|
57
|
-
if (!page)
|
|
58
|
-
throw new Error('Browser not initialized');
|
|
59
|
-
const duration = args.duration || 15000;
|
|
60
|
-
const results = await captureNetwork(page, duration, (req, res) => {
|
|
61
|
-
const type = req.resourceType();
|
|
62
|
-
const isXhr = type === 'xhr' || type === 'fetch';
|
|
63
|
-
if (!isXhr)
|
|
64
|
-
return false;
|
|
65
|
-
if (args.url && !req.url().includes(args.url))
|
|
66
|
-
return false;
|
|
67
|
-
return true;
|
|
68
|
-
});
|
|
69
|
-
return {
|
|
70
|
-
content: [{
|
|
71
|
-
type: 'text',
|
|
72
|
-
text: JSON.stringify(results, null, 2)
|
|
73
|
-
}]
|
|
74
|
-
};
|
|
75
|
-
}
|
|
76
|
-
export async function handleFetchXhr(args) {
|
|
77
|
-
return handleAjaxExtractor({ duration: args.duration });
|
|
78
|
-
}
|
|
79
|
-
export async function handleApiFinder(args) {
|
|
80
|
-
const page = getPageInstance();
|
|
81
|
-
if (!page)
|
|
82
|
-
throw new Error('Browser not initialized');
|
|
83
|
-
// 1. Static Analysis
|
|
84
|
-
const staticApis = await page.evaluate(() => {
|
|
85
|
-
const patterns = [/\/api\//, /\/v\d+\//, /graphql/, /\.json$/];
|
|
86
|
-
const candidates = new Set();
|
|
87
|
-
document.querySelectorAll('script').forEach(s => {
|
|
88
|
-
if (s.src)
|
|
89
|
-
candidates.add(s.src);
|
|
90
|
-
});
|
|
91
|
-
document.querySelectorAll('a').forEach(a => {
|
|
92
|
-
candidates.add(a.href);
|
|
93
|
-
});
|
|
94
|
-
return Array.from(candidates).filter(url => patterns.some(p => p.test(url)));
|
|
95
|
-
});
|
|
96
|
-
// 2. Dynamic Analysis
|
|
97
|
-
const dynamicApis = await captureNetwork(page, 5000, (req, res) => {
|
|
98
|
-
const type = req.resourceType();
|
|
99
|
-
const contentType = res.headers()['content-type'] || '';
|
|
100
|
-
return (type === 'xhr' || type === 'fetch') && contentType.includes('json');
|
|
101
|
-
});
|
|
102
|
-
return {
|
|
103
|
-
content: [{
|
|
104
|
-
type: 'text',
|
|
105
|
-
text: JSON.stringify({
|
|
106
|
-
staticAnalysis: staticApis,
|
|
107
|
-
dynamicCapture: dynamicApis.map(d => d.url)
|
|
108
|
-
}, null, 2)
|
|
109
|
-
}]
|
|
110
|
-
};
|
|
111
|
-
}
|
package/dist/mcp-server.js
DELETED
|
@@ -1,265 +0,0 @@
|
|
|
1
|
-
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
2
|
-
import { CallToolRequestSchema, ListToolsRequestSchema, ListResourcesRequestSchema, ListPromptsRequestSchema, InitializeRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
|
|
3
|
-
import { TOOLS, SERVER_INFO, CAPABILITIES, TOOL_NAMES } from './tool-definitions.js';
|
|
4
|
-
import { validateMCPResponse } from './mcp-response-validator.js';
|
|
5
|
-
// Import handlers
|
|
6
|
-
import { handleBrowserInit, handleBrowserClose } from './handlers/browser-handlers.js';
|
|
7
|
-
import { handleNavigate, handleWait } from './handlers/navigation-handlers.js';
|
|
8
|
-
import { handleClick, handleType, handlePressKey, handleSolveCaptcha, handleRandomScroll } from './handlers/interaction-handlers.js';
|
|
9
|
-
import { handleGetContent, handleFindSelector } from './handlers/content-handlers.js';
|
|
10
|
-
import { handleSaveContentAsMarkdown } from './handlers/file-handlers.js';
|
|
11
|
-
import { handleExtractList, handleExtractJSON, handleScrapeMetaTags, handleExtractSchema } from './handlers/data-extraction-handlers.js';
|
|
12
|
-
import { handleBatchElementScraper, handleNestedDataExtraction, handleAttributeHarvester, handleLinkHarvester, handleMediaExtractor, } from './handlers/multi-element-handlers.js';
|
|
13
|
-
import { handleMultiPageScraper, handleBreadcrumbNavigator } from './handlers/advanced-scraping-handlers.js';
|
|
14
|
-
import { handleHtmlElementsExtractor, handleTagsFinder, handleLinksFinder, handleXpathLinks, handleShadowDomExtractor, handleIframeExtractor, handleEmbedPageExtractor } from './handlers/dom-handlers.js';
|
|
15
|
-
import { handleNetworkRecorder, handleAjaxExtractor, handleFetchXhr, handleApiFinder } from './handlers/network-handlers.js';
|
|
16
|
-
import { handleHtmlToText, handleDuplicateRemover } from './handlers/data-transform-handlers.js';
|
|
17
|
-
import { handleConsistencyChecker } from './handlers/data-quality-handlers.js';
|
|
18
|
-
import { handleSmartSelectorGenerator, handleContentClassification, handleSentimentAnalysis, handleSummaryGenerator, handleTranslationSupport } from './handlers/ai-powered-handlers.js';
|
|
19
|
-
import { handleKeywordSearch, handleRegexPatternMatcher, handleXPathSupport, handleAdvancedCSSSelectors, handleVisualElementFinder } from './handlers/search-filter-handlers.js';
|
|
20
|
-
import { handleVideoSourceExtractor, handleVideoPlayerFinder, handleStreamDetector, handleRedirectTracer } from './handlers/advanced-video-media-handlers.js';
|
|
21
|
-
import { handleOCREngine, handleAudioCaptchaSolver, handlePuzzleCaptchaHandler } from './handlers/captcha-handlers.js';
|
|
22
|
-
import { handleAdvancedVideoExtraction, handleDeobfuscateJS, handleMultiLayerRedirectTrace, handleAdProtectionDetector } from './handlers/advanced-extraction-handlers.js';
|
|
23
|
-
export async function createMcpServer() {
|
|
24
|
-
const server = new Server(SERVER_INFO, { capabilities: CAPABILITIES });
|
|
25
|
-
// Register initialize handler
|
|
26
|
-
server.setRequestHandler(InitializeRequestSchema, async (request) => {
|
|
27
|
-
const clientProtocolVersion = request.params.protocolVersion;
|
|
28
|
-
return {
|
|
29
|
-
protocolVersion: clientProtocolVersion,
|
|
30
|
-
capabilities: CAPABILITIES,
|
|
31
|
-
serverInfo: SERVER_INFO,
|
|
32
|
-
};
|
|
33
|
-
});
|
|
34
|
-
// Register tool handlers
|
|
35
|
-
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
36
|
-
return { tools: TOOLS };
|
|
37
|
-
});
|
|
38
|
-
// Register resource handlers (placeholder)
|
|
39
|
-
server.setRequestHandler(ListResourcesRequestSchema, async () => {
|
|
40
|
-
return { resources: [] };
|
|
41
|
-
});
|
|
42
|
-
// Register prompt handlers (placeholder)
|
|
43
|
-
server.setRequestHandler(ListPromptsRequestSchema, async () => {
|
|
44
|
-
return { prompts: [] };
|
|
45
|
-
});
|
|
46
|
-
// Main tool call handler
|
|
47
|
-
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
48
|
-
const { name, arguments: args } = request.params;
|
|
49
|
-
try {
|
|
50
|
-
let result;
|
|
51
|
-
switch (name) {
|
|
52
|
-
case TOOL_NAMES.BROWSER_INIT:
|
|
53
|
-
result = await handleBrowserInit(args || {});
|
|
54
|
-
break;
|
|
55
|
-
case TOOL_NAMES.NAVIGATE:
|
|
56
|
-
result = await handleNavigate(args);
|
|
57
|
-
break;
|
|
58
|
-
case TOOL_NAMES.GET_CONTENT:
|
|
59
|
-
result = await handleGetContent(args || {});
|
|
60
|
-
break;
|
|
61
|
-
case TOOL_NAMES.CLICK:
|
|
62
|
-
result = await handleClick(args);
|
|
63
|
-
break;
|
|
64
|
-
case TOOL_NAMES.TYPE:
|
|
65
|
-
result = await handleType(args);
|
|
66
|
-
break;
|
|
67
|
-
case TOOL_NAMES.PRESS_KEY:
|
|
68
|
-
result = await handlePressKey(args);
|
|
69
|
-
break;
|
|
70
|
-
case TOOL_NAMES.WAIT:
|
|
71
|
-
result = await handleWait(args);
|
|
72
|
-
break;
|
|
73
|
-
case TOOL_NAMES.BROWSER_CLOSE:
|
|
74
|
-
result = await handleBrowserClose();
|
|
75
|
-
break;
|
|
76
|
-
case TOOL_NAMES.SOLVE_CAPTCHA:
|
|
77
|
-
result = await handleSolveCaptcha(args);
|
|
78
|
-
break;
|
|
79
|
-
case TOOL_NAMES.RANDOM_SCROLL:
|
|
80
|
-
result = await handleRandomScroll();
|
|
81
|
-
break;
|
|
82
|
-
case TOOL_NAMES.FIND_SELECTOR:
|
|
83
|
-
result = await handleFindSelector(args);
|
|
84
|
-
break;
|
|
85
|
-
case TOOL_NAMES.SAVE_CONTENT_AS_MARKDOWN:
|
|
86
|
-
result = await handleSaveContentAsMarkdown(args);
|
|
87
|
-
break;
|
|
88
|
-
// Smart Data Extractors
|
|
89
|
-
case TOOL_NAMES.EXTRACT_LIST:
|
|
90
|
-
result = await handleExtractList(args || {});
|
|
91
|
-
break;
|
|
92
|
-
case TOOL_NAMES.EXTRACT_JSON:
|
|
93
|
-
result = await handleExtractJSON(args || {});
|
|
94
|
-
break;
|
|
95
|
-
case TOOL_NAMES.SCRAPE_META_TAGS:
|
|
96
|
-
result = await handleScrapeMetaTags(args || {});
|
|
97
|
-
break;
|
|
98
|
-
case TOOL_NAMES.EXTRACT_SCHEMA:
|
|
99
|
-
result = await handleExtractSchema(args || {});
|
|
100
|
-
break;
|
|
101
|
-
// Multi-Element Extractors
|
|
102
|
-
case TOOL_NAMES.BATCH_ELEMENT_SCRAPER:
|
|
103
|
-
result = await handleBatchElementScraper(args);
|
|
104
|
-
break;
|
|
105
|
-
case TOOL_NAMES.NESTED_DATA_EXTRACTION:
|
|
106
|
-
result = await handleNestedDataExtraction(args);
|
|
107
|
-
break;
|
|
108
|
-
case TOOL_NAMES.ATTRIBUTE_HARVESTER:
|
|
109
|
-
result = await handleAttributeHarvester(args);
|
|
110
|
-
break;
|
|
111
|
-
// Content Type Specific
|
|
112
|
-
case TOOL_NAMES.LINK_HARVESTER:
|
|
113
|
-
result = await handleLinkHarvester(args || {});
|
|
114
|
-
break;
|
|
115
|
-
case TOOL_NAMES.MEDIA_EXTRACTOR:
|
|
116
|
-
result = await handleMediaExtractor(args || {});
|
|
117
|
-
break;
|
|
118
|
-
// Phase 1: Pagination & Navigation
|
|
119
|
-
case TOOL_NAMES.MULTI_PAGE_SCRAPER:
|
|
120
|
-
result = await handleMultiPageScraper(args);
|
|
121
|
-
break;
|
|
122
|
-
case TOOL_NAMES.BREADCRUMB_NAVIGATOR:
|
|
123
|
-
result = await handleBreadcrumbNavigator(args);
|
|
124
|
-
break;
|
|
125
|
-
// Phase 1: DOM & HTML
|
|
126
|
-
case TOOL_NAMES.HTML_ELEMENTS_EXTRACTOR:
|
|
127
|
-
result = await handleHtmlElementsExtractor(args);
|
|
128
|
-
break;
|
|
129
|
-
case TOOL_NAMES.TAGS_FINDER:
|
|
130
|
-
result = await handleTagsFinder(args);
|
|
131
|
-
break;
|
|
132
|
-
case TOOL_NAMES.LINKS_FINDER:
|
|
133
|
-
result = await handleLinksFinder(args);
|
|
134
|
-
break;
|
|
135
|
-
case TOOL_NAMES.XPATH_LINKS:
|
|
136
|
-
result = await handleXpathLinks(args);
|
|
137
|
-
break;
|
|
138
|
-
case TOOL_NAMES.SHADOW_DOM_EXTRACTOR:
|
|
139
|
-
result = await handleShadowDomExtractor(args);
|
|
140
|
-
break;
|
|
141
|
-
case TOOL_NAMES.IFRAME_EXTRACTOR:
|
|
142
|
-
result = await handleIframeExtractor();
|
|
143
|
-
break;
|
|
144
|
-
case TOOL_NAMES.EMBED_PAGE_EXTRACTOR:
|
|
145
|
-
result = await handleEmbedPageExtractor();
|
|
146
|
-
break;
|
|
147
|
-
// Phase 1: Network Tools
|
|
148
|
-
case TOOL_NAMES.NETWORK_RECORDER:
|
|
149
|
-
result = await handleNetworkRecorder(args);
|
|
150
|
-
break;
|
|
151
|
-
case TOOL_NAMES.AJAX_EXTRACTOR:
|
|
152
|
-
result = await handleAjaxExtractor(args);
|
|
153
|
-
break;
|
|
154
|
-
case TOOL_NAMES.FETCH_XHR:
|
|
155
|
-
result = await handleFetchXhr(args);
|
|
156
|
-
break;
|
|
157
|
-
case TOOL_NAMES.API_FINDER:
|
|
158
|
-
result = await handleApiFinder(args);
|
|
159
|
-
break;
|
|
160
|
-
// Phase 1: Data Transform
|
|
161
|
-
case TOOL_NAMES.HTML_TO_TEXT:
|
|
162
|
-
result = await handleHtmlToText(args);
|
|
163
|
-
break;
|
|
164
|
-
case TOOL_NAMES.DUPLICATE_REMOVER:
|
|
165
|
-
result = await handleDuplicateRemover(args);
|
|
166
|
-
break;
|
|
167
|
-
// Phase 2: AI & Smart Features
|
|
168
|
-
case TOOL_NAMES.SMART_SELECTOR_GENERATOR:
|
|
169
|
-
result = await handleSmartSelectorGenerator(args);
|
|
170
|
-
break;
|
|
171
|
-
case TOOL_NAMES.CONTENT_CLASSIFICATION:
|
|
172
|
-
result = await handleContentClassification(args);
|
|
173
|
-
break;
|
|
174
|
-
case TOOL_NAMES.SENTIMENT_ANALYSIS:
|
|
175
|
-
result = await handleSentimentAnalysis(args);
|
|
176
|
-
break;
|
|
177
|
-
case TOOL_NAMES.SUMMARY_GENERATOR:
|
|
178
|
-
result = await handleSummaryGenerator(args);
|
|
179
|
-
break;
|
|
180
|
-
case TOOL_NAMES.TRANSLATION_SUPPORT:
|
|
181
|
-
result = await handleTranslationSupport(args);
|
|
182
|
-
break;
|
|
183
|
-
// Phase 2: Search & Filter
|
|
184
|
-
case TOOL_NAMES.KEYWORD_SEARCH:
|
|
185
|
-
result = await handleKeywordSearch(args);
|
|
186
|
-
break;
|
|
187
|
-
case TOOL_NAMES.REGEX_PATTERN_MATCHER:
|
|
188
|
-
result = await handleRegexPatternMatcher(args);
|
|
189
|
-
break;
|
|
190
|
-
case TOOL_NAMES.XPATH_SUPPORT:
|
|
191
|
-
result = await handleXPathSupport(args);
|
|
192
|
-
break;
|
|
193
|
-
case TOOL_NAMES.ADVANCED_CSS_SELECTORS:
|
|
194
|
-
result = await handleAdvancedCSSSelectors(args);
|
|
195
|
-
break;
|
|
196
|
-
case TOOL_NAMES.VISUAL_ELEMENT_FINDER:
|
|
197
|
-
result = await handleVisualElementFinder(args);
|
|
198
|
-
break;
|
|
199
|
-
// Phase 3: Media & Video
|
|
200
|
-
case TOOL_NAMES.VIDEO_SOURCE_EXTRACTOR:
|
|
201
|
-
result = await handleVideoSourceExtractor(args);
|
|
202
|
-
break;
|
|
203
|
-
case TOOL_NAMES.VIDEO_PLAYER_FINDER:
|
|
204
|
-
result = await handleVideoPlayerFinder(args);
|
|
205
|
-
break;
|
|
206
|
-
case TOOL_NAMES.STREAM_DETECTOR:
|
|
207
|
-
result = await handleStreamDetector(args);
|
|
208
|
-
break;
|
|
209
|
-
case TOOL_NAMES.REDIRECT_TRACER:
|
|
210
|
-
result = await handleRedirectTracer(args);
|
|
211
|
-
break;
|
|
212
|
-
// Phase 4: Captcha & Security
|
|
213
|
-
case TOOL_NAMES.OCR_ENGINE:
|
|
214
|
-
result = await handleOCREngine(args);
|
|
215
|
-
break;
|
|
216
|
-
case TOOL_NAMES.AUDIO_CAPTCHA_SOLVER:
|
|
217
|
-
result = await handleAudioCaptchaSolver(args);
|
|
218
|
-
break;
|
|
219
|
-
case TOOL_NAMES.PUZZLE_CAPTCHA_HANDLER:
|
|
220
|
-
result = await handlePuzzleCaptchaHandler(args);
|
|
221
|
-
break;
|
|
222
|
-
// Advanced Extraction (Security/Bypass)
|
|
223
|
-
case "advanced_video_extraction":
|
|
224
|
-
result = await handleAdvancedVideoExtraction(args);
|
|
225
|
-
break;
|
|
226
|
-
case "deobfuscate_js":
|
|
227
|
-
result = await handleDeobfuscateJS(args);
|
|
228
|
-
break;
|
|
229
|
-
case "multi_layer_redirect_trace":
|
|
230
|
-
result = await handleMultiLayerRedirectTrace(args);
|
|
231
|
-
break;
|
|
232
|
-
case "ad_protection_detector":
|
|
233
|
-
result = await handleAdProtectionDetector(args);
|
|
234
|
-
break;
|
|
235
|
-
case "consistency_checker":
|
|
236
|
-
result = await handleConsistencyChecker(args);
|
|
237
|
-
break;
|
|
238
|
-
default:
|
|
239
|
-
throw new Error(`Unknown tool: ${name}`);
|
|
240
|
-
}
|
|
241
|
-
// Validate MCP response format universally
|
|
242
|
-
return validateMCPResponse(result, name);
|
|
243
|
-
}
|
|
244
|
-
catch (error) {
|
|
245
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
246
|
-
// For workflow validation errors, throw them so MCP SDK handles them properly
|
|
247
|
-
if (errorMessage.includes('cannot be executed in current state') ||
|
|
248
|
-
errorMessage.includes('Cannot search for selectors') ||
|
|
249
|
-
errorMessage.includes('Next Steps:')) {
|
|
250
|
-
throw error;
|
|
251
|
-
}
|
|
252
|
-
// For other errors, return formatted response
|
|
253
|
-
return {
|
|
254
|
-
content: [
|
|
255
|
-
{
|
|
256
|
-
type: 'text',
|
|
257
|
-
text: `❌ Tool execution failed: ${errorMessage}`,
|
|
258
|
-
},
|
|
259
|
-
],
|
|
260
|
-
isError: true,
|
|
261
|
-
};
|
|
262
|
-
}
|
|
263
|
-
});
|
|
264
|
-
return server;
|
|
265
|
-
}
|
package/dist/test-constants.js
DELETED
|
@@ -1,111 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Central Test Configuration
|
|
3
|
-
* Use these constants across all test files for consistency
|
|
4
|
-
*/
|
|
5
|
-
export const TEST_URLS = {
|
|
6
|
-
// CAPTCHA Testing URLs
|
|
7
|
-
CAPTCHA: {
|
|
8
|
-
CLOUDFLARE: 'https://nopecha.com/demo/cloudflare',
|
|
9
|
-
ECOURTS_INDIA: 'https://services.ecourts.gov.in/ecourtindia_v6/?p=casestatus/index&app_token=22e7493ca224682349cf0986dd144491a950819d30918b8e319ab0d39618f847',
|
|
10
|
-
RECAPTCHA: 'https://nopecha.com/demo/recaptcha',
|
|
11
|
-
HCAPTCHA: 'https://nopecha.com/demo/hcaptcha',
|
|
12
|
-
TURNSTILE: 'https://nopecha.com/demo/turnstile',
|
|
13
|
-
},
|
|
14
|
-
// General Content Testing
|
|
15
|
-
GENERAL: {
|
|
16
|
-
WIKIPEDIA: 'https://en.wikipedia.org/wiki/Web_scraping',
|
|
17
|
-
IMDB: 'https://www.imdb.com/',
|
|
18
|
-
GITHUB: 'https://github.com/',
|
|
19
|
-
EXAMPLE: 'https://example.com',
|
|
20
|
-
},
|
|
21
|
-
// API Discovery Testing
|
|
22
|
-
API: {
|
|
23
|
-
JSONPLACEHOLDER: 'https://jsonplaceholder.typicode.com',
|
|
24
|
-
REQRES: 'https://reqres.in/',
|
|
25
|
-
},
|
|
26
|
-
// E-commerce Testing
|
|
27
|
-
ECOMMERCE: {
|
|
28
|
-
AMAZON: 'https://www.amazon.com',
|
|
29
|
-
},
|
|
30
|
-
// Local Testing
|
|
31
|
-
LOCAL: {
|
|
32
|
-
LOCALHOST: 'http://localhost:3000',
|
|
33
|
-
FILE: 'file:///test.html',
|
|
34
|
-
},
|
|
35
|
-
};
|
|
36
|
-
export const TEST_SELECTORS = {
|
|
37
|
-
// Common selectors for testing
|
|
38
|
-
WIKIPEDIA: {
|
|
39
|
-
HEADING: '#firstHeading',
|
|
40
|
-
CONTENT: '#mw-content-text',
|
|
41
|
-
TOC: '#toc',
|
|
42
|
-
LINKS: 'a[href]',
|
|
43
|
-
IMAGES: 'img',
|
|
44
|
-
TABLES: 'table.wikitable',
|
|
45
|
-
},
|
|
46
|
-
ECOURTS: {
|
|
47
|
-
CAPTCHA_IMAGE: 'img[src*="captcha"]',
|
|
48
|
-
CAPTCHA_INPUT: 'input[name*="captcha" i]',
|
|
49
|
-
STATE_SELECT: 'select[name="state"]',
|
|
50
|
-
SEARCH_BUTTON: 'button[type="submit"]',
|
|
51
|
-
},
|
|
52
|
-
CLOUDFLARE: {
|
|
53
|
-
CHALLENGE: 'div[id^="cf-chl-widget"]',
|
|
54
|
-
IFRAME: 'iframe[src*="challenges.cloudflare.com"]',
|
|
55
|
-
VERIFY_TEXT: 'p:contains("Verifying")',
|
|
56
|
-
},
|
|
57
|
-
COMMON: {
|
|
58
|
-
HEADING: 'h1',
|
|
59
|
-
PARAGRAPH: 'p',
|
|
60
|
-
LINK: 'a',
|
|
61
|
-
IMAGE: 'img',
|
|
62
|
-
BUTTON: 'button',
|
|
63
|
-
INPUT: 'input',
|
|
64
|
-
},
|
|
65
|
-
};
|
|
66
|
-
export const TEST_TIMEOUTS = {
|
|
67
|
-
SHORT: 5000,
|
|
68
|
-
MEDIUM: 10000,
|
|
69
|
-
LONG: 30000,
|
|
70
|
-
CAPTCHA: 60000,
|
|
71
|
-
};
|
|
72
|
-
export const TEST_EXPECTATIONS = {
|
|
73
|
-
WIKIPEDIA: {
|
|
74
|
-
MIN_IMAGES: 5,
|
|
75
|
-
MIN_LINKS: 100,
|
|
76
|
-
HAS_TOC: true,
|
|
77
|
-
},
|
|
78
|
-
ECOURTS: {
|
|
79
|
-
HAS_CAPTCHA: true,
|
|
80
|
-
HAS_STATE_SELECT: true,
|
|
81
|
-
},
|
|
82
|
-
CLOUDFLARE: {
|
|
83
|
-
HAS_CHALLENGE: true,
|
|
84
|
-
VERIFICATION_TEXT: 'Verifying you are human',
|
|
85
|
-
},
|
|
86
|
-
};
|
|
87
|
-
export const TEST_DATA = {
|
|
88
|
-
// Sample data for testing
|
|
89
|
-
SENTIMENT: {
|
|
90
|
-
POSITIVE: 'This is an amazing product! I absolutely love it. Highly recommended!',
|
|
91
|
-
NEGATIVE: 'Terrible experience. Very disappointed. Would not recommend.',
|
|
92
|
-
NEUTRAL: 'The product arrived on time. It works as described.',
|
|
93
|
-
},
|
|
94
|
-
TRANSLATION: {
|
|
95
|
-
FRENCH: 'Bonjour, comment allez-vous? Je suis très heureux de vous rencontrer.',
|
|
96
|
-
SPANISH: 'Hola, ¿cómo estás? Estoy muy feliz de conocerte.',
|
|
97
|
-
GERMAN: 'Hallo, wie geht es dir? Ich bin sehr glücklich, dich kennenzulernen.',
|
|
98
|
-
},
|
|
99
|
-
SEARCH: {
|
|
100
|
-
PARTY_NAME: 'Ramesh Kumar',
|
|
101
|
-
CASE_NUMBER: '123/2024',
|
|
102
|
-
YEAR: '2024',
|
|
103
|
-
},
|
|
104
|
-
};
|
|
105
|
-
export default {
|
|
106
|
-
TEST_URLS,
|
|
107
|
-
TEST_SELECTORS,
|
|
108
|
-
TEST_TIMEOUTS,
|
|
109
|
-
TEST_EXPECTATIONS,
|
|
110
|
-
TEST_DATA,
|
|
111
|
-
};
|