brave-real-browser-mcp-server 2.14.9 → 2.14.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -2
- package/dist/handlers/advanced-scraping-handlers.js +58 -0
- package/dist/handlers/advanced-video-media-handlers.js +134 -1246
- package/dist/handlers/ai-powered-handlers.js +113 -17
- package/dist/handlers/data-quality-handlers.js +74 -0
- package/dist/handlers/data-transform-handlers.js +66 -0
- package/dist/handlers/dom-handlers.js +206 -0
- package/dist/handlers/network-handlers.js +111 -0
- package/dist/handlers/search-filter-handlers.js +15 -71
- package/dist/mcp-server.js +133 -0
- package/dist/tool-definitions.js +129 -14
- package/package.json +1 -1
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
// @ts-nocheck
|
|
2
1
|
import { getPageInstance } from '../browser-manager.js';
|
|
3
2
|
/**
|
|
4
|
-
* Smart Selector Generator - AI-powered CSS selector generation
|
|
3
|
+
* Smart Selector Generator - AI-powered CSS selector generation (Heuristic)
|
|
5
4
|
*/
|
|
6
5
|
export async function handleSmartSelectorGenerator(args) {
|
|
7
6
|
const { url, description, context } = args;
|
|
@@ -27,7 +26,7 @@ export async function handleSmartSelectorGenerator(args) {
|
|
|
27
26
|
keywords.forEach(keyword => {
|
|
28
27
|
if (text.includes(keyword))
|
|
29
28
|
score += 10;
|
|
30
|
-
if (className.includes(keyword))
|
|
29
|
+
if (typeof className === 'string' && className.includes(keyword))
|
|
31
30
|
score += 5;
|
|
32
31
|
if (id.includes(keyword))
|
|
33
32
|
score += 5;
|
|
@@ -48,9 +47,9 @@ export async function handleSmartSelectorGenerator(args) {
|
|
|
48
47
|
// Generate selector
|
|
49
48
|
let selector = tag;
|
|
50
49
|
if (id)
|
|
51
|
-
selector = `#${id}`;
|
|
52
|
-
else if (className)
|
|
53
|
-
selector = `${tag}.${className.split(
|
|
50
|
+
selector = `#${CSS.escape(id)}`;
|
|
51
|
+
else if (typeof className === 'string' && className.trim())
|
|
52
|
+
selector = `${tag}.${CSS.escape(className.trim().split(/\s+/)[0])}`;
|
|
54
53
|
scores.push({ selector, score, text: text.substring(0, 100), element: tag });
|
|
55
54
|
}
|
|
56
55
|
});
|
|
@@ -62,11 +61,11 @@ export async function handleSmartSelectorGenerator(args) {
|
|
|
62
61
|
totalCandidates: scores.length
|
|
63
62
|
};
|
|
64
63
|
}, description, context || '');
|
|
65
|
-
const resultText =
|
|
64
|
+
const resultText = `🤖 Smart Selector Generated\n\nBest Match: ${JSON.stringify(result.bestMatch, null, 2)}\nAlternatives: ${JSON.stringify(result.alternatives, null, 2)}\nTotal Candidates: ${result.totalCandidates}`;
|
|
66
65
|
return { content: [{ type: 'text', text: resultText }] };
|
|
67
66
|
}
|
|
68
67
|
catch (error) {
|
|
69
|
-
return { content: [{ type: 'text', text:
|
|
68
|
+
return { content: [{ type: 'text', text: `❌ Error: ${error.message}` }], isError: true };
|
|
70
69
|
}
|
|
71
70
|
}
|
|
72
71
|
/**
|
|
@@ -115,25 +114,122 @@ export async function handleContentClassification(args) {
|
|
|
115
114
|
return { category: cat.name, score };
|
|
116
115
|
});
|
|
117
116
|
scores.sort((a, b) => b.score - a.score);
|
|
118
|
-
const
|
|
119
|
-
const
|
|
117
|
+
const match = scores[0];
|
|
118
|
+
const totalScore = scores.reduce((sum, s) => sum + s.score, 0) || 1;
|
|
119
|
+
const confidence = match.score / totalScore;
|
|
120
|
+
const resultText = `✅ Content Classification\n\nPrimary Category: ${match.category} (Score: ${match.score})\nConfidence: ${(confidence * 100).toFixed(2)}%\n\nAll Categories:\n${JSON.stringify(scores.slice(0, 5), null, 2)}`;
|
|
121
|
+
return {
|
|
122
|
+
content: [{ type: 'text', text: resultText }]
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
catch (error) {
|
|
126
|
+
return { content: [{ type: 'text', text: `❌ Error: ${error.message}` }], isError: true };
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Sentiment Analysis - Analyze sentiment of page content (Basic Heuristic)
|
|
131
|
+
*/
|
|
132
|
+
export async function handleSentimentAnalysis(args) {
|
|
133
|
+
try {
|
|
134
|
+
let textToAnalyze = args.text || '';
|
|
135
|
+
if (args.url || args.selector) {
|
|
136
|
+
const page = getPageInstance();
|
|
137
|
+
if (!page)
|
|
138
|
+
throw new Error('Browser not initialized');
|
|
139
|
+
if (args.url && page.url() !== args.url) {
|
|
140
|
+
await page.goto(args.url, { waitUntil: 'domcontentloaded' });
|
|
141
|
+
}
|
|
142
|
+
if (args.selector) {
|
|
143
|
+
textToAnalyze = await page.evaluate((sel) => document.querySelector(sel)?.textContent || '', args.selector);
|
|
144
|
+
}
|
|
145
|
+
else if (!textToAnalyze) {
|
|
146
|
+
textToAnalyze = await page.evaluate(() => document.body.innerText);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
if (!textToAnalyze)
|
|
150
|
+
throw new Error('No text to analyze');
|
|
151
|
+
// Simple Bag of Words
|
|
152
|
+
const positiveWords = ['good', 'great', 'awesome', 'excellent', 'happy', 'love', 'best', 'wonderful', 'amazing'];
|
|
153
|
+
const negativeWords = ['bad', 'terrible', 'awful', 'worst', 'hate', 'sad', 'poor', 'disappointing', 'fail'];
|
|
154
|
+
const lowerText = textToAnalyze.toLowerCase();
|
|
155
|
+
let score = 0;
|
|
156
|
+
let matchCount = 0;
|
|
157
|
+
positiveWords.forEach(w => {
|
|
158
|
+
const regex = new RegExp(`\\b${w}\\b`, 'g');
|
|
159
|
+
const count = (lowerText.match(regex) || []).length;
|
|
160
|
+
score += count;
|
|
161
|
+
matchCount += count;
|
|
162
|
+
});
|
|
163
|
+
negativeWords.forEach(w => {
|
|
164
|
+
const regex = new RegExp(`\\b${w}\\b`, 'g');
|
|
165
|
+
const count = (lowerText.match(regex) || []).length;
|
|
166
|
+
score -= count;
|
|
167
|
+
matchCount += count;
|
|
168
|
+
});
|
|
169
|
+
let sentiment = 'Neutral';
|
|
170
|
+
if (score > 0)
|
|
171
|
+
sentiment = 'Positive';
|
|
172
|
+
if (score < 0)
|
|
173
|
+
sentiment = 'Negative';
|
|
120
174
|
return {
|
|
121
175
|
content: [{
|
|
122
176
|
type: 'text',
|
|
123
|
-
text:
|
|
177
|
+
text: JSON.stringify({ sentiment, score, matchCount, analyzedLength: textToAnalyze.length }, null, 2)
|
|
124
178
|
}]
|
|
125
179
|
};
|
|
126
180
|
}
|
|
127
181
|
catch (error) {
|
|
128
|
-
return { content: [{ type: 'text', text:
|
|
182
|
+
return { content: [{ type: 'text', text: `❌ Error: ${error.message}` }], isError: true };
|
|
129
183
|
}
|
|
130
184
|
}
|
|
131
185
|
/**
|
|
132
|
-
*
|
|
133
|
-
*/
|
|
134
|
-
/**
|
|
135
|
-
* Summary Generator - Generate summary of page content
|
|
186
|
+
* Summary Generator - Generate summary of page content (Basic Truncation/Extraction)
|
|
136
187
|
*/
|
|
188
|
+
export async function handleSummaryGenerator(args) {
|
|
189
|
+
try {
|
|
190
|
+
let textToSummary = args.text || '';
|
|
191
|
+
if (args.url || args.selector) {
|
|
192
|
+
const page = getPageInstance();
|
|
193
|
+
if (!page)
|
|
194
|
+
throw new Error('Browser not initialized');
|
|
195
|
+
if (args.url && page.url() !== args.url) {
|
|
196
|
+
await page.goto(args.url, { waitUntil: 'domcontentloaded' });
|
|
197
|
+
}
|
|
198
|
+
if (args.selector) {
|
|
199
|
+
textToSummary = await page.evaluate((sel) => document.querySelector(sel)?.textContent || '', args.selector);
|
|
200
|
+
}
|
|
201
|
+
else if (!textToSummary) {
|
|
202
|
+
// Heuristic: Get paragraphs
|
|
203
|
+
textToSummary = await page.evaluate(() => {
|
|
204
|
+
return Array.from(document.querySelectorAll('p')).map(p => p.textContent).join('\n\n');
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
if (!textToSummary)
|
|
209
|
+
throw new Error('No text to summarize');
|
|
210
|
+
// Basic Summary: First 5 sentences or maxLength
|
|
211
|
+
const sentences = textToSummary.split(/[.!?]+/).filter(s => s.trim().length > 20);
|
|
212
|
+
const summary = sentences.slice(0, 5).join('. ') + '.';
|
|
213
|
+
const finalSummary = args.maxLength ? summary.slice(0, args.maxLength) : summary;
|
|
214
|
+
return {
|
|
215
|
+
content: [{
|
|
216
|
+
type: 'text',
|
|
217
|
+
text: JSON.stringify({ summary: finalSummary, originalLength: textToSummary.length }, null, 2)
|
|
218
|
+
}]
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
catch (error) {
|
|
222
|
+
return { content: [{ type: 'text', text: `❌ Error: ${error.message}` }], isError: true };
|
|
223
|
+
}
|
|
224
|
+
}
|
|
137
225
|
/**
|
|
138
|
-
* Translation Support -
|
|
226
|
+
* Translation Support - Placeholder
|
|
139
227
|
*/
|
|
228
|
+
export async function handleTranslationSupport(args) {
|
|
229
|
+
return {
|
|
230
|
+
content: [{
|
|
231
|
+
type: 'text',
|
|
232
|
+
text: `⚠️ Translation Support requires an external API (e.g., Google Translate, DeepL). This feature is defined but currently running in 'offline' mode. To implement, valid API keys would be required.\n\nInput extracted: ${args.text ? 'Yes' : 'No'}\nTarget Language: ${args.targetLanguage}`
|
|
233
|
+
}]
|
|
234
|
+
};
|
|
235
|
+
}
|
|
@@ -255,3 +255,77 @@ export async function handleOutlierDetection(args) {
|
|
|
255
255
|
/**
|
|
256
256
|
* Consistency Checker - Check data consistency across fields
|
|
257
257
|
*/
|
|
258
|
+
export async function handleConsistencyChecker(args) {
|
|
259
|
+
const { data, rules } = args;
|
|
260
|
+
try {
|
|
261
|
+
if (!Array.isArray(data)) {
|
|
262
|
+
throw new Error('Data must be an array');
|
|
263
|
+
}
|
|
264
|
+
if (!rules || !Array.isArray(rules)) {
|
|
265
|
+
if (!rules)
|
|
266
|
+
return { content: [{ type: "text", text: "No rules provided. Pass." }] };
|
|
267
|
+
throw new Error('Rules must be an array');
|
|
268
|
+
}
|
|
269
|
+
const report = {
|
|
270
|
+
totalItems: data.length,
|
|
271
|
+
passedItems: 0,
|
|
272
|
+
failedItems: 0,
|
|
273
|
+
failures: []
|
|
274
|
+
};
|
|
275
|
+
data.forEach((item, index) => {
|
|
276
|
+
let itemPassed = true;
|
|
277
|
+
const itemFailures = [];
|
|
278
|
+
rules.forEach((rule) => {
|
|
279
|
+
try {
|
|
280
|
+
if (rule.type === 'dependency') {
|
|
281
|
+
if (item[rule.field] && !item[rule.dependentField]) {
|
|
282
|
+
itemPassed = false;
|
|
283
|
+
itemFailures.push(`Field '${rule.field}' requires '${rule.dependentField}'`);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
else if (rule.type === 'value_match') {
|
|
287
|
+
if (item[rule.field] === rule.value && item[rule.targetField] !== rule.targetValue) {
|
|
288
|
+
itemPassed = false;
|
|
289
|
+
itemFailures.push(`When '${rule.field}' is '${rule.value}', '${rule.targetField}' must be '${rule.targetValue}'`);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
catch (e) {
|
|
294
|
+
itemPassed = false;
|
|
295
|
+
// @ts-ignore
|
|
296
|
+
itemFailures.push(`Rule execution error: ${e.message}`);
|
|
297
|
+
}
|
|
298
|
+
});
|
|
299
|
+
if (itemPassed) {
|
|
300
|
+
report.passedItems++;
|
|
301
|
+
}
|
|
302
|
+
else {
|
|
303
|
+
report.failedItems++;
|
|
304
|
+
report.failures.push({
|
|
305
|
+
index,
|
|
306
|
+
item,
|
|
307
|
+
errors: itemFailures
|
|
308
|
+
});
|
|
309
|
+
}
|
|
310
|
+
});
|
|
311
|
+
return {
|
|
312
|
+
content: [
|
|
313
|
+
{
|
|
314
|
+
type: "text",
|
|
315
|
+
text: `Consistency Check Results:\nTotal: ${report.totalItems}\nPassed: ${report.passedItems}\nFailed: ${report.failedItems}\n\nFailures:\n${JSON.stringify(report.failures, null, 2)}`
|
|
316
|
+
}
|
|
317
|
+
]
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
catch (error) {
|
|
321
|
+
return {
|
|
322
|
+
content: [
|
|
323
|
+
{
|
|
324
|
+
type: "text",
|
|
325
|
+
text: `Consistency Checker Error: ${error.message}`
|
|
326
|
+
}
|
|
327
|
+
],
|
|
328
|
+
isError: true
|
|
329
|
+
};
|
|
330
|
+
}
|
|
331
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import TurndownService from 'turndown';
|
|
2
|
+
export async function handleHtmlToText(args) {
|
|
3
|
+
try {
|
|
4
|
+
const turndownService = new TurndownService({
|
|
5
|
+
headingStyle: 'atx',
|
|
6
|
+
codeBlockStyle: 'fenced'
|
|
7
|
+
});
|
|
8
|
+
// Config based on args
|
|
9
|
+
if (!args.preserveLinks) {
|
|
10
|
+
turndownService.addRule('no-links', {
|
|
11
|
+
filter: 'a',
|
|
12
|
+
replacement: function (content) { return content; }
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
const text = turndownService.turndown(args.html);
|
|
16
|
+
return {
|
|
17
|
+
content: [{
|
|
18
|
+
type: 'text',
|
|
19
|
+
text: text
|
|
20
|
+
}]
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
catch (error) {
|
|
24
|
+
return {
|
|
25
|
+
content: [{
|
|
26
|
+
type: 'text',
|
|
27
|
+
text: `Error converting HTML to text: ${error}`
|
|
28
|
+
}],
|
|
29
|
+
isError: true
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
export async function handleDuplicateRemover(args) {
|
|
34
|
+
if (!Array.isArray(args.data)) {
|
|
35
|
+
throw new Error("Input 'data' must be an array.");
|
|
36
|
+
}
|
|
37
|
+
let uniqueData;
|
|
38
|
+
if (args.uniqueKey) {
|
|
39
|
+
const seen = new Set();
|
|
40
|
+
uniqueData = args.data.filter(item => {
|
|
41
|
+
const val = item[args.uniqueKey];
|
|
42
|
+
if (seen.has(val))
|
|
43
|
+
return false;
|
|
44
|
+
seen.add(val);
|
|
45
|
+
return true;
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
// Deep equality check or simple JSON stringify check?
|
|
50
|
+
// Use JSON stringify for simplicity and speed on complex objects
|
|
51
|
+
const seen = new Set();
|
|
52
|
+
uniqueData = args.data.filter(item => {
|
|
53
|
+
const val = JSON.stringify(item);
|
|
54
|
+
if (seen.has(val))
|
|
55
|
+
return false;
|
|
56
|
+
seen.add(val);
|
|
57
|
+
return true;
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
return {
|
|
61
|
+
content: [{
|
|
62
|
+
type: 'text',
|
|
63
|
+
text: JSON.stringify(uniqueData, null, 2)
|
|
64
|
+
}]
|
|
65
|
+
};
|
|
66
|
+
}
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import { getPageInstance } from '../browser-manager.js';
|
|
2
|
+
export async function handleHtmlElementsExtractor(args) {
|
|
3
|
+
const page = getPageInstance();
|
|
4
|
+
if (!page)
|
|
5
|
+
throw new Error('Browser not initialized');
|
|
6
|
+
const selector = args.selector || '*';
|
|
7
|
+
const max = args.maxElements || 100;
|
|
8
|
+
const includeStyles = args.includeStyles || false;
|
|
9
|
+
const elements = await page.evaluate((sel, maxCount, incStyles) => {
|
|
10
|
+
const els = Array.from(document.querySelectorAll(sel)).slice(0, maxCount);
|
|
11
|
+
return els.map(el => {
|
|
12
|
+
const rect = el.getBoundingClientRect();
|
|
13
|
+
const info = {
|
|
14
|
+
tagName: el.tagName.toLowerCase(),
|
|
15
|
+
id: el.id,
|
|
16
|
+
className: el.className,
|
|
17
|
+
text: el.textContent?.slice(0, 100).trim(),
|
|
18
|
+
attributes: {},
|
|
19
|
+
rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height }
|
|
20
|
+
};
|
|
21
|
+
if (incStyles) {
|
|
22
|
+
const computed = window.getComputedStyle(el);
|
|
23
|
+
info.styles = {
|
|
24
|
+
display: computed.display,
|
|
25
|
+
position: computed.position,
|
|
26
|
+
color: computed.color,
|
|
27
|
+
backgroundColor: computed.backgroundColor,
|
|
28
|
+
fontSize: computed.fontSize
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
Array.from(el.attributes).forEach((attr) => {
|
|
32
|
+
info.attributes[attr.name] = attr.value;
|
|
33
|
+
});
|
|
34
|
+
return info;
|
|
35
|
+
});
|
|
36
|
+
}, selector, max, includeStyles);
|
|
37
|
+
return {
|
|
38
|
+
content: [{
|
|
39
|
+
type: 'text',
|
|
40
|
+
text: JSON.stringify(elements, null, 2)
|
|
41
|
+
}]
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
export async function handleTagsFinder(args) {
|
|
45
|
+
const page = getPageInstance();
|
|
46
|
+
if (!page)
|
|
47
|
+
throw new Error('Browser not initialized');
|
|
48
|
+
const results = await page.evaluate((tags) => {
|
|
49
|
+
const found = {};
|
|
50
|
+
tags.forEach(tag => {
|
|
51
|
+
const elements = document.querySelectorAll(tag);
|
|
52
|
+
found[tag] = Array.from(elements).map(el => ({
|
|
53
|
+
text: el.textContent?.slice(0, 50).trim(),
|
|
54
|
+
html: el.outerHTML.slice(0, 100)
|
|
55
|
+
}));
|
|
56
|
+
});
|
|
57
|
+
return found;
|
|
58
|
+
}, args.tags);
|
|
59
|
+
return {
|
|
60
|
+
content: [{
|
|
61
|
+
type: 'text',
|
|
62
|
+
text: JSON.stringify(results, null, 2)
|
|
63
|
+
}]
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
export async function handleLinksFinder(args) {
|
|
67
|
+
const page = getPageInstance();
|
|
68
|
+
if (!page)
|
|
69
|
+
throw new Error('Browser not initialized');
|
|
70
|
+
const includeExt = args.includeExternal ?? true;
|
|
71
|
+
const max = args.maxLinks ?? 200;
|
|
72
|
+
const links = await page.evaluate((incExt, maxCount) => {
|
|
73
|
+
const allLinks = Array.from(document.querySelectorAll('a[href]'));
|
|
74
|
+
const filtered = incExt
|
|
75
|
+
? allLinks
|
|
76
|
+
: allLinks.filter(a => a.href.startsWith(window.location.origin));
|
|
77
|
+
return filtered.slice(0, maxCount).map(a => ({
|
|
78
|
+
text: a.textContent?.trim(),
|
|
79
|
+
href: a.href,
|
|
80
|
+
isExternal: !a.href.startsWith(window.location.origin)
|
|
81
|
+
}));
|
|
82
|
+
}, includeExt, max);
|
|
83
|
+
return {
|
|
84
|
+
content: [{
|
|
85
|
+
type: 'text',
|
|
86
|
+
text: JSON.stringify(links, null, 2)
|
|
87
|
+
}]
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
export async function handleXpathLinks(args) {
|
|
91
|
+
const page = getPageInstance();
|
|
92
|
+
if (!page)
|
|
93
|
+
throw new Error('Browser not initialized');
|
|
94
|
+
const links = await page.evaluate((xpathExpr) => {
|
|
95
|
+
const result = document.evaluate(xpathExpr, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
|
96
|
+
const items = [];
|
|
97
|
+
for (let i = 0; i < result.snapshotLength; i++) {
|
|
98
|
+
const node = result.snapshotItem(i);
|
|
99
|
+
if (node instanceof HTMLAnchorElement) {
|
|
100
|
+
items.push({ text: node.textContent?.trim(), href: node.href });
|
|
101
|
+
}
|
|
102
|
+
else if (node && node.textContent) {
|
|
103
|
+
items.push({ text: node.textContent.trim() });
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
return items;
|
|
107
|
+
}, args.xpath);
|
|
108
|
+
return {
|
|
109
|
+
content: [{
|
|
110
|
+
type: 'text',
|
|
111
|
+
text: JSON.stringify(links, null, 2)
|
|
112
|
+
}]
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
export async function handleShadowDomExtractor(args) {
|
|
116
|
+
const page = getPageInstance();
|
|
117
|
+
if (!page)
|
|
118
|
+
throw new Error('Browser not initialized');
|
|
119
|
+
const sel = args.selector || '*';
|
|
120
|
+
const results = await page.evaluate((selector) => {
|
|
121
|
+
function findAllShadowRoots(root) {
|
|
122
|
+
const shadowRoots = [];
|
|
123
|
+
if (root instanceof Element && root.shadowRoot) {
|
|
124
|
+
shadowRoots.push(root.shadowRoot);
|
|
125
|
+
shadowRoots.push(...findAllShadowRoots(root.shadowRoot));
|
|
126
|
+
}
|
|
127
|
+
if (root.childNodes) {
|
|
128
|
+
root.childNodes.forEach(child => {
|
|
129
|
+
shadowRoots.push(...findAllShadowRoots(child));
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
return shadowRoots;
|
|
133
|
+
}
|
|
134
|
+
const allShadows = findAllShadowRoots(document.body);
|
|
135
|
+
const data = [];
|
|
136
|
+
allShadows.forEach((shadow, index) => {
|
|
137
|
+
const elements = shadow.querySelectorAll(selector);
|
|
138
|
+
if (elements.length > 0) {
|
|
139
|
+
data.push({
|
|
140
|
+
shadowRootIndex: index,
|
|
141
|
+
elements: Array.from(elements).map(el => el.outerHTML)
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
});
|
|
145
|
+
return data;
|
|
146
|
+
}, sel);
|
|
147
|
+
return {
|
|
148
|
+
content: [{
|
|
149
|
+
type: 'text',
|
|
150
|
+
text: JSON.stringify({ message: `Found content in ${results.length} shadow roots`, data: results }, null, 2)
|
|
151
|
+
}]
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
export async function handleIframeExtractor() {
|
|
155
|
+
const page = getPageInstance();
|
|
156
|
+
if (!page)
|
|
157
|
+
throw new Error('Browser not initialized');
|
|
158
|
+
const frames = page.frames();
|
|
159
|
+
const frameData = await Promise.all(frames.map(async (frame, index) => {
|
|
160
|
+
try {
|
|
161
|
+
const title = await frame.title();
|
|
162
|
+
const url = frame.url();
|
|
163
|
+
const bodyText = await frame.evaluate(() => document.body.innerText.slice(0, 500));
|
|
164
|
+
return {
|
|
165
|
+
id: index,
|
|
166
|
+
url,
|
|
167
|
+
title,
|
|
168
|
+
preview: bodyText,
|
|
169
|
+
isMainFrame: frame === page.mainFrame()
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
catch (e) {
|
|
173
|
+
return { id: index, error: String(e) };
|
|
174
|
+
}
|
|
175
|
+
}));
|
|
176
|
+
return {
|
|
177
|
+
content: [{
|
|
178
|
+
type: 'text',
|
|
179
|
+
text: JSON.stringify(frameData, null, 2)
|
|
180
|
+
}]
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
export async function handleEmbedPageExtractor() {
|
|
184
|
+
const page = getPageInstance();
|
|
185
|
+
if (!page)
|
|
186
|
+
throw new Error('Browser not initialized');
|
|
187
|
+
const embeds = await page.evaluate(() => {
|
|
188
|
+
const embedTags = Array.from(document.querySelectorAll('embed')).map(el => ({
|
|
189
|
+
type: 'embed',
|
|
190
|
+
src: el.src,
|
|
191
|
+
typeAttr: el.type
|
|
192
|
+
}));
|
|
193
|
+
const objectTags = Array.from(document.querySelectorAll('object')).map(el => ({
|
|
194
|
+
type: 'object',
|
|
195
|
+
data: el.data,
|
|
196
|
+
typeAttr: el.type // Type casting
|
|
197
|
+
}));
|
|
198
|
+
return [...embedTags, ...objectTags];
|
|
199
|
+
});
|
|
200
|
+
return {
|
|
201
|
+
content: [{
|
|
202
|
+
type: 'text',
|
|
203
|
+
text: JSON.stringify(embeds, null, 2)
|
|
204
|
+
}]
|
|
205
|
+
};
|
|
206
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import { getPageInstance } from '../browser-manager.js';
|
|
2
|
+
async function captureNetwork(page, duration, filter) {
|
|
3
|
+
const captured = [];
|
|
4
|
+
const responseHandler = async (response) => {
|
|
5
|
+
try {
|
|
6
|
+
const request = response.request();
|
|
7
|
+
if (filter(request, response)) {
|
|
8
|
+
let body = '[Binary or Too Large]';
|
|
9
|
+
try {
|
|
10
|
+
const type = response.headers()['content-type'] || '';
|
|
11
|
+
if (type.includes('text') || type.includes('json') || type.includes('xml')) {
|
|
12
|
+
body = await response.text();
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
catch (e) {
|
|
16
|
+
// Ignore
|
|
17
|
+
}
|
|
18
|
+
captured.push({
|
|
19
|
+
url: response.url(),
|
|
20
|
+
method: request.method(),
|
|
21
|
+
type: request.resourceType(),
|
|
22
|
+
status: response.status(),
|
|
23
|
+
headers: response.headers(),
|
|
24
|
+
body: body.slice(0, 5000)
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
catch (e) {
|
|
29
|
+
// ignore
|
|
30
|
+
}
|
|
31
|
+
};
|
|
32
|
+
page.on('response', responseHandler);
|
|
33
|
+
await new Promise(resolve => setTimeout(resolve, duration));
|
|
34
|
+
page.off('response', responseHandler);
|
|
35
|
+
return captured;
|
|
36
|
+
}
|
|
37
|
+
export async function handleNetworkRecorder(args) {
|
|
38
|
+
const page = getPageInstance();
|
|
39
|
+
if (!page)
|
|
40
|
+
throw new Error('Browser not initialized');
|
|
41
|
+
const duration = args.duration || 10000;
|
|
42
|
+
const results = await captureNetwork(page, duration, (req, res) => {
|
|
43
|
+
if (!args.filterTypes || args.filterTypes.length === 0)
|
|
44
|
+
return true;
|
|
45
|
+
const type = req.resourceType().toLowerCase();
|
|
46
|
+
return args.filterTypes.includes(type);
|
|
47
|
+
});
|
|
48
|
+
return {
|
|
49
|
+
content: [{
|
|
50
|
+
type: 'text',
|
|
51
|
+
text: JSON.stringify(results, null, 2)
|
|
52
|
+
}]
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
export async function handleAjaxExtractor(args) {
|
|
56
|
+
const page = getPageInstance();
|
|
57
|
+
if (!page)
|
|
58
|
+
throw new Error('Browser not initialized');
|
|
59
|
+
const duration = args.duration || 15000;
|
|
60
|
+
const results = await captureNetwork(page, duration, (req, res) => {
|
|
61
|
+
const type = req.resourceType();
|
|
62
|
+
const isXhr = type === 'xhr' || type === 'fetch';
|
|
63
|
+
if (!isXhr)
|
|
64
|
+
return false;
|
|
65
|
+
if (args.url && !req.url().includes(args.url))
|
|
66
|
+
return false;
|
|
67
|
+
return true;
|
|
68
|
+
});
|
|
69
|
+
return {
|
|
70
|
+
content: [{
|
|
71
|
+
type: 'text',
|
|
72
|
+
text: JSON.stringify(results, null, 2)
|
|
73
|
+
}]
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
export async function handleFetchXhr(args) {
|
|
77
|
+
return handleAjaxExtractor({ duration: args.duration });
|
|
78
|
+
}
|
|
79
|
+
export async function handleApiFinder(args) {
|
|
80
|
+
const page = getPageInstance();
|
|
81
|
+
if (!page)
|
|
82
|
+
throw new Error('Browser not initialized');
|
|
83
|
+
// 1. Static Analysis
|
|
84
|
+
const staticApis = await page.evaluate(() => {
|
|
85
|
+
const patterns = [/\/api\//, /\/v\d+\//, /graphql/, /\.json$/];
|
|
86
|
+
const candidates = new Set();
|
|
87
|
+
document.querySelectorAll('script').forEach(s => {
|
|
88
|
+
if (s.src)
|
|
89
|
+
candidates.add(s.src);
|
|
90
|
+
});
|
|
91
|
+
document.querySelectorAll('a').forEach(a => {
|
|
92
|
+
candidates.add(a.href);
|
|
93
|
+
});
|
|
94
|
+
return Array.from(candidates).filter(url => patterns.some(p => p.test(url)));
|
|
95
|
+
});
|
|
96
|
+
// 2. Dynamic Analysis
|
|
97
|
+
const dynamicApis = await captureNetwork(page, 5000, (req, res) => {
|
|
98
|
+
const type = req.resourceType();
|
|
99
|
+
const contentType = res.headers()['content-type'] || '';
|
|
100
|
+
return (type === 'xhr' || type === 'fetch') && contentType.includes('json');
|
|
101
|
+
});
|
|
102
|
+
return {
|
|
103
|
+
content: [{
|
|
104
|
+
type: 'text',
|
|
105
|
+
text: JSON.stringify({
|
|
106
|
+
staticAnalysis: staticApis,
|
|
107
|
+
dynamicCapture: dynamicApis.map(d => d.url)
|
|
108
|
+
}, null, 2)
|
|
109
|
+
}]
|
|
110
|
+
};
|
|
111
|
+
}
|