brave-real-browser-mcp-server 2.8.0 → 2.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser-manager.js +7 -0
- package/dist/handlers/ai-powered-handlers.js +367 -0
- package/dist/handlers/api-integration-handlers.js +314 -0
- package/dist/handlers/captcha-handlers.js +223 -0
- package/dist/handlers/data-extraction-handlers.js +421 -0
- package/dist/handlers/data-processing-handlers.js +366 -0
- package/dist/handlers/data-quality-handlers.js +429 -0
- package/dist/handlers/multi-element-handlers.js +532 -0
- package/dist/handlers/pagination-handlers.js +306 -0
- package/dist/handlers/search-filter-handlers.js +396 -0
- package/dist/handlers/visual-tools-handlers.js +305 -0
- package/dist/index.js +134 -0
- package/dist/tool-definitions.js +734 -0
- package/package.json +17 -23
- package/dist/extractors/content-type-extractors.js +0 -314
- package/dist/extractors/extractors.test.js +0 -17
- package/dist/extractors/multi-element-extractors.js +0 -325
- package/dist/extractors/smart-data-extractors.js +0 -281
- package/dist/utils/advanced-features.js +0 -247
- package/dist/utils/advanced-scraping.js +0 -253
- package/dist/utils/all-modules.test.js +0 -86
- package/dist/utils/auth-session.js +0 -296
- package/dist/utils/data-processing.js +0 -301
- package/dist/utils/data-processing.test.js +0 -52
- package/dist/utils/pagination.js +0 -249
- package/dist/utils/pagination.test.js +0 -22
package/dist/browser-manager.js
CHANGED
|
@@ -726,6 +726,13 @@ export function getBrowserInstance() {
|
|
|
726
726
|
export function getPageInstance() {
|
|
727
727
|
return pageInstance;
|
|
728
728
|
}
|
|
729
|
+
// Alias for getPageInstance for compatibility
|
|
730
|
+
export function getCurrentPage() {
|
|
731
|
+
if (!pageInstance) {
|
|
732
|
+
throw new Error('No page instance available. Please initialize browser first.');
|
|
733
|
+
}
|
|
734
|
+
return pageInstance;
|
|
735
|
+
}
|
|
729
736
|
export function getContentPriorityConfig() {
|
|
730
737
|
return contentPriorityConfig;
|
|
731
738
|
}
|
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
// @ts-nocheck
|
|
2
|
+
import { getPageInstance } from '../browser-manager.js';
|
|
3
|
+
import natural from 'natural';
|
|
4
|
+
import Sentiment from 'sentiment';
|
|
5
|
+
import { franc } from 'franc';
|
|
6
|
+
const sentiment = new Sentiment();
|
|
7
|
+
const tokenizer = new natural.WordTokenizer();
|
|
8
|
+
/**
|
|
9
|
+
* Smart Selector Generator - AI-powered CSS selector generation
|
|
10
|
+
*/
|
|
11
|
+
export async function handleSmartSelectorGenerator(args) {
|
|
12
|
+
const { url, description, context } = args;
|
|
13
|
+
try {
|
|
14
|
+
const page = getPageInstance();
|
|
15
|
+
if (!page) {
|
|
16
|
+
throw new Error('Browser not initialized. Call browser_init first.');
|
|
17
|
+
}
|
|
18
|
+
if (url && page.url() !== url) {
|
|
19
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
20
|
+
}
|
|
21
|
+
const result = await page.evaluate((desc, ctx) => {
|
|
22
|
+
const elements = Array.from(document.querySelectorAll('*'));
|
|
23
|
+
const scores = [];
|
|
24
|
+
const keywords = desc.toLowerCase().split(/\s+/);
|
|
25
|
+
elements.forEach((el) => {
|
|
26
|
+
let score = 0;
|
|
27
|
+
const text = el.textContent?.toLowerCase() || '';
|
|
28
|
+
const tag = el.tagName.toLowerCase();
|
|
29
|
+
const className = el.className || '';
|
|
30
|
+
const id = el.id || '';
|
|
31
|
+
// Score based on text content matching
|
|
32
|
+
keywords.forEach(keyword => {
|
|
33
|
+
if (text.includes(keyword))
|
|
34
|
+
score += 10;
|
|
35
|
+
if (className.includes(keyword))
|
|
36
|
+
score += 5;
|
|
37
|
+
if (id.includes(keyword))
|
|
38
|
+
score += 5;
|
|
39
|
+
});
|
|
40
|
+
// Score based on context
|
|
41
|
+
if (ctx) {
|
|
42
|
+
const contextKeywords = ctx.toLowerCase().split(/\s+/);
|
|
43
|
+
contextKeywords.forEach(keyword => {
|
|
44
|
+
if (text.includes(keyword))
|
|
45
|
+
score += 3;
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
// Prefer semantic elements
|
|
49
|
+
const semanticTags = ['button', 'input', 'a', 'nav', 'header', 'footer', 'article', 'section'];
|
|
50
|
+
if (semanticTags.includes(tag))
|
|
51
|
+
score += 2;
|
|
52
|
+
if (score > 0) {
|
|
53
|
+
// Generate selector
|
|
54
|
+
let selector = tag;
|
|
55
|
+
if (id)
|
|
56
|
+
selector = `#${id}`;
|
|
57
|
+
else if (className)
|
|
58
|
+
selector = `${tag}.${className.split(' ')[0]}`;
|
|
59
|
+
scores.push({ selector, score, text: text.substring(0, 100), element: tag });
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
// Sort by score
|
|
63
|
+
scores.sort((a, b) => b.score - a.score);
|
|
64
|
+
return {
|
|
65
|
+
bestMatch: scores[0] || null,
|
|
66
|
+
alternatives: scores.slice(1, 6),
|
|
67
|
+
totalCandidates: scores.length
|
|
68
|
+
};
|
|
69
|
+
}, description, context || '');
|
|
70
|
+
return {
|
|
71
|
+
success: true,
|
|
72
|
+
data: result,
|
|
73
|
+
description,
|
|
74
|
+
context
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
catch (error) {
|
|
78
|
+
return {
|
|
79
|
+
success: false,
|
|
80
|
+
error: error.message
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Content Classification - Classify webpage content into categories
|
|
86
|
+
*/
|
|
87
|
+
export async function handleContentClassification(args) {
|
|
88
|
+
const { url, categories } = args;
|
|
89
|
+
try {
|
|
90
|
+
const page = getPageInstance();
|
|
91
|
+
if (!page) {
|
|
92
|
+
throw new Error('Browser not initialized. Call browser_init first.');
|
|
93
|
+
}
|
|
94
|
+
if (url && page.url() !== url) {
|
|
95
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
96
|
+
}
|
|
97
|
+
const content = await page.evaluate(() => {
|
|
98
|
+
return {
|
|
99
|
+
title: document.title,
|
|
100
|
+
text: document.body.innerText,
|
|
101
|
+
metaDescription: document.querySelector('meta[name="description"]')?.getAttribute('content') || '',
|
|
102
|
+
metaKeywords: document.querySelector('meta[name="keywords"]')?.getAttribute('content') || '',
|
|
103
|
+
headings: Array.from(document.querySelectorAll('h1, h2, h3')).map(h => h.textContent).join(' ')
|
|
104
|
+
};
|
|
105
|
+
});
|
|
106
|
+
const allText = [content.title, content.text, content.metaDescription, content.metaKeywords, content.headings].join(' ').toLowerCase();
|
|
107
|
+
// Define default categories if not provided
|
|
108
|
+
const defaultCategories = [
|
|
109
|
+
{ name: 'E-commerce', keywords: ['shop', 'buy', 'cart', 'price', 'product', 'order', 'checkout'] },
|
|
110
|
+
{ name: 'News', keywords: ['news', 'article', 'breaking', 'report', 'journalist', 'story'] },
|
|
111
|
+
{ name: 'Blog', keywords: ['blog', 'post', 'author', 'comment', 'share', 'subscribe'] },
|
|
112
|
+
{ name: 'Social Media', keywords: ['follow', 'like', 'share', 'post', 'friend', 'profile'] },
|
|
113
|
+
{ name: 'Educational', keywords: ['learn', 'course', 'tutorial', 'education', 'study', 'lesson'] },
|
|
114
|
+
{ name: 'Entertainment', keywords: ['video', 'movie', 'music', 'game', 'play', 'watch'] },
|
|
115
|
+
{ name: 'Business', keywords: ['business', 'company', 'service', 'enterprise', 'solution'] },
|
|
116
|
+
{ name: 'Technology', keywords: ['tech', 'software', 'app', 'code', 'developer', 'api'] }
|
|
117
|
+
];
|
|
118
|
+
const categoriesToUse = categories || defaultCategories;
|
|
119
|
+
const scores = categoriesToUse.map((cat) => {
|
|
120
|
+
const keywords = Array.isArray(cat.keywords) ? cat.keywords : cat.keywords.split(',');
|
|
121
|
+
let score = 0;
|
|
122
|
+
keywords.forEach((keyword) => {
|
|
123
|
+
const regex = new RegExp(keyword.trim().toLowerCase(), 'g');
|
|
124
|
+
const matches = allText.match(regex);
|
|
125
|
+
score += matches ? matches.length : 0;
|
|
126
|
+
});
|
|
127
|
+
return { category: cat.name, score };
|
|
128
|
+
});
|
|
129
|
+
scores.sort((a, b) => b.score - a.score);
|
|
130
|
+
return {
|
|
131
|
+
success: true,
|
|
132
|
+
primaryCategory: scores[0],
|
|
133
|
+
allScores: scores,
|
|
134
|
+
confidence: scores[0].score / (scores.reduce((sum, s) => sum + s.score, 0) || 1)
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
catch (error) {
|
|
138
|
+
return {
|
|
139
|
+
success: false,
|
|
140
|
+
error: error.message
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Sentiment Analysis - Analyze sentiment of page content
|
|
146
|
+
*/
|
|
147
|
+
export async function handleSentimentAnalysis(args) {
|
|
148
|
+
const { url, selector, text } = args;
|
|
149
|
+
try {
|
|
150
|
+
let contentToAnalyze = text;
|
|
151
|
+
if (!contentToAnalyze && url) {
|
|
152
|
+
const page = getPageInstance();
|
|
153
|
+
if (!page) {
|
|
154
|
+
throw new Error('Browser not initialized. Call browser_init first.');
|
|
155
|
+
}
|
|
156
|
+
if (page.url() !== url) {
|
|
157
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
158
|
+
}
|
|
159
|
+
if (selector) {
|
|
160
|
+
contentToAnalyze = await page.evaluate((sel) => {
|
|
161
|
+
const element = document.querySelector(sel);
|
|
162
|
+
return element ? element.textContent : '';
|
|
163
|
+
}, selector);
|
|
164
|
+
}
|
|
165
|
+
else {
|
|
166
|
+
contentToAnalyze = await page.evaluate(() => document.body.innerText);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
if (!contentToAnalyze) {
|
|
170
|
+
throw new Error('No content to analyze');
|
|
171
|
+
}
|
|
172
|
+
const result = sentiment.analyze(contentToAnalyze);
|
|
173
|
+
// Additional analysis using natural
|
|
174
|
+
const tokens = tokenizer.tokenize(contentToAnalyze);
|
|
175
|
+
const sentenceTokenizer = new natural.SentenceTokenizer();
|
|
176
|
+
const sentences = sentenceTokenizer.tokenize(contentToAnalyze);
|
|
177
|
+
// Classify sentiment per sentence
|
|
178
|
+
const sentenceSentiments = sentences.map(sentence => {
|
|
179
|
+
const s = sentiment.analyze(sentence);
|
|
180
|
+
return {
|
|
181
|
+
sentence: sentence.substring(0, 100),
|
|
182
|
+
score: s.score,
|
|
183
|
+
sentiment: s.score > 0 ? 'positive' : s.score < 0 ? 'negative' : 'neutral'
|
|
184
|
+
};
|
|
185
|
+
});
|
|
186
|
+
return {
|
|
187
|
+
success: true,
|
|
188
|
+
overall: {
|
|
189
|
+
score: result.score,
|
|
190
|
+
comparative: result.comparative,
|
|
191
|
+
sentiment: result.score > 0 ? 'positive' : result.score < 0 ? 'negative' : 'neutral',
|
|
192
|
+
tokens: result.tokens.length,
|
|
193
|
+
positive: result.positive,
|
|
194
|
+
negative: result.negative
|
|
195
|
+
},
|
|
196
|
+
sentences: sentenceSentiments,
|
|
197
|
+
statistics: {
|
|
198
|
+
totalSentences: sentences.length,
|
|
199
|
+
positiveSentences: sentenceSentiments.filter(s => s.sentiment === 'positive').length,
|
|
200
|
+
negativeSentences: sentenceSentiments.filter(s => s.sentiment === 'negative').length,
|
|
201
|
+
neutralSentences: sentenceSentiments.filter(s => s.sentiment === 'neutral').length
|
|
202
|
+
}
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
catch (error) {
|
|
206
|
+
return {
|
|
207
|
+
success: false,
|
|
208
|
+
error: error.message
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
/**
|
|
213
|
+
* Summary Generator - Generate summary of page content
|
|
214
|
+
*/
|
|
215
|
+
export async function handleSummaryGenerator(args) {
|
|
216
|
+
const { url, maxSentences = 5, selector } = args;
|
|
217
|
+
try {
|
|
218
|
+
const page = getPageInstance();
|
|
219
|
+
if (!page) {
|
|
220
|
+
throw new Error('Browser not initialized. Call browser_init first.');
|
|
221
|
+
}
|
|
222
|
+
if (url && page.url() !== url) {
|
|
223
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
224
|
+
}
|
|
225
|
+
let content;
|
|
226
|
+
if (selector) {
|
|
227
|
+
content = await page.evaluate((sel) => {
|
|
228
|
+
const element = document.querySelector(sel);
|
|
229
|
+
return element ? element.textContent : '';
|
|
230
|
+
}, selector);
|
|
231
|
+
}
|
|
232
|
+
else {
|
|
233
|
+
content = await page.evaluate(() => {
|
|
234
|
+
// Extract main content
|
|
235
|
+
const main = document.querySelector('main, article, .content, .post, #content');
|
|
236
|
+
if (main)
|
|
237
|
+
return main.textContent || '';
|
|
238
|
+
return document.body.innerText;
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
if (!content) {
|
|
242
|
+
throw new Error('No content found');
|
|
243
|
+
}
|
|
244
|
+
// Use TF-IDF for extractive summarization
|
|
245
|
+
const TfIdf = natural.TfIdf;
|
|
246
|
+
const tfidf = new TfIdf();
|
|
247
|
+
const sentenceTokenizer = new natural.SentenceTokenizer();
|
|
248
|
+
const sentences = sentenceTokenizer.tokenize(content);
|
|
249
|
+
if (sentences.length === 0) {
|
|
250
|
+
throw new Error('No sentences found to summarize');
|
|
251
|
+
}
|
|
252
|
+
// Add each sentence as a document
|
|
253
|
+
sentences.forEach(sentence => {
|
|
254
|
+
tfidf.addDocument(sentence);
|
|
255
|
+
});
|
|
256
|
+
// Score each sentence
|
|
257
|
+
const sentenceScores = sentences.map((sentence, idx) => {
|
|
258
|
+
let score = 0;
|
|
259
|
+
tfidf.listTerms(idx).forEach(term => {
|
|
260
|
+
score += term.tfidf;
|
|
261
|
+
});
|
|
262
|
+
return { sentence, score, index: idx };
|
|
263
|
+
});
|
|
264
|
+
// Sort by score and take top N
|
|
265
|
+
sentenceScores.sort((a, b) => b.score - a.score);
|
|
266
|
+
const topSentences = sentenceScores.slice(0, maxSentences);
|
|
267
|
+
// Sort by original order
|
|
268
|
+
topSentences.sort((a, b) => a.index - b.index);
|
|
269
|
+
const summary = topSentences.map(s => s.sentence).join(' ');
|
|
270
|
+
return {
|
|
271
|
+
success: true,
|
|
272
|
+
summary,
|
|
273
|
+
originalLength: content.length,
|
|
274
|
+
summaryLength: summary.length,
|
|
275
|
+
compressionRatio: (summary.length / content.length * 100).toFixed(2) + '%',
|
|
276
|
+
sentenceCount: {
|
|
277
|
+
original: sentences.length,
|
|
278
|
+
summary: topSentences.length
|
|
279
|
+
},
|
|
280
|
+
topScoredSentences: topSentences.map(s => ({
|
|
281
|
+
sentence: s.sentence,
|
|
282
|
+
score: s.score.toFixed(2)
|
|
283
|
+
}))
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
catch (error) {
|
|
287
|
+
return {
|
|
288
|
+
success: false,
|
|
289
|
+
error: error.message
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
/**
|
|
294
|
+
* Translation Support - Detect language and provide translation info
|
|
295
|
+
*/
|
|
296
|
+
export async function handleTranslationSupport(args) {
|
|
297
|
+
const { url, selector, text, targetLanguage = 'en' } = args;
|
|
298
|
+
try {
|
|
299
|
+
let contentToTranslate = text;
|
|
300
|
+
if (!contentToTranslate && url) {
|
|
301
|
+
const page = getPageInstance();
|
|
302
|
+
if (!page) {
|
|
303
|
+
throw new Error('Browser not initialized. Call browser_init first.');
|
|
304
|
+
}
|
|
305
|
+
if (page.url() !== url) {
|
|
306
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
307
|
+
}
|
|
308
|
+
if (selector) {
|
|
309
|
+
contentToTranslate = await page.evaluate((sel) => {
|
|
310
|
+
const element = document.querySelector(sel);
|
|
311
|
+
return element ? element.textContent : '';
|
|
312
|
+
}, selector);
|
|
313
|
+
}
|
|
314
|
+
else {
|
|
315
|
+
contentToTranslate = await page.evaluate(() => document.body.innerText);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
if (!contentToTranslate) {
|
|
319
|
+
throw new Error('No content to translate');
|
|
320
|
+
}
|
|
321
|
+
// Detect language using franc
|
|
322
|
+
const detectedLang = franc(contentToTranslate, { minLength: 10 });
|
|
323
|
+
// Get language name
|
|
324
|
+
const langNames = {
|
|
325
|
+
'eng': 'English',
|
|
326
|
+
'spa': 'Spanish',
|
|
327
|
+
'fra': 'French',
|
|
328
|
+
'deu': 'German',
|
|
329
|
+
'ita': 'Italian',
|
|
330
|
+
'por': 'Portuguese',
|
|
331
|
+
'rus': 'Russian',
|
|
332
|
+
'jpn': 'Japanese',
|
|
333
|
+
'kor': 'Korean',
|
|
334
|
+
'cmn': 'Chinese (Mandarin)',
|
|
335
|
+
'ara': 'Arabic',
|
|
336
|
+
'hin': 'Hindi',
|
|
337
|
+
'und': 'Undetermined'
|
|
338
|
+
};
|
|
339
|
+
const languageName = langNames[detectedLang] || detectedLang;
|
|
340
|
+
// Extract key phrases using TF-IDF
|
|
341
|
+
const TfIdf = natural.TfIdf;
|
|
342
|
+
const tfidf = new TfIdf();
|
|
343
|
+
tfidf.addDocument(contentToTranslate);
|
|
344
|
+
const keyPhrases = tfidf.listTerms(0)
|
|
345
|
+
.slice(0, 10)
|
|
346
|
+
.map(term => term.term);
|
|
347
|
+
return {
|
|
348
|
+
success: true,
|
|
349
|
+
detectedLanguage: {
|
|
350
|
+
code: detectedLang,
|
|
351
|
+
name: languageName
|
|
352
|
+
},
|
|
353
|
+
targetLanguage,
|
|
354
|
+
needsTranslation: detectedLang !== targetLanguage && detectedLang !== 'und',
|
|
355
|
+
contentPreview: contentToTranslate.substring(0, 200),
|
|
356
|
+
contentLength: contentToTranslate.length,
|
|
357
|
+
keyPhrases,
|
|
358
|
+
translationNote: 'Use external translation API (Google Translate, DeepL) for actual translation'
|
|
359
|
+
};
|
|
360
|
+
}
|
|
361
|
+
catch (error) {
|
|
362
|
+
return {
|
|
363
|
+
success: false,
|
|
364
|
+
error: error.message
|
|
365
|
+
};
|
|
366
|
+
}
|
|
367
|
+
}
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
// @ts-nocheck
|
|
2
|
+
import { getPageInstance } from '../browser-manager.js';
|
|
3
|
+
import axios from 'axios';
|
|
4
|
+
/**
|
|
5
|
+
* REST API Endpoint Finder - Discover REST API endpoints
|
|
6
|
+
*/
|
|
7
|
+
export async function handleRESTAPIEndpointFinder(args) {
|
|
8
|
+
const { url, analyzeNetworkRequests = true, scanDuration = 5000 } = args;
|
|
9
|
+
try {
|
|
10
|
+
const page = getPageInstance();
|
|
11
|
+
if (!page) {
|
|
12
|
+
throw new Error('Browser not initialized. Call browser_init first.');
|
|
13
|
+
}
|
|
14
|
+
const apiEndpoints = [];
|
|
15
|
+
const seenUrls = new Set();
|
|
16
|
+
// Listen to network requests
|
|
17
|
+
if (analyzeNetworkRequests) {
|
|
18
|
+
const requestHandler = (request) => {
|
|
19
|
+
const requestUrl = request.url();
|
|
20
|
+
const method = request.method();
|
|
21
|
+
const resourceType = request.resourceType();
|
|
22
|
+
// Filter for API-like requests
|
|
23
|
+
if ((resourceType === 'xhr' || resourceType === 'fetch') &&
|
|
24
|
+
!seenUrls.has(requestUrl)) {
|
|
25
|
+
seenUrls.add(requestUrl);
|
|
26
|
+
const headers = request.headers();
|
|
27
|
+
const postData = request.postData();
|
|
28
|
+
apiEndpoints.push({
|
|
29
|
+
url: requestUrl,
|
|
30
|
+
method,
|
|
31
|
+
resourceType,
|
|
32
|
+
headers: Object.keys(headers).reduce((acc, key) => {
|
|
33
|
+
if (!key.toLowerCase().includes('cookie') && !key.toLowerCase().includes('authorization')) {
|
|
34
|
+
acc[key] = headers[key];
|
|
35
|
+
}
|
|
36
|
+
return acc;
|
|
37
|
+
}, {}),
|
|
38
|
+
hasBody: !!postData,
|
|
39
|
+
timestamp: new Date().toISOString()
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
page.on('request', requestHandler);
|
|
44
|
+
// Navigate and wait
|
|
45
|
+
if (url && page.url() !== url) {
|
|
46
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
47
|
+
}
|
|
48
|
+
// Wait for additional requests
|
|
49
|
+
await page.waitForTimeout(scanDuration);
|
|
50
|
+
page.off('request', requestHandler);
|
|
51
|
+
}
|
|
52
|
+
// Also scan page content for API endpoints
|
|
53
|
+
const discoveredAPIs = await page.evaluate(() => {
|
|
54
|
+
const apis = [];
|
|
55
|
+
const scripts = Array.from(document.querySelectorAll('script'));
|
|
56
|
+
// Common API URL patterns
|
|
57
|
+
const apiPatterns = [
|
|
58
|
+
/https?:\/\/[^"'\s]+\/api\/[^"'\s]*/gi,
|
|
59
|
+
/https?:\/\/api\.[^"'\s]+/gi,
|
|
60
|
+
/\/api\/v?\d*\/[^"'\s]*/gi,
|
|
61
|
+
/graphql/gi,
|
|
62
|
+
/rest\/v?\d*/gi
|
|
63
|
+
];
|
|
64
|
+
scripts.forEach(script => {
|
|
65
|
+
const content = script.textContent || '';
|
|
66
|
+
apiPatterns.forEach(pattern => {
|
|
67
|
+
const matches = content.match(pattern);
|
|
68
|
+
if (matches) {
|
|
69
|
+
matches.forEach(match => apis.push({
|
|
70
|
+
url: match,
|
|
71
|
+
source: 'script_content',
|
|
72
|
+
type: 'discovered'
|
|
73
|
+
}));
|
|
74
|
+
}
|
|
75
|
+
});
|
|
76
|
+
});
|
|
77
|
+
// Check for data attributes
|
|
78
|
+
const elements = Array.from(document.querySelectorAll('[data-api], [data-endpoint], [data-url]'));
|
|
79
|
+
elements.forEach(el => {
|
|
80
|
+
const apiUrl = el.getAttribute('data-api') || el.getAttribute('data-endpoint') || el.getAttribute('data-url');
|
|
81
|
+
if (apiUrl) {
|
|
82
|
+
apis.push({
|
|
83
|
+
url: apiUrl,
|
|
84
|
+
source: 'data_attribute',
|
|
85
|
+
element: el.tagName.toLowerCase()
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
return apis;
|
|
90
|
+
});
|
|
91
|
+
return {
|
|
92
|
+
success: true,
|
|
93
|
+
networkRequests: {
|
|
94
|
+
count: apiEndpoints.length,
|
|
95
|
+
endpoints: apiEndpoints
|
|
96
|
+
},
|
|
97
|
+
discoveredInContent: {
|
|
98
|
+
count: discoveredAPIs.length,
|
|
99
|
+
endpoints: discoveredAPIs.slice(0, 20) // Limit to 20
|
|
100
|
+
},
|
|
101
|
+
summary: {
|
|
102
|
+
totalFound: apiEndpoints.length + discoveredAPIs.length,
|
|
103
|
+
uniqueNetworkAPIs: apiEndpoints.length,
|
|
104
|
+
discoveredAPIs: discoveredAPIs.length
|
|
105
|
+
}
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
catch (error) {
|
|
109
|
+
return {
|
|
110
|
+
success: false,
|
|
111
|
+
error: error.message
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Webhook Support - Set up and test webhooks
|
|
117
|
+
*/
|
|
118
|
+
export async function handleWebhookSupport(args) {
|
|
119
|
+
const { webhookUrl, method = 'POST', payload, headers, testMode = true } = args;
|
|
120
|
+
try {
|
|
121
|
+
if (!webhookUrl) {
|
|
122
|
+
throw new Error('Webhook URL is required');
|
|
123
|
+
}
|
|
124
|
+
const defaultHeaders = {
|
|
125
|
+
'Content-Type': 'application/json',
|
|
126
|
+
'User-Agent': 'Brave-MCP-Server/1.0',
|
|
127
|
+
...headers
|
|
128
|
+
};
|
|
129
|
+
let response;
|
|
130
|
+
if (testMode) {
|
|
131
|
+
// Test webhook with ping
|
|
132
|
+
try {
|
|
133
|
+
response = await axios({
|
|
134
|
+
method,
|
|
135
|
+
url: webhookUrl,
|
|
136
|
+
headers: defaultHeaders,
|
|
137
|
+
data: payload || { test: true, timestamp: new Date().toISOString() },
|
|
138
|
+
timeout: 10000
|
|
139
|
+
});
|
|
140
|
+
return {
|
|
141
|
+
success: true,
|
|
142
|
+
webhookUrl,
|
|
143
|
+
method,
|
|
144
|
+
testMode: true,
|
|
145
|
+
response: {
|
|
146
|
+
status: response.status,
|
|
147
|
+
statusText: response.statusText,
|
|
148
|
+
headers: response.headers,
|
|
149
|
+
data: response.data
|
|
150
|
+
}
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
catch (webhookError) {
|
|
154
|
+
return {
|
|
155
|
+
success: false,
|
|
156
|
+
webhookUrl,
|
|
157
|
+
testMode: true,
|
|
158
|
+
error: webhookError.message,
|
|
159
|
+
details: {
|
|
160
|
+
status: webhookError.response?.status,
|
|
161
|
+
statusText: webhookError.response?.statusText,
|
|
162
|
+
data: webhookError.response?.data
|
|
163
|
+
}
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
else {
|
|
168
|
+
// Production mode - set up webhook listener
|
|
169
|
+
return {
|
|
170
|
+
success: true,
|
|
171
|
+
webhookUrl,
|
|
172
|
+
method,
|
|
173
|
+
testMode: false,
|
|
174
|
+
status: 'configured',
|
|
175
|
+
note: 'Webhook configured. Send data using separate call or integrate with scraping workflow'
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
catch (error) {
|
|
180
|
+
return {
|
|
181
|
+
success: false,
|
|
182
|
+
error: error.message
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* All Website API Finder - Comprehensive API discovery
|
|
188
|
+
*/
|
|
189
|
+
export async function handleAllWebsiteAPIFinder(args) {
|
|
190
|
+
const { url, deepScan = true, includeExternal = false } = args;
|
|
191
|
+
try {
|
|
192
|
+
const page = getPageInstance();
|
|
193
|
+
if (!page) {
|
|
194
|
+
throw new Error('Browser not initialized. Call browser_init first.');
|
|
195
|
+
}
|
|
196
|
+
if (url && page.url() !== url) {
|
|
197
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
198
|
+
}
|
|
199
|
+
const apiDiscovery = await page.evaluate((deep, external) => {
|
|
200
|
+
const result = {
|
|
201
|
+
apis: [],
|
|
202
|
+
graphql: [],
|
|
203
|
+
rest: [],
|
|
204
|
+
websockets: [],
|
|
205
|
+
metadata: {}
|
|
206
|
+
};
|
|
207
|
+
// 1. Check for API documentation links
|
|
208
|
+
const apiLinks = Array.from(document.querySelectorAll('a[href*="api"], a[href*="docs"], a[href*="developer"]'));
|
|
209
|
+
result.documentationLinks = apiLinks.map(link => ({
|
|
210
|
+
text: link.textContent?.trim(),
|
|
211
|
+
href: link.href
|
|
212
|
+
})).slice(0, 10);
|
|
213
|
+
// 2. Scan for GraphQL
|
|
214
|
+
const graphqlIndicators = document.body.innerHTML.match(/graphql|__schema|query\s*{|mutation\s*{/gi);
|
|
215
|
+
if (graphqlIndicators) {
|
|
216
|
+
result.graphql.push({
|
|
217
|
+
found: true,
|
|
218
|
+
indicators: graphqlIndicators.length,
|
|
219
|
+
possibleEndpoints: [
|
|
220
|
+
'/graphql',
|
|
221
|
+
'/api/graphql',
|
|
222
|
+
'/v1/graphql'
|
|
223
|
+
]
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
// 3. Scan scripts for API configurations
|
|
227
|
+
const scripts = Array.from(document.querySelectorAll('script'));
|
|
228
|
+
scripts.forEach(script => {
|
|
229
|
+
const content = script.textContent || '';
|
|
230
|
+
// Look for API base URLs
|
|
231
|
+
const baseUrlPatterns = [
|
|
232
|
+
/apiUrl\s*[:=]\s*["']([^"']+)["']/gi,
|
|
233
|
+
/baseURL\s*[:=]\s*["']([^"']+)["']/gi,
|
|
234
|
+
/API_BASE\s*[:=]\s*["']([^"']+)["']/gi,
|
|
235
|
+
/endpoint\s*[:=]\s*["']([^"']+)["']/gi
|
|
236
|
+
];
|
|
237
|
+
baseUrlPatterns.forEach(pattern => {
|
|
238
|
+
const matches = Array.from(content.matchAll(pattern));
|
|
239
|
+
matches.forEach(match => {
|
|
240
|
+
if (match[1] && (external || match[1].startsWith('/'))) {
|
|
241
|
+
result.apis.push({
|
|
242
|
+
type: 'config',
|
|
243
|
+
url: match[1],
|
|
244
|
+
source: 'script'
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
});
|
|
248
|
+
});
|
|
249
|
+
// Look for WebSocket connections
|
|
250
|
+
if (content.includes('WebSocket') || content.includes('ws://') || content.includes('wss://')) {
|
|
251
|
+
const wsMatches = content.match(/wss?:\/\/[^"'\s]+/gi);
|
|
252
|
+
if (wsMatches) {
|
|
253
|
+
wsMatches.forEach(ws => {
|
|
254
|
+
result.websockets.push({
|
|
255
|
+
url: ws,
|
|
256
|
+
protocol: ws.startsWith('wss') ? 'secure' : 'unsecure'
|
|
257
|
+
});
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
});
|
|
262
|
+
// 4. Check meta tags for API info
|
|
263
|
+
const metaTags = Array.from(document.querySelectorAll('meta[name*="api"], meta[property*="api"]'));
|
|
264
|
+
result.metadata.apiMeta = metaTags.map(meta => ({
|
|
265
|
+
name: meta.getAttribute('name') || meta.getAttribute('property'),
|
|
266
|
+
content: meta.getAttribute('content')
|
|
267
|
+
}));
|
|
268
|
+
// 5. Look for REST patterns
|
|
269
|
+
const restPatterns = [
|
|
270
|
+
'/api/v1', '/api/v2', '/api/v3',
|
|
271
|
+
'/rest/v1', '/rest/v2',
|
|
272
|
+
'/api/users', '/api/products', '/api/data'
|
|
273
|
+
];
|
|
274
|
+
restPatterns.forEach(pattern => {
|
|
275
|
+
const found = document.body.innerHTML.includes(pattern);
|
|
276
|
+
if (found) {
|
|
277
|
+
result.rest.push({
|
|
278
|
+
pattern,
|
|
279
|
+
found: true
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
});
|
|
283
|
+
// 6. Check for Swagger/OpenAPI
|
|
284
|
+
const swaggerLinks = Array.from(document.querySelectorAll('a[href*="swagger"], a[href*="openapi"], link[href*="swagger"]'));
|
|
285
|
+
if (swaggerLinks.length > 0) {
|
|
286
|
+
result.swagger = swaggerLinks.map(link => ({
|
|
287
|
+
href: link.href || link.href
|
|
288
|
+
}));
|
|
289
|
+
}
|
|
290
|
+
return result;
|
|
291
|
+
}, deepScan, includeExternal);
|
|
292
|
+
// Deduplicate APIs
|
|
293
|
+
const uniqueAPIs = [...new Set(apiDiscovery.apis.map((api) => api.url))];
|
|
294
|
+
return {
|
|
295
|
+
success: true,
|
|
296
|
+
summary: {
|
|
297
|
+
totalAPIsFound: uniqueAPIs.length,
|
|
298
|
+
graphqlDetected: apiDiscovery.graphql.length > 0,
|
|
299
|
+
restEndpointsFound: apiDiscovery.rest.filter((r) => r.found).length,
|
|
300
|
+
websocketsFound: apiDiscovery.websockets.length,
|
|
301
|
+
documentationLinks: apiDiscovery.documentationLinks?.length || 0
|
|
302
|
+
},
|
|
303
|
+
details: apiDiscovery,
|
|
304
|
+
uniqueAPIs: uniqueAPIs.slice(0, 20),
|
|
305
|
+
recommendations: []
|
|
306
|
+
};
|
|
307
|
+
}
|
|
308
|
+
catch (error) {
|
|
309
|
+
return {
|
|
310
|
+
success: false,
|
|
311
|
+
error: error.message
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
}
|