brave-real-browser-mcp-server 2.5.1 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -5
- package/dist/advanced/advanced-scraping.js +301 -0
- package/dist/ai/ai-tools.js +390 -0
- package/dist/api/api-integration.js +371 -0
- package/dist/browser-manager.js +15 -61
- package/dist/captcha/captcha-handler.js +374 -0
- package/dist/extractors/content-type-extractors.js +233 -0
- package/dist/extractors/multi-element-extractors.js +174 -0
- package/dist/extractors/smart-data-extractors.js +204 -0
- package/dist/extractors/smart-data-extractors.test.js +91 -0
- package/dist/handlers/advanced-scraping-handlers.js +333 -0
- package/dist/handlers/advanced-scraping-handlers.test.js +218 -0
- package/dist/handlers/browser-handlers.js +6 -21
- package/dist/handlers/browser-handlers.test.js +7 -29
- package/dist/handlers/interaction-handlers.js +10 -55
- package/dist/handlers/interaction-handlers.test.js +1 -3
- package/dist/index.js +46 -79
- package/dist/monitoring/monitoring-tools.js +372 -0
- package/dist/navigation/pagination-tools.js +215 -0
- package/dist/processors/data-processors.js +250 -0
- package/dist/processors/data-processors.test.js +163 -0
- package/dist/search/search-filter-tools.js +339 -0
- package/dist/tool-definitions.js +277 -96
- package/dist/visual/visual-tools.js +516 -0
- package/package.json +1 -3
- package/dist/handlers/auto-captcha-detector.js +0 -150
- package/dist/handlers/captcha-solver-handlers.js +0 -283
package/README.md
CHANGED
|
@@ -93,7 +93,7 @@ Once set up, you can ask Claude to:
|
|
|
93
93
|
- **Fill forms**: "Fill out this contact form with my details"
|
|
94
94
|
- **Extract data**: "Get all the product prices from this page"
|
|
95
95
|
- **Automate tasks**: "Log into my account and download my invoice"
|
|
96
|
-
- **Solve captchas
|
|
96
|
+
- **Solve captchas**: "Handle any captchas that appear"
|
|
97
97
|
|
|
98
98
|
### Safety Notes
|
|
99
99
|
- Claude will show you what it's doing - you can see the browser window
|
|
@@ -124,7 +124,6 @@ assistants to control a real browser, extract content, and more.
|
|
|
124
124
|
- **Comprehensive toolset**: 11 tools covering all browser automation needs
|
|
125
125
|
- **Proxy support**: Built-in proxy configuration for enhanced privacy
|
|
126
126
|
- **Captcha handling**: Support for solving reCAPTCHA, hCaptcha, and Turnstile
|
|
127
|
-
- **🆕 Auto CAPTCHA Solver**: 🤖 Automatically detects and solves text CAPTCHAs with 100% accuracy - no selectors needed! [Learn more](docs/AUTO_CAPTCHA_SOLVER.md)
|
|
128
127
|
- **Robust error handling**: Advanced error recovery with circuit breaker pattern
|
|
129
128
|
- **Stack overflow protection**: Comprehensive protection against infinite recursion
|
|
130
129
|
- **Timeout controls**: Automatic timeout mechanisms prevent hanging operations
|
|
@@ -539,10 +538,8 @@ AI: I'll set up the browser with your proxy configuration.
|
|
|
539
538
|
### Anti-Detection Tools
|
|
540
539
|
|
|
541
540
|
| Tool Name | Description | Required Parameters | Optional Parameters |
|
|
542
|
-
|
|
541
|
+
|-----------|-------------|---------------------|-------------------|
|
|
543
542
|
| `solve_captcha` | Attempt to solve captchas | `type` | None |
|
|
544
|
-
| `solve_text_captcha` | Solve text-based image CAPTCHAs using OCR | `imageSelector` | `inputSelector`, `config` |
|
|
545
|
-
| `auto_solve_captcha` | 🆕 **Automatically detect and solve CAPTCHAs** (no selectors needed!) | None | `config` |
|
|
546
543
|
|
|
547
544
|
## Advanced Features
|
|
548
545
|
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
// Advanced Scraping Features Module
|
|
2
|
+
// Dynamic Content Handling, Authentication, Rate Limiting, Session Management
|
|
3
|
+
/**
|
|
4
|
+
* Wait for Dynamic Content to Load
|
|
5
|
+
* AJAX/dynamic content के लिए intelligent waiting
|
|
6
|
+
*/
|
|
7
|
+
export async function waitForDynamicContent(page, options) {
|
|
8
|
+
const startTime = Date.now();
|
|
9
|
+
const timeout = options?.timeout || 30000;
|
|
10
|
+
const networkIdleTime = options?.networkIdleTime || 500;
|
|
11
|
+
try {
|
|
12
|
+
if (options?.selector) {
|
|
13
|
+
// Wait for specific selector
|
|
14
|
+
await page.waitForSelector(options.selector, { timeout });
|
|
15
|
+
return {
|
|
16
|
+
success: true,
|
|
17
|
+
waitTime: Date.now() - startTime,
|
|
18
|
+
method: 'selector'
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
// Wait for network idle
|
|
22
|
+
await page.waitForNetworkIdle({ timeout, idleTime: networkIdleTime });
|
|
23
|
+
return {
|
|
24
|
+
success: true,
|
|
25
|
+
waitTime: Date.now() - startTime,
|
|
26
|
+
method: 'networkIdle'
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
catch (error) {
|
|
30
|
+
return {
|
|
31
|
+
success: false,
|
|
32
|
+
waitTime: Date.now() - startTime,
|
|
33
|
+
method: 'timeout'
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Handle Shadow DOM Content
|
|
39
|
+
* Shadow DOM elements को access करना
|
|
40
|
+
*/
|
|
41
|
+
export async function extractFromShadowDOM(page, hostSelector, innerSelector) {
|
|
42
|
+
return await page.evaluate((host, inner) => {
|
|
43
|
+
const hostElement = document.querySelector(host);
|
|
44
|
+
if (!hostElement || !hostElement.shadowRoot) {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
const innerElement = hostElement.shadowRoot.querySelector(inner);
|
|
48
|
+
return innerElement ? innerElement.textContent?.trim() : null;
|
|
49
|
+
}, hostSelector, innerSelector);
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Handle iFrame Content
|
|
53
|
+
* iFrames के अंदर का content extract करना
|
|
54
|
+
*/
|
|
55
|
+
export async function extractFromIframe(page, iframeSelector, contentSelector) {
|
|
56
|
+
const frames = page.frames();
|
|
57
|
+
for (const frame of frames) {
|
|
58
|
+
try {
|
|
59
|
+
const frameElement = await frame.$(iframeSelector);
|
|
60
|
+
if (frameElement) {
|
|
61
|
+
if (contentSelector) {
|
|
62
|
+
return await frame.$eval(contentSelector, (el) => el.textContent?.trim());
|
|
63
|
+
}
|
|
64
|
+
else {
|
|
65
|
+
return await frame.content();
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
catch (error) {
|
|
70
|
+
continue;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return null;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Cookie Manager
|
|
77
|
+
* Cookies को save और reuse करना
|
|
78
|
+
*/
|
|
79
|
+
export class CookieManager {
|
|
80
|
+
cookies = [];
|
|
81
|
+
async saveCookies(page) {
|
|
82
|
+
this.cookies = await page.cookies();
|
|
83
|
+
}
|
|
84
|
+
async loadCookies(page) {
|
|
85
|
+
if (this.cookies.length > 0) {
|
|
86
|
+
await page.setCookie(...this.cookies);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
getCookies() {
|
|
90
|
+
return this.cookies;
|
|
91
|
+
}
|
|
92
|
+
clearCookies() {
|
|
93
|
+
this.cookies = [];
|
|
94
|
+
}
|
|
95
|
+
async exportCookies() {
|
|
96
|
+
return JSON.stringify(this.cookies, null, 2);
|
|
97
|
+
}
|
|
98
|
+
async importCookies(cookiesJson) {
|
|
99
|
+
this.cookies = JSON.parse(cookiesJson);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Session Manager
|
|
104
|
+
* Multiple requests में session maintain करना
|
|
105
|
+
*/
|
|
106
|
+
export class SessionManager {
|
|
107
|
+
sessionData = new Map();
|
|
108
|
+
cookieManager = new CookieManager();
|
|
109
|
+
async saveSession(page, sessionId) {
|
|
110
|
+
await this.cookieManager.saveCookies(page);
|
|
111
|
+
const localStorage = await page.evaluate(() => {
|
|
112
|
+
return JSON.stringify(localStorage);
|
|
113
|
+
});
|
|
114
|
+
const sessionStorage = await page.evaluate(() => {
|
|
115
|
+
return JSON.stringify(sessionStorage);
|
|
116
|
+
});
|
|
117
|
+
this.sessionData.set(sessionId, {
|
|
118
|
+
cookies: this.cookieManager.getCookies(),
|
|
119
|
+
localStorage,
|
|
120
|
+
sessionStorage,
|
|
121
|
+
timestamp: Date.now()
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
async restoreSession(page, sessionId) {
|
|
125
|
+
const session = this.sessionData.get(sessionId);
|
|
126
|
+
if (!session) {
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
// Restore cookies
|
|
130
|
+
await this.cookieManager.loadCookies(page);
|
|
131
|
+
// Restore localStorage
|
|
132
|
+
if (session.localStorage) {
|
|
133
|
+
await page.evaluate((data) => {
|
|
134
|
+
const parsed = JSON.parse(data);
|
|
135
|
+
for (const key in parsed) {
|
|
136
|
+
localStorage.setItem(key, parsed[key]);
|
|
137
|
+
}
|
|
138
|
+
}, session.localStorage);
|
|
139
|
+
}
|
|
140
|
+
// Restore sessionStorage
|
|
141
|
+
if (session.sessionStorage) {
|
|
142
|
+
await page.evaluate((data) => {
|
|
143
|
+
const parsed = JSON.parse(data);
|
|
144
|
+
for (const key in parsed) {
|
|
145
|
+
sessionStorage.setItem(key, parsed[key]);
|
|
146
|
+
}
|
|
147
|
+
}, session.sessionStorage);
|
|
148
|
+
}
|
|
149
|
+
return true;
|
|
150
|
+
}
|
|
151
|
+
deleteSession(sessionId) {
|
|
152
|
+
this.sessionData.delete(sessionId);
|
|
153
|
+
}
|
|
154
|
+
listSessions() {
|
|
155
|
+
return Array.from(this.sessionData.keys());
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Rate Limiter
|
|
160
|
+
* Requests के बीच delays और throttling
|
|
161
|
+
*/
|
|
162
|
+
export class RateLimiter {
|
|
163
|
+
minDelay;
|
|
164
|
+
maxRequestsPerWindow;
|
|
165
|
+
windowDuration;
|
|
166
|
+
lastRequestTime = 0;
|
|
167
|
+
requestCount = 0;
|
|
168
|
+
windowStart = Date.now();
|
|
169
|
+
constructor(minDelay = 1000, maxRequestsPerWindow = 10, windowDuration = 60000) {
|
|
170
|
+
this.minDelay = minDelay;
|
|
171
|
+
this.maxRequestsPerWindow = maxRequestsPerWindow;
|
|
172
|
+
this.windowDuration = windowDuration;
|
|
173
|
+
}
|
|
174
|
+
async waitForNextRequest() {
|
|
175
|
+
const now = Date.now();
|
|
176
|
+
// Reset window if needed
|
|
177
|
+
if (now - this.windowStart > this.windowDuration) {
|
|
178
|
+
this.requestCount = 0;
|
|
179
|
+
this.windowStart = now;
|
|
180
|
+
}
|
|
181
|
+
// Check rate limit
|
|
182
|
+
if (this.requestCount >= this.maxRequestsPerWindow) {
|
|
183
|
+
const waitTime = this.windowDuration - (now - this.windowStart);
|
|
184
|
+
await this.sleep(waitTime);
|
|
185
|
+
this.requestCount = 0;
|
|
186
|
+
this.windowStart = Date.now();
|
|
187
|
+
}
|
|
188
|
+
// Enforce minimum delay
|
|
189
|
+
const timeSinceLastRequest = now - this.lastRequestTime;
|
|
190
|
+
if (timeSinceLastRequest < this.minDelay) {
|
|
191
|
+
await this.sleep(this.minDelay - timeSinceLastRequest);
|
|
192
|
+
}
|
|
193
|
+
this.lastRequestTime = Date.now();
|
|
194
|
+
this.requestCount++;
|
|
195
|
+
}
|
|
196
|
+
sleep(ms) {
|
|
197
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
198
|
+
}
|
|
199
|
+
getStats() {
|
|
200
|
+
return {
|
|
201
|
+
requestCount: this.requestCount,
|
|
202
|
+
windowRemaining: this.maxRequestsPerWindow - this.requestCount
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* Random Delay Generator
|
|
208
|
+
* Human-like delays के लिए
|
|
209
|
+
*/
|
|
210
|
+
export function getRandomDelay(min = 500, max = 2000) {
|
|
211
|
+
return Math.floor(Math.random() * (max - min + 1)) + min;
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* User Agent Rotator
|
|
215
|
+
* Different user agents use करना
|
|
216
|
+
*/
|
|
217
|
+
export class UserAgentRotator {
|
|
218
|
+
userAgents = [
|
|
219
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
220
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
221
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
|
|
222
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15',
|
|
223
|
+
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
224
|
+
];
|
|
225
|
+
currentIndex = 0;
|
|
226
|
+
getNext() {
|
|
227
|
+
const ua = this.userAgents[this.currentIndex];
|
|
228
|
+
this.currentIndex = (this.currentIndex + 1) % this.userAgents.length;
|
|
229
|
+
return ua;
|
|
230
|
+
}
|
|
231
|
+
getRandom() {
|
|
232
|
+
return this.userAgents[Math.floor(Math.random() * this.userAgents.length)];
|
|
233
|
+
}
|
|
234
|
+
addUserAgent(ua) {
|
|
235
|
+
this.userAgents.push(ua);
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Auto Login Handler
|
|
240
|
+
* Automatic login form filling
|
|
241
|
+
*/
|
|
242
|
+
export async function autoLogin(page, credentials) {
|
|
243
|
+
try {
|
|
244
|
+
// Wait for form
|
|
245
|
+
await page.waitForSelector(credentials.usernameSelector, { timeout: 10000 });
|
|
246
|
+
// Fill username
|
|
247
|
+
await page.type(credentials.usernameSelector, credentials.username, { delay: 100 });
|
|
248
|
+
// Fill password
|
|
249
|
+
await page.type(credentials.passwordSelector, credentials.password, { delay: 100 });
|
|
250
|
+
// Random delay before submit
|
|
251
|
+
await page.waitForTimeout(getRandomDelay(500, 1500));
|
|
252
|
+
// Click submit
|
|
253
|
+
await page.click(credentials.submitSelector);
|
|
254
|
+
// Wait for navigation
|
|
255
|
+
await page.waitForNavigation({ waitUntil: 'networkidle0', timeout: 15000 });
|
|
256
|
+
return {
|
|
257
|
+
success: true,
|
|
258
|
+
message: 'Login successful'
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
catch (error) {
|
|
262
|
+
return {
|
|
263
|
+
success: false,
|
|
264
|
+
message: `Login failed: ${error.message}`
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Robots.txt Checker
|
|
270
|
+
* Website की scraping permissions check करना
|
|
271
|
+
*/
|
|
272
|
+
export async function checkRobotsTxt(page, baseUrl, userAgent = '*') {
|
|
273
|
+
try {
|
|
274
|
+
const robotsUrl = new URL('/robots.txt', baseUrl).href;
|
|
275
|
+
await page.goto(robotsUrl, { waitUntil: 'networkidle0' });
|
|
276
|
+
const content = await page.content();
|
|
277
|
+
const rules = [];
|
|
278
|
+
let allowed = true;
|
|
279
|
+
const lines = content.split('\n');
|
|
280
|
+
let currentUserAgent = '';
|
|
281
|
+
for (const line of lines) {
|
|
282
|
+
const trimmed = line.trim();
|
|
283
|
+
if (trimmed.startsWith('User-agent:')) {
|
|
284
|
+
currentUserAgent = trimmed.split(':')[1].trim();
|
|
285
|
+
}
|
|
286
|
+
else if (trimmed.startsWith('Disallow:') &&
|
|
287
|
+
(currentUserAgent === userAgent || currentUserAgent === '*')) {
|
|
288
|
+
const disallowedPath = trimmed.split(':')[1].trim();
|
|
289
|
+
rules.push(disallowedPath);
|
|
290
|
+
if (disallowedPath === '/') {
|
|
291
|
+
allowed = false;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
return { allowed, rules };
|
|
296
|
+
}
|
|
297
|
+
catch (error) {
|
|
298
|
+
// If robots.txt doesn't exist, assume allowed
|
|
299
|
+
return { allowed: true, rules: [] };
|
|
300
|
+
}
|
|
301
|
+
}
|