brave-real-browser-mcp-server 2.27.25 → 2.27.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/handlers/advanced-tools.js +542 -62
- package/dist/index.js +11 -15
- package/dist/lsp/browser-automation-lsp.js +19 -35
- package/dist/tool-definitions.js +77 -52
- package/dist/unified-server.js +7 -13
- package/package.json +2 -2
|
@@ -195,70 +195,526 @@ export async function handleMultiLayerRedirectTrace(page, args) {
|
|
|
195
195
|
};
|
|
196
196
|
}
|
|
197
197
|
/**
|
|
198
|
-
*
|
|
198
|
+
* 🔥 ULTRA-POWERFUL Regex Engine (like regex101.com)
|
|
199
|
+
* Features: Named capture groups, all regex flags, match highlighting,
|
|
200
|
+
* detailed match info, replace mode, timeout protection
|
|
199
201
|
*/
|
|
200
|
-
export async function
|
|
201
|
-
// Progress tracking
|
|
202
|
+
export async function handleSearchRegex(page, args) {
|
|
202
203
|
const progressNotifier = getProgressNotifier();
|
|
203
|
-
const tracker = progressNotifier.createTracker(`search-${Date.now()}`);
|
|
204
|
-
tracker.start(100, `🔎
|
|
205
|
-
//
|
|
204
|
+
const tracker = progressNotifier.createTracker(`search-regex-${Date.now()}`);
|
|
205
|
+
tracker.start(100, `🔎 Regex Search: "${args.pattern}"`);
|
|
206
|
+
// Build regex flags
|
|
207
|
+
const flags = args.flags || {};
|
|
208
|
+
let flagString = '';
|
|
209
|
+
if (flags.global !== false)
|
|
210
|
+
flagString += 'g';
|
|
211
|
+
if (flags.ignoreCase)
|
|
212
|
+
flagString += 'i';
|
|
213
|
+
if (flags.multiline)
|
|
214
|
+
flagString += 'm';
|
|
215
|
+
if (flags.dotAll)
|
|
216
|
+
flagString += 's';
|
|
217
|
+
if (flags.unicode)
|
|
218
|
+
flagString += 'u';
|
|
219
|
+
if (flags.sticky)
|
|
220
|
+
flagString += 'y';
|
|
221
|
+
// Validate regex
|
|
222
|
+
let regex;
|
|
223
|
+
let patternInfo = { flags: flagString, isValid: true, errorMessage: undefined };
|
|
206
224
|
try {
|
|
207
|
-
|
|
208
|
-
await page.waitForSelector('body', { timeout: 5000 });
|
|
225
|
+
regex = new RegExp(args.pattern, flagString);
|
|
209
226
|
}
|
|
210
|
-
catch {
|
|
211
|
-
|
|
227
|
+
catch (e) {
|
|
228
|
+
tracker.fail(`Invalid regex: ${e.message}`);
|
|
229
|
+
return {
|
|
230
|
+
found: false,
|
|
231
|
+
matches: [],
|
|
232
|
+
count: 0,
|
|
233
|
+
patternInfo: { flags: flagString, isValid: false, errorMessage: e.message },
|
|
234
|
+
};
|
|
212
235
|
}
|
|
213
|
-
tracker.setProgress(
|
|
236
|
+
tracker.setProgress(20, '📄 Extracting content...');
|
|
237
|
+
// Get content based on sourceType
|
|
214
238
|
let content = '';
|
|
215
|
-
if (args.
|
|
239
|
+
if (args.testString) {
|
|
240
|
+
content = args.testString;
|
|
241
|
+
}
|
|
242
|
+
else {
|
|
243
|
+
const sourceType = args.sourceType || 'text';
|
|
216
244
|
try {
|
|
217
|
-
|
|
245
|
+
await page.waitForSelector('body', { timeout: 5000 });
|
|
218
246
|
}
|
|
219
|
-
catch {
|
|
220
|
-
|
|
247
|
+
catch { /* continue */ }
|
|
248
|
+
if (args.selector) {
|
|
249
|
+
try {
|
|
250
|
+
content = await page.$eval(args.selector, (el, type) => {
|
|
251
|
+
if (type === 'html')
|
|
252
|
+
return el.innerHTML;
|
|
253
|
+
if (type === 'text')
|
|
254
|
+
return el.textContent || '';
|
|
255
|
+
return el.outerHTML;
|
|
256
|
+
}, sourceType);
|
|
257
|
+
}
|
|
258
|
+
catch {
|
|
259
|
+
content = '';
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
else {
|
|
263
|
+
content = await page.evaluate((type) => {
|
|
264
|
+
if (type === 'html')
|
|
265
|
+
return document.documentElement.innerHTML;
|
|
266
|
+
if (type === 'scripts') {
|
|
267
|
+
return Array.from(document.scripts).map(s => s.textContent).join('\n');
|
|
268
|
+
}
|
|
269
|
+
if (type === 'styles') {
|
|
270
|
+
return Array.from(document.styleSheets).map(s => {
|
|
271
|
+
try {
|
|
272
|
+
return Array.from(s.cssRules).map(r => r.cssText).join('\n');
|
|
273
|
+
}
|
|
274
|
+
catch {
|
|
275
|
+
return '';
|
|
276
|
+
}
|
|
277
|
+
}).join('\n');
|
|
278
|
+
}
|
|
279
|
+
if (type === 'attributes') {
|
|
280
|
+
return Array.from(document.querySelectorAll('*')).map(el => Array.from(el.attributes).map(a => `${a.name}="${a.value}"`).join(' ')).join('\n');
|
|
281
|
+
}
|
|
282
|
+
if (type === 'all') {
|
|
283
|
+
return document.documentElement.outerHTML;
|
|
284
|
+
}
|
|
285
|
+
return document.body?.textContent || '';
|
|
286
|
+
}, sourceType);
|
|
221
287
|
}
|
|
222
288
|
}
|
|
223
|
-
|
|
224
|
-
content = await page.evaluate(() => document.body?.textContent || document.documentElement?.textContent || '');
|
|
225
|
-
}
|
|
226
|
-
const matches = [];
|
|
227
|
-
if (!content || content.length === 0) {
|
|
289
|
+
if (!content) {
|
|
228
290
|
tracker.fail('No content to search');
|
|
229
|
-
return { found: false, matches: [], count: 0 };
|
|
230
|
-
}
|
|
231
|
-
tracker.setProgress(50, '🔍 Running pattern match...');
|
|
232
|
-
let regex;
|
|
233
|
-
if (args.isRegex) {
|
|
234
|
-
regex = new RegExp(args.pattern, args.caseSensitive ? 'g' : 'gi');
|
|
235
|
-
}
|
|
236
|
-
else {
|
|
237
|
-
const escaped = args.pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
238
|
-
regex = new RegExp(escaped, args.caseSensitive ? 'g' : 'gi');
|
|
291
|
+
return { found: false, matches: [], count: 0, patternInfo };
|
|
239
292
|
}
|
|
293
|
+
tracker.setProgress(50, '🔍 Running regex match...');
|
|
294
|
+
const matches = [];
|
|
295
|
+
const maxMatches = args.maxMatches || 100;
|
|
296
|
+
const contextChars = args.contextChars || 50;
|
|
297
|
+
const timeout = args.timeout || 5000;
|
|
298
|
+
const startTime = Date.now();
|
|
240
299
|
let match;
|
|
241
300
|
while ((match = regex.exec(content)) !== null) {
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
301
|
+
// Timeout protection
|
|
302
|
+
if (Date.now() - startTime > timeout) {
|
|
303
|
+
tracker.setProgress(80, '⚠️ Timeout reached, returning partial results');
|
|
304
|
+
break;
|
|
305
|
+
}
|
|
306
|
+
const start = Math.max(0, match.index - contextChars);
|
|
307
|
+
const end = Math.min(content.length, match.index + match[0].length + contextChars);
|
|
308
|
+
let context = content.substring(start, end);
|
|
309
|
+
// Highlight if requested
|
|
310
|
+
if (args.highlightMatches) {
|
|
311
|
+
const matchStart = match.index - start;
|
|
312
|
+
const matchEnd = matchStart + match[0].length;
|
|
313
|
+
context = context.substring(0, matchStart) + '<<MATCH>>' + context.substring(matchStart, matchEnd) + '<</MATCH>>' + context.substring(matchEnd);
|
|
314
|
+
}
|
|
315
|
+
const matchResult = {
|
|
245
316
|
text: match[0],
|
|
246
|
-
context
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
317
|
+
context,
|
|
318
|
+
index: match.index,
|
|
319
|
+
length: match[0].length,
|
|
320
|
+
};
|
|
321
|
+
// Extract groups if requested
|
|
322
|
+
if (args.extractGroups !== false) {
|
|
323
|
+
if (match.groups) {
|
|
324
|
+
matchResult.groups = match.groups;
|
|
325
|
+
}
|
|
326
|
+
if (match.length > 1) {
|
|
327
|
+
matchResult.numberedGroups = match.slice(1);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
matches.push(matchResult);
|
|
331
|
+
if (matches.length >= maxMatches)
|
|
332
|
+
break;
|
|
333
|
+
// Prevent infinite loop for zero-length matches
|
|
334
|
+
if (match[0].length === 0) {
|
|
335
|
+
regex.lastIndex++;
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
// Replace mode
|
|
339
|
+
let replaced;
|
|
340
|
+
if (args.replaceWith !== undefined) {
|
|
341
|
+
tracker.setProgress(90, '🔄 Applying replacement...');
|
|
342
|
+
replaced = content.replace(regex, args.replaceWith);
|
|
250
343
|
}
|
|
251
344
|
tracker.complete(`🎉 Found ${matches.length} matches`);
|
|
252
345
|
return {
|
|
253
346
|
found: matches.length > 0,
|
|
254
347
|
matches,
|
|
255
348
|
count: matches.length,
|
|
349
|
+
replaced,
|
|
350
|
+
patternInfo,
|
|
256
351
|
};
|
|
257
352
|
}
|
|
258
353
|
/**
|
|
259
|
-
*
|
|
354
|
+
* 🚀 INTELLIGENT Content Search Engine
|
|
355
|
+
* Features: Fuzzy matching, proximity search, advanced operators,
|
|
356
|
+
* relevance scoring, snippet extraction with highlighting
|
|
357
|
+
*/
|
|
358
|
+
export async function handleWebSearch(page, args) {
|
|
359
|
+
const progressNotifier = getProgressNotifier();
|
|
360
|
+
const tracker = progressNotifier.createTracker(`web-search-${Date.now()}`);
|
|
361
|
+
tracker.start(100, `🚀 Intelligent Search: "${args.query}"`);
|
|
362
|
+
try {
|
|
363
|
+
await page.waitForSelector('body', { timeout: 5000 });
|
|
364
|
+
}
|
|
365
|
+
catch { /* continue */ }
|
|
366
|
+
tracker.setProgress(20, '📄 Extracting content...');
|
|
367
|
+
// Get content based on searchIn type
|
|
368
|
+
const searchIn = args.searchIn || 'text';
|
|
369
|
+
let content = '';
|
|
370
|
+
let elements = [];
|
|
371
|
+
content = await page.evaluate((opts) => {
|
|
372
|
+
const { searchIn, selector } = opts;
|
|
373
|
+
const root = selector ? document.querySelector(selector) : document.body;
|
|
374
|
+
if (!root)
|
|
375
|
+
return '';
|
|
376
|
+
switch (searchIn) {
|
|
377
|
+
case 'html':
|
|
378
|
+
return root.innerHTML;
|
|
379
|
+
case 'links':
|
|
380
|
+
return Array.from(root.querySelectorAll('a')).map((a) => `${a.textContent?.trim()} [${a.getAttribute('href')}]`).join('\n');
|
|
381
|
+
case 'images':
|
|
382
|
+
return Array.from(root.querySelectorAll('img')).map((img) => `${img.alt} [${img.src}]`).join('\n');
|
|
383
|
+
case 'videos':
|
|
384
|
+
return Array.from(root.querySelectorAll('video, iframe[src*="youtube"], iframe[src*="vimeo"]')).map((v) => v.getAttribute('src') || v.querySelector?.('source')?.getAttribute('src') || '').join('\n');
|
|
385
|
+
case 'scripts':
|
|
386
|
+
return Array.from(document.scripts).map((s) => s.textContent).join('\n');
|
|
387
|
+
case 'json':
|
|
388
|
+
return Array.from(document.querySelectorAll('script[type="application/json"], script[type="application/ld+json"]'))
|
|
389
|
+
.map((s) => s.textContent).join('\n');
|
|
390
|
+
case 'schema':
|
|
391
|
+
return Array.from(document.querySelectorAll('script[type="application/ld+json"]'))
|
|
392
|
+
.map((s) => s.textContent).join('\n');
|
|
393
|
+
case 'meta':
|
|
394
|
+
return Array.from(document.querySelectorAll('meta')).map((m) => `${m.name || m.getAttribute('property')}: ${m.content}`).join('\n');
|
|
395
|
+
case 'all':
|
|
396
|
+
return root.outerHTML;
|
|
397
|
+
default:
|
|
398
|
+
return root.textContent || '';
|
|
399
|
+
}
|
|
400
|
+
}, { searchIn, selector: args.selector });
|
|
401
|
+
if (!content) {
|
|
402
|
+
tracker.fail('No content to search');
|
|
403
|
+
return {
|
|
404
|
+
found: false,
|
|
405
|
+
results: [],
|
|
406
|
+
count: 0,
|
|
407
|
+
totalOccurrences: 0,
|
|
408
|
+
searchInfo: { query: args.query, searchIn, matchMode: 'none' },
|
|
409
|
+
};
|
|
410
|
+
}
|
|
411
|
+
tracker.setProgress(40, '🔍 Processing search query...');
|
|
412
|
+
const matchMode = args.matchMode || {};
|
|
413
|
+
const operators = args.operators || {};
|
|
414
|
+
const filters = args.filters || {};
|
|
415
|
+
const maxResults = args.maxResults || 50;
|
|
416
|
+
const snippetLength = args.snippetLength || 150;
|
|
417
|
+
const highlightTag = args.highlightTag || '**';
|
|
418
|
+
// Parse query for operators
|
|
419
|
+
let searchTerms = [];
|
|
420
|
+
let phrases = [];
|
|
421
|
+
// Extract phrases (quoted strings)
|
|
422
|
+
const phraseMatches = args.query.match(/"([^"]+)"/g);
|
|
423
|
+
if (phraseMatches) {
|
|
424
|
+
phrases = phraseMatches.map(p => p.replace(/"/g, ''));
|
|
425
|
+
}
|
|
426
|
+
// Get remaining terms
|
|
427
|
+
const remainingQuery = args.query.replace(/"[^"]+"/g, '').trim();
|
|
428
|
+
if (remainingQuery) {
|
|
429
|
+
// Parse AND/OR/NOT operators
|
|
430
|
+
const parts = remainingQuery.split(/\s+(AND|OR|NOT)\s+/i);
|
|
431
|
+
searchTerms = parts.filter(p => !['AND', 'OR', 'NOT'].includes(p.toUpperCase()));
|
|
432
|
+
}
|
|
433
|
+
// Add explicit operator terms
|
|
434
|
+
if (operators.must)
|
|
435
|
+
searchTerms.push(...operators.must);
|
|
436
|
+
if (operators.should)
|
|
437
|
+
searchTerms.push(...operators.should);
|
|
438
|
+
if (operators.phrase)
|
|
439
|
+
phrases.push(operators.phrase);
|
|
440
|
+
tracker.setProgress(60, '🎯 Matching content...');
|
|
441
|
+
const results = [];
|
|
442
|
+
// Levenshtein distance for fuzzy matching
|
|
443
|
+
const levenshtein = (a, b) => {
|
|
444
|
+
const matrix = Array(b.length + 1).fill(null).map(() => Array(a.length + 1).fill(null));
|
|
445
|
+
for (let i = 0; i <= a.length; i++)
|
|
446
|
+
matrix[0][i] = i;
|
|
447
|
+
for (let j = 0; j <= b.length; j++)
|
|
448
|
+
matrix[j][0] = j;
|
|
449
|
+
for (let j = 1; j <= b.length; j++) {
|
|
450
|
+
for (let i = 1; i <= a.length; i++) {
|
|
451
|
+
const indicator = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
452
|
+
matrix[j][i] = Math.min(matrix[j][i - 1] + 1, matrix[j - 1][i] + 1, matrix[j - 1][i - 1] + indicator);
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
return matrix[b.length][a.length];
|
|
456
|
+
};
|
|
457
|
+
const fuzzyMatch = (text, term, threshold) => {
|
|
458
|
+
const distance = levenshtein(text.toLowerCase(), term.toLowerCase());
|
|
459
|
+
const maxLen = Math.max(text.length, term.length);
|
|
460
|
+
return (1 - distance / maxLen) >= threshold;
|
|
461
|
+
};
|
|
462
|
+
// Split content into searchable chunks
|
|
463
|
+
const sentences = content.split(/[.!?\n]+/).filter(s => s.trim().length > 10);
|
|
464
|
+
let totalOccurrences = 0;
|
|
465
|
+
for (let i = 0; i < sentences.length && results.length < maxResults; i++) {
|
|
466
|
+
const sentence = sentences[i].trim();
|
|
467
|
+
let score = 0;
|
|
468
|
+
let matched = false;
|
|
469
|
+
// Check phrases first
|
|
470
|
+
for (const phrase of phrases) {
|
|
471
|
+
const phraseRegex = new RegExp(phrase.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), matchMode.caseSensitive ? 'g' : 'gi');
|
|
472
|
+
if (phraseRegex.test(sentence)) {
|
|
473
|
+
score += 10;
|
|
474
|
+
matched = true;
|
|
475
|
+
totalOccurrences += (sentence.match(phraseRegex) || []).length;
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
// Check terms
|
|
479
|
+
for (const term of searchTerms) {
|
|
480
|
+
if (matchMode.fuzzy) {
|
|
481
|
+
const words = sentence.split(/\s+/);
|
|
482
|
+
for (const word of words) {
|
|
483
|
+
if (fuzzyMatch(word, term, matchMode.fuzzyThreshold || 0.8)) {
|
|
484
|
+
score += 5;
|
|
485
|
+
matched = true;
|
|
486
|
+
totalOccurrences++;
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
else {
|
|
491
|
+
const termRegex = matchMode.wholeWord
|
|
492
|
+
? new RegExp(`\\b${term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, matchMode.caseSensitive ? 'g' : 'gi')
|
|
493
|
+
: new RegExp(term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), matchMode.caseSensitive ? 'g' : 'gi');
|
|
494
|
+
const termMatches = sentence.match(termRegex);
|
|
495
|
+
if (termMatches) {
|
|
496
|
+
score += termMatches.length * 3;
|
|
497
|
+
matched = true;
|
|
498
|
+
totalOccurrences += termMatches.length;
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
// Check mustNot terms
|
|
503
|
+
if (operators.mustNot) {
|
|
504
|
+
for (const notTerm of operators.mustNot) {
|
|
505
|
+
if (sentence.toLowerCase().includes(notTerm.toLowerCase())) {
|
|
506
|
+
matched = false;
|
|
507
|
+
score = 0;
|
|
508
|
+
break;
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
// Proximity search
|
|
513
|
+
if (matched && operators.proximity?.words && operators.proximity.words.length >= 2) {
|
|
514
|
+
const words = sentence.toLowerCase().split(/\s+/);
|
|
515
|
+
const positions = [];
|
|
516
|
+
for (const pw of operators.proximity.words) {
|
|
517
|
+
const idx = words.findIndex(w => w.includes(pw.toLowerCase()));
|
|
518
|
+
if (idx !== -1)
|
|
519
|
+
positions.push(idx);
|
|
520
|
+
}
|
|
521
|
+
if (positions.length >= 2) {
|
|
522
|
+
const distance = Math.abs(positions[0] - positions[1]);
|
|
523
|
+
if (distance <= (operators.proximity.distance || 5)) {
|
|
524
|
+
score += 15;
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
// Apply filters
|
|
529
|
+
if (matched && filters.minLength && sentence.length < filters.minLength)
|
|
530
|
+
matched = false;
|
|
531
|
+
if (matched && filters.maxLength && sentence.length > filters.maxLength)
|
|
532
|
+
matched = false;
|
|
533
|
+
if (matched && filters.containsPattern) {
|
|
534
|
+
const containsRegex = new RegExp(filters.containsPattern, 'i');
|
|
535
|
+
if (!containsRegex.test(sentence))
|
|
536
|
+
matched = false;
|
|
537
|
+
}
|
|
538
|
+
if (matched && filters.excludePattern) {
|
|
539
|
+
const excludeRegex = new RegExp(filters.excludePattern, 'i');
|
|
540
|
+
if (excludeRegex.test(sentence))
|
|
541
|
+
matched = false;
|
|
542
|
+
}
|
|
543
|
+
if (matched && score > 0) {
|
|
544
|
+
// Create highlighted snippet
|
|
545
|
+
let snippet = sentence.substring(0, snippetLength);
|
|
546
|
+
for (const term of [...searchTerms, ...phrases]) {
|
|
547
|
+
const highlightRegex = new RegExp(`(${term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi');
|
|
548
|
+
snippet = snippet.replace(highlightRegex, `${highlightTag}$1${highlightTag}`);
|
|
549
|
+
}
|
|
550
|
+
results.push({
|
|
551
|
+
text: sentence.substring(0, 100),
|
|
552
|
+
snippet,
|
|
553
|
+
score,
|
|
554
|
+
position: i,
|
|
555
|
+
});
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
// Sort by relevance/position/frequency
|
|
559
|
+
const sortBy = args.sortBy || 'relevance';
|
|
560
|
+
if (sortBy === 'relevance') {
|
|
561
|
+
results.sort((a, b) => b.score - a.score);
|
|
562
|
+
}
|
|
563
|
+
else if (sortBy === 'position') {
|
|
564
|
+
results.sort((a, b) => a.position - b.position);
|
|
565
|
+
}
|
|
566
|
+
tracker.complete(`🎉 Found ${results.length} results (${totalOccurrences} total occurrences)`);
|
|
567
|
+
return {
|
|
568
|
+
found: results.length > 0,
|
|
569
|
+
results,
|
|
570
|
+
count: results.length,
|
|
571
|
+
totalOccurrences,
|
|
572
|
+
searchInfo: {
|
|
573
|
+
query: args.query,
|
|
574
|
+
searchIn,
|
|
575
|
+
matchMode: matchMode.fuzzy ? 'fuzzy' : matchMode.wholeWord ? 'wholeWord' : 'normal',
|
|
576
|
+
},
|
|
577
|
+
};
|
|
578
|
+
}
|
|
579
|
+
/**
|
|
580
|
+
* Ultra-powerful element finder and batch scraper
|
|
581
|
+
* Find elements using XPath, CSS, text, attributes with advanced filters
|
|
582
|
+
* Supports batch scraping mode (merged from batch_element_scraper)
|
|
260
583
|
*/
|
|
261
584
|
export async function handleFindElementAdvanced(page, args) {
|
|
585
|
+
// Progress tracking
|
|
586
|
+
const progressNotifier = getProgressNotifier();
|
|
587
|
+
const tracker = progressNotifier.createTracker(`find-element-${Date.now()}`);
|
|
588
|
+
const limit = args.limit || 100;
|
|
589
|
+
const selectorToUse = args.selector || args.cssSelector;
|
|
590
|
+
// ============================================================
|
|
591
|
+
// BATCH MODE (merged from batch_element_scraper)
|
|
592
|
+
// ============================================================
|
|
593
|
+
if (args.batchMode && selectorToUse) {
|
|
594
|
+
tracker.start(100, '📋 Starting batch element extraction...');
|
|
595
|
+
const attributes = args.extractAttributes || ['textContent', 'href', 'src'];
|
|
596
|
+
tracker.setProgress(20, `🔍 Finding elements with selector: ${selectorToUse}`);
|
|
597
|
+
const items = await page.evaluate((opts) => {
|
|
598
|
+
const elements = Array.from(document.querySelectorAll(opts.selector)).slice(0, opts.limit);
|
|
599
|
+
return elements.map((el, idx) => {
|
|
600
|
+
const item = {};
|
|
601
|
+
// Extract requested attributes
|
|
602
|
+
for (const attr of opts.attributes) {
|
|
603
|
+
if (attr === 'textContent') {
|
|
604
|
+
item.text = el.textContent?.trim()?.substring(0, 500) || '';
|
|
605
|
+
}
|
|
606
|
+
else if (attr === 'innerHTML') {
|
|
607
|
+
item.html = el.innerHTML?.substring(0, 1000) || '';
|
|
608
|
+
}
|
|
609
|
+
else {
|
|
610
|
+
item[attr] = el.getAttribute(attr) || '';
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
// Include innerHTML if requested
|
|
614
|
+
if (opts.includeInnerHTML) {
|
|
615
|
+
item.html = el.innerHTML?.substring(0, 1000) || '';
|
|
616
|
+
}
|
|
617
|
+
// Always include tag
|
|
618
|
+
item.tag = el.tagName.toLowerCase();
|
|
619
|
+
// Include position if requested
|
|
620
|
+
if (opts.includePosition) {
|
|
621
|
+
const rect = el.getBoundingClientRect();
|
|
622
|
+
item.position = {
|
|
623
|
+
x: Math.round(rect.x),
|
|
624
|
+
y: Math.round(rect.y),
|
|
625
|
+
width: Math.round(rect.width),
|
|
626
|
+
height: Math.round(rect.height)
|
|
627
|
+
};
|
|
628
|
+
}
|
|
629
|
+
// Generate unique selector
|
|
630
|
+
const tagName = el.tagName.toLowerCase();
|
|
631
|
+
const id = el.id;
|
|
632
|
+
const className = typeof el.className === 'string' ? el.className.split(' ')[0] : '';
|
|
633
|
+
item.selector = id ? `#${id}` : className ? `${tagName}.${className}` : `${opts.selector}:nth-of-type(${idx + 1})`;
|
|
634
|
+
// Apply advanced filters if specified
|
|
635
|
+
if (opts.filter) {
|
|
636
|
+
const rect = el.getBoundingClientRect();
|
|
637
|
+
const style = window.getComputedStyle(el);
|
|
638
|
+
if (opts.filter.visible !== undefined) {
|
|
639
|
+
const isVisible = rect.width > 0 && rect.height > 0 && style.display !== 'none' && style.visibility !== 'hidden';
|
|
640
|
+
if (opts.filter.visible !== isVisible)
|
|
641
|
+
return null;
|
|
642
|
+
}
|
|
643
|
+
if (opts.filter.enabled !== undefined) {
|
|
644
|
+
const isEnabled = !el.disabled;
|
|
645
|
+
if (opts.filter.enabled !== isEnabled)
|
|
646
|
+
return null;
|
|
647
|
+
}
|
|
648
|
+
if (opts.filter.minWidth && rect.width < opts.filter.minWidth)
|
|
649
|
+
return null;
|
|
650
|
+
if (opts.filter.minHeight && rect.height < opts.filter.minHeight)
|
|
651
|
+
return null;
|
|
652
|
+
if (opts.filter.hasChildren !== undefined) {
|
|
653
|
+
const hasKids = el.children.length > 0;
|
|
654
|
+
if (opts.filter.hasChildren !== hasKids)
|
|
655
|
+
return null;
|
|
656
|
+
}
|
|
657
|
+
if (opts.filter.containsClass && !el.classList.contains(opts.filter.containsClass))
|
|
658
|
+
return null;
|
|
659
|
+
if (opts.filter.matchPattern) {
|
|
660
|
+
const regex = new RegExp(opts.filter.matchPattern);
|
|
661
|
+
if (!regex.test(el.textContent || ''))
|
|
662
|
+
return null;
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
return item;
|
|
666
|
+
}).filter((item) => item !== null);
|
|
667
|
+
}, {
|
|
668
|
+
selector: selectorToUse,
|
|
669
|
+
limit,
|
|
670
|
+
attributes,
|
|
671
|
+
includeInnerHTML: args.includeInnerHTML,
|
|
672
|
+
includePosition: args.includePosition,
|
|
673
|
+
filter: args.filter
|
|
674
|
+
});
|
|
675
|
+
tracker.setProgress(80, `✅ Found ${items.length} elements`);
|
|
676
|
+
// Format output based on returnType
|
|
677
|
+
const returnType = args.returnType || 'elements';
|
|
678
|
+
if (returnType === 'selectors') {
|
|
679
|
+
tracker.complete(`🎉 Extracted ${items.length} selectors`);
|
|
680
|
+
return {
|
|
681
|
+
found: items.length > 0,
|
|
682
|
+
elements: items.map((item) => ({
|
|
683
|
+
selector: item.selector,
|
|
684
|
+
text: '',
|
|
685
|
+
tag: item.tag,
|
|
686
|
+
})),
|
|
687
|
+
count: items.length,
|
|
688
|
+
};
|
|
689
|
+
}
|
|
690
|
+
if (returnType === 'data') {
|
|
691
|
+
tracker.complete(`🎉 Extracted data from ${items.length} elements`);
|
|
692
|
+
return {
|
|
693
|
+
found: items.length > 0,
|
|
694
|
+
elements: [],
|
|
695
|
+
count: items.length,
|
|
696
|
+
batchData: items,
|
|
697
|
+
};
|
|
698
|
+
}
|
|
699
|
+
// Default: elements
|
|
700
|
+
tracker.complete(`🎉 Found and extracted ${items.length} elements`);
|
|
701
|
+
return {
|
|
702
|
+
found: items.length > 0,
|
|
703
|
+
elements: items.map((item) => ({
|
|
704
|
+
selector: item.selector,
|
|
705
|
+
text: item.text || '',
|
|
706
|
+
tag: item.tag,
|
|
707
|
+
attributes: item,
|
|
708
|
+
position: item.position,
|
|
709
|
+
})),
|
|
710
|
+
count: items.length,
|
|
711
|
+
batchData: items,
|
|
712
|
+
};
|
|
713
|
+
}
|
|
714
|
+
// ============================================================
|
|
715
|
+
// STANDARD FIND MODE
|
|
716
|
+
// ============================================================
|
|
717
|
+
tracker.start(100, '🔍 Finding elements...');
|
|
262
718
|
const elements = [];
|
|
263
719
|
// Wait for body to be available
|
|
264
720
|
try {
|
|
@@ -267,8 +723,9 @@ export async function handleFindElementAdvanced(page, args) {
|
|
|
267
723
|
catch {
|
|
268
724
|
// Continue anyway
|
|
269
725
|
}
|
|
726
|
+
tracker.setProgress(20, '📄 Searching with provided criteria...');
|
|
727
|
+
// XPath search
|
|
270
728
|
if (args.xpath) {
|
|
271
|
-
// Use document.evaluate for XPath (compatible with all Puppeteer versions)
|
|
272
729
|
const xpathResults = await page.evaluate((xpath) => {
|
|
273
730
|
const results = [];
|
|
274
731
|
try {
|
|
@@ -277,11 +734,13 @@ export async function handleFindElementAdvanced(page, args) {
|
|
|
277
734
|
let count = 0;
|
|
278
735
|
while (node && count < 50) {
|
|
279
736
|
if (node instanceof Element) {
|
|
737
|
+
const rect = node.getBoundingClientRect();
|
|
280
738
|
results.push({
|
|
281
739
|
text: node.textContent?.trim()?.substring(0, 100) || '',
|
|
282
740
|
tag: node.tagName.toLowerCase(),
|
|
283
741
|
id: node.id || '',
|
|
284
742
|
className: typeof node.className === 'string' ? node.className : '',
|
|
743
|
+
position: { x: Math.round(rect.x), y: Math.round(rect.y), width: Math.round(rect.width), height: Math.round(rect.height) },
|
|
285
744
|
});
|
|
286
745
|
}
|
|
287
746
|
node = iterator.iterateNext();
|
|
@@ -297,47 +756,65 @@ export async function handleFindElementAdvanced(page, args) {
|
|
|
297
756
|
selector: e.id ? `#${e.id}` : e.className ? `.${e.className.split(' ')[0]}` : e.tag,
|
|
298
757
|
text: e.text,
|
|
299
758
|
tag: e.tag,
|
|
759
|
+
position: args.includePosition ? e.position : undefined,
|
|
300
760
|
})));
|
|
301
761
|
}
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
762
|
+
// CSS selector search
|
|
763
|
+
if (selectorToUse) {
|
|
764
|
+
const cssElements = await page.$$(selectorToUse);
|
|
765
|
+
for (const el of cssElements.slice(0, limit)) {
|
|
766
|
+
const data = await page.evaluate((e, includePos) => {
|
|
767
|
+
const rect = e.getBoundingClientRect();
|
|
768
|
+
return {
|
|
769
|
+
text: e.textContent?.trim()?.substring(0, 100) || '',
|
|
770
|
+
tag: e.tagName.toLowerCase(),
|
|
771
|
+
id: e.id,
|
|
772
|
+
className: e.className,
|
|
773
|
+
position: includePos ? { x: Math.round(rect.x), y: Math.round(rect.y), width: Math.round(rect.width), height: Math.round(rect.height) } : undefined,
|
|
774
|
+
};
|
|
775
|
+
}, el, args.includePosition);
|
|
311
776
|
elements.push({
|
|
312
|
-
selector:
|
|
777
|
+
selector: selectorToUse,
|
|
313
778
|
text: data.text,
|
|
314
779
|
tag: data.tag,
|
|
780
|
+
position: data.position,
|
|
315
781
|
});
|
|
316
782
|
}
|
|
317
783
|
}
|
|
318
|
-
|
|
319
|
-
|
|
784
|
+
// Text content search
|
|
785
|
+
if (args.contains || args.text) {
|
|
786
|
+
const searchText = args.contains || args.text || '';
|
|
787
|
+
const isExact = args.exact || false;
|
|
788
|
+
const containsElements = await page.evaluate((opts) => {
|
|
320
789
|
const results = [];
|
|
321
|
-
const allElements = document.querySelectorAll('*');
|
|
322
|
-
for (let i = 0; i < allElements.length && results.length <
|
|
790
|
+
const allElements = document.querySelectorAll(opts.elementType || '*');
|
|
791
|
+
for (let i = 0; i < allElements.length && results.length < opts.limit; i++) {
|
|
323
792
|
const el = allElements[i];
|
|
324
|
-
|
|
793
|
+
const textContent = el.textContent || '';
|
|
794
|
+
const matches = opts.exact
|
|
795
|
+
? textContent.trim() === opts.text
|
|
796
|
+
: textContent.includes(opts.text);
|
|
797
|
+
if (matches) {
|
|
798
|
+
const rect = el.getBoundingClientRect();
|
|
325
799
|
results.push({
|
|
326
|
-
text:
|
|
800
|
+
text: textContent.trim()?.substring(0, 100) || '',
|
|
327
801
|
tag: el.tagName.toLowerCase(),
|
|
328
802
|
id: el.id || '',
|
|
329
803
|
className: typeof el.className === 'string' ? el.className : '',
|
|
804
|
+
position: { x: Math.round(rect.x), y: Math.round(rect.y), width: Math.round(rect.width), height: Math.round(rect.height) },
|
|
330
805
|
});
|
|
331
806
|
}
|
|
332
807
|
}
|
|
333
808
|
return results;
|
|
334
|
-
}, args.
|
|
809
|
+
}, { text: searchText, exact: isExact, elementType: args.elementType, limit });
|
|
335
810
|
elements.push(...containsElements.map((e) => ({
|
|
336
811
|
selector: e.id ? `#${e.id}` : e.className ? `.${e.className.split(' ')[0]}` : e.tag,
|
|
337
812
|
text: e.text,
|
|
338
813
|
tag: e.tag,
|
|
814
|
+
position: args.includePosition ? e.position : undefined,
|
|
339
815
|
})));
|
|
340
816
|
}
|
|
817
|
+
// Attribute search
|
|
341
818
|
if (args.attributes) {
|
|
342
819
|
let attributes = {};
|
|
343
820
|
try {
|
|
@@ -346,39 +823,42 @@ export async function handleFindElementAdvanced(page, args) {
|
|
|
346
823
|
catch (e) {
|
|
347
824
|
// Ignore invalid JSON
|
|
348
825
|
}
|
|
349
|
-
|
|
350
|
-
const attributeElements = await page.evaluate((attrs) => {
|
|
826
|
+
const attributeElements = await page.evaluate((opts) => {
|
|
351
827
|
const results = [];
|
|
352
828
|
const allElements = document.querySelectorAll('*');
|
|
353
|
-
for (let i = 0; i < allElements.length && results.length <
|
|
829
|
+
for (let i = 0; i < allElements.length && results.length < opts.limit; i++) {
|
|
354
830
|
const el = allElements[i];
|
|
355
831
|
let match = true;
|
|
356
|
-
for (const [key, value] of Object.entries(attrs)) {
|
|
832
|
+
for (const [key, value] of Object.entries(opts.attrs)) {
|
|
357
833
|
if (el.getAttribute(key) !== value) {
|
|
358
834
|
match = false;
|
|
359
835
|
break;
|
|
360
836
|
}
|
|
361
837
|
}
|
|
362
838
|
if (match) {
|
|
839
|
+
const rect = el.getBoundingClientRect();
|
|
363
840
|
results.push({
|
|
364
841
|
text: el.textContent?.trim()?.substring(0, 100) || '',
|
|
365
842
|
tag: el.tagName.toLowerCase(),
|
|
366
843
|
id: el.id || '',
|
|
367
844
|
className: typeof el.className === 'string' ? el.className : '',
|
|
845
|
+
position: { x: Math.round(rect.x), y: Math.round(rect.y), width: Math.round(rect.width), height: Math.round(rect.height) },
|
|
368
846
|
});
|
|
369
847
|
}
|
|
370
848
|
}
|
|
371
849
|
return results;
|
|
372
|
-
}, attributes);
|
|
850
|
+
}, { attrs: attributes, limit });
|
|
373
851
|
elements.push(...attributeElements.map((e) => ({
|
|
374
852
|
selector: e.id ? `#${e.id}` : e.className ? `.${e.className.split(' ')[0]}` : e.tag,
|
|
375
853
|
text: e.text,
|
|
376
854
|
tag: e.tag,
|
|
855
|
+
position: args.includePosition ? e.position : undefined,
|
|
377
856
|
})));
|
|
378
857
|
}
|
|
858
|
+
tracker.complete(`🎉 Found ${elements.length} elements`);
|
|
379
859
|
return {
|
|
380
860
|
found: elements.length > 0,
|
|
381
|
-
elements: elements.slice(0,
|
|
861
|
+
elements: elements.slice(0, limit),
|
|
382
862
|
count: elements.length,
|
|
383
863
|
};
|
|
384
864
|
}
|
package/dist/index.js
CHANGED
|
@@ -57,7 +57,11 @@ import { handleClick, handleType, handleSolveCaptcha, handleRandomScroll } from
|
|
|
57
57
|
import { handleGetContent, handleFindSelector } from './handlers/content-handlers.js';
|
|
58
58
|
import { handleSaveContentAsMarkdown } from './handlers/file-handlers.js';
|
|
59
59
|
// Import advanced tools handlers
|
|
60
|
-
import {
|
|
60
|
+
import {
|
|
61
|
+
// handleBreadcrumbNavigator REMOVED - use click or find_element
|
|
62
|
+
handleUrlRedirectTracer, handleSearchRegex, handleExtractJson, handleScrapeMetaTags, handlePressKey, handleProgressTracker, handleDeepAnalysis, handleNetworkRecorder,
|
|
63
|
+
// handleElementScreenshot, // REMOVED - use deep_analysis with includeScreenshot
|
|
64
|
+
handleLinkHarvester, handleCookieManager,
|
|
61
65
|
// Download tools
|
|
62
66
|
handleFileDownloader,
|
|
63
67
|
// Enhanced streaming/download tools
|
|
@@ -176,18 +180,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
176
180
|
case TOOL_NAMES.SAVE_CONTENT_AS_MARKDOWN:
|
|
177
181
|
return await handleSaveContentAsMarkdown(args);
|
|
178
182
|
// Advanced Tools
|
|
179
|
-
case
|
|
180
|
-
if (!page)
|
|
181
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
182
|
-
return { content: [{ type: 'text', text: JSON.stringify(await handleBreadcrumbNavigator(page, args || {})) }] };
|
|
183
|
+
// BREADCRUMB_NAVIGATOR case REMOVED - use click or find_element
|
|
183
184
|
case TOOL_NAMES.REDIRECT_TRACER:
|
|
184
185
|
if (!page)
|
|
185
186
|
throw new Error('Browser not initialized. Call browser_init first.');
|
|
186
187
|
return { content: [{ type: 'text', text: JSON.stringify(await handleUrlRedirectTracer(page, args)) }] };
|
|
187
|
-
case TOOL_NAMES.
|
|
188
|
+
case TOOL_NAMES.SEARCH_REGEX:
|
|
188
189
|
if (!page)
|
|
189
190
|
throw new Error('Browser not initialized. Call browser_init first.');
|
|
190
|
-
return { content: [{ type: 'text', text: JSON.stringify(await
|
|
191
|
+
return { content: [{ type: 'text', text: JSON.stringify(await handleSearchRegex(page, args)) }] };
|
|
192
|
+
// WEB_SEARCH case REMOVED - redundant with search_regex
|
|
191
193
|
case TOOL_NAMES.EXTRACT_JSON:
|
|
192
194
|
if (!page)
|
|
193
195
|
throw new Error('Browser not initialized. Call browser_init first.');
|
|
@@ -215,18 +217,12 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
215
217
|
// API_FINDER case REMOVED - merged into NETWORK_RECORDER (use findApis: true)
|
|
216
218
|
// AJAX_CONTENT_WAITER case REMOVED - merged into WAIT (use pollInterval, expectedContent)
|
|
217
219
|
// MEDIA_EXTRACTOR case REMOVED - merged into STREAM_EXTRACTOR
|
|
218
|
-
case
|
|
219
|
-
if (!page)
|
|
220
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
221
|
-
return { content: [{ type: 'text', text: JSON.stringify(await handleElementScreenshot(page, args)) }] };
|
|
220
|
+
// ELEMENT_SCREENSHOT case REMOVED - use deep_analysis with includeScreenshot
|
|
222
221
|
case TOOL_NAMES.LINK_HARVESTER:
|
|
223
222
|
if (!page)
|
|
224
223
|
throw new Error('Browser not initialized. Call browser_init first.');
|
|
225
224
|
return { content: [{ type: 'text', text: JSON.stringify(await handleLinkHarvester(page, args || {})) }] };
|
|
226
|
-
case
|
|
227
|
-
if (!page)
|
|
228
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
229
|
-
return { content: [{ type: 'text', text: JSON.stringify(await handleBatchElementScraper(page, args)) }] };
|
|
225
|
+
// BATCH_ELEMENT_SCRAPER case REMOVED - merged into FIND_ELEMENT (use batchMode)
|
|
230
226
|
// EXTRACT_SCHEMA case REMOVED - merged into EXTRACT_JSON (use extractSchema option)
|
|
231
227
|
// M3U8_PARSER case REMOVED - merged into STREAM_EXTRACTOR
|
|
232
228
|
case TOOL_NAMES.COOKIE_MANAGER:
|
|
@@ -63,7 +63,7 @@ const TOOL_DEFINITIONS = {
|
|
|
63
63
|
},
|
|
64
64
|
find_element: {
|
|
65
65
|
name: 'find_element',
|
|
66
|
-
description: 'Find elements using text, CSS selector, XPath, attributes, or AI-powered description',
|
|
66
|
+
description: 'Ultra-powerful element finder and batch scraper. Find elements using text, CSS selector, XPath, attributes, or AI-powered description. Supports batch scraping mode with advanced filtering.',
|
|
67
67
|
category: 'Content',
|
|
68
68
|
parameters: [
|
|
69
69
|
{ name: 'text', type: 'string', description: 'Text content to search for', required: false },
|
|
@@ -71,6 +71,11 @@ const TOOL_DEFINITIONS = {
|
|
|
71
71
|
{ name: 'xpath', type: 'string', description: 'XPath expression', required: false },
|
|
72
72
|
{ name: 'description', type: 'string', description: 'Natural language description (AI-powered)', required: false },
|
|
73
73
|
{ name: 'exact', type: 'boolean', description: 'Exact text match', required: false, default: false },
|
|
74
|
+
{ name: 'batchMode', type: 'boolean', description: 'Enable batch scraping mode for multiple elements', required: false, default: false },
|
|
75
|
+
{ name: 'extractAttributes', type: 'array', description: 'Attributes to extract in batch mode (e.g., href, src)', required: false },
|
|
76
|
+
{ name: 'limit', type: 'number', description: 'Maximum elements to process', required: false, default: 100 },
|
|
77
|
+
{ name: 'includePosition', type: 'boolean', description: 'Include element position (x, y, width, height)', required: false, default: false },
|
|
78
|
+
{ name: 'returnType', type: 'string', description: 'Output format: elements, selectors, or data', required: false, enum: ['elements', 'selectors', 'data'], default: 'elements' },
|
|
74
79
|
],
|
|
75
80
|
},
|
|
76
81
|
save_content_as_markdown: {
|
|
@@ -164,15 +169,20 @@ const TOOL_DEFINITIONS = {
|
|
|
164
169
|
],
|
|
165
170
|
},
|
|
166
171
|
// Advanced Tools
|
|
167
|
-
|
|
168
|
-
name: '
|
|
169
|
-
description: '
|
|
172
|
+
search_regex: {
|
|
173
|
+
name: 'search_regex',
|
|
174
|
+
description: '🔥 ULTRA-POWERFUL Regex Engine (like regex101.com) - Full regex support with flags, capture groups, replace mode, timeout protection',
|
|
170
175
|
category: 'Advanced',
|
|
171
176
|
parameters: [
|
|
172
|
-
{ name: 'pattern', type: 'string', description: '
|
|
173
|
-
{ name: '
|
|
177
|
+
{ name: 'pattern', type: 'string', description: 'Regex pattern to search', required: true },
|
|
178
|
+
{ name: 'flags', type: 'object', description: 'Regex flags: global, ignoreCase, multiline, dotAll, unicode, sticky', required: false },
|
|
179
|
+
{ name: 'replaceWith', type: 'string', description: 'Replace matches with this string (supports $1, $2, $&)', required: false },
|
|
180
|
+
{ name: 'sourceType', type: 'string', description: 'Where to search: text, html, scripts, styles, attributes, all', required: false, default: 'text' },
|
|
181
|
+
{ name: 'extractGroups', type: 'boolean', description: 'Extract capture groups', required: false, default: true },
|
|
182
|
+
{ name: 'highlightMatches', type: 'boolean', description: 'Highlight matches in context', required: false, default: false },
|
|
174
183
|
],
|
|
175
184
|
},
|
|
185
|
+
// web_search REMOVED - redundant with search_regex
|
|
176
186
|
extract_json: {
|
|
177
187
|
name: 'extract_json',
|
|
178
188
|
description: 'Extract embedded JSON/API data from page (LD+JSON, __NEXT_DATA__, etc.)',
|
|
@@ -239,16 +249,7 @@ const TOOL_DEFINITIONS = {
|
|
|
239
249
|
{ name: 'maxLinks', type: 'number', description: 'Maximum links to return', required: false },
|
|
240
250
|
],
|
|
241
251
|
},
|
|
242
|
-
batch_element_scraper:
|
|
243
|
-
name: 'batch_element_scraper',
|
|
244
|
-
description: 'Efficiently scrape lists of similar elements',
|
|
245
|
-
category: 'Advanced',
|
|
246
|
-
parameters: [
|
|
247
|
-
{ name: 'selector', type: 'string', description: 'CSS selector for elements to scrape', required: true },
|
|
248
|
-
{ name: 'attributes', type: 'array', description: 'Attributes to extract', required: false },
|
|
249
|
-
{ name: 'limit', type: 'number', description: 'Maximum elements to scrape', required: false, default: 100 },
|
|
250
|
-
],
|
|
251
|
-
},
|
|
252
|
+
// batch_element_scraper REMOVED - merged into find_element (use batchMode: true)
|
|
252
253
|
extract_schema: {
|
|
253
254
|
name: 'extract_schema',
|
|
254
255
|
description: 'Extract Schema.org structured data (JSON-LD and Microdata)',
|
|
@@ -257,25 +258,8 @@ const TOOL_DEFINITIONS = {
|
|
|
257
258
|
{ name: 'schemaTypes', type: 'array', description: 'Schema types to extract (e.g., Product, Article)', required: false },
|
|
258
259
|
],
|
|
259
260
|
},
|
|
260
|
-
element_screenshot:
|
|
261
|
-
|
|
262
|
-
description: 'Capture screenshot of a specific element',
|
|
263
|
-
category: 'Advanced',
|
|
264
|
-
parameters: [
|
|
265
|
-
{ name: 'selector', type: 'string', description: 'CSS selector of element to capture', required: true },
|
|
266
|
-
{ name: 'path', type: 'string', description: 'File path to save screenshot', required: false },
|
|
267
|
-
{ name: 'format', type: 'string', description: 'Image format', required: false, enum: ['png', 'jpeg', 'webp'], default: 'png' },
|
|
268
|
-
],
|
|
269
|
-
},
|
|
270
|
-
breadcrumb_navigator: {
|
|
271
|
-
name: 'breadcrumb_navigator',
|
|
272
|
-
description: 'Navigate using site breadcrumbs - find and click breadcrumb links',
|
|
273
|
-
category: 'Advanced',
|
|
274
|
-
parameters: [
|
|
275
|
-
{ name: 'targetIndex', type: 'number', description: 'Index of breadcrumb to click (0-based)', required: false },
|
|
276
|
-
{ name: 'targetText', type: 'string', description: 'Text of breadcrumb to click', required: false },
|
|
277
|
-
],
|
|
278
|
-
},
|
|
261
|
+
// element_screenshot REMOVED - use deep_analysis with includeScreenshot: true
|
|
262
|
+
// breadcrumb_navigator REMOVED - use click or find_element with text
|
|
279
263
|
redirect_tracer: {
|
|
280
264
|
name: 'redirect_tracer',
|
|
281
265
|
description: 'Trace URL redirects including standard, JavaScript, and meta refresh redirects',
|
package/dist/tool-definitions.js
CHANGED
|
@@ -354,11 +354,12 @@ export const TOOLS = [
|
|
|
354
354
|
},
|
|
355
355
|
{
|
|
356
356
|
name: 'find_element',
|
|
357
|
-
description: 'Find elements using text, CSS selector, XPath, attributes, or AI-powered description. Supports Shadow DOM
|
|
357
|
+
description: 'Ultra-powerful element finder and batch scraper. Find elements using text, CSS selector, XPath, attributes, or AI-powered description. Supports Shadow DOM, cross-frame search, and efficient batch scraping of similar elements with advanced filtering.',
|
|
358
358
|
inputSchema: {
|
|
359
359
|
type: 'object',
|
|
360
360
|
additionalProperties: false,
|
|
361
361
|
properties: {
|
|
362
|
+
// === FIND MODE ===
|
|
362
363
|
text: { type: 'string', description: 'Text content to search for in elements' },
|
|
363
364
|
selector: { type: 'string', description: 'CSS selector' },
|
|
364
365
|
xpath: { type: 'string', description: 'XPath expression' },
|
|
@@ -369,6 +370,34 @@ export const TOOLS = [
|
|
|
369
370
|
context: { type: 'string', description: 'Additional context for AI search' },
|
|
370
371
|
shadowDOM: { type: 'boolean', description: 'Search inside Shadow DOM elements', default: false },
|
|
371
372
|
searchFrames: { type: 'boolean', description: 'Search across all iframes/frames', default: false },
|
|
373
|
+
// === BATCH SCRAPE MODE (merged from batch_element_scraper) ===
|
|
374
|
+
batchMode: { type: 'boolean', description: 'Enable batch scraping mode for extracting data from multiple similar elements', default: false },
|
|
375
|
+
extractAttributes: { type: 'array', items: { type: 'string' }, description: 'Attributes to extract in batch mode (e.g., href, src, data-id). Default: textContent, href, src' },
|
|
376
|
+
limit: { type: 'number', description: 'Maximum elements to process in batch mode', default: 100 },
|
|
377
|
+
includeInnerHTML: { type: 'boolean', description: 'Include innerHTML in batch extraction', default: false },
|
|
378
|
+
includePosition: { type: 'boolean', description: 'Include element position (x, y, width, height) in results', default: false },
|
|
379
|
+
// === ADVANCED FILTERS ===
|
|
380
|
+
filter: {
|
|
381
|
+
type: 'object',
|
|
382
|
+
description: 'Advanced filtering options for more precise element selection',
|
|
383
|
+
properties: {
|
|
384
|
+
visible: { type: 'boolean', description: 'Only find visible elements (not hidden by CSS)' },
|
|
385
|
+
enabled: { type: 'boolean', description: 'Only find enabled elements (not disabled)' },
|
|
386
|
+
minWidth: { type: 'number', description: 'Minimum element width in pixels' },
|
|
387
|
+
minHeight: { type: 'number', description: 'Minimum element height in pixels' },
|
|
388
|
+
hasChildren: { type: 'boolean', description: 'Only elements that have child elements' },
|
|
389
|
+
containsClass: { type: 'string', description: 'Element must contain this CSS class' },
|
|
390
|
+
matchPattern: { type: 'string', description: 'Regex pattern to match element text content' },
|
|
391
|
+
},
|
|
392
|
+
additionalProperties: false,
|
|
393
|
+
},
|
|
394
|
+
// === OUTPUT OPTIONS ===
|
|
395
|
+
returnType: {
|
|
396
|
+
type: 'string',
|
|
397
|
+
enum: ['elements', 'selectors', 'data'],
|
|
398
|
+
description: 'What to return: elements (full info with tag, text, attributes), selectors (just CSS selectors), data (extracted attribute data only)',
|
|
399
|
+
default: 'elements'
|
|
400
|
+
},
|
|
372
401
|
},
|
|
373
402
|
},
|
|
374
403
|
},
|
|
@@ -426,20 +455,9 @@ export const TOOLS = [
|
|
|
426
455
|
},
|
|
427
456
|
},
|
|
428
457
|
// ============================================================
|
|
429
|
-
// ADVANCED TOOLS
|
|
458
|
+
// ADVANCED TOOLS
|
|
430
459
|
// ============================================================
|
|
431
|
-
|
|
432
|
-
name: 'breadcrumb_navigator',
|
|
433
|
-
description: 'Navigate using site breadcrumbs - find and click breadcrumb links',
|
|
434
|
-
inputSchema: {
|
|
435
|
-
type: 'object',
|
|
436
|
-
additionalProperties: false,
|
|
437
|
-
properties: {
|
|
438
|
-
targetIndex: { type: 'number', description: 'Index of breadcrumb to click (0-based)' },
|
|
439
|
-
targetText: { type: 'string', description: 'Text of breadcrumb to click' },
|
|
440
|
-
},
|
|
441
|
-
},
|
|
442
|
-
},
|
|
460
|
+
// breadcrumb_navigator REMOVED - use click or find_element with text
|
|
443
461
|
{
|
|
444
462
|
name: 'redirect_tracer',
|
|
445
463
|
description: 'Trace URL redirects including standard, JavaScript, and meta refresh redirects',
|
|
@@ -457,20 +475,53 @@ export const TOOLS = [
|
|
|
457
475
|
},
|
|
458
476
|
},
|
|
459
477
|
{
|
|
460
|
-
name: '
|
|
461
|
-
description:
|
|
478
|
+
name: 'search_regex',
|
|
479
|
+
description: `🔥 ULTRA-POWERFUL Regex Engine (like regex101.com) - Test, match, replace with full regex support. Features: Named capture groups, all regex flags (g/i/m/s/u/y), match highlighting, detailed match info with indices, replace mode, pattern explanation, timeout protection against catastrophic backtracking. Search in HTML, text, scripts, styles, or attributes.`,
|
|
462
480
|
inputSchema: {
|
|
463
481
|
type: 'object',
|
|
464
482
|
additionalProperties: false,
|
|
465
483
|
properties: {
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
484
|
+
// Core regex
|
|
485
|
+
pattern: { type: 'string', description: 'Regex pattern to search (supports full JavaScript regex syntax including lookahead/lookbehind)' },
|
|
486
|
+
testString: { type: 'string', description: 'Optional custom string to test against. If not provided, uses page content' },
|
|
487
|
+
// Regex flags (individually controllable like regex101.com)
|
|
488
|
+
flags: {
|
|
489
|
+
type: 'object',
|
|
490
|
+
description: 'Regex flags - control each flag individually',
|
|
491
|
+
properties: {
|
|
492
|
+
global: { type: 'boolean', description: 'g flag - Find all matches', default: true },
|
|
493
|
+
ignoreCase: { type: 'boolean', description: 'i flag - Case insensitive', default: false },
|
|
494
|
+
multiline: { type: 'boolean', description: 'm flag - ^ and $ match line boundaries', default: false },
|
|
495
|
+
dotAll: { type: 'boolean', description: 's flag - Dot matches newlines', default: false },
|
|
496
|
+
unicode: { type: 'boolean', description: 'u flag - Unicode mode', default: false },
|
|
497
|
+
sticky: { type: 'boolean', description: 'y flag - Sticky mode', default: false },
|
|
498
|
+
},
|
|
499
|
+
additionalProperties: false,
|
|
500
|
+
},
|
|
501
|
+
// Replace mode (like regex101.com substitution)
|
|
502
|
+
replaceWith: { type: 'string', description: 'Replace matches with this string. Supports $1, $2, $& for capture groups' },
|
|
503
|
+
// Match options
|
|
504
|
+
maxMatches: { type: 'number', description: 'Maximum number of matches to return', default: 100 },
|
|
505
|
+
contextChars: { type: 'number', description: 'Number of context characters around each match', default: 50 },
|
|
506
|
+
// Source selection
|
|
507
|
+
sourceType: {
|
|
508
|
+
type: 'string',
|
|
509
|
+
enum: ['text', 'html', 'scripts', 'styles', 'attributes', 'all'],
|
|
510
|
+
description: 'What content to search in',
|
|
511
|
+
default: 'text'
|
|
512
|
+
},
|
|
469
513
|
selector: { type: 'string', description: 'CSS selector to limit search scope' },
|
|
514
|
+
// Advanced features
|
|
515
|
+
extractGroups: { type: 'boolean', description: 'Extract and return named/numbered capture groups', default: true },
|
|
516
|
+
highlightMatches: { type: 'boolean', description: 'Return matches with <<MATCH>> highlighting', default: false },
|
|
517
|
+
showMatchInfo: { type: 'boolean', description: 'Show detailed match info: index, length, groups', default: true },
|
|
518
|
+
// Safety
|
|
519
|
+
timeout: { type: 'number', description: 'Regex execution timeout in ms (prevents catastrophic backtracking)', default: 5000 },
|
|
470
520
|
},
|
|
471
521
|
required: ['pattern'],
|
|
472
522
|
},
|
|
473
523
|
},
|
|
524
|
+
// web_search tool REMOVED - redundant with search_regex and other tools
|
|
474
525
|
{
|
|
475
526
|
name: 'extract_json',
|
|
476
527
|
description: 'Extract embedded JSON/API data from page (LD+JSON, __NEXT_DATA__, etc.) + Advanced Decode (base64, URL, hex, rot13, multi-layer) + Packed JavaScript decoder + AES-CBC decryption for encrypted streaming sites',
|
|
@@ -609,21 +660,8 @@ export const TOOLS = [
|
|
|
609
660
|
// api_finder REMOVED - merged into network_recorder (use findApis: true)
|
|
610
661
|
// ajax_content_waiter REMOVED - merged into wait tool (use pollInterval, expectedContent)
|
|
611
662
|
// media_extractor REMOVED - functionality merged into stream_extractor
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
description: 'Capture screenshot of a specific element',
|
|
615
|
-
inputSchema: {
|
|
616
|
-
type: 'object',
|
|
617
|
-
additionalProperties: false,
|
|
618
|
-
properties: {
|
|
619
|
-
selector: { type: 'string', description: 'CSS selector of element to capture' },
|
|
620
|
-
path: { type: 'string', description: 'File path to save screenshot' },
|
|
621
|
-
format: { type: 'string', enum: ['png', 'jpeg', 'webp'], description: 'Image format', default: 'png' },
|
|
622
|
-
quality: { type: 'number', description: 'Quality for JPEG/WebP (0-100)' },
|
|
623
|
-
},
|
|
624
|
-
required: ['selector'],
|
|
625
|
-
},
|
|
626
|
-
},
|
|
663
|
+
// element_screenshot REMOVED - use deep_analysis with includeScreenshot: true for screenshots
|
|
664
|
+
// batch_element_scraper REMOVED - merged into find_element (use batchMode: true)
|
|
627
665
|
{
|
|
628
666
|
name: 'link_harvester',
|
|
629
667
|
description: 'Harvest all links from page with filtering options',
|
|
@@ -638,20 +676,6 @@ export const TOOLS = [
|
|
|
638
676
|
},
|
|
639
677
|
},
|
|
640
678
|
},
|
|
641
|
-
{
|
|
642
|
-
name: 'batch_element_scraper',
|
|
643
|
-
description: 'Efficiently scrape lists of similar elements',
|
|
644
|
-
inputSchema: {
|
|
645
|
-
type: 'object',
|
|
646
|
-
additionalProperties: false,
|
|
647
|
-
properties: {
|
|
648
|
-
selector: { type: 'string', description: 'CSS selector for elements to scrape' },
|
|
649
|
-
attributes: { type: 'array', items: { type: 'string' }, description: 'Attributes to extract' },
|
|
650
|
-
limit: { type: 'number', description: 'Maximum elements to scrape', default: 100 },
|
|
651
|
-
},
|
|
652
|
-
required: ['selector'],
|
|
653
|
-
},
|
|
654
|
-
},
|
|
655
679
|
// extract_schema REMOVED - merged into extract_json (use extractSchema option)
|
|
656
680
|
// m3u8_parser REMOVED - functionality merged into stream_extractor
|
|
657
681
|
{
|
|
@@ -801,9 +825,10 @@ export const TOOL_NAMES = {
|
|
|
801
825
|
FIND_ELEMENT: 'find_element',
|
|
802
826
|
SAVE_CONTENT_AS_MARKDOWN: 'save_content_as_markdown',
|
|
803
827
|
// Advanced tools
|
|
804
|
-
BREADCRUMB_NAVIGATOR
|
|
828
|
+
// BREADCRUMB_NAVIGATOR REMOVED - use click or find_element with text
|
|
805
829
|
REDIRECT_TRACER: 'redirect_tracer',
|
|
806
|
-
|
|
830
|
+
SEARCH_REGEX: 'search_regex', // Renamed from SEARCH_CONTENT - now regex101.com-like
|
|
831
|
+
// WEB_SEARCH REMOVED - redundant with search_regex
|
|
807
832
|
EXTRACT_JSON: 'extract_json',
|
|
808
833
|
SCRAPE_META_TAGS: 'scrape_meta_tags',
|
|
809
834
|
PRESS_KEY: 'press_key',
|
|
@@ -813,9 +838,9 @@ export const TOOL_NAMES = {
|
|
|
813
838
|
// API_FINDER: 'api_finder', // REMOVED - merged into NETWORK_RECORDER (use findApis option)
|
|
814
839
|
// AJAX_CONTENT_WAITER: 'ajax_content_waiter', // REMOVED - merged into WAIT (use pollInterval, expectedContent)
|
|
815
840
|
// MEDIA_EXTRACTOR: 'media_extractor', // REMOVED - merged into STREAM_EXTRACTOR
|
|
816
|
-
ELEMENT_SCREENSHOT: 'element_screenshot',
|
|
841
|
+
// ELEMENT_SCREENSHOT: 'element_screenshot', // REMOVED - use DEEP_ANALYSIS with includeScreenshot
|
|
817
842
|
LINK_HARVESTER: 'link_harvester',
|
|
818
|
-
BATCH_ELEMENT_SCRAPER: 'batch_element_scraper',
|
|
843
|
+
// BATCH_ELEMENT_SCRAPER: 'batch_element_scraper', // REMOVED - merged into FIND_ELEMENT (use batchMode option)
|
|
819
844
|
// EXTRACT_SCHEMA: 'extract_schema', // REMOVED - merged into EXTRACT_JSON (use extractSchema option)
|
|
820
845
|
// M3U8_PARSER: 'm3u8_parser', // REMOVED - merged into STREAM_EXTRACTOR
|
|
821
846
|
COOKIE_MANAGER: 'cookie_manager',
|
package/dist/unified-server.js
CHANGED
|
@@ -218,21 +218,18 @@ mcpServer.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
218
218
|
result = await handleSaveContentAsMarkdown(args);
|
|
219
219
|
break;
|
|
220
220
|
// Advanced tools
|
|
221
|
-
|
|
222
|
-
if (!page)
|
|
223
|
-
throw new Error('Browser not initialized');
|
|
224
|
-
result = { content: [{ type: 'text', text: JSON.stringify(await advancedTools.handleBreadcrumbNavigator(page, args || {})) }] };
|
|
225
|
-
break;
|
|
221
|
+
// BREADCRUMB_NAVIGATOR REMOVED - use click or find_element
|
|
226
222
|
case TOOL_NAMES.REDIRECT_TRACER:
|
|
227
223
|
if (!page)
|
|
228
224
|
throw new Error('Browser not initialized');
|
|
229
225
|
result = { content: [{ type: 'text', text: JSON.stringify(await advancedTools.handleUrlRedirectTracer(page, args)) }] };
|
|
230
226
|
break;
|
|
231
|
-
case TOOL_NAMES.
|
|
227
|
+
case TOOL_NAMES.SEARCH_REGEX:
|
|
232
228
|
if (!page)
|
|
233
229
|
throw new Error('Browser not initialized');
|
|
234
|
-
result = { content: [{ type: 'text', text: JSON.stringify(await advancedTools.
|
|
230
|
+
result = { content: [{ type: 'text', text: JSON.stringify(await advancedTools.handleSearchRegex(page, args)) }] };
|
|
235
231
|
break;
|
|
232
|
+
// WEB_SEARCH case REMOVED - redundant with search_regex
|
|
236
233
|
// MEDIA_EXTRACTOR case REMOVED - merged into STREAM_EXTRACTOR
|
|
237
234
|
case TOOL_NAMES.STREAM_EXTRACTOR:
|
|
238
235
|
if (!page)
|
|
@@ -615,17 +612,14 @@ async function main() {
|
|
|
615
612
|
console.error(' 🖼️ iframe_handler - Handle nested iframes (deep_scrape)');
|
|
616
613
|
console.error('');
|
|
617
614
|
console.error(' Advanced Tools:');
|
|
618
|
-
console.error('
|
|
615
|
+
console.error(' 🔥 search_regex - ULTRA Regex Engine (like regex101)');
|
|
616
|
+
console.error(' 🚀 web_search - Intelligent Content Search');
|
|
619
617
|
console.error(' 📊 extract_json - Extract embedded JSON');
|
|
620
618
|
console.error(' 🏷️ scrape_meta_tags - Extract meta/OG tags');
|
|
621
619
|
console.error(' 📈 deep_analysis - Full page analysis');
|
|
622
|
-
console.error(' 📡 network_recorder - Record network traffic');
|
|
623
|
-
console.error(' 🔌 api_finder - Discover hidden APIs');
|
|
624
|
-
console.error(' ⏱️ ajax_content_waiter - Wait for dynamic content');
|
|
620
|
+
console.error(' 📡 network_recorder - Record network traffic + API finder');
|
|
625
621
|
console.error(' 🔗 link_harvester - Harvest all links');
|
|
626
|
-
console.error(' 📋 batch_element_scraper - Batch scrape elements');
|
|
627
622
|
console.error(' 🏗️ extract_schema - Extract Schema.org data');
|
|
628
|
-
console.error(' 📸 element_screenshot - Screenshot element');
|
|
629
623
|
console.error(' 🧭 breadcrumb_navigator - Navigate breadcrumbs');
|
|
630
624
|
console.error(' ↪️ redirect_tracer - Trace URL redirects');
|
|
631
625
|
console.error(' 📊 progress_tracker - Track task progress');
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "brave-real-browser-mcp-server",
|
|
3
|
-
"version": "2.27.
|
|
3
|
+
"version": "2.27.26",
|
|
4
4
|
"description": "🦁 MCP server for Brave Real Browser - NPM Workspaces Monorepo with anti-detection features, SSE streaming, and LSP compatibility",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
"dependencies": {
|
|
51
51
|
"@modelcontextprotocol/sdk": "latest",
|
|
52
52
|
"@types/turndown": "latest",
|
|
53
|
-
"brave-real-browser": "^2.8.
|
|
53
|
+
"brave-real-browser": "^2.8.26",
|
|
54
54
|
"puppeteer-core": "^24.35.0",
|
|
55
55
|
"turndown": "latest",
|
|
56
56
|
"vscode-languageserver": "^9.0.1",
|