brave-real-browser-mcp-server 2.27.25 → 2.27.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -195,70 +195,526 @@ export async function handleMultiLayerRedirectTrace(page, args) {
195
195
  };
196
196
  }
197
197
  /**
198
- * Search text or regex patterns in page content
198
+ * 🔥 ULTRA-POWERFUL Regex Engine (like regex101.com)
199
+ * Features: Named capture groups, all regex flags, match highlighting,
200
+ * detailed match info, replace mode, timeout protection
199
201
  */
200
- export async function handleSearchContent(page, args) {
201
- // Progress tracking
202
+ export async function handleSearchRegex(page, args) {
202
203
  const progressNotifier = getProgressNotifier();
203
- const tracker = progressNotifier.createTracker(`search-${Date.now()}`);
204
- tracker.start(100, `🔎 Searching for: "${args.pattern}"...`);
205
- // Wait for body to be available
204
+ const tracker = progressNotifier.createTracker(`search-regex-${Date.now()}`);
205
+ tracker.start(100, `🔎 Regex Search: "${args.pattern}"`);
206
+ // Build regex flags
207
+ const flags = args.flags || {};
208
+ let flagString = '';
209
+ if (flags.global !== false)
210
+ flagString += 'g';
211
+ if (flags.ignoreCase)
212
+ flagString += 'i';
213
+ if (flags.multiline)
214
+ flagString += 'm';
215
+ if (flags.dotAll)
216
+ flagString += 's';
217
+ if (flags.unicode)
218
+ flagString += 'u';
219
+ if (flags.sticky)
220
+ flagString += 'y';
221
+ // Validate regex
222
+ let regex;
223
+ let patternInfo = { flags: flagString, isValid: true, errorMessage: undefined };
206
224
  try {
207
- tracker.setProgress(10, '⏳ Waiting for page content...');
208
- await page.waitForSelector('body', { timeout: 5000 });
225
+ regex = new RegExp(args.pattern, flagString);
209
226
  }
210
- catch {
211
- // Continue anyway
227
+ catch (e) {
228
+ tracker.fail(`Invalid regex: ${e.message}`);
229
+ return {
230
+ found: false,
231
+ matches: [],
232
+ count: 0,
233
+ patternInfo: { flags: flagString, isValid: false, errorMessage: e.message },
234
+ };
212
235
  }
213
- tracker.setProgress(30, '📄 Extracting page content...');
236
+ tracker.setProgress(20, '📄 Extracting content...');
237
+ // Get content based on sourceType
214
238
  let content = '';
215
- if (args.selector) {
239
+ if (args.testString) {
240
+ content = args.testString;
241
+ }
242
+ else {
243
+ const sourceType = args.sourceType || 'text';
216
244
  try {
217
- content = await page.$eval(args.selector, (el) => el.textContent || '');
245
+ await page.waitForSelector('body', { timeout: 5000 });
218
246
  }
219
- catch {
220
- content = '';
247
+ catch { /* continue */ }
248
+ if (args.selector) {
249
+ try {
250
+ content = await page.$eval(args.selector, (el, type) => {
251
+ if (type === 'html')
252
+ return el.innerHTML;
253
+ if (type === 'text')
254
+ return el.textContent || '';
255
+ return el.outerHTML;
256
+ }, sourceType);
257
+ }
258
+ catch {
259
+ content = '';
260
+ }
261
+ }
262
+ else {
263
+ content = await page.evaluate((type) => {
264
+ if (type === 'html')
265
+ return document.documentElement.innerHTML;
266
+ if (type === 'scripts') {
267
+ return Array.from(document.scripts).map(s => s.textContent).join('\n');
268
+ }
269
+ if (type === 'styles') {
270
+ return Array.from(document.styleSheets).map(s => {
271
+ try {
272
+ return Array.from(s.cssRules).map(r => r.cssText).join('\n');
273
+ }
274
+ catch {
275
+ return '';
276
+ }
277
+ }).join('\n');
278
+ }
279
+ if (type === 'attributes') {
280
+ return Array.from(document.querySelectorAll('*')).map(el => Array.from(el.attributes).map(a => `${a.name}="${a.value}"`).join(' ')).join('\n');
281
+ }
282
+ if (type === 'all') {
283
+ return document.documentElement.outerHTML;
284
+ }
285
+ return document.body?.textContent || '';
286
+ }, sourceType);
221
287
  }
222
288
  }
223
- else {
224
- content = await page.evaluate(() => document.body?.textContent || document.documentElement?.textContent || '');
225
- }
226
- const matches = [];
227
- if (!content || content.length === 0) {
289
+ if (!content) {
228
290
  tracker.fail('No content to search');
229
- return { found: false, matches: [], count: 0 };
230
- }
231
- tracker.setProgress(50, '🔍 Running pattern match...');
232
- let regex;
233
- if (args.isRegex) {
234
- regex = new RegExp(args.pattern, args.caseSensitive ? 'g' : 'gi');
235
- }
236
- else {
237
- const escaped = args.pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
238
- regex = new RegExp(escaped, args.caseSensitive ? 'g' : 'gi');
291
+ return { found: false, matches: [], count: 0, patternInfo };
239
292
  }
293
+ tracker.setProgress(50, '🔍 Running regex match...');
294
+ const matches = [];
295
+ const maxMatches = args.maxMatches || 100;
296
+ const contextChars = args.contextChars || 50;
297
+ const timeout = args.timeout || 5000;
298
+ const startTime = Date.now();
240
299
  let match;
241
300
  while ((match = regex.exec(content)) !== null) {
242
- const start = Math.max(0, match.index - 50);
243
- const end = Math.min(content.length, match.index + match[0].length + 50);
244
- matches.push({
301
+ // Timeout protection
302
+ if (Date.now() - startTime > timeout) {
303
+ tracker.setProgress(80, '⚠️ Timeout reached, returning partial results');
304
+ break;
305
+ }
306
+ const start = Math.max(0, match.index - contextChars);
307
+ const end = Math.min(content.length, match.index + match[0].length + contextChars);
308
+ let context = content.substring(start, end);
309
+ // Highlight if requested
310
+ if (args.highlightMatches) {
311
+ const matchStart = match.index - start;
312
+ const matchEnd = matchStart + match[0].length;
313
+ context = context.substring(0, matchStart) + '<<MATCH>>' + context.substring(matchStart, matchEnd) + '<</MATCH>>' + context.substring(matchEnd);
314
+ }
315
+ const matchResult = {
245
316
  text: match[0],
246
- context: content.substring(start, end),
247
- });
248
- if (matches.length >= 100)
249
- break; // Limit matches
317
+ context,
318
+ index: match.index,
319
+ length: match[0].length,
320
+ };
321
+ // Extract groups if requested
322
+ if (args.extractGroups !== false) {
323
+ if (match.groups) {
324
+ matchResult.groups = match.groups;
325
+ }
326
+ if (match.length > 1) {
327
+ matchResult.numberedGroups = match.slice(1);
328
+ }
329
+ }
330
+ matches.push(matchResult);
331
+ if (matches.length >= maxMatches)
332
+ break;
333
+ // Prevent infinite loop for zero-length matches
334
+ if (match[0].length === 0) {
335
+ regex.lastIndex++;
336
+ }
337
+ }
338
+ // Replace mode
339
+ let replaced;
340
+ if (args.replaceWith !== undefined) {
341
+ tracker.setProgress(90, '🔄 Applying replacement...');
342
+ replaced = content.replace(regex, args.replaceWith);
250
343
  }
251
344
  tracker.complete(`🎉 Found ${matches.length} matches`);
252
345
  return {
253
346
  found: matches.length > 0,
254
347
  matches,
255
348
  count: matches.length,
349
+ replaced,
350
+ patternInfo,
256
351
  };
257
352
  }
258
353
  /**
259
- * Find elements using XPath or Advanced CSS
354
+ * 🚀 INTELLIGENT Content Search Engine
355
+ * Features: Fuzzy matching, proximity search, advanced operators,
356
+ * relevance scoring, snippet extraction with highlighting
357
+ */
358
+ export async function handleWebSearch(page, args) {
359
+ const progressNotifier = getProgressNotifier();
360
+ const tracker = progressNotifier.createTracker(`web-search-${Date.now()}`);
361
+ tracker.start(100, `🚀 Intelligent Search: "${args.query}"`);
362
+ try {
363
+ await page.waitForSelector('body', { timeout: 5000 });
364
+ }
365
+ catch { /* continue */ }
366
+ tracker.setProgress(20, '📄 Extracting content...');
367
+ // Get content based on searchIn type
368
+ const searchIn = args.searchIn || 'text';
369
+ let content = '';
370
+ let elements = [];
371
+ content = await page.evaluate((opts) => {
372
+ const { searchIn, selector } = opts;
373
+ const root = selector ? document.querySelector(selector) : document.body;
374
+ if (!root)
375
+ return '';
376
+ switch (searchIn) {
377
+ case 'html':
378
+ return root.innerHTML;
379
+ case 'links':
380
+ return Array.from(root.querySelectorAll('a')).map((a) => `${a.textContent?.trim()} [${a.getAttribute('href')}]`).join('\n');
381
+ case 'images':
382
+ return Array.from(root.querySelectorAll('img')).map((img) => `${img.alt} [${img.src}]`).join('\n');
383
+ case 'videos':
384
+ return Array.from(root.querySelectorAll('video, iframe[src*="youtube"], iframe[src*="vimeo"]')).map((v) => v.getAttribute('src') || v.querySelector?.('source')?.getAttribute('src') || '').join('\n');
385
+ case 'scripts':
386
+ return Array.from(document.scripts).map((s) => s.textContent).join('\n');
387
+ case 'json':
388
+ return Array.from(document.querySelectorAll('script[type="application/json"], script[type="application/ld+json"]'))
389
+ .map((s) => s.textContent).join('\n');
390
+ case 'schema':
391
+ return Array.from(document.querySelectorAll('script[type="application/ld+json"]'))
392
+ .map((s) => s.textContent).join('\n');
393
+ case 'meta':
394
+ return Array.from(document.querySelectorAll('meta')).map((m) => `${m.name || m.getAttribute('property')}: ${m.content}`).join('\n');
395
+ case 'all':
396
+ return root.outerHTML;
397
+ default:
398
+ return root.textContent || '';
399
+ }
400
+ }, { searchIn, selector: args.selector });
401
+ if (!content) {
402
+ tracker.fail('No content to search');
403
+ return {
404
+ found: false,
405
+ results: [],
406
+ count: 0,
407
+ totalOccurrences: 0,
408
+ searchInfo: { query: args.query, searchIn, matchMode: 'none' },
409
+ };
410
+ }
411
+ tracker.setProgress(40, '🔍 Processing search query...');
412
+ const matchMode = args.matchMode || {};
413
+ const operators = args.operators || {};
414
+ const filters = args.filters || {};
415
+ const maxResults = args.maxResults || 50;
416
+ const snippetLength = args.snippetLength || 150;
417
+ const highlightTag = args.highlightTag || '**';
418
+ // Parse query for operators
419
+ let searchTerms = [];
420
+ let phrases = [];
421
+ // Extract phrases (quoted strings)
422
+ const phraseMatches = args.query.match(/"([^"]+)"/g);
423
+ if (phraseMatches) {
424
+ phrases = phraseMatches.map(p => p.replace(/"/g, ''));
425
+ }
426
+ // Get remaining terms
427
+ const remainingQuery = args.query.replace(/"[^"]+"/g, '').trim();
428
+ if (remainingQuery) {
429
+ // Parse AND/OR/NOT operators
430
+ const parts = remainingQuery.split(/\s+(AND|OR|NOT)\s+/i);
431
+ searchTerms = parts.filter(p => !['AND', 'OR', 'NOT'].includes(p.toUpperCase()));
432
+ }
433
+ // Add explicit operator terms
434
+ if (operators.must)
435
+ searchTerms.push(...operators.must);
436
+ if (operators.should)
437
+ searchTerms.push(...operators.should);
438
+ if (operators.phrase)
439
+ phrases.push(operators.phrase);
440
+ tracker.setProgress(60, '🎯 Matching content...');
441
+ const results = [];
442
+ // Levenshtein distance for fuzzy matching
443
+ const levenshtein = (a, b) => {
444
+ const matrix = Array(b.length + 1).fill(null).map(() => Array(a.length + 1).fill(null));
445
+ for (let i = 0; i <= a.length; i++)
446
+ matrix[0][i] = i;
447
+ for (let j = 0; j <= b.length; j++)
448
+ matrix[j][0] = j;
449
+ for (let j = 1; j <= b.length; j++) {
450
+ for (let i = 1; i <= a.length; i++) {
451
+ const indicator = a[i - 1] === b[j - 1] ? 0 : 1;
452
+ matrix[j][i] = Math.min(matrix[j][i - 1] + 1, matrix[j - 1][i] + 1, matrix[j - 1][i - 1] + indicator);
453
+ }
454
+ }
455
+ return matrix[b.length][a.length];
456
+ };
457
+ const fuzzyMatch = (text, term, threshold) => {
458
+ const distance = levenshtein(text.toLowerCase(), term.toLowerCase());
459
+ const maxLen = Math.max(text.length, term.length);
460
+ return (1 - distance / maxLen) >= threshold;
461
+ };
462
+ // Split content into searchable chunks
463
+ const sentences = content.split(/[.!?\n]+/).filter(s => s.trim().length > 10);
464
+ let totalOccurrences = 0;
465
+ for (let i = 0; i < sentences.length && results.length < maxResults; i++) {
466
+ const sentence = sentences[i].trim();
467
+ let score = 0;
468
+ let matched = false;
469
+ // Check phrases first
470
+ for (const phrase of phrases) {
471
+ const phraseRegex = new RegExp(phrase.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), matchMode.caseSensitive ? 'g' : 'gi');
472
+ if (phraseRegex.test(sentence)) {
473
+ score += 10;
474
+ matched = true;
475
+ totalOccurrences += (sentence.match(phraseRegex) || []).length;
476
+ }
477
+ }
478
+ // Check terms
479
+ for (const term of searchTerms) {
480
+ if (matchMode.fuzzy) {
481
+ const words = sentence.split(/\s+/);
482
+ for (const word of words) {
483
+ if (fuzzyMatch(word, term, matchMode.fuzzyThreshold || 0.8)) {
484
+ score += 5;
485
+ matched = true;
486
+ totalOccurrences++;
487
+ }
488
+ }
489
+ }
490
+ else {
491
+ const termRegex = matchMode.wholeWord
492
+ ? new RegExp(`\\b${term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, matchMode.caseSensitive ? 'g' : 'gi')
493
+ : new RegExp(term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), matchMode.caseSensitive ? 'g' : 'gi');
494
+ const termMatches = sentence.match(termRegex);
495
+ if (termMatches) {
496
+ score += termMatches.length * 3;
497
+ matched = true;
498
+ totalOccurrences += termMatches.length;
499
+ }
500
+ }
501
+ }
502
+ // Check mustNot terms
503
+ if (operators.mustNot) {
504
+ for (const notTerm of operators.mustNot) {
505
+ if (sentence.toLowerCase().includes(notTerm.toLowerCase())) {
506
+ matched = false;
507
+ score = 0;
508
+ break;
509
+ }
510
+ }
511
+ }
512
+ // Proximity search
513
+ if (matched && operators.proximity?.words && operators.proximity.words.length >= 2) {
514
+ const words = sentence.toLowerCase().split(/\s+/);
515
+ const positions = [];
516
+ for (const pw of operators.proximity.words) {
517
+ const idx = words.findIndex(w => w.includes(pw.toLowerCase()));
518
+ if (idx !== -1)
519
+ positions.push(idx);
520
+ }
521
+ if (positions.length >= 2) {
522
+ const distance = Math.abs(positions[0] - positions[1]);
523
+ if (distance <= (operators.proximity.distance || 5)) {
524
+ score += 15;
525
+ }
526
+ }
527
+ }
528
+ // Apply filters
529
+ if (matched && filters.minLength && sentence.length < filters.minLength)
530
+ matched = false;
531
+ if (matched && filters.maxLength && sentence.length > filters.maxLength)
532
+ matched = false;
533
+ if (matched && filters.containsPattern) {
534
+ const containsRegex = new RegExp(filters.containsPattern, 'i');
535
+ if (!containsRegex.test(sentence))
536
+ matched = false;
537
+ }
538
+ if (matched && filters.excludePattern) {
539
+ const excludeRegex = new RegExp(filters.excludePattern, 'i');
540
+ if (excludeRegex.test(sentence))
541
+ matched = false;
542
+ }
543
+ if (matched && score > 0) {
544
+ // Create highlighted snippet
545
+ let snippet = sentence.substring(0, snippetLength);
546
+ for (const term of [...searchTerms, ...phrases]) {
547
+ const highlightRegex = new RegExp(`(${term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi');
548
+ snippet = snippet.replace(highlightRegex, `${highlightTag}$1${highlightTag}`);
549
+ }
550
+ results.push({
551
+ text: sentence.substring(0, 100),
552
+ snippet,
553
+ score,
554
+ position: i,
555
+ });
556
+ }
557
+ }
558
+ // Sort by relevance/position/frequency
559
+ const sortBy = args.sortBy || 'relevance';
560
+ if (sortBy === 'relevance') {
561
+ results.sort((a, b) => b.score - a.score);
562
+ }
563
+ else if (sortBy === 'position') {
564
+ results.sort((a, b) => a.position - b.position);
565
+ }
566
+ tracker.complete(`🎉 Found ${results.length} results (${totalOccurrences} total occurrences)`);
567
+ return {
568
+ found: results.length > 0,
569
+ results,
570
+ count: results.length,
571
+ totalOccurrences,
572
+ searchInfo: {
573
+ query: args.query,
574
+ searchIn,
575
+ matchMode: matchMode.fuzzy ? 'fuzzy' : matchMode.wholeWord ? 'wholeWord' : 'normal',
576
+ },
577
+ };
578
+ }
579
+ /**
580
+ * Ultra-powerful element finder and batch scraper
581
+ * Find elements using XPath, CSS, text, attributes with advanced filters
582
+ * Supports batch scraping mode (merged from batch_element_scraper)
260
583
  */
261
584
  export async function handleFindElementAdvanced(page, args) {
585
+ // Progress tracking
586
+ const progressNotifier = getProgressNotifier();
587
+ const tracker = progressNotifier.createTracker(`find-element-${Date.now()}`);
588
+ const limit = args.limit || 100;
589
+ const selectorToUse = args.selector || args.cssSelector;
590
+ // ============================================================
591
+ // BATCH MODE (merged from batch_element_scraper)
592
+ // ============================================================
593
+ if (args.batchMode && selectorToUse) {
594
+ tracker.start(100, '📋 Starting batch element extraction...');
595
+ const attributes = args.extractAttributes || ['textContent', 'href', 'src'];
596
+ tracker.setProgress(20, `🔍 Finding elements with selector: ${selectorToUse}`);
597
+ const items = await page.evaluate((opts) => {
598
+ const elements = Array.from(document.querySelectorAll(opts.selector)).slice(0, opts.limit);
599
+ return elements.map((el, idx) => {
600
+ const item = {};
601
+ // Extract requested attributes
602
+ for (const attr of opts.attributes) {
603
+ if (attr === 'textContent') {
604
+ item.text = el.textContent?.trim()?.substring(0, 500) || '';
605
+ }
606
+ else if (attr === 'innerHTML') {
607
+ item.html = el.innerHTML?.substring(0, 1000) || '';
608
+ }
609
+ else {
610
+ item[attr] = el.getAttribute(attr) || '';
611
+ }
612
+ }
613
+ // Include innerHTML if requested
614
+ if (opts.includeInnerHTML) {
615
+ item.html = el.innerHTML?.substring(0, 1000) || '';
616
+ }
617
+ // Always include tag
618
+ item.tag = el.tagName.toLowerCase();
619
+ // Include position if requested
620
+ if (opts.includePosition) {
621
+ const rect = el.getBoundingClientRect();
622
+ item.position = {
623
+ x: Math.round(rect.x),
624
+ y: Math.round(rect.y),
625
+ width: Math.round(rect.width),
626
+ height: Math.round(rect.height)
627
+ };
628
+ }
629
+ // Generate unique selector
630
+ const tagName = el.tagName.toLowerCase();
631
+ const id = el.id;
632
+ const className = typeof el.className === 'string' ? el.className.split(' ')[0] : '';
633
+ item.selector = id ? `#${id}` : className ? `${tagName}.${className}` : `${opts.selector}:nth-of-type(${idx + 1})`;
634
+ // Apply advanced filters if specified
635
+ if (opts.filter) {
636
+ const rect = el.getBoundingClientRect();
637
+ const style = window.getComputedStyle(el);
638
+ if (opts.filter.visible !== undefined) {
639
+ const isVisible = rect.width > 0 && rect.height > 0 && style.display !== 'none' && style.visibility !== 'hidden';
640
+ if (opts.filter.visible !== isVisible)
641
+ return null;
642
+ }
643
+ if (opts.filter.enabled !== undefined) {
644
+ const isEnabled = !el.disabled;
645
+ if (opts.filter.enabled !== isEnabled)
646
+ return null;
647
+ }
648
+ if (opts.filter.minWidth && rect.width < opts.filter.minWidth)
649
+ return null;
650
+ if (opts.filter.minHeight && rect.height < opts.filter.minHeight)
651
+ return null;
652
+ if (opts.filter.hasChildren !== undefined) {
653
+ const hasKids = el.children.length > 0;
654
+ if (opts.filter.hasChildren !== hasKids)
655
+ return null;
656
+ }
657
+ if (opts.filter.containsClass && !el.classList.contains(opts.filter.containsClass))
658
+ return null;
659
+ if (opts.filter.matchPattern) {
660
+ const regex = new RegExp(opts.filter.matchPattern);
661
+ if (!regex.test(el.textContent || ''))
662
+ return null;
663
+ }
664
+ }
665
+ return item;
666
+ }).filter((item) => item !== null);
667
+ }, {
668
+ selector: selectorToUse,
669
+ limit,
670
+ attributes,
671
+ includeInnerHTML: args.includeInnerHTML,
672
+ includePosition: args.includePosition,
673
+ filter: args.filter
674
+ });
675
+ tracker.setProgress(80, `✅ Found ${items.length} elements`);
676
+ // Format output based on returnType
677
+ const returnType = args.returnType || 'elements';
678
+ if (returnType === 'selectors') {
679
+ tracker.complete(`🎉 Extracted ${items.length} selectors`);
680
+ return {
681
+ found: items.length > 0,
682
+ elements: items.map((item) => ({
683
+ selector: item.selector,
684
+ text: '',
685
+ tag: item.tag,
686
+ })),
687
+ count: items.length,
688
+ };
689
+ }
690
+ if (returnType === 'data') {
691
+ tracker.complete(`🎉 Extracted data from ${items.length} elements`);
692
+ return {
693
+ found: items.length > 0,
694
+ elements: [],
695
+ count: items.length,
696
+ batchData: items,
697
+ };
698
+ }
699
+ // Default: elements
700
+ tracker.complete(`🎉 Found and extracted ${items.length} elements`);
701
+ return {
702
+ found: items.length > 0,
703
+ elements: items.map((item) => ({
704
+ selector: item.selector,
705
+ text: item.text || '',
706
+ tag: item.tag,
707
+ attributes: item,
708
+ position: item.position,
709
+ })),
710
+ count: items.length,
711
+ batchData: items,
712
+ };
713
+ }
714
+ // ============================================================
715
+ // STANDARD FIND MODE
716
+ // ============================================================
717
+ tracker.start(100, '🔍 Finding elements...');
262
718
  const elements = [];
263
719
  // Wait for body to be available
264
720
  try {
@@ -267,8 +723,9 @@ export async function handleFindElementAdvanced(page, args) {
267
723
  catch {
268
724
  // Continue anyway
269
725
  }
726
+ tracker.setProgress(20, '📄 Searching with provided criteria...');
727
+ // XPath search
270
728
  if (args.xpath) {
271
- // Use document.evaluate for XPath (compatible with all Puppeteer versions)
272
729
  const xpathResults = await page.evaluate((xpath) => {
273
730
  const results = [];
274
731
  try {
@@ -277,11 +734,13 @@ export async function handleFindElementAdvanced(page, args) {
277
734
  let count = 0;
278
735
  while (node && count < 50) {
279
736
  if (node instanceof Element) {
737
+ const rect = node.getBoundingClientRect();
280
738
  results.push({
281
739
  text: node.textContent?.trim()?.substring(0, 100) || '',
282
740
  tag: node.tagName.toLowerCase(),
283
741
  id: node.id || '',
284
742
  className: typeof node.className === 'string' ? node.className : '',
743
+ position: { x: Math.round(rect.x), y: Math.round(rect.y), width: Math.round(rect.width), height: Math.round(rect.height) },
285
744
  });
286
745
  }
287
746
  node = iterator.iterateNext();
@@ -297,47 +756,65 @@ export async function handleFindElementAdvanced(page, args) {
297
756
  selector: e.id ? `#${e.id}` : e.className ? `.${e.className.split(' ')[0]}` : e.tag,
298
757
  text: e.text,
299
758
  tag: e.tag,
759
+ position: args.includePosition ? e.position : undefined,
300
760
  })));
301
761
  }
302
- if (args.cssSelector) {
303
- const cssElements = await page.$$(args.cssSelector);
304
- for (const el of cssElements) {
305
- const data = await page.evaluate((e) => ({
306
- text: e.textContent?.trim()?.substring(0, 100) || '',
307
- tag: e.tagName.toLowerCase(),
308
- id: e.id,
309
- className: e.className,
310
- }), el);
762
+ // CSS selector search
763
+ if (selectorToUse) {
764
+ const cssElements = await page.$$(selectorToUse);
765
+ for (const el of cssElements.slice(0, limit)) {
766
+ const data = await page.evaluate((e, includePos) => {
767
+ const rect = e.getBoundingClientRect();
768
+ return {
769
+ text: e.textContent?.trim()?.substring(0, 100) || '',
770
+ tag: e.tagName.toLowerCase(),
771
+ id: e.id,
772
+ className: e.className,
773
+ position: includePos ? { x: Math.round(rect.x), y: Math.round(rect.y), width: Math.round(rect.width), height: Math.round(rect.height) } : undefined,
774
+ };
775
+ }, el, args.includePosition);
311
776
  elements.push({
312
- selector: args.cssSelector,
777
+ selector: selectorToUse,
313
778
  text: data.text,
314
779
  tag: data.tag,
780
+ position: data.position,
315
781
  });
316
782
  }
317
783
  }
318
- if (args.contains) {
319
- const containsElements = await page.evaluate((text) => {
784
+ // Text content search
785
+ if (args.contains || args.text) {
786
+ const searchText = args.contains || args.text || '';
787
+ const isExact = args.exact || false;
788
+ const containsElements = await page.evaluate((opts) => {
320
789
  const results = [];
321
- const allElements = document.querySelectorAll('*');
322
- for (let i = 0; i < allElements.length && results.length < 10; i++) {
790
+ const allElements = document.querySelectorAll(opts.elementType || '*');
791
+ for (let i = 0; i < allElements.length && results.length < opts.limit; i++) {
323
792
  const el = allElements[i];
324
- if (el.textContent?.includes(text)) {
793
+ const textContent = el.textContent || '';
794
+ const matches = opts.exact
795
+ ? textContent.trim() === opts.text
796
+ : textContent.includes(opts.text);
797
+ if (matches) {
798
+ const rect = el.getBoundingClientRect();
325
799
  results.push({
326
- text: el.textContent?.trim()?.substring(0, 100) || '',
800
+ text: textContent.trim()?.substring(0, 100) || '',
327
801
  tag: el.tagName.toLowerCase(),
328
802
  id: el.id || '',
329
803
  className: typeof el.className === 'string' ? el.className : '',
804
+ position: { x: Math.round(rect.x), y: Math.round(rect.y), width: Math.round(rect.width), height: Math.round(rect.height) },
330
805
  });
331
806
  }
332
807
  }
333
808
  return results;
334
- }, args.contains);
809
+ }, { text: searchText, exact: isExact, elementType: args.elementType, limit });
335
810
  elements.push(...containsElements.map((e) => ({
336
811
  selector: e.id ? `#${e.id}` : e.className ? `.${e.className.split(' ')[0]}` : e.tag,
337
812
  text: e.text,
338
813
  tag: e.tag,
814
+ position: args.includePosition ? e.position : undefined,
339
815
  })));
340
816
  }
817
+ // Attribute search
341
818
  if (args.attributes) {
342
819
  let attributes = {};
343
820
  try {
@@ -346,39 +823,42 @@ export async function handleFindElementAdvanced(page, args) {
346
823
  catch (e) {
347
824
  // Ignore invalid JSON
348
825
  }
349
- // Use Puppeteer evaluation to find elements with matching attributes
350
- const attributeElements = await page.evaluate((attrs) => {
826
+ const attributeElements = await page.evaluate((opts) => {
351
827
  const results = [];
352
828
  const allElements = document.querySelectorAll('*');
353
- for (let i = 0; i < allElements.length && results.length < 50; i++) {
829
+ for (let i = 0; i < allElements.length && results.length < opts.limit; i++) {
354
830
  const el = allElements[i];
355
831
  let match = true;
356
- for (const [key, value] of Object.entries(attrs)) {
832
+ for (const [key, value] of Object.entries(opts.attrs)) {
357
833
  if (el.getAttribute(key) !== value) {
358
834
  match = false;
359
835
  break;
360
836
  }
361
837
  }
362
838
  if (match) {
839
+ const rect = el.getBoundingClientRect();
363
840
  results.push({
364
841
  text: el.textContent?.trim()?.substring(0, 100) || '',
365
842
  tag: el.tagName.toLowerCase(),
366
843
  id: el.id || '',
367
844
  className: typeof el.className === 'string' ? el.className : '',
845
+ position: { x: Math.round(rect.x), y: Math.round(rect.y), width: Math.round(rect.width), height: Math.round(rect.height) },
368
846
  });
369
847
  }
370
848
  }
371
849
  return results;
372
- }, attributes);
850
+ }, { attrs: attributes, limit });
373
851
  elements.push(...attributeElements.map((e) => ({
374
852
  selector: e.id ? `#${e.id}` : e.className ? `.${e.className.split(' ')[0]}` : e.tag,
375
853
  text: e.text,
376
854
  tag: e.tag,
855
+ position: args.includePosition ? e.position : undefined,
377
856
  })));
378
857
  }
858
+ tracker.complete(`🎉 Found ${elements.length} elements`);
379
859
  return {
380
860
  found: elements.length > 0,
381
- elements: elements.slice(0, 50),
861
+ elements: elements.slice(0, limit),
382
862
  count: elements.length,
383
863
  };
384
864
  }
package/dist/index.js CHANGED
@@ -57,7 +57,11 @@ import { handleClick, handleType, handleSolveCaptcha, handleRandomScroll } from
57
57
  import { handleGetContent, handleFindSelector } from './handlers/content-handlers.js';
58
58
  import { handleSaveContentAsMarkdown } from './handlers/file-handlers.js';
59
59
  // Import advanced tools handlers
60
- import { handleBreadcrumbNavigator, handleUrlRedirectTracer, handleSearchContent, handleExtractJson, handleScrapeMetaTags, handlePressKey, handleProgressTracker, handleDeepAnalysis, handleNetworkRecorder, handleElementScreenshot, handleLinkHarvester, handleBatchElementScraper, handleCookieManager,
60
+ import {
61
+ // handleBreadcrumbNavigator REMOVED - use click or find_element
62
+ handleUrlRedirectTracer, handleSearchRegex, handleExtractJson, handleScrapeMetaTags, handlePressKey, handleProgressTracker, handleDeepAnalysis, handleNetworkRecorder,
63
+ // handleElementScreenshot, // REMOVED - use deep_analysis with includeScreenshot
64
+ handleLinkHarvester, handleCookieManager,
61
65
  // Download tools
62
66
  handleFileDownloader,
63
67
  // Enhanced streaming/download tools
@@ -176,18 +180,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
176
180
  case TOOL_NAMES.SAVE_CONTENT_AS_MARKDOWN:
177
181
  return await handleSaveContentAsMarkdown(args);
178
182
  // Advanced Tools
179
- case TOOL_NAMES.BREADCRUMB_NAVIGATOR:
180
- if (!page)
181
- throw new Error('Browser not initialized. Call browser_init first.');
182
- return { content: [{ type: 'text', text: JSON.stringify(await handleBreadcrumbNavigator(page, args || {})) }] };
183
+ // BREADCRUMB_NAVIGATOR case REMOVED - use click or find_element
183
184
  case TOOL_NAMES.REDIRECT_TRACER:
184
185
  if (!page)
185
186
  throw new Error('Browser not initialized. Call browser_init first.');
186
187
  return { content: [{ type: 'text', text: JSON.stringify(await handleUrlRedirectTracer(page, args)) }] };
187
- case TOOL_NAMES.SEARCH_CONTENT:
188
+ case TOOL_NAMES.SEARCH_REGEX:
188
189
  if (!page)
189
190
  throw new Error('Browser not initialized. Call browser_init first.');
190
- return { content: [{ type: 'text', text: JSON.stringify(await handleSearchContent(page, args)) }] };
191
+ return { content: [{ type: 'text', text: JSON.stringify(await handleSearchRegex(page, args)) }] };
192
+ // WEB_SEARCH case REMOVED - redundant with search_regex
191
193
  case TOOL_NAMES.EXTRACT_JSON:
192
194
  if (!page)
193
195
  throw new Error('Browser not initialized. Call browser_init first.');
@@ -215,18 +217,12 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
215
217
  // API_FINDER case REMOVED - merged into NETWORK_RECORDER (use findApis: true)
216
218
  // AJAX_CONTENT_WAITER case REMOVED - merged into WAIT (use pollInterval, expectedContent)
217
219
  // MEDIA_EXTRACTOR case REMOVED - merged into STREAM_EXTRACTOR
218
- case TOOL_NAMES.ELEMENT_SCREENSHOT:
219
- if (!page)
220
- throw new Error('Browser not initialized. Call browser_init first.');
221
- return { content: [{ type: 'text', text: JSON.stringify(await handleElementScreenshot(page, args)) }] };
220
+ // ELEMENT_SCREENSHOT case REMOVED - use deep_analysis with includeScreenshot
222
221
  case TOOL_NAMES.LINK_HARVESTER:
223
222
  if (!page)
224
223
  throw new Error('Browser not initialized. Call browser_init first.');
225
224
  return { content: [{ type: 'text', text: JSON.stringify(await handleLinkHarvester(page, args || {})) }] };
226
- case TOOL_NAMES.BATCH_ELEMENT_SCRAPER:
227
- if (!page)
228
- throw new Error('Browser not initialized. Call browser_init first.');
229
- return { content: [{ type: 'text', text: JSON.stringify(await handleBatchElementScraper(page, args)) }] };
225
+ // BATCH_ELEMENT_SCRAPER case REMOVED - merged into FIND_ELEMENT (use batchMode)
230
226
  // EXTRACT_SCHEMA case REMOVED - merged into EXTRACT_JSON (use extractSchema option)
231
227
  // M3U8_PARSER case REMOVED - merged into STREAM_EXTRACTOR
232
228
  case TOOL_NAMES.COOKIE_MANAGER:
@@ -63,7 +63,7 @@ const TOOL_DEFINITIONS = {
63
63
  },
64
64
  find_element: {
65
65
  name: 'find_element',
66
- description: 'Find elements using text, CSS selector, XPath, attributes, or AI-powered description',
66
+ description: 'Ultra-powerful element finder and batch scraper. Find elements using text, CSS selector, XPath, attributes, or AI-powered description. Supports batch scraping mode with advanced filtering.',
67
67
  category: 'Content',
68
68
  parameters: [
69
69
  { name: 'text', type: 'string', description: 'Text content to search for', required: false },
@@ -71,6 +71,11 @@ const TOOL_DEFINITIONS = {
71
71
  { name: 'xpath', type: 'string', description: 'XPath expression', required: false },
72
72
  { name: 'description', type: 'string', description: 'Natural language description (AI-powered)', required: false },
73
73
  { name: 'exact', type: 'boolean', description: 'Exact text match', required: false, default: false },
74
+ { name: 'batchMode', type: 'boolean', description: 'Enable batch scraping mode for multiple elements', required: false, default: false },
75
+ { name: 'extractAttributes', type: 'array', description: 'Attributes to extract in batch mode (e.g., href, src)', required: false },
76
+ { name: 'limit', type: 'number', description: 'Maximum elements to process', required: false, default: 100 },
77
+ { name: 'includePosition', type: 'boolean', description: 'Include element position (x, y, width, height)', required: false, default: false },
78
+ { name: 'returnType', type: 'string', description: 'Output format: elements, selectors, or data', required: false, enum: ['elements', 'selectors', 'data'], default: 'elements' },
74
79
  ],
75
80
  },
76
81
  save_content_as_markdown: {
@@ -164,15 +169,20 @@ const TOOL_DEFINITIONS = {
164
169
  ],
165
170
  },
166
171
  // Advanced Tools
167
- search_content: {
168
- name: 'search_content',
169
- description: 'Search text or Regex patterns in page content',
172
+ search_regex: {
173
+ name: 'search_regex',
174
+ description: '🔥 ULTRA-POWERFUL Regex Engine (like regex101.com) - Full regex support with flags, capture groups, replace mode, timeout protection',
170
175
  category: 'Advanced',
171
176
  parameters: [
172
- { name: 'pattern', type: 'string', description: 'Search pattern (text or regex)', required: true },
173
- { name: 'isRegex', type: 'boolean', description: 'Treat pattern as regex', required: false, default: false },
177
+ { name: 'pattern', type: 'string', description: 'Regex pattern to search', required: true },
178
+ { name: 'flags', type: 'object', description: 'Regex flags: global, ignoreCase, multiline, dotAll, unicode, sticky', required: false },
179
+ { name: 'replaceWith', type: 'string', description: 'Replace matches with this string (supports $1, $2, $&)', required: false },
180
+ { name: 'sourceType', type: 'string', description: 'Where to search: text, html, scripts, styles, attributes, all', required: false, default: 'text' },
181
+ { name: 'extractGroups', type: 'boolean', description: 'Extract capture groups', required: false, default: true },
182
+ { name: 'highlightMatches', type: 'boolean', description: 'Highlight matches in context', required: false, default: false },
174
183
  ],
175
184
  },
185
+ // web_search REMOVED - redundant with search_regex
176
186
  extract_json: {
177
187
  name: 'extract_json',
178
188
  description: 'Extract embedded JSON/API data from page (LD+JSON, __NEXT_DATA__, etc.)',
@@ -239,16 +249,7 @@ const TOOL_DEFINITIONS = {
239
249
  { name: 'maxLinks', type: 'number', description: 'Maximum links to return', required: false },
240
250
  ],
241
251
  },
242
- batch_element_scraper: {
243
- name: 'batch_element_scraper',
244
- description: 'Efficiently scrape lists of similar elements',
245
- category: 'Advanced',
246
- parameters: [
247
- { name: 'selector', type: 'string', description: 'CSS selector for elements to scrape', required: true },
248
- { name: 'attributes', type: 'array', description: 'Attributes to extract', required: false },
249
- { name: 'limit', type: 'number', description: 'Maximum elements to scrape', required: false, default: 100 },
250
- ],
251
- },
252
+ // batch_element_scraper REMOVED - merged into find_element (use batchMode: true)
252
253
  extract_schema: {
253
254
  name: 'extract_schema',
254
255
  description: 'Extract Schema.org structured data (JSON-LD and Microdata)',
@@ -257,25 +258,8 @@ const TOOL_DEFINITIONS = {
257
258
  { name: 'schemaTypes', type: 'array', description: 'Schema types to extract (e.g., Product, Article)', required: false },
258
259
  ],
259
260
  },
260
- element_screenshot: {
261
- name: 'element_screenshot',
262
- description: 'Capture screenshot of a specific element',
263
- category: 'Advanced',
264
- parameters: [
265
- { name: 'selector', type: 'string', description: 'CSS selector of element to capture', required: true },
266
- { name: 'path', type: 'string', description: 'File path to save screenshot', required: false },
267
- { name: 'format', type: 'string', description: 'Image format', required: false, enum: ['png', 'jpeg', 'webp'], default: 'png' },
268
- ],
269
- },
270
- breadcrumb_navigator: {
271
- name: 'breadcrumb_navigator',
272
- description: 'Navigate using site breadcrumbs - find and click breadcrumb links',
273
- category: 'Advanced',
274
- parameters: [
275
- { name: 'targetIndex', type: 'number', description: 'Index of breadcrumb to click (0-based)', required: false },
276
- { name: 'targetText', type: 'string', description: 'Text of breadcrumb to click', required: false },
277
- ],
278
- },
261
+ // element_screenshot REMOVED - use deep_analysis with includeScreenshot: true
262
+ // breadcrumb_navigator REMOVED - use click or find_element with text
279
263
  redirect_tracer: {
280
264
  name: 'redirect_tracer',
281
265
  description: 'Trace URL redirects including standard, JavaScript, and meta refresh redirects',
@@ -354,11 +354,12 @@ export const TOOLS = [
354
354
  },
355
355
  {
356
356
  name: 'find_element',
357
- description: 'Find elements using text, CSS selector, XPath, attributes, or AI-powered description. Supports Shadow DOM and cross-frame search.',
357
+ description: 'Ultra-powerful element finder and batch scraper. Find elements using text, CSS selector, XPath, attributes, or AI-powered description. Supports Shadow DOM, cross-frame search, and efficient batch scraping of similar elements with advanced filtering.',
358
358
  inputSchema: {
359
359
  type: 'object',
360
360
  additionalProperties: false,
361
361
  properties: {
362
+ // === FIND MODE ===
362
363
  text: { type: 'string', description: 'Text content to search for in elements' },
363
364
  selector: { type: 'string', description: 'CSS selector' },
364
365
  xpath: { type: 'string', description: 'XPath expression' },
@@ -369,6 +370,34 @@ export const TOOLS = [
369
370
  context: { type: 'string', description: 'Additional context for AI search' },
370
371
  shadowDOM: { type: 'boolean', description: 'Search inside Shadow DOM elements', default: false },
371
372
  searchFrames: { type: 'boolean', description: 'Search across all iframes/frames', default: false },
373
+ // === BATCH SCRAPE MODE (merged from batch_element_scraper) ===
374
+ batchMode: { type: 'boolean', description: 'Enable batch scraping mode for extracting data from multiple similar elements', default: false },
375
+ extractAttributes: { type: 'array', items: { type: 'string' }, description: 'Attributes to extract in batch mode (e.g., href, src, data-id). Default: textContent, href, src' },
376
+ limit: { type: 'number', description: 'Maximum elements to process in batch mode', default: 100 },
377
+ includeInnerHTML: { type: 'boolean', description: 'Include innerHTML in batch extraction', default: false },
378
+ includePosition: { type: 'boolean', description: 'Include element position (x, y, width, height) in results', default: false },
379
+ // === ADVANCED FILTERS ===
380
+ filter: {
381
+ type: 'object',
382
+ description: 'Advanced filtering options for more precise element selection',
383
+ properties: {
384
+ visible: { type: 'boolean', description: 'Only find visible elements (not hidden by CSS)' },
385
+ enabled: { type: 'boolean', description: 'Only find enabled elements (not disabled)' },
386
+ minWidth: { type: 'number', description: 'Minimum element width in pixels' },
387
+ minHeight: { type: 'number', description: 'Minimum element height in pixels' },
388
+ hasChildren: { type: 'boolean', description: 'Only elements that have child elements' },
389
+ containsClass: { type: 'string', description: 'Element must contain this CSS class' },
390
+ matchPattern: { type: 'string', description: 'Regex pattern to match element text content' },
391
+ },
392
+ additionalProperties: false,
393
+ },
394
+ // === OUTPUT OPTIONS ===
395
+ returnType: {
396
+ type: 'string',
397
+ enum: ['elements', 'selectors', 'data'],
398
+ description: 'What to return: elements (full info with tag, text, attributes), selectors (just CSS selectors), data (extracted attribute data only)',
399
+ default: 'elements'
400
+ },
372
401
  },
373
402
  },
374
403
  },
@@ -426,20 +455,9 @@ export const TOOLS = [
426
455
  },
427
456
  },
428
457
  // ============================================================
429
- // ADVANCED TOOLS (24 advanced tools)
458
+ // ADVANCED TOOLS
430
459
  // ============================================================
431
- {
432
- name: 'breadcrumb_navigator',
433
- description: 'Navigate using site breadcrumbs - find and click breadcrumb links',
434
- inputSchema: {
435
- type: 'object',
436
- additionalProperties: false,
437
- properties: {
438
- targetIndex: { type: 'number', description: 'Index of breadcrumb to click (0-based)' },
439
- targetText: { type: 'string', description: 'Text of breadcrumb to click' },
440
- },
441
- },
442
- },
460
+ // breadcrumb_navigator REMOVED - use click or find_element with text
443
461
  {
444
462
  name: 'redirect_tracer',
445
463
  description: 'Trace URL redirects including standard, JavaScript, and meta refresh redirects',
@@ -457,20 +475,53 @@ export const TOOLS = [
457
475
  },
458
476
  },
459
477
  {
460
- name: 'search_content',
461
- description: 'Search text or Regex patterns in page content',
478
+ name: 'search_regex',
479
+ description: `🔥 ULTRA-POWERFUL Regex Engine (like regex101.com) - Test, match, replace with full regex support. Features: Named capture groups, all regex flags (g/i/m/s/u/y), match highlighting, detailed match info with indices, replace mode, pattern explanation, timeout protection against catastrophic backtracking. Search in HTML, text, scripts, styles, or attributes.`,
462
480
  inputSchema: {
463
481
  type: 'object',
464
482
  additionalProperties: false,
465
483
  properties: {
466
- pattern: { type: 'string', description: 'Search pattern (text or regex)' },
467
- isRegex: { type: 'boolean', description: 'Treat pattern as regex', default: false },
468
- caseSensitive: { type: 'boolean', description: 'Case sensitive search', default: false },
484
+ // Core regex
485
+ pattern: { type: 'string', description: 'Regex pattern to search (supports full JavaScript regex syntax including lookahead/lookbehind)' },
486
+ testString: { type: 'string', description: 'Optional custom string to test against. If not provided, uses page content' },
487
+ // Regex flags (individually controllable like regex101.com)
488
+ flags: {
489
+ type: 'object',
490
+ description: 'Regex flags - control each flag individually',
491
+ properties: {
492
+ global: { type: 'boolean', description: 'g flag - Find all matches', default: true },
493
+ ignoreCase: { type: 'boolean', description: 'i flag - Case insensitive', default: false },
494
+ multiline: { type: 'boolean', description: 'm flag - ^ and $ match line boundaries', default: false },
495
+ dotAll: { type: 'boolean', description: 's flag - Dot matches newlines', default: false },
496
+ unicode: { type: 'boolean', description: 'u flag - Unicode mode', default: false },
497
+ sticky: { type: 'boolean', description: 'y flag - Sticky mode', default: false },
498
+ },
499
+ additionalProperties: false,
500
+ },
501
+ // Replace mode (like regex101.com substitution)
502
+ replaceWith: { type: 'string', description: 'Replace matches with this string. Supports $1, $2, $& for capture groups' },
503
+ // Match options
504
+ maxMatches: { type: 'number', description: 'Maximum number of matches to return', default: 100 },
505
+ contextChars: { type: 'number', description: 'Number of context characters around each match', default: 50 },
506
+ // Source selection
507
+ sourceType: {
508
+ type: 'string',
509
+ enum: ['text', 'html', 'scripts', 'styles', 'attributes', 'all'],
510
+ description: 'What content to search in',
511
+ default: 'text'
512
+ },
469
513
  selector: { type: 'string', description: 'CSS selector to limit search scope' },
514
+ // Advanced features
515
+ extractGroups: { type: 'boolean', description: 'Extract and return named/numbered capture groups', default: true },
516
+ highlightMatches: { type: 'boolean', description: 'Return matches with <<MATCH>> highlighting', default: false },
517
+ showMatchInfo: { type: 'boolean', description: 'Show detailed match info: index, length, groups', default: true },
518
+ // Safety
519
+ timeout: { type: 'number', description: 'Regex execution timeout in ms (prevents catastrophic backtracking)', default: 5000 },
470
520
  },
471
521
  required: ['pattern'],
472
522
  },
473
523
  },
524
+ // web_search tool REMOVED - redundant with search_regex and other tools
474
525
  {
475
526
  name: 'extract_json',
476
527
  description: 'Extract embedded JSON/API data from page (LD+JSON, __NEXT_DATA__, etc.) + Advanced Decode (base64, URL, hex, rot13, multi-layer) + Packed JavaScript decoder + AES-CBC decryption for encrypted streaming sites',
@@ -609,21 +660,8 @@ export const TOOLS = [
609
660
  // api_finder REMOVED - merged into network_recorder (use findApis: true)
610
661
  // ajax_content_waiter REMOVED - merged into wait tool (use pollInterval, expectedContent)
611
662
  // media_extractor REMOVED - functionality merged into stream_extractor
612
- {
613
- name: 'element_screenshot',
614
- description: 'Capture screenshot of a specific element',
615
- inputSchema: {
616
- type: 'object',
617
- additionalProperties: false,
618
- properties: {
619
- selector: { type: 'string', description: 'CSS selector of element to capture' },
620
- path: { type: 'string', description: 'File path to save screenshot' },
621
- format: { type: 'string', enum: ['png', 'jpeg', 'webp'], description: 'Image format', default: 'png' },
622
- quality: { type: 'number', description: 'Quality for JPEG/WebP (0-100)' },
623
- },
624
- required: ['selector'],
625
- },
626
- },
663
+ // element_screenshot REMOVED - use deep_analysis with includeScreenshot: true for screenshots
664
+ // batch_element_scraper REMOVED - merged into find_element (use batchMode: true)
627
665
  {
628
666
  name: 'link_harvester',
629
667
  description: 'Harvest all links from page with filtering options',
@@ -638,20 +676,6 @@ export const TOOLS = [
638
676
  },
639
677
  },
640
678
  },
641
- {
642
- name: 'batch_element_scraper',
643
- description: 'Efficiently scrape lists of similar elements',
644
- inputSchema: {
645
- type: 'object',
646
- additionalProperties: false,
647
- properties: {
648
- selector: { type: 'string', description: 'CSS selector for elements to scrape' },
649
- attributes: { type: 'array', items: { type: 'string' }, description: 'Attributes to extract' },
650
- limit: { type: 'number', description: 'Maximum elements to scrape', default: 100 },
651
- },
652
- required: ['selector'],
653
- },
654
- },
655
679
  // extract_schema REMOVED - merged into extract_json (use extractSchema option)
656
680
  // m3u8_parser REMOVED - functionality merged into stream_extractor
657
681
  {
@@ -801,9 +825,10 @@ export const TOOL_NAMES = {
801
825
  FIND_ELEMENT: 'find_element',
802
826
  SAVE_CONTENT_AS_MARKDOWN: 'save_content_as_markdown',
803
827
  // Advanced tools
804
- BREADCRUMB_NAVIGATOR: 'breadcrumb_navigator',
828
+ // BREADCRUMB_NAVIGATOR REMOVED - use click or find_element with text
805
829
  REDIRECT_TRACER: 'redirect_tracer',
806
- SEARCH_CONTENT: 'search_content',
830
+ SEARCH_REGEX: 'search_regex', // Renamed from SEARCH_CONTENT - now regex101.com-like
831
+ // WEB_SEARCH REMOVED - redundant with search_regex
807
832
  EXTRACT_JSON: 'extract_json',
808
833
  SCRAPE_META_TAGS: 'scrape_meta_tags',
809
834
  PRESS_KEY: 'press_key',
@@ -813,9 +838,9 @@ export const TOOL_NAMES = {
813
838
  // API_FINDER: 'api_finder', // REMOVED - merged into NETWORK_RECORDER (use findApis option)
814
839
  // AJAX_CONTENT_WAITER: 'ajax_content_waiter', // REMOVED - merged into WAIT (use pollInterval, expectedContent)
815
840
  // MEDIA_EXTRACTOR: 'media_extractor', // REMOVED - merged into STREAM_EXTRACTOR
816
- ELEMENT_SCREENSHOT: 'element_screenshot',
841
+ // ELEMENT_SCREENSHOT: 'element_screenshot', // REMOVED - use DEEP_ANALYSIS with includeScreenshot
817
842
  LINK_HARVESTER: 'link_harvester',
818
- BATCH_ELEMENT_SCRAPER: 'batch_element_scraper',
843
+ // BATCH_ELEMENT_SCRAPER: 'batch_element_scraper', // REMOVED - merged into FIND_ELEMENT (use batchMode option)
819
844
  // EXTRACT_SCHEMA: 'extract_schema', // REMOVED - merged into EXTRACT_JSON (use extractSchema option)
820
845
  // M3U8_PARSER: 'm3u8_parser', // REMOVED - merged into STREAM_EXTRACTOR
821
846
  COOKIE_MANAGER: 'cookie_manager',
@@ -218,21 +218,18 @@ mcpServer.setRequestHandler(CallToolRequestSchema, async (request) => {
218
218
  result = await handleSaveContentAsMarkdown(args);
219
219
  break;
220
220
  // Advanced tools
221
- case TOOL_NAMES.BREADCRUMB_NAVIGATOR:
222
- if (!page)
223
- throw new Error('Browser not initialized');
224
- result = { content: [{ type: 'text', text: JSON.stringify(await advancedTools.handleBreadcrumbNavigator(page, args || {})) }] };
225
- break;
221
+ // BREADCRUMB_NAVIGATOR REMOVED - use click or find_element
226
222
  case TOOL_NAMES.REDIRECT_TRACER:
227
223
  if (!page)
228
224
  throw new Error('Browser not initialized');
229
225
  result = { content: [{ type: 'text', text: JSON.stringify(await advancedTools.handleUrlRedirectTracer(page, args)) }] };
230
226
  break;
231
- case TOOL_NAMES.SEARCH_CONTENT:
227
+ case TOOL_NAMES.SEARCH_REGEX:
232
228
  if (!page)
233
229
  throw new Error('Browser not initialized');
234
- result = { content: [{ type: 'text', text: JSON.stringify(await advancedTools.handleSearchContent(page, args)) }] };
230
+ result = { content: [{ type: 'text', text: JSON.stringify(await advancedTools.handleSearchRegex(page, args)) }] };
235
231
  break;
232
+ // WEB_SEARCH case REMOVED - redundant with search_regex
236
233
  // MEDIA_EXTRACTOR case REMOVED - merged into STREAM_EXTRACTOR
237
234
  case TOOL_NAMES.STREAM_EXTRACTOR:
238
235
  if (!page)
@@ -615,17 +612,14 @@ async function main() {
615
612
  console.error(' 🖼️ iframe_handler - Handle nested iframes (deep_scrape)');
616
613
  console.error('');
617
614
  console.error(' Advanced Tools:');
618
- console.error(' 🔎 search_content - Search patterns in page');
615
+ console.error(' 🔥 search_regex - ULTRA Regex Engine (like regex101)');
616
+ console.error(' 🚀 web_search - Intelligent Content Search');
619
617
  console.error(' 📊 extract_json - Extract embedded JSON');
620
618
  console.error(' 🏷️ scrape_meta_tags - Extract meta/OG tags');
621
619
  console.error(' 📈 deep_analysis - Full page analysis');
622
- console.error(' 📡 network_recorder - Record network traffic');
623
- console.error(' 🔌 api_finder - Discover hidden APIs');
624
- console.error(' ⏱️ ajax_content_waiter - Wait for dynamic content');
620
+ console.error(' 📡 network_recorder - Record network traffic + API finder');
625
621
  console.error(' 🔗 link_harvester - Harvest all links');
626
- console.error(' 📋 batch_element_scraper - Batch scrape elements');
627
622
  console.error(' 🏗️ extract_schema - Extract Schema.org data');
628
- console.error(' 📸 element_screenshot - Screenshot element');
629
623
  console.error(' 🧭 breadcrumb_navigator - Navigate breadcrumbs');
630
624
  console.error(' ↪️ redirect_tracer - Trace URL redirects');
631
625
  console.error(' 📊 progress_tracker - Track task progress');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "brave-real-browser-mcp-server",
3
- "version": "2.27.25",
3
+ "version": "2.27.26",
4
4
  "description": "🦁 MCP server for Brave Real Browser - NPM Workspaces Monorepo with anti-detection features, SSE streaming, and LSP compatibility",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -50,7 +50,7 @@
50
50
  "dependencies": {
51
51
  "@modelcontextprotocol/sdk": "latest",
52
52
  "@types/turndown": "latest",
53
- "brave-real-browser": "^2.8.25",
53
+ "brave-real-browser": "^2.8.26",
54
54
  "puppeteer-core": "^24.35.0",
55
55
  "turndown": "latest",
56
56
  "vscode-languageserver": "^9.0.1",