npm - brave-real-browser-mcp-server - Versions diffs - 2.7.5 → 2.8.0 - Mend

brave-real-browser-mcp-server 2.7.5 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/dist/browser-manager.js +0 -14
package/dist/extractors/content-type-extractors.js +225 -144
package/dist/extractors/extractors.test.js +17 -0
package/dist/extractors/multi-element-extractors.js +273 -122
package/dist/extractors/smart-data-extractors.js +202 -125
package/dist/index.js +78 -122
package/dist/tool-definitions.js +14 -659
package/dist/utils/advanced-features.js +247 -0
package/dist/utils/advanced-scraping.js +253 -0
package/dist/utils/all-modules.test.js +86 -0
package/dist/utils/auth-session.js +296 -0
package/dist/utils/data-processing.js +301 -0
package/dist/utils/data-processing.test.js +52 -0
package/dist/utils/pagination.js +249 -0
package/dist/utils/pagination.test.js +22 -0
package/package.json +31 -2
package/dist/advanced/advanced-content-extraction.js +0 -435
package/dist/advanced/advanced-content-extraction.test.js +0 -8
package/dist/advanced/advanced-scraping.js +0 -301
package/dist/ai/ai-features.js +0 -56
package/dist/ai/ai-features.test.js +0 -18
package/dist/ai/ai-tools.js +0 -390
package/dist/api/api-integration-system.js +0 -68
package/dist/api/api-integration-system.test.js +0 -29
package/dist/api/api-integration.js +0 -371
package/dist/auth/session-manager.js +0 -50
package/dist/auth/session-manager.test.js +0 -8
package/dist/captcha/advanced-captcha-handler.js +0 -45
package/dist/captcha/advanced-captcha-handler.test.js +0 -8
package/dist/captcha/captcha-handler.js +0 -374
package/dist/extractors/smart-data-extractors.test.js +0 -91
package/dist/handlers/advanced-scraping-handlers.js +0 -333
package/dist/handlers/advanced-scraping-handlers.test.js +0 -218
package/dist/handlers/new-features-handlers.js +0 -209
package/dist/handlers/new-features-handlers.test.js +0 -21
package/dist/monitoring/monitoring-system.js +0 -53
package/dist/monitoring/monitoring-system.test.js +0 -26
package/dist/monitoring/monitoring-tools.js +0 -372
package/dist/navigation/pagination-tools.js +0 -215
package/dist/processors/data-processors.js +0 -250
package/dist/processors/data-processors.test.js +0 -163
package/dist/processors/data-transformation.js +0 -344
package/dist/processors/data-transformation.test.js +0 -288
package/dist/quality/data-quality-tools.js +0 -43
package/dist/quality/data-quality-tools.test.js +0 -26
package/dist/search/advanced-search-tools.js +0 -52
package/dist/search/advanced-search-tools.test.js +0 -11
package/dist/search/search-filter-tools.js +0 -339
package/dist/visual/screenshot-tools.js +0 -47
package/dist/visual/screenshot-tools.test.js +0 -8
package/dist/visual/visual-tools.js +0 -516

package/dist/browser-manager.js CHANGED Viewed

@@ -732,17 +732,3 @@ export function getContentPriorityConfig() {
 export function updateContentPriorityConfig(config) {
     contentPriorityConfig = { ...contentPriorityConfig, ...config };
 }
-// Alias for getPageInstance - for compatibility with advanced scraping handlers
-export async function getBrowserPage() {
-    if (!pageInstance) {
-        throw new Error('Browser not initialized. Call browser_init first.');
-    }
-    return pageInstance;
-}
-// Synchronous version for compatibility with new-features-handlers
-export function getCurrentPage() {
-    if (!pageInstance) {
-        throw new Error('Browser not initialized. Call browser_init first.');
-    }
-    return pageInstance;
-}

package/dist/extractors/content-type-extractors.js CHANGED Viewed

@@ -1,233 +1,314 @@
-// Content Type Specific Extractors
-// Image Scraper, Link Harvester, Media Extractor, PDF Link Finder
 /**
- * Image Scraper - सभी images URLs, alt text, dimensions के साथ
+ * Image Scraper - Extract all images with metadata
  */
-export async function extractImages(page, selector) {
+export async function scrapeImages(page, selector) {
     return await page.evaluate((sel) => {
-        const images = sel ?
-            Array.from(document.querySelectorAll(sel)) :
-            Array.from(document.querySelectorAll('img'));
-        return images.map((img) => ({
-            src: img.src || img.getAttribute('data-src') || img.getAttribute('data-lazy-src'),
-            alt: img.alt || '',
-            title: img.title || '',
-            width: img.naturalWidth || img.width,
-            height: img.naturalHeight || img.height,
-            loading: img.loading,
-            srcset: img.srcset || ''
-        }));
-    }, selector);
+        const images = sel
+            ? document.querySelectorAll(sel)
+            : document.querySelectorAll('img');
+        const results = [];
+        images.forEach((img) => {
+            const imgEl = img;
+            const rect = imgEl.getBoundingClientRect();
+            const styles = window.getComputedStyle(imgEl);
+            results.push({
+                src: imgEl.src || imgEl.getAttribute('src') || '',
+                alt: imgEl.alt || '',
+                title: imgEl.title || '',
+                width: imgEl.width || rect.width,
+                height: imgEl.height || rect.height,
+                naturalWidth: imgEl.naturalWidth,
+                naturalHeight: imgEl.naturalHeight,
+                loading: imgEl.loading || 'auto',
+                srcset: imgEl.srcset || '',
+                sizes: imgEl.sizes || '',
+                isVisible: styles.display !== 'none' && styles.visibility !== 'hidden' && rect.width > 0 && rect.height > 0
+            });
+        });
+        return results;
+    }, selector || null);
 }
 /**
- * Link Harvester - Internal/external links classification के साथ
+ * Link Harvester - Extract all links with classification
  */
-export async function extractLinks(page, selector) {
-    const currentUrl = page.url();
-    return await page.evaluate((sel, pageUrl) => {
-        const links = sel ?
-            Array.from(document.querySelectorAll(sel)) :
-            Array.from(document.querySelectorAll('a[href]'));
+export async function harvestLinks(page, options) {
+    const opts = {
+        includeInternal: true,
+        includeExternal: true,
+        includeAnchors: true,
+        ...options
+    };
+    return await page.evaluate((config) => {
+        const currentDomain = window.location.hostname;
         const internal = [];
         const external = [];
-        const currentDomain = new URL(pageUrl).hostname;
+        const anchors = [];
+        const all = [];
+        const links = document.querySelectorAll('a[href]');
         links.forEach((link) => {
             const href = link.href;
+            const text = link.innerText.trim();
+            const title = link.getAttribute('title') || '';
+            const target = link.getAttribute('target') || '';
             if (!href)
                 return;
-            const linkData = {
-                href,
-                text: link.textContent?.trim() || '',
-                title: link.title || '',
-                rel: link.rel || '',
-                target: link.target || ''
-            };
+            // Anchor links
+            if (href.startsWith('#')) {
+                if (config.includeAnchors) {
+                    anchors.push({ href, text, target });
+                    all.push({ href, text, type: 'anchor' });
+                }
+                return;
+            }
             try {
-                const linkDomain = new URL(href).hostname;
-                if (linkDomain === currentDomain || href.startsWith('/') || href.startsWith('#')) {
-                    internal.push(linkData);
+                const url = new URL(href);
+                // Internal vs External
+                if (url.hostname === currentDomain || url.hostname === '') {
+                    if (config.includeInternal) {
+                        internal.push({ href, text, title });
+                        all.push({ href, text, type: 'internal' });
+                    }
                 }
                 else {
-                    external.push(linkData);
+                    if (config.includeExternal) {
+                        external.push({ href, text, title });
+                        all.push({ href, text, type: 'external' });
+                    }
                 }
             }
             catch (e) {
-                // Invalid URL, consider as internal relative link
-                internal.push(linkData);
+                // Invalid URL, treat as internal
+                if (config.includeInternal) {
+                    internal.push({ href, text, title });
+                    all.push({ href, text, type: 'internal' });
+                }
             }
         });
-        return {
-            internal,
-            external,
-            totalLinks: internal.length + external.length,
-            internalCount: internal.length,
-            externalCount: external.length
-        };
-    }, selector, currentUrl);
+        return { internal, external, anchors, all };
+    }, opts);
 }
 /**
- * Media Extractor - Videos, audio files के URLs और metadata
+ * Media Extractor - Extract videos, audio files, and embedded media
  */
 export async function extractMedia(page) {
     return await page.evaluate(() => {
         const videos = [];
-        const audios = [];
+        const audio = [];
         const iframes = [];
-        // Extract videos
-        const videoElements = document.querySelectorAll('video');
-        videoElements.forEach((video) => {
-            const sources = Array.from(video.querySelectorAll('source'));
+        const embeds = [];
+        // Extract video elements
+        document.querySelectorAll('video').forEach((video) => {
+            const sources = [];
+            video.querySelectorAll('source').forEach((source) => {
+                sources.push(source.src);
+            });
             videos.push({
-                src: video.src || '',
+                src: video.src || sources[0] || '',
+                sources: sources,
                 poster: video.poster || '',
                 width: video.width,
                 height: video.height,
-                duration: video.duration,
-                sources: sources.map((s) => ({
-                    src: s.src,
-                    type: s.type
-                })),
                 controls: video.controls,
                 autoplay: video.autoplay,
-                loop: video.loop
+                loop: video.loop,
+                muted: video.muted,
+                duration: video.duration,
+                currentTime: video.currentTime
             });
         });
-        // Extract audio
-        const audioElements = document.querySelectorAll('audio');
-        audioElements.forEach((audio) => {
-            const sources = Array.from(audio.querySelectorAll('source'));
-            audios.push({
-                src: audio.src || '',
-                duration: audio.duration,
-                sources: sources.map((s) => ({
-                    src: s.src,
-                    type: s.type
-                })),
-                controls: audio.controls,
-                autoplay: audio.autoplay,
-                loop: audio.loop
+        // Extract audio elements
+        document.querySelectorAll('audio').forEach((audioEl) => {
+            const sources = [];
+            audioEl.querySelectorAll('source').forEach((source) => {
+                sources.push(source.src);
+            });
+            audio.push({
+                src: audioEl.src || sources[0] || '',
+                sources: sources,
+                controls: audioEl.controls,
+                autoplay: audioEl.autoplay,
+                loop: audioEl.loop,
+                muted: audioEl.muted,
+                duration: audioEl.duration
             });
         });
-        // Extract iframes (often used for embedded videos)
-        const iframeElements = document.querySelectorAll('iframe');
-        iframeElements.forEach((iframe) => {
+        // Extract iframes
+        document.querySelectorAll('iframe').forEach((iframe) => {
+            const src = iframe.src;
+            let platform = 'unknown';
+            // Detect common video platforms
+            if (src.includes('youtube.com') || src.includes('youtu.be')) {
+                platform = 'youtube';
+            }
+            else if (src.includes('vimeo.com')) {
+                platform = 'vimeo';
+            }
+            else if (src.includes('dailymotion.com')) {
+                platform = 'dailymotion';
+            }
+            else if (src.includes('facebook.com')) {
+                platform = 'facebook';
+            }
+            else if (src.includes('twitter.com') || src.includes('x.com')) {
+                platform = 'twitter';
+            }
             iframes.push({
-                src: iframe.src || '',
+                src: src,
+                title: iframe.title || '',
                 width: iframe.width,
                 height: iframe.height,
-                title: iframe.title || '',
-                allow: iframe.allow || ''
+                platform: platform,
+                allowFullscreen: iframe.allowFullscreen
             });
         });
-        return {
-            videos,
-            audios,
-            iframes,
-            videoCount: videos.length,
-            audioCount: audios.length,
-            iframeCount: iframes.length
-        };
+        // Extract embed elements
+        document.querySelectorAll('embed, object').forEach((embed) => {
+            embeds.push({
+                src: embed.getAttribute('src') || embed.getAttribute('data') || '',
+                type: embed.getAttribute('type') || '',
+                width: embed.getAttribute('width') || '',
+                height: embed.getAttribute('height') || ''
+            });
+        });
+        return { videos, audio, iframes, embeds };
     });
 }
 /**
- * PDF Link Finder - Downloadable files detect करना
+ * PDF Link Finder - Find all downloadable file links
  */
-export async function extractDownloadableFiles(page) {
+export async function findDownloadableFiles(page) {
     return await page.evaluate(() => {
-        const files = {
-            pdfs: [],
-            docs: [],
-            images: [],
-            archives: [],
-            others: []
-        };
-        // Common file extensions
-        const extensions = {
-            pdf: ['pdf'],
-            doc: ['doc', 'docx', 'txt', 'rtf', 'odt'],
-            image: ['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
-            archive: ['zip', 'rar', '7z', 'tar', 'gz'],
-        };
-        // Find all links
+        const pdfs = [];
+        const documents = [];
+        const archives = [];
+        const images = [];
+        const other = [];
         const links = document.querySelectorAll('a[href]');
         links.forEach((link) => {
             const href = link.href;
+            const text = link.innerText.trim();
+            const download = link.getAttribute('download');
             if (!href)
                 return;
-            const linkData = {
-                href,
-                text: link.textContent?.trim() || '',
-                download: link.download || '',
-                type: link.type || ''
-            };
-            // Check file extension
-            const urlPath = href.split('?')[0]; // Remove query params
-            const ext = urlPath.split('.').pop()?.toLowerCase();
-            if (!ext)
-                return;
-            if (extensions.pdf.includes(ext)) {
-                files.pdfs.push(linkData);
+            const url = href.toLowerCase();
+            const fileInfo = { href, text, size: link.getAttribute('data-size') || undefined };
+            // PDF files
+            if (url.endsWith('.pdf') || url.includes('.pdf?') || download?.endsWith('.pdf')) {
+                pdfs.push(fileInfo);
             }
-            else if (extensions.doc.includes(ext)) {
-                files.docs.push(linkData);
+            // Document files
+            else if (url.match(/\.(doc|docx|xls|xlsx|ppt|pptx|odt|ods|odp)($|\?)/)) {
+                const match = url.match(/\.(doc|docx|xls|xlsx|ppt|pptx|odt|ods|odp)($|\?)/);
+                documents.push({ ...fileInfo, type: match ? match[1] : 'unknown' });
             }
-            else if (extensions.image.includes(ext)) {
-                files.images.push(linkData);
+            // Archive files
+            else if (url.match(/\.(zip|rar|7z|tar|gz|bz2)($|\?)/)) {
+                const match = url.match(/\.(zip|rar|7z|tar|gz|bz2)($|\?)/);
+                archives.push({ ...fileInfo, type: match ? match[1] : 'unknown' });
             }
-            else if (extensions.archive.includes(ext)) {
-                files.archives.push(linkData);
+            // Image files (downloadable)
+            else if (url.match(/\.(jpg|jpeg|png|gif|bmp|svg|webp|ico)($|\?)/) && download) {
+                const match = url.match(/\.(jpg|jpeg|png|gif|bmp|svg|webp|ico)($|\?)/);
+                images.push({ ...fileInfo, type: match ? match[1] : 'unknown' });
             }
-            else if (link.download || link.type) {
-                files.others.push(linkData);
+            // Other downloadable files
+            else if (download || url.match(/\.(exe|dmg|apk|deb|rpm|msi|iso)($|\?)/)) {
+                const match = url.match(/\.([a-z0-9]+)($|\?)/);
+                other.push({ ...fileInfo, type: match ? match[1] : 'unknown' });
             }
         });
-        return {
-            ...files,
-            totalFiles: files.pdfs.length + files.docs.length + files.images.length +
-                files.archives.length + files.others.length
-        };
+        return { pdfs, documents, archives, images, other };
     });
 }
 /**
- * Social Media Links Extractor - Social media profiles निकालना
+ * Social Media Links Extractor - Extract social media profile links
  */
-export async function extractSocialMediaLinks(page) {
+export async function extractSocialLinks(page) {
     return await page.evaluate(() => {
-        const social = {
+        const socialLinks = {
             facebook: [],
             twitter: [],
             instagram: [],
             linkedin: [],
             youtube: [],
             github: [],
+            pinterest: [],
+            tiktok: [],
             other: []
         };
         const links = document.querySelectorAll('a[href]');
         links.forEach((link) => {
             const href = link.href.toLowerCase();
-            const linkData = {
-                href: link.href,
-                text: link.textContent?.trim() || ''
-            };
             if (href.includes('facebook.com')) {
-                social.facebook.push(linkData);
+                socialLinks.facebook.push(link.href);
             }
             else if (href.includes('twitter.com') || href.includes('x.com')) {
-                social.twitter.push(linkData);
+                socialLinks.twitter.push(link.href);
             }
             else if (href.includes('instagram.com')) {
-                social.instagram.push(linkData);
+                socialLinks.instagram.push(link.href);
             }
             else if (href.includes('linkedin.com')) {
-                social.linkedin.push(linkData);
+                socialLinks.linkedin.push(link.href);
             }
             else if (href.includes('youtube.com') || href.includes('youtu.be')) {
-                social.youtube.push(linkData);
+                socialLinks.youtube.push(link.href);
             }
             else if (href.includes('github.com')) {
-                social.github.push(linkData);
+                socialLinks.github.push(link.href);
+            }
+            else if (href.includes('pinterest.com')) {
+                socialLinks.pinterest.push(link.href);
+            }
+            else if (href.includes('tiktok.com')) {
+                socialLinks.tiktok.push(link.href);
             }
         });
-        return social;
+        // Remove duplicates
+        Object.keys(socialLinks).forEach((key) => {
+            socialLinks[key] = Array.from(new Set(socialLinks[key]));
+        });
+        return socialLinks;
+    });
+}
+/**
+ * Email and Phone Extractor - Extract contact information from page
+ */
+export async function extractContactInfo(page) {
+    return await page.evaluate(() => {
+        const text = document.body.innerText;
+        const emails = [];
+        const phones = [];
+        const addresses = [];
+        // Extract emails
+        const emailRegex = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g;
+        const emailMatches = text.match(emailRegex);
+        if (emailMatches) {
+            emails.push(...emailMatches);
+        }
+        // Also check mailto links
+        document.querySelectorAll('a[href^="mailto:"]').forEach((link) => {
+            const email = link.href.replace('mailto:', '').split('?')[0];
+            if (email)
+                emails.push(email);
+        });
+        // Extract phone numbers (various formats)
+        const phoneRegex = /(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g;
+        const phoneMatches = text.match(phoneRegex);
+        if (phoneMatches) {
+            phones.push(...phoneMatches);
+        }
+        // Also check tel links
+        document.querySelectorAll('a[href^="tel:"]').forEach((link) => {
+            const phone = link.href.replace('tel:', '');
+            if (phone)
+                phones.push(phone);
+        });
+        // Remove duplicates
+        return {
+            emails: Array.from(new Set(emails)),
+            phones: Array.from(new Set(phones)),
+            addresses: addresses
+        };
     });
 }

package/dist/extractors/extractors.test.js ADDED Viewed

@@ -0,0 +1,17 @@
+// Basic tests for extractor modules
+import { describe, it, expect } from 'vitest';
+describe('Smart Data Extractors', () => {
+    it('should exist', () => {
+        expect(true).toBe(true);
+    });
+});
+describe('Multi-Element Extractors', () => {
+    it('should exist', () => {
+        expect(true).toBe(true);
+    });
+});
+describe('Content Type Extractors', () => {
+    it('should exist', () => {
+        expect(true).toBe(true);
+    });
+});