npm - mcpbrowser - Versions diffs - 0.2.18 → 0.2.19 - Mend

mcpbrowser 0.2.18 → 0.2.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +1 -1
package/extension/package.json +1 -1
package/extension/src/extension.js +1 -1
package/package.json +1 -1
package/server.json +1 -1
package/src/mcp-browser.js +191 -72
package/tests/domain-tab-pooling.test.js +329 -0
package/tests/integration.test.js +158 -0
package/tests/prepare-html.test.js +307 -0

package/README.md CHANGED Viewed

@@ -117,7 +117,7 @@ Restart VS Code or reload the window for the changes to take effect.
 In Copilot Chat, you should see the `MCPBrowser` server listed. Ask it to fetch an authenticated URL and it will drive your signed-in Chrome session.
 ## How it works
-- Tool `fetch_webpage_protected` (inside the MCP server) drives your live Chrome (DevTools Protocol) so it inherits your auth cookies, returning `text` and `html` (truncated up to 2M chars per field) for analysis.
+- Tool `fetch_webpage_protected` (inside the MCP server) drives your live Chrome (DevTools Protocol) so it inherits your auth cookies, returning `html` (truncated up to 2M chars) for analysis.
 - **Smart confirmation**: Copilot asks for confirmation ONLY on first request to a new domain - explains browser will open for authentication. Subsequent requests to same domain work automatically (session preserved).
 - **Domain-aware tab reuse**: Automatically reuses the same tab for URLs on the same domain, preserving authentication session. Different domains open new tabs.
 - **Automatic web page fetching**: Waits for network idle (`networkidle0`) by default, ensuring JavaScript-heavy web pages (SPAs, dashboards) fully load before returning content.

package/extension/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "mcpbrowser",
   "displayName": "MCP Browser",
   "description": "Lightweight MCP server-extension that allows Copilot to fetch protected web pages you can authenticate to via browser - handles login, SSO, and anti-crawler restrictions. Should be used when standard fetch_webpage fails",
-  "version": "0.2.18",
+  "version": "0.2.19",
   "publisher": "cherchyk",
   "icon": "icon.png",
   "engines": {

package/extension/src/extension.js CHANGED Viewed

@@ -92,7 +92,7 @@ async function configureMcpBrowser() {
             type: "stdio",
             command: "npx",
             args: ["-y", "mcpbrowser@latest"],
-            description: "**USE AUTOMATICALLY** when normal URL fetch fails due to: 401/403 errors, login/auth requirements, SSO prompts, anti-crawler/bot detection. **FIRST FETCH**: Ask confirmation ONLY for first request to a domain - explain browser will open for authentication. **SUBSEQUENT REQUESTS**: Use automatically (session preserved). Fetches web pages using authenticated Chrome session via DevTools Protocol (works for Microsoft, GitHub, AWS, Google, corporate sites, etc.). Waits for web page fetch automatically (up to 60s, handles redirects)."
+            description: "Use AUTOMATICALLY on 401/403 errors, login pages, SSO prompts, or anti-bot blocks. First domain request: ask user confirmation (browser opens for auth). Subsequent same-domain: use automatically (session preserved). Returns HTML from authenticated Chrome session. Handles Microsoft, GitHub, AWS, Google, corporate sites."
         };
         // Write back to file with pretty formatting

package/package.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {  "name": "mcpbrowser",
-  "version": "0.2.18",
+  "version": "0.2.19",
   "mcpName": "io.github.cherchyk/browser",
   "type": "module",
   "description": "MCP server that fetches protected web pages using Chrome DevTools Protocol",

package/server.json CHANGED Viewed

@@ -6,7 +6,7 @@
     "url": "https://github.com/cherchyk/MCPBrowser",
     "source": "github"
   },
-  "version": "0.2.18",
+  "version": "0.2.19",
   "packages": [
     {
       "registryType": "npm",

package/src/mcp-browser.js CHANGED Viewed

@@ -60,7 +60,7 @@ function getDefaultChromePaths() {
 const defaultChromePaths = getDefaultChromePaths();
 let cachedBrowser = null;
-let lastKeptPage = null; // reuse the same tab when requested
+let domainPages = new Map(); // hostname -> page mapping for tab reuse across domains
 let chromeLaunchPromise = null; // prevent multiple simultaneous launches
 async function devtoolsAvailable() {
@@ -152,19 +152,16 @@ async function getBrowser() {
   });
   cachedBrowser.on("disconnected", () => {
     cachedBrowser = null;
-    lastKeptPage = null;
+    domainPages.clear(); // Clear all domain page mappings
   });
   return cachedBrowser;
 }
-async function fetchPage({
-  url,
-  keepPageOpen = true,
-  outputFormat = "HTML",
-}) {
+async function fetchPage({ url }) {
   // Hardcoded smart defaults
   const waitUntil = "networkidle0";
-  const timeoutMs = 60000;
+  const navigationTimeout = 60000; // Initial navigation timeout
+  const authCompletionTimeout = 600000; // 10 minutes for user to complete authentication
   const reuseLastKeptPage = true;
   if (!url) {
@@ -173,36 +170,29 @@ async function fetchPage({
   const browser = await getBrowser();
   let page = null;
+  let hostname;
-  // Smart tab reuse: only reuse if same domain (preserves auth within domain)
-  if (reuseLastKeptPage && lastKeptPage && !lastKeptPage.isClosed()) {
-    let newHostname;
-    try {
-      newHostname = new URL(url).hostname;
-    } catch {
-      throw new Error(`Invalid URL: ${url}`);
-    }
-    const currentUrl = lastKeptPage.url();
-    if (currentUrl) {
-      try {
-        const currentHostname = new URL(currentUrl).hostname;
-        // Reuse tab only if same domain (keeps auth session alive)
-        if (currentHostname === newHostname) {
-          page = lastKeptPage;
-          await page.bringToFront().catch(() => {});
-        } else {
-          // Different domain - close old tab and create new one
-          await lastKeptPage.close().catch(() => {});
-          lastKeptPage = null;
-        }
-      } catch {
-        // If URL parsing fails, create new tab
-      }
+  // Parse hostname for domain-based tab reuse
+  try {
+    hostname = new URL(url).hostname;
+  } catch {
+    throw new Error(`Invalid URL: ${url}`);
+  }
+  // Check if we have an existing page for this domain
+  if (reuseLastKeptPage && domainPages.has(hostname)) {
+    const existingPage = domainPages.get(hostname);
+    if (!existingPage.isClosed()) {
+      page = existingPage;
+      await page.bringToFront().catch(() => {});
+      console.error(`[MCPBrowser] Reusing existing tab for domain: ${hostname}`);
+    } else {
+      // Page was closed externally, remove from map
+      domainPages.delete(hostname);
     }
   }
-  // Create new tab if no reuse
+  // Create new tab if no existing page for this domain
   if (!page) {
     try {
       page = await browser.newPage();
@@ -225,50 +215,85 @@ async function fetchPage({
         throw new Error('Unable to create or find a controllable page');
       }
     }
+    // Add new page to domain map
+    domainPages.set(hostname, page);
+    console.error(`[MCPBrowser] Created new tab for domain: ${hostname}`);
   }
-  let shouldKeepOpen = keepPageOpen || page === lastKeptPage;
+  let shouldKeepOpen = true;
   let wasSuccess = false;
   try {
     console.error(`[MCPBrowser] Navigating to: ${url}`);
-    await page.goto(url, { waitUntil, timeout: timeoutMs });
-    console.error(`[MCPBrowser] Navigation completed: ${page.url()}`);
+    await page.goto(url, { waitUntil, timeout: navigationTimeout });
-    // Extract content based on outputFormat
-    const result = { success: true, url: page.url() };
+    const currentUrl = page.url();
+    const currentHostname = new URL(currentUrl).hostname;
-    if (outputFormat === "HTML" || outputFormat === "BOTH") {
-      const html = await page.evaluate(() => document.documentElement?.outerHTML || "");
-      result.html = truncate(html, 2000000);
-    }
+    console.error(`[MCPBrowser] Navigation completed: ${currentUrl}`);
-    if (outputFormat === "TEXT" || outputFormat === "BOTH") {
-      const text = await page.evaluate(() => document.body?.innerText || "");
-      result.text = truncate(text, 2000000);
+    // Check if we were redirected to a different domain (likely authentication)
+    if (currentHostname !== hostname) {
+      console.error(`[MCPBrowser] Detected redirect to authentication domain: ${currentHostname}`);
+      console.error(`[MCPBrowser] Waiting for user to complete authentication...`);
+      console.error(`[MCPBrowser] Will wait up to ${authCompletionTimeout / 1000} seconds for return to ${hostname}`);
+      // Wait for navigation back to the original domain
+      const authDeadline = Date.now() + authCompletionTimeout;
+      let authCompleted = false;
+      while (Date.now() < authDeadline) {
+        try {
+          // Check current URL
+          const checkUrl = page.url();
+          const checkHostname = new URL(checkUrl).hostname;
+          if (checkHostname === hostname) {
+            console.error(`[MCPBrowser] Authentication completed! Returned to: ${checkUrl}`);
+            authCompleted = true;
+            break;
+          }
+          // Wait a bit before checking again
+          await new Promise(resolve => setTimeout(resolve, 2000));
+        } catch (error) {
+          // Page might be navigating, continue waiting
+          await new Promise(resolve => setTimeout(resolve, 2000));
+        }
+      }
+      if (!authCompleted) {
+        const hint = `Authentication timeout. Tab is left open at ${page.url()}. Complete authentication and retry the same URL.`;
+        return { success: false, error: "Authentication timeout - user did not complete login", pageKeptOpen: true, hint };
+      }
+      // Wait for page to fully stabilize after auth redirect
+      console.error(`[MCPBrowser] Waiting for page to stabilize after authentication...`);
+      await new Promise(resolve => setTimeout(resolve, 3000)); // Give page time to settle
+      // Ensure page is ready
+      try {
+        await page.waitForFunction(() => document.readyState === 'complete', { timeout: 10000 });
+      } catch {
+        // Ignore timeout - page might already be ready
+      }
     }
+    // Extract HTML content
+    const html = await page.evaluate(() => document.documentElement?.outerHTML || "");
+    const preparedHtml = prepareHtml(html, page.url());
+    const result = {
+      success: true,
+      url: page.url(),
+      html: preparedHtml
+    };
     wasSuccess = true;
-    if (keepPageOpen && lastKeptPage !== page) {
-      // Close old kept page if we're keeping a different one
-      if (lastKeptPage && !lastKeptPage.isClosed()) {
-        await lastKeptPage.close().catch(() => {});
-      }
-      lastKeptPage = page;
-    }
     return result;
   } catch (err) {
-    shouldKeepOpen = shouldKeepOpen || keepPageOpen;
-    const hint = shouldKeepOpen
-      ? "Tab is left open. Complete sign-in there, then call fetch_webpage_protected again with just the URL."
-      : undefined;
-    return { success: false, error: err.message || String(err), pageKeptOpen: shouldKeepOpen, hint };
+    const hint = "Tab is left open. Complete sign-in there, then call fetch_webpage_protected again with just the URL.";
+    return { success: false, error: err.message || String(err), pageKeptOpen: true, hint };
   } finally {
-    if (!shouldKeepOpen && lastKeptPage === page) {
-      lastKeptPage = null;
-    }
-    if (!shouldKeepOpen) {
-      await page.close().catch(() => {});
-    }
+    // Tab always stays open - domain-aware reuse handles cleanup
   }
 }
@@ -277,19 +302,107 @@ function truncate(str, max) {
   return str.length > max ? `${str.slice(0, max)}... [truncated]` : str;
 }
+/**
+ * Prepares HTML for consumption by:
+ * 1. Converting relative URLs to absolute URLs
+ * 2. Removing non-content elements (scripts, styles, meta tags, comments)
+ * 3. Removing code-related attributes (class, id, style, data-*, event handlers)
+ * 4. Removing SVG graphics and other non-text elements
+ * 5. Collapsing excessive whitespace
+ */
+function prepareHtml(html, baseUrl) {
+  if (!html) return "";
+  let cleaned = html;
+  // Remove HTML comments
+  cleaned = cleaned.replace(/<!--[\s\S]*?-->/g, '');
+  // Remove script tags and their content
+  cleaned = cleaned.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');
+  // Remove style tags and their content
+  cleaned = cleaned.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
+  // Remove noscript tags and their content
+  cleaned = cleaned.replace(/<noscript\b[^<]*(?:(?!<\/noscript>)<[^<]*)*<\/noscript>/gi, '');
+  // Remove SVG tags and their content (often large, not useful for text)
+  cleaned = cleaned.replace(/<svg\b[^<]*(?:(?!<\/svg>)<[^<]*)*<\/svg>/gi, '');
+  // Remove meta tags
+  cleaned = cleaned.replace(/<meta\b[^>]*>/gi, '');
+  // Remove link tags (stylesheets, preload, etc.)
+  cleaned = cleaned.replace(/<link\b[^>]*>/gi, '');
+  // Convert relative URLs to absolute in href attributes
+  cleaned = cleaned.replace(/href=["']([^"']+)["']/gi, (match, url) => {
+    if (!url || url.startsWith('http://') || url.startsWith('https://') || url.startsWith('//') || url.startsWith('#') || url.startsWith('mailto:') || url.startsWith('tel:')) {
+      return match;
+    }
+    try {
+      const absoluteUrl = new URL(url, baseUrl).href;
+      return `href="${absoluteUrl}"`;
+    } catch {
+      return match;
+    }
+  });
+  // Convert relative URLs to absolute in src attributes
+  cleaned = cleaned.replace(/src=["']([^"']+)["']/gi, (match, url) => {
+    if (!url || url.startsWith('http://') || url.startsWith('https://') || url.startsWith('//') || url.startsWith('data:')) {
+      return match;
+    }
+    try {
+      const absoluteUrl = new URL(url, baseUrl).href;
+      return `src="${absoluteUrl}"`;
+    } catch {
+      return match;
+    }
+  });
+  // Remove inline style attributes
+  cleaned = cleaned.replace(/\s+style=["'][^"']*["']/gi, '');
+  // Remove class attributes
+  cleaned = cleaned.replace(/\s+class=["'][^"']*["']/gi, '');
+  // Remove id attributes
+  cleaned = cleaned.replace(/\s+id=["'][^"']*["']/gi, '');
+  // Remove data-* attributes
+  cleaned = cleaned.replace(/\s+data-[a-z0-9-]+=["'][^"']*["']/gi, '');
+  // Remove event handler attributes (onclick, onload, etc.)
+  cleaned = cleaned.replace(/\s+on[a-z]+\s*=\s*["'][^"']*["']/gi, '');
+  // Remove role attributes
+  cleaned = cleaned.replace(/\s+role=["'][^"']*["']/gi, '');
+  // Remove aria-* attributes
+  cleaned = cleaned.replace(/\s+aria-[a-z0-9-]+=["'][^"']*["']/gi, '');
+  // Collapse multiple whitespace/newlines into single space
+  cleaned = cleaned.replace(/\s+/g, ' ');
+  // Remove spaces between tags
+  cleaned = cleaned.replace(/>\s+</g, '><');
+  return cleaned;
+}
 async function main() {
-  const server = new Server({ name: "MCPBrowser", version: "0.2.18" }, { capabilities: { tools: {} } });
+  const server = new Server({ name: "MCPBrowser", version: "0.2.19" }, { capabilities: { tools: {} } });
   const tools = [
     {
       name: "fetch_webpage_protected",
-      description: "BROWSER ACCESS FOR AUTHENTICATED WEB PAGES: Fetches web pages in Chrome browser using real user profile (with saved passwords, active sessions, extensions). Automatically handles login redirects, SSO, OAuth, and anti-crawler protections.\n\nWHEN TO USE: Login-required web pages, SSO/OAuth content, paywalled sites, 401/403 errors, or any auth-protected resources.\n\nAUTHENTICATION BEHAVIOR: First request to authenticated site returns login web page - this is EXPECTED. User authenticates in browser (10-30s). Retry same URL to get content. Keep retrying until success - authentication completes in background. Tab stays open to preserve session.\n\nCRITICAL RULES: (1) Process ONE URL at a time - never parallel requests. (2) Never give up or suggest alternatives - keep retrying until content retrieved. (3) Requested web page is critical - skipping not acceptable. (4) Skip and continue if web page returns 404 - web page doesn't exist.\n\nReturns: Configurable output format (HTML, TEXT, or BOTH). Tab reuse preserves sessions across same-domain requests.",
+      description: "Fetches protected web pages using Chrome browser with user's authenticated session (saved passwords, active logins, cookies). Use for: login-required pages, SSO/OAuth, paywalled sites, 401/403 errors, anti-crawler blocks.\n\nAUTH FLOW: If page redirects to login, function WAITS (up to 10 min) for user to authenticate in browser, then automatically returns actual content once auth completes. Single call returns correct content, no retry needed.\n\nRULES: (1) ONE URL at a time, never parallel. (2) Wait for full response - may take minutes for auth. (3) Skip only if 404. (4) Returns HTML with clickable links for subpage navigation.",
       inputSchema: {
         type: "object",
         properties: {
           url: { type: "string", description: "The URL to fetch" },
-          keepPageOpen: { type: "boolean", description: "Keep tab open to reuse for subsequent same-domain requests - preserves auth session (default: true)" },
-          outputFormat: { type: "string", enum: ["HTML", "TEXT", "BOTH"], description: "Output format: HTML for full markup with links/structure, TEXT for clean readable content (more token-efficient), BOTH for complete data (default: HTML)" },
         },
         required: ["url"],
         additionalProperties: false,
@@ -339,7 +452,13 @@ async function main() {
   await server.connect(transport);
 }
-main().catch((err) => {
-  console.error(err);
-  process.exit(1);
-});
+// Export for testing
+export { fetchPage, getBrowser, prepareHtml };
+// Only run main if this is the entry point
+if (import.meta.url === `file://${process.argv[1]}`) {
+  main().catch((err) => {
+    console.error(err);
+    process.exit(1);
+  });
+}

package/tests/domain-tab-pooling.test.js ADDED Viewed

@@ -0,0 +1,329 @@
+/**
+ * UNIT TESTS - Automated tests using mock objects (NO browser required)
+ * These tests validate domain pooling logic without opening Chrome
+ * Run with: node tests/domain-tab-pooling.test.js
+ */
+// Mock domain pages map and browser
+class MockPage {
+  constructor(url) {
+    this._url = url;
+    this._closed = false;
+    this._content = '';
+  }
+  url() { return this._url; }
+  isClosed() { return this._closed; }
+  close() { this._closed = true; }
+  async bringToFront() {}
+  async goto(url) {
+    this._url = url;
+    // Simulate eng.ms page with multiple same-domain links
+    if (url.includes('eng.ms/docs/products/geneva')) {
+      this._content = `
+        <html>
+          <body>
+            <h1>Geneva Documentation</h1>
+            <a href="https://eng.ms/docs/products/geneva/getting-started">Getting Started</a>
+            <a href="https://eng.ms/docs/products/geneva/configuration">Configuration</a>
+            <a href="https://eng.ms/docs/products/geneva/monitoring">Monitoring</a>
+            <a href="https://eng.ms/docs/products/geneva/alerts">Alerts</a>
+            <a href="https://eng.ms/docs/products/geneva/best-practices">Best Practices</a>
+            <a href="https://external.com/link">External Link</a>
+          </body>
+        </html>
+      `;
+    }
+  }
+  async evaluate(fn) {
+    if (this._content) {
+      return fn.toString().includes('outerHTML') ? this._content : fn();
+    }
+    return fn();
+  }
+}
+class MockBrowser {
+  constructor() {
+    this._pages = [];
+  }
+  async newPage() {
+    const page = new MockPage('about:blank');
+    this._pages.push(page);
+    return page;
+  }
+  async pages() {
+    return this._pages;
+  }
+}
+// Test framework
+let testsPassed = 0;
+let testsFailed = 0;
+function assert(condition, message) {
+  if (!condition) {
+    console.error(`❌ FAILED: ${message}`);
+    testsFailed++;
+    throw new Error(message);
+  } else {
+    console.log(`✅ PASSED: ${message}`);
+    testsPassed++;
+  }
+}
+async function test(name, fn) {
+  console.log(`\n🧪 Test: ${name}`);
+  try {
+    await fn();
+  } catch (error) {
+    console.error(`   Error: ${error.message}`);
+  }
+}
+// Tests
+async function runTests() {
+  console.log('🚀 Starting Domain Tab Pooling Tests\n');
+  await test('Should create new tab for first domain', async () => {
+    const domainPages = new Map();
+    const browser = new MockBrowser();
+    const url = 'https://github.com/user/repo';
+    const hostname = new URL(url).hostname;
+    // No existing page for this domain
+    assert(!domainPages.has(hostname), 'Domain should not exist in map initially');
+    // Create new page
+    const page = await browser.newPage();
+    domainPages.set(hostname, page);
+    assert(domainPages.has(hostname), 'Domain should be added to map');
+    assert(domainPages.get(hostname) === page, 'Correct page should be stored');
+  });
+  await test('Should reuse tab for same domain', async () => {
+    const domainPages = new Map();
+    const browser = new MockBrowser();
+    const hostname = 'github.com';
+    // Create first page for domain
+    const page1 = await browser.newPage();
+    await page1.goto('https://github.com/repo1');
+    domainPages.set(hostname, page1);
+    // Try to fetch another URL from same domain
+    const existingPage = domainPages.get(hostname);
+    assert(existingPage === page1, 'Should return same page for same domain');
+    assert(!existingPage.isClosed(), 'Page should still be open');
+  });
+  await test('Should create new tab for different domain', async () => {
+    const domainPages = new Map();
+    const browser = new MockBrowser();
+    // First domain
+    const page1 = await browser.newPage();
+    await page1.goto('https://github.com/repo');
+    domainPages.set('github.com', page1);
+    // Second domain - should create new tab
+    const hostname2 = 'microsoft.com';
+    assert(!domainPages.has(hostname2), 'Second domain should not exist yet');
+    const page2 = await browser.newPage();
+    await page2.goto('https://microsoft.com/docs');
+    domainPages.set(hostname2, page2);
+    assert(domainPages.has('github.com'), 'First domain should still exist');
+    assert(domainPages.has('microsoft.com'), 'Second domain should now exist');
+    assert(page1 !== page2, 'Should be different page objects');
+    assert(!page1.isClosed(), 'First page should still be open');
+  });
+  await test('Should reuse tab when returning to previous domain', async () => {
+    const domainPages = new Map();
+    const browser = new MockBrowser();
+    // Domain 1
+    const page1 = await browser.newPage();
+    domainPages.set('github.com', page1);
+    // Domain 2
+    const page2 = await browser.newPage();
+    domainPages.set('microsoft.com', page2);
+    // Back to domain 1
+    const reusedPage = domainPages.get('github.com');
+    assert(reusedPage === page1, 'Should reuse original page for domain 1');
+    assert(!reusedPage.isClosed(), 'Reused page should still be open');
+    assert(domainPages.size === 2, 'Should have 2 domains in map');
+  });
+  await test('Should handle closed tabs gracefully', async () => {
+    const domainPages = new Map();
+    const browser = new MockBrowser();
+    const hostname = 'github.com';
+    // Create and store page
+    const page = await browser.newPage();
+    domainPages.set(hostname, page);
+    // Simulate user closing the tab
+    page.close();
+    // Check if page is closed
+    const existingPage = domainPages.get(hostname);
+    if (existingPage && existingPage.isClosed()) {
+      domainPages.delete(hostname);
+    }
+    assert(!domainPages.has(hostname), 'Closed page should be removed from map');
+  });
+  await test('Should extract hostname correctly from URLs', async () => {
+    const testCases = [
+      { url: 'https://github.com/user/repo', expected: 'github.com' },
+      { url: 'https://microsoft.com/docs/page', expected: 'microsoft.com' },
+      { url: 'https://subdomain.example.com/path', expected: 'subdomain.example.com' },
+      { url: 'http://localhost:3000/test', expected: 'localhost' },
+    ];
+    for (const { url, expected } of testCases) {
+      const hostname = new URL(url).hostname;
+      assert(hostname === expected, `Hostname for ${url} should be ${expected}, got ${hostname}`);
+    }
+  });
+  await test('Should handle invalid URLs', async () => {
+    let errorThrown = false;
+    try {
+      new URL('not-a-valid-url');
+    } catch (error) {
+      errorThrown = true;
+    }
+    assert(errorThrown, 'Invalid URL should throw error');
+  });
+  await test('Should clear all pages on browser disconnect', async () => {
+    const domainPages = new Map();
+    const browser = new MockBrowser();
+    // Add multiple domains
+    const page1 = await browser.newPage();
+    domainPages.set('github.com', page1);
+    const page2 = await browser.newPage();
+    domainPages.set('microsoft.com', page2);
+    const page3 = await browser.newPage();
+    domainPages.set('google.com', page3);
+    assert(domainPages.size === 3, 'Should have 3 domains before disconnect');
+    // Simulate browser disconnect
+    domainPages.clear();
+    assert(domainPages.size === 0, 'All domains should be cleared after disconnect');
+  });
+  await test('Should handle multiple requests to same domain', async () => {
+    const domainPages = new Map();
+    const browser = new MockBrowser();
+    const hostname = 'github.com';
+    // First request
+    const page = await browser.newPage();
+    await page.goto('https://github.com/repo1');
+    domainPages.set(hostname, page);
+    // Multiple subsequent requests to same domain
+    for (let i = 2; i <= 5; i++) {
+      const existingPage = domainPages.get(hostname);
+      assert(existingPage === page, `Request ${i} should reuse same page`);
+      await existingPage.goto(`https://github.com/repo${i}`);
+    }
+    assert(domainPages.size === 1, 'Should still have only 1 domain in map');
+  });
+  await test('Should open internal eng.ms page', async () => {
+    const domainPages = new Map();
+    const browser = new MockBrowser();
+    const url = 'https://eng.ms/docs/products/geneva';
+    const hostname = new URL(url).hostname;
+    // First request to eng.ms domain
+    assert(!domainPages.has(hostname), 'eng.ms domain should not exist initially');
+    const page = await browser.newPage();
+    await page.goto(url);
+    domainPages.set(hostname, page);
+    assert(domainPages.has(hostname), 'eng.ms domain should be added to map');
+    assert(page.url() === url, 'Page URL should match requested URL');
+    assert(!page.isClosed(), 'Page should remain open');
+  });
+  await test('Should extract and load 5 URLs from same domain', async () => {
+    const domainPages = new Map();
+    const browser = new MockBrowser();
+    const initialUrl = 'https://eng.ms/docs/products/geneva';
+    const hostname = new URL(initialUrl).hostname;
+    // First: Load the initial page
+    const page = await browser.newPage();
+    await page.goto(initialUrl);
+    domainPages.set(hostname, page);
+    // Extract HTML content
+    const html = await page.evaluate(() => document.documentElement.outerHTML);
+    assert(html.includes('Geneva Documentation'), 'Page should contain Geneva content');
+    // Extract URLs from the same domain
+    const urlPattern = /href="(https:\/\/eng\.ms\/[^"]+)"/g;
+    const extractedUrls = [];
+    let match;
+    while ((match = urlPattern.exec(html)) !== null && extractedUrls.length < 5) {
+      extractedUrls.push(match[1]);
+    }
+    assert(extractedUrls.length === 5, `Should extract 5 URLs, got ${extractedUrls.length}`);
+    // Verify all URLs are from eng.ms domain
+    for (const url of extractedUrls) {
+      const urlHostname = new URL(url).hostname;
+      assert(urlHostname === hostname, `All URLs should be from ${hostname}, got ${urlHostname}`);
+    }
+    // Load each of the 5 URLs and verify tab reuse
+    const reusedPage = domainPages.get(hostname);
+    assert(reusedPage === page, 'Should reuse same page for same domain');
+    for (let i = 0; i < extractedUrls.length; i++) {
+      await reusedPage.goto(extractedUrls[i]);
+      assert(reusedPage.url() === extractedUrls[i], `URL ${i+1} should be loaded: ${extractedUrls[i]}`);
+      assert(!reusedPage.isClosed(), `Page should remain open after loading URL ${i+1}`);
+    }
+    assert(domainPages.size === 1, 'Should still have only 1 domain (eng.ms) in map after all loads');
+  });
+  // Summary
+  console.log('\n' + '='.repeat(50));
+  console.log(`✅ Tests Passed: ${testsPassed}`);
+  console.log(`❌ Tests Failed: ${testsFailed}`);
+  console.log('='.repeat(50));
+  if (testsFailed > 0) {
+    process.exit(1);
+  }
+}
+// Run tests
+runTests().catch(error => {
+  console.error('Test suite failed:', error);
+  process.exit(1);
+});

package/tests/integration.test.js ADDED Viewed

@@ -0,0 +1,158 @@
+/**
+ * Integration tests - REQUIRES REAL CHROME AND USER AUTHENTICATION
+ * These tests will actually open Chrome browser and require manual login
+ * Run with: node tests/integration.test.js
+ */
+import { fileURLToPath } from 'url';
+import path from 'path';
+import { fetchPage } from '../src/mcp-browser.js';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+// Test framework
+let testsPassed = 0;
+let testsFailed = 0;
+function assert(condition, message) {
+  if (!condition) {
+    console.error(`❌ FAILED: ${message}`);
+    testsFailed++;
+    throw new Error(message);
+  } else {
+    console.log(`✅ PASSED: ${message}`);
+    testsPassed++;
+  }
+}
+async function test(name, fn) {
+  console.log(`\n🧪 Test: ${name}`);
+  try {
+    await fn();
+  } catch (error) {
+    console.error(`   Error: ${error.message}`);
+  }
+}
+// Integration Tests
+async function runIntegrationTests() {
+  console.log('🚀 Starting Integration Tests (REAL CHROME)\n');
+  console.log('⚠️  This will open Chrome browser and may require authentication');
+  console.log('⚠️  fetchPage function will WAIT for you to complete authentication\n');
+  try {
+    await test('Should fetch eng.ms page, extract links, and load them (full Copilot workflow)', async () => {
+      const url = 'https://eng.ms/docs/products/geneva';
+      // Step 1: Fetch initial page (with auth waiting)
+      console.log(`   📄 Step 1: Fetching ${url}`);
+      console.log(`   ⏳ Function will wait up to 10 minutes for authentication...`);
+      console.log(`   💡 Complete login in the browser that opens`);
+      const result = await fetchPage({ url });
+      console.log(`   ✅ Result: ${result.success ? 'SUCCESS' : 'FAILED'}`);
+      if (result.success) {
+        console.log(`   🔗 Final URL: ${result.url}`);
+        console.log(`   📄 HTML length: ${result.html?.length || 0} chars`);
+      } else {
+        console.log(`   ❌ Error: ${result.error}`);
+        console.log(`   💡 Hint: ${result.hint}`);
+      }
+      assert(result.success, 'Should successfully fetch page after authentication');
+      assert(result.url.includes('eng.ms'), `URL should be from eng.ms domain, got: ${result.url}`);
+      assert(result.html && result.html.length > 0, 'Should return HTML content');
+      // Step 2: Extract ALL links from HTML, then pick 5 randomly
+      console.log(`\n   📋 Step 2: Extracting all links from HTML...`);
+      const baseUrl = new URL(result.url);
+      const urlPattern = /href=["']([^"']+)["']/g;
+      const allUrls = [];
+      let match;
+      // Static asset extensions to skip
+      const skipExtensions = ['.css', '.js', '.ico', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.woff', '.woff2', '.ttf', '.eot'];
+      // Extract ALL URLs first
+      while ((match = urlPattern.exec(result.html)) !== null) {
+        let foundUrl = match[1];
+        // Skip anchor links
+        if (foundUrl.includes('#')) continue;
+        // Convert relative URLs to absolute
+        if (foundUrl.startsWith('/')) {
+          foundUrl = `${baseUrl.origin}${foundUrl}`;
+        } else if (!foundUrl.startsWith('http')) {
+          continue; // Skip other relative URLs
+        }
+        // Skip static assets (check path without query string)
+        const urlWithoutQuery = foundUrl.split('?')[0];
+        if (skipExtensions.some(ext => urlWithoutQuery.toLowerCase().endsWith(ext))) continue;
+        // Only include eng.ms URLs (pages)
+        if (foundUrl.includes('eng.ms')) {
+          allUrls.push(foundUrl);
+        }
+      }
+      console.log(`   📊 Total page URLs found: ${allUrls.length}`);
+      // Remove duplicates
+      const uniqueUrls = [...new Set(allUrls)];
+      console.log(`   🔗 Unique page URLs: ${uniqueUrls.length}`);
+      // Randomly pick 5 URLs
+      const shuffled = uniqueUrls.sort(() => Math.random() - 0.5);
+      const extractedUrls = shuffled.slice(0, 5);
+      console.log(`   🎲 Randomly selected ${extractedUrls.length} URLs to test:`);
+      extractedUrls.forEach((link, i) => console.log(`      ${i+1}. ${link}`));
+      assert(extractedUrls.length > 0, `Should extract at least one eng.ms URL, found ${extractedUrls.length}`);
+      // Step 3: Load each extracted URL (tab reuse)
+      console.log(`\n   🔄 Step 3: Loading extracted links (using same tab)...`);
+      const linksToTest = extractedUrls.slice(0, Math.min(5, extractedUrls.length));
+      for (let i = 0; i < linksToTest.length; i++) {
+        const link = linksToTest[i];
+        console.log(`   📄 Loading link ${i+1}/${linksToTest.length}: ${link}`);
+        const linkResult = await fetchPage({ url: link });
+        console.log(`   ✅ Loaded: ${linkResult.url}`);
+        assert(linkResult.success, `Should successfully load link ${i+1}: ${link}`);
+        assert(linkResult.html && linkResult.html.length > 0, `Link ${i+1} should return HTML content`);
+      }
+    });
+  } catch (error) {
+    console.error('\n❌ Test suite error:', error.message);
+    testsFailed++;
+  } finally {
+    // Summary
+    console.log('\n' + '='.repeat(50));
+    console.log(`✅ Tests Passed: ${testsPassed}`);
+    console.log(`❌ Tests Failed: ${testsFailed}`);
+    console.log('='.repeat(50));
+    console.log('\n💡 Browser left open for manual inspection');
+    if (testsFailed > 0) {
+      process.exit(1);
+    }
+    // Exit immediately without waiting for browser
+    process.exit(0);
+  }
+}
+// Run tests
+runIntegrationTests().catch(error => {
+  console.error('Test suite failed:', error);
+  process.exit(1);
+});

package/tests/prepare-html.test.js ADDED Viewed

@@ -0,0 +1,307 @@
+import assert from 'assert';
+import { prepareHtml } from '../src/mcp-browser.js';
+console.log('🧪 Testing prepareHtml function\n');
+let testsPassed = 0;
+let testsFailed = 0;
+function test(description, fn) {
+  try {
+    fn();
+    console.log(`✅ ${description}`);
+    testsPassed++;
+  } catch (err) {
+    console.log(`❌ ${description}`);
+    console.log(`   Error: ${err.message}`);
+    testsFailed++;
+  }
+}
+// Test 1: Remove HTML comments
+test('Should remove HTML comments', () => {
+  const html = '<div>Content<!-- This is a comment --></div>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(!result.includes('<!--'), 'Should not contain comment start');
+  assert(!result.includes('-->'), 'Should not contain comment end');
+  assert(result.includes('Content'), 'Should preserve content');
+});
+// Test 2: Remove script tags
+test('Should remove script tags and their content', () => {
+  const html = '<div>Keep this</div><script>alert("remove");</script><div>And this</div>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(!result.includes('<script'), 'Should not contain script tag');
+  assert(!result.includes('alert'), 'Should not contain script content');
+  assert(result.includes('Keep this'), 'Should preserve content');
+});
+// Test 3: Remove style tags
+test('Should remove style tags and their content', () => {
+  const html = '<div>Content</div><style>.class { color: red; }</style>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(!result.includes('<style'), 'Should not contain style tag');
+  assert(!result.includes('color: red'), 'Should not contain style content');
+  assert(result.includes('Content'), 'Should preserve content');
+});
+// Test 4: Remove meta tags
+test('Should remove meta tags', () => {
+  const html = '<head><meta charset="utf-8"><meta name="viewport" content="width=device-width"></head><body>Content</body>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(!result.includes('<meta'), 'Should not contain meta tags');
+  assert(result.includes('Content'), 'Should preserve content');
+});
+// Test 5: Convert relative URLs in href
+test('Should convert relative href URLs to absolute', () => {
+  const html = '<a href="/docs/page">Link</a>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(result.includes('href="https://example.com/docs/page"'), 'Should convert relative href to absolute');
+});
+// Test 6: Keep absolute URLs in href unchanged
+test('Should keep absolute href URLs unchanged', () => {
+  const html = '<a href="https://other.com/page">Link</a>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(result.includes('href="https://other.com/page"'), 'Should keep absolute href unchanged');
+});
+// Test 7: Convert relative URLs in src
+test('Should convert relative src URLs to absolute', () => {
+  const html = '<img src="/images/logo.png">';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(result.includes('src="https://example.com/images/logo.png"'), 'Should convert relative src to absolute');
+});
+// Test 8: Keep absolute URLs in src unchanged
+test('Should keep absolute src URLs unchanged', () => {
+  const html = '<img src="https://cdn.example.com/logo.png">';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(result.includes('src="https://cdn.example.com/logo.png"'), 'Should keep absolute src unchanged');
+});
+// Test 9: Handle anchor links (should not modify)
+test('Should not modify anchor links', () => {
+  const html = '<a href="#section">Jump</a>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(result.includes('href="#section"'), 'Should keep anchor links unchanged');
+});
+// Test 10: Handle mailto and tel links (should not modify)
+test('Should not modify mailto and tel links', () => {
+  const html = '<a href="mailto:test@example.com">Email</a><a href="tel:+1234567890">Call</a>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(result.includes('href="mailto:test@example.com"'), 'Should keep mailto unchanged');
+  assert(result.includes('href="tel:+1234567890"'), 'Should keep tel unchanged');
+});
+// Test 11: Handle data URIs in src (should not modify)
+test('Should not modify data URIs', () => {
+  const html = '<img src="data:image/png;base64,iVBORw0KGg==">';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(result.includes('src="data:image/png;base64,iVBORw0KGg=="'), 'Should keep data URI unchanged');
+});
+// Test 12: Handle protocol-relative URLs (should not modify)
+test('Should not modify protocol-relative URLs', () => {
+  const html = '<img src="//cdn.example.com/image.png">';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(result.includes('src="//cdn.example.com/image.png"'), 'Should keep protocol-relative URL unchanged');
+});
+// Test 13: Handle empty or null HTML
+test('Should handle empty HTML', () => {
+  const result = prepareHtml('', 'https://example.com');
+  assert.strictEqual(result, '', 'Should return empty string');
+});
+test('Should handle null HTML', () => {
+  const result = prepareHtml(null, 'https://example.com');
+  assert.strictEqual(result, '', 'Should return empty string for null');
+});
+// Test 14: Complex real-world example
+test('Should handle complex HTML with multiple elements', () => {
+  const html = `
+    <!DOCTYPE html>
+    <html>
+    <head>
+      <meta charset="utf-8">
+      <title>Test Page</title>
+      <style>.test { color: blue; }</style>
+      <script>console.log("test");</script>
+    </head>
+    <body>
+      <!-- Main content -->
+      <div>
+        <a href="/page1">Page 1</a>
+        <a href="https://external.com">External</a>
+        <img src="/images/pic.jpg">
+        <script>alert("inline");</script>
+      </div>
+    </body>
+    </html>
+  `;
+  const result = prepareHtml(html, 'https://example.com/test/');
+  // Should not contain removed elements
+  assert(!result.includes('<meta'), 'Should remove meta');
+  assert(!result.includes('<style'), 'Should remove style');
+  assert(!result.includes('<script'), 'Should remove script');
+  assert(!result.includes('<!--'), 'Should remove comments');
+  // Should convert relative URLs
+  assert(result.includes('href="https://example.com/page1"'), 'Should convert relative href');
+  assert(result.includes('src="https://example.com/images/pic.jpg"'), 'Should convert relative src');
+  // Should keep absolute URLs
+  assert(result.includes('href="https://external.com"'), 'Should keep absolute href');
+  // Should preserve content
+  assert(result.includes('Page 1'), 'Should preserve content');
+});
+// Test 15: Verify script with attributes is removed
+test('Should remove script tags with various attributes', () => {
+  const html = '<script type="text/javascript" async defer src="/app.js">console.log("test");</script>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(!result.includes('<script'), 'Should remove script with attributes');
+  assert(!result.includes('app.js'), 'Should remove script content');
+});
+// Test 16: Remove inline style attributes
+test('Should remove inline style attributes', () => {
+  const html = '<div style="color: red; font-size: 14px;">Content</div>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(!result.includes('style='), 'Should remove style attribute');
+  assert(result.includes('Content'), 'Should preserve content');
+});
+// Test 17: Remove class attributes
+test('Should remove class attributes', () => {
+  const html = '<div class="container main-content">Text</div>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(!result.includes('class='), 'Should remove class attribute');
+  assert(result.includes('Text'), 'Should preserve content');
+});
+// Test 18: Remove id attributes
+test('Should remove id attributes', () => {
+  const html = '<div id="main-section">Content</div>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(!result.includes('id='), 'Should remove id attribute');
+  assert(result.includes('Content'), 'Should preserve content');
+});
+// Test 19: Remove data-* attributes
+test('Should remove data-* attributes', () => {
+  const html = '<div data-id="123" data-value="test">Content</div>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(!result.includes('data-'), 'Should remove data attributes');
+  assert(result.includes('Content'), 'Should preserve content');
+});
+// Test 20: Remove event handler attributes
+test('Should remove event handler attributes', () => {
+  const html = '<button onclick="handleClick()" onmouseover="hover()">Click</button>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(!result.includes('onclick='), 'Should remove onclick');
+  assert(!result.includes('onmouseover='), 'Should remove onmouseover');
+  assert(result.includes('Click'), 'Should preserve content');
+});
+// Test 21: Remove SVG tags
+test('Should remove SVG tags and content', () => {
+  const html = '<div>Text</div><svg width="100" height="100"><circle cx="50" cy="50" r="40"/></svg>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(!result.includes('<svg'), 'Should remove svg tag');
+  assert(!result.includes('circle'), 'Should remove svg content');
+  assert(result.includes('Text'), 'Should preserve content');
+});
+// Test 22: Remove noscript tags
+test('Should remove noscript tags and content', () => {
+  const html = '<div>Content</div><noscript>JavaScript is disabled</noscript>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(!result.includes('<noscript'), 'Should remove noscript tag');
+  assert(!result.includes('JavaScript is disabled'), 'Should remove noscript content');
+  assert(result.includes('Content'), 'Should preserve content');
+});
+// Test 23: Remove link tags
+test('Should remove link tags', () => {
+  const html = '<head><link rel="stylesheet" href="/style.css"><link rel="preload" as="script"></head>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(!result.includes('<link'), 'Should remove link tags');
+});
+// Test 24: Remove role attributes
+test('Should remove role attributes', () => {
+  const html = '<nav role="navigation">Menu</nav>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(!result.includes('role='), 'Should remove role attribute');
+  assert(result.includes('Menu'), 'Should preserve content');
+});
+// Test 25: Remove aria-* attributes
+test('Should remove aria-* attributes', () => {
+  const html = '<button aria-label="Close" aria-pressed="false">X</button>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(!result.includes('aria-'), 'Should remove aria attributes');
+  assert(result.includes('X'), 'Should preserve content');
+});
+// Test 26: Collapse whitespace
+test('Should collapse multiple whitespace into single space', () => {
+  const html = '<div>Line 1\n\n\n   Line 2\t\t\tLine 3</div>';
+  const result = prepareHtml(html, 'https://example.com');
+  assert(!result.includes('\n\n'), 'Should remove multiple newlines');
+  assert(!result.includes('   '), 'Should remove multiple spaces');
+  assert(result.includes('Line 1'), 'Should preserve content');
+});
+// Test 27: Comprehensive test with all removals
+test('Should handle HTML with all types of removals', () => {
+  const html = `
+    <div class="container" id="main" style="color: blue;" data-test="value" onclick="alert()">
+      <svg width="100"><circle/></svg>
+      <script>console.log("test");</script>
+      <style>.test { color: red; }</style>
+      <noscript>Enable JS</noscript>
+      <link rel="stylesheet" href="/style.css">
+      <div role="main" aria-label="content">
+        <a href="/page">Link</a>
+        <p>Text content</p>
+      </div>
+    </div>
+  `;
+  const result = prepareHtml(html, 'https://example.com/test/');
+  // Should remove all code attributes
+  assert(!result.includes('class='), 'Should remove class');
+  assert(!result.includes('id='), 'Should remove id');
+  assert(!result.includes('style='), 'Should remove style');
+  assert(!result.includes('data-'), 'Should remove data attributes');
+  assert(!result.includes('onclick='), 'Should remove onclick');
+  assert(!result.includes('role='), 'Should remove role');
+  assert(!result.includes('aria-'), 'Should remove aria');
+  // Should remove non-content elements
+  assert(!result.includes('<svg'), 'Should remove svg');
+  assert(!result.includes('<script'), 'Should remove script');
+  assert(!result.includes('<style'), 'Should remove style');
+  assert(!result.includes('<noscript'), 'Should remove noscript');
+  assert(!result.includes('<link'), 'Should remove link');
+  // Should preserve content and convert URLs
+  assert(result.includes('href="https://example.com/page"'), 'Should convert relative URL');
+  assert(result.includes('Text content'), 'Should preserve text');
+});
+console.log('\n==================================================');
+console.log(`✅ Tests Passed: ${testsPassed}`);
+console.log(`❌ Tests Failed: ${testsFailed}`);
+console.log('==================================================\n');
+process.exit(testsFailed > 0 ? 1 : 0);