mcpbrowser 0.2.29 → 0.2.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  { "name": "mcpbrowser",
2
- "version": "0.2.29",
2
+ "version": "0.2.30",
3
3
  "mcpName": "io.github.cherchyk/browser",
4
4
  "type": "module",
5
5
  "description": "MCP server for in-browser web page fetching using Chrome DevTools Protocol",
@@ -240,37 +240,79 @@ async function getBrowser() {
240
240
  }
241
241
 
242
242
  /**
243
- * Fetch a web page using Chrome browser, with support for authentication flows and tab reuse.
244
- * Reuses existing tabs per domain when possible. Handles authentication redirects by waiting
245
- * for user to complete login (up to 10 minutes). Processes HTML to remove unnecessary elements
246
- * and convert relative URLs to absolute.
247
- * @param {Object} params - Fetch parameters
248
- * @param {string} params.url - The URL to fetch
249
- * @param {boolean} [params.removeUnnecessaryHTML=true] - Whether to clean HTML (removes scripts, styles, etc.)
250
- * @returns {Promise<Object>} Result object with success status, URL, HTML content, or error details
243
+ * Extract base domain from hostname (e.g., "mail.google.com" "google.com")
244
+ * @param {string} hostname - The hostname to parse
245
+ * @returns {string} The base domain
251
246
  */
252
- async function fetchPage({ url, removeUnnecessaryHTML = true }) {
253
- // Hardcoded smart defaults
254
- const waitUntil = "networkidle0";
255
- const navigationTimeout = 60000; // Initial navigation timeout
256
- const authCompletionTimeout = 600000; // 10 minutes for user to complete authentication
257
- const reuseLastKeptPage = true;
258
-
259
- if (!url) {
260
- throw new Error("url parameter is required");
247
+ function getBaseDomain(hostname) {
248
+ const parts = hostname.split('.');
249
+ if (parts.length >= 2) {
250
+ return parts.slice(-2).join('.');
261
251
  }
252
+ return hostname;
253
+ }
262
254
 
263
- const browser = await getBrowser();
264
- let page = null;
265
- let hostname;
255
+ /**
256
+ * Detect if URL contains authentication patterns
257
+ * @param {string} url - The URL to check
258
+ * @returns {boolean} True if URL appears to be auth-related
259
+ */
260
+ function isLikelyAuthUrl(url) {
261
+ const lowerUrl = url.toLowerCase();
266
262
 
267
- // Parse hostname for domain-based tab reuse
263
+ // Path-based patterns (more strict - require / boundaries or end of path)
264
+ const pathPatterns = [
265
+ '/login', '/signin', '/sign-in', '/auth', '/sso', '/oauth',
266
+ '/authenticate', '/saml', '/openid'
267
+ ];
268
+
269
+ // Subdomain patterns (require as subdomain at start)
270
+ const subdomainPatterns = [
271
+ 'login.', 'auth.', 'sso.', 'accounts.', 'id.', 'identity.',
272
+ 'signin.', 'authentication.', 'idp.'
273
+ ];
274
+
275
+ // Extract path from URL
276
+ let pathname = '';
268
277
  try {
269
- hostname = new URL(url).hostname;
278
+ pathname = new URL(url).pathname.toLowerCase();
270
279
  } catch {
271
- throw new Error(`Invalid URL: ${url}`);
280
+ // If URL parsing fails, check if any pattern exists in the string
281
+ pathname = lowerUrl;
272
282
  }
273
283
 
284
+ // Check path patterns - ensure they're at path boundaries
285
+ const hasAuthPath = pathPatterns.some(pattern => {
286
+ // Check if pattern appears at start of path, followed by nothing, /, ?, or #
287
+ return pathname === pattern ||
288
+ pathname.startsWith(pattern + '/') ||
289
+ pathname.startsWith(pattern + '?') ||
290
+ lowerUrl.includes(pattern + '#');
291
+ });
292
+
293
+ // Check subdomain patterns (must be at start of hostname)
294
+ const hostname = (() => {
295
+ try {
296
+ return new URL(url).hostname.toLowerCase();
297
+ } catch {
298
+ return '';
299
+ }
300
+ })();
301
+ const hasAuthSubdomain = subdomainPatterns.some(pattern => hostname.startsWith(pattern));
302
+
303
+ return hasAuthPath || hasAuthSubdomain;
304
+ }
305
+
306
+ /**
307
+ * Get or create a page for the given domain, reusing existing tabs when possible.
308
+ * @param {Browser} browser - The Puppeteer browser instance
309
+ * @param {string} hostname - The hostname to get/create a page for
310
+ * @param {boolean} reuseLastKeptPage - Whether to reuse existing tabs
311
+ * @returns {Promise<Page>} The page for this domain
312
+ */
313
+ async function getOrCreatePage(browser, hostname, reuseLastKeptPage = true) {
314
+ let page = null;
315
+
274
316
  // Check if we have an existing page for this domain
275
317
  if (reuseLastKeptPage && domainPages.has(hostname)) {
276
318
  const existingPage = domainPages.get(hostname);
@@ -311,85 +353,322 @@ async function fetchPage({ url, removeUnnecessaryHTML = true }) {
311
353
  domainPages.set(hostname, page);
312
354
  console.error(`[MCPBrowser] Created new tab for domain: ${hostname}`);
313
355
  }
356
+
357
+ return page;
358
+ }
314
359
 
315
- let shouldKeepOpen = true;
316
- let wasSuccess = false;
360
+ /**
361
+ * Navigate to URL with fallback strategy for slow pages.
362
+ * @param {Page} page - The Puppeteer page instance
363
+ * @param {string} url - The URL to navigate to
364
+ * @param {string} waitUntil - Wait condition (networkidle0, load, etc.)
365
+ * @param {number} timeout - Navigation timeout in ms
366
+ * @returns {Promise<void>}
367
+ */
368
+ async function navigateToUrl(page, url, waitUntil, timeout) {
369
+ console.error(`[MCPBrowser] Navigating to: ${url}`);
370
+
371
+ // Set up listener for JS-based redirects that happen after page load
372
+ let jsRedirectDetected = false;
373
+ let jsRedirectUrl = null;
374
+ const navigationHandler = (frame) => {
375
+ if (frame === page.mainFrame()) {
376
+ jsRedirectUrl = frame.url();
377
+ jsRedirectDetected = true;
378
+ }
379
+ };
380
+ page.on('framenavigated', navigationHandler);
381
+
317
382
  try {
318
- console.error(`[MCPBrowser] Navigating to: ${url}`);
319
- await page.goto(url, { waitUntil, timeout: navigationTimeout });
320
-
321
- const currentUrl = page.url();
322
- const currentHostname = new URL(currentUrl).hostname;
323
-
324
- console.error(`[MCPBrowser] Navigation completed: ${currentUrl}`);
383
+ // Handle slow pages: try networkidle0 first, fallback to load if it takes too long
384
+ try {
385
+ await page.goto(url, { waitUntil, timeout });
386
+ } catch (error) {
387
+ // If networkidle0 times out or page has issues, try with just 'load'
388
+ if (error.message.includes('timeout') || error.message.includes('Navigation')) {
389
+ console.error(`[MCPBrowser] Navigation slow, trying fallback load strategy...`);
390
+ await page.goto(url, { waitUntil: 'load', timeout });
391
+ } else {
392
+ throw error;
393
+ }
394
+ }
325
395
 
326
- // Check if we were redirected to a different domain (likely authentication)
327
- if (currentHostname !== hostname) {
328
- console.error(`[MCPBrowser] Detected redirect to authentication domain: ${currentHostname}`);
329
- console.error(`[MCPBrowser] Waiting for user to complete authentication...`);
330
- console.error(`[MCPBrowser] Will wait up to ${authCompletionTimeout / 1000} seconds for return to ${hostname}`);
396
+ // Wait briefly for potential JS redirects
397
+ await new Promise(resolve => setTimeout(resolve, 2000));
398
+ } finally {
399
+ // Remove navigation listener
400
+ page.off('framenavigated', navigationHandler);
401
+ }
402
+ }
403
+
404
+ /**
405
+ * Detect redirect type: permanent redirect, auth flow, or same-domain auth path change.
406
+ * @param {string} url - Original requested URL
407
+ * @param {string} hostname - Original hostname
408
+ * @param {string} currentUrl - Current page URL
409
+ * @param {string} currentHostname - Current page hostname
410
+ * @returns {Object} Object with redirect type and related info
411
+ */
412
+ function detectRedirectType(url, hostname, currentUrl, currentHostname) {
413
+ const isDifferentDomain = currentHostname !== hostname;
414
+ const requestedAuthPage = isLikelyAuthUrl(url);
415
+ const currentIsAuthPage = isLikelyAuthUrl(currentUrl);
416
+ const isSameDomainAuthPath = !isDifferentDomain && currentIsAuthPage && !requestedAuthPage;
417
+
418
+ // If user requested auth page directly and landed on it (same domain), return content
419
+ if (requestedAuthPage && currentHostname === hostname && !isDifferentDomain) {
420
+ return { type: 'requested_auth', currentHostname };
421
+ }
422
+
423
+ // No redirect scenario
424
+ if (!isDifferentDomain && !isSameDomainAuthPath) {
425
+ return { type: 'none' };
426
+ }
427
+
428
+ const originalBase = getBaseDomain(hostname);
429
+ const currentBase = getBaseDomain(currentHostname);
430
+
431
+ // Permanent redirect: Different domain without auth patterns
432
+ if (!currentIsAuthPage) {
433
+ return { type: 'permanent', currentHostname };
434
+ }
435
+
436
+ // Authentication flow
437
+ const flowType = isSameDomainAuthPath ? 'same-domain path change' : 'cross-domain redirect';
438
+ return {
439
+ type: 'auth',
440
+ flowType,
441
+ originalBase,
442
+ currentBase,
443
+ currentUrl,
444
+ hostname,
445
+ currentHostname
446
+ };
447
+ }
448
+
449
+ /**
450
+ * Check if authentication auto-completes quickly (valid session/cookies).
451
+ * @param {Page} page - The Puppeteer page instance
452
+ * @param {string} hostname - Original hostname
453
+ * @param {string} originalBase - Original base domain
454
+ * @param {number} timeoutMs - How long to wait for auto-auth
455
+ * @returns {Promise<Object>} Object with success status and final hostname
456
+ */
457
+ async function waitForAutoAuth(page, hostname, originalBase, timeoutMs = 5000) {
458
+ console.error(`[MCPBrowser] Checking for auto-authentication (${timeoutMs / 1000} sec)...`);
459
+
460
+ const deadline = Date.now() + timeoutMs;
461
+
462
+ while (Date.now() < deadline) {
463
+ try {
464
+ const checkUrl = page.url();
465
+ const checkHostname = new URL(checkUrl).hostname;
466
+ const checkBase = getBaseDomain(checkHostname);
331
467
 
332
- // Wait for navigation back to the original domain
333
- const authDeadline = Date.now() + authCompletionTimeout;
334
- let authCompleted = false;
468
+ // Check if returned to original domain/base and no longer on auth URL
469
+ if ((checkHostname === hostname || checkBase === originalBase) && !isLikelyAuthUrl(checkUrl)) {
470
+ console.error(`[MCPBrowser] Auto-authentication successful! Now at: ${checkUrl}`);
471
+ return { success: true, hostname: checkHostname };
472
+ }
335
473
 
336
- while (Date.now() < authDeadline) {
337
- try {
338
- // Check current URL
339
- const checkUrl = page.url();
340
- const checkHostname = new URL(checkUrl).hostname;
341
-
342
- if (checkHostname === hostname) {
343
- console.error(`[MCPBrowser] Authentication completed! Returned to: ${checkUrl}`);
344
- authCompleted = true;
345
- break;
346
- }
347
-
348
- // Wait a bit before checking again
349
- await new Promise(resolve => setTimeout(resolve, 2000));
350
- } catch (error) {
351
- // Page might be navigating, continue waiting
352
- await new Promise(resolve => setTimeout(resolve, 2000));
474
+ await new Promise(resolve => setTimeout(resolve, 500));
475
+ } catch (error) {
476
+ await new Promise(resolve => setTimeout(resolve, 500));
477
+ }
478
+ }
479
+
480
+ return { success: false };
481
+ }
482
+
483
+ /**
484
+ * Wait for user to complete manual authentication.
485
+ * @param {Page} page - The Puppeteer page instance
486
+ * @param {string} hostname - Original hostname
487
+ * @param {string} originalBase - Original base domain
488
+ * @param {number} timeoutMs - How long to wait for manual auth
489
+ * @returns {Promise<Object>} Object with success status, final hostname, and optional error
490
+ */
491
+ async function waitForManualAuth(page, hostname, originalBase, timeoutMs = 600000) {
492
+ console.error(`[MCPBrowser] Auto-authentication did not complete. Waiting for user...`);
493
+ console.error(`[MCPBrowser] Will wait for return to ${hostname} or same base domain (${originalBase})`);
494
+
495
+ const deadline = Date.now() + timeoutMs;
496
+
497
+ while (Date.now() < deadline) {
498
+ try {
499
+ const checkUrl = page.url();
500
+ const checkHostname = new URL(checkUrl).hostname;
501
+ const checkBase = getBaseDomain(checkHostname);
502
+
503
+ // Auth complete if back to original domain OR same base domain AND not on auth page
504
+ if ((checkHostname === hostname || checkBase === originalBase) && !isLikelyAuthUrl(checkUrl)) {
505
+ console.error(`[MCPBrowser] Authentication completed! Now at: ${checkUrl}`);
506
+
507
+ if (checkHostname !== hostname) {
508
+ console.error(`[MCPBrowser] Landed on different subdomain: ${checkHostname}`);
353
509
  }
510
+
511
+ return { success: true, hostname: checkHostname };
354
512
  }
355
513
 
356
- if (!authCompleted) {
357
- const hint = `Authentication timeout. Tab is left open at ${page.url()}. Complete authentication and retry the same URL.`;
358
- return { success: false, error: "Authentication timeout - user did not complete login", pageKeptOpen: true, hint };
514
+ await new Promise(resolve => setTimeout(resolve, 2000));
515
+ } catch (error) {
516
+ await new Promise(resolve => setTimeout(resolve, 2000));
517
+ }
518
+ }
519
+
520
+ const currentUrl = page.url();
521
+ const hint = `Authentication timeout. Tab is left open at ${currentUrl}. Complete authentication and retry the same URL.`;
522
+ return {
523
+ success: false,
524
+ error: "Authentication timeout - user did not complete login",
525
+ hint
526
+ };
527
+ }
528
+
529
+ /**
530
+ * Wait for page to stabilize after authentication.
531
+ * @param {Page} page - The Puppeteer page instance
532
+ * @returns {Promise<void>}
533
+ */
534
+ async function waitForPageStability(page) {
535
+ console.error(`[MCPBrowser] Waiting for page to stabilize...`);
536
+ await new Promise(resolve => setTimeout(resolve, 3000));
537
+
538
+ try {
539
+ await page.waitForFunction(() => document.readyState === 'complete', { timeout: 10000 });
540
+ } catch {
541
+ // Ignore timeout - page might already be ready
542
+ }
543
+ }
544
+
545
+ /**
546
+ * Extract and process HTML from the page.
547
+ * @param {Page} page - The Puppeteer page instance
548
+ * @param {boolean} removeUnnecessaryHTML - Whether to clean the HTML
549
+ * @returns {Promise<string>} The processed HTML
550
+ */
551
+ async function extractAndProcessHtml(page, removeUnnecessaryHTML) {
552
+ const html = await page.evaluate(() => document.documentElement?.outerHTML || "");
553
+
554
+ let processedHtml;
555
+ if (removeUnnecessaryHTML) {
556
+ const cleaned = cleanHtml(html);
557
+ processedHtml = enrichHtml(cleaned, page.url());
558
+ } else {
559
+ processedHtml = enrichHtml(html, page.url());
560
+ }
561
+
562
+ return processedHtml;
563
+ }
564
+
565
+ /**
566
+ * Fetch a web page using Chrome browser, with support for authentication flows and tab reuse.
567
+ * Reuses existing tabs per domain when possible. Handles authentication redirects by waiting
568
+ * for user to complete login (up to 10 minutes). Processes HTML to remove unnecessary elements
569
+ * and convert relative URLs to absolute.
570
+ * @param {Object} params - Fetch parameters
571
+ * @param {string} params.url - The URL to fetch
572
+ * @param {boolean} [params.removeUnnecessaryHTML=true] - Whether to clean HTML (removes scripts, styles, etc.)
573
+ * @returns {Promise<Object>} Result object with success status, URL, HTML content, or error details
574
+ */
575
+ async function fetchPage({ url, removeUnnecessaryHTML = true }) {
576
+ // Hardcoded smart defaults
577
+ const waitUntil = "networkidle0";
578
+ const navigationTimeout = 60000;
579
+ const authCompletionTimeout = 600000;
580
+ const reuseLastKeptPage = true;
581
+
582
+ if (!url) {
583
+ throw new Error("url parameter is required");
584
+ }
585
+
586
+ // Parse hostname for domain-based tab reuse
587
+ let hostname;
588
+ try {
589
+ hostname = new URL(url).hostname;
590
+ } catch {
591
+ throw new Error(`Invalid URL: ${url}`);
592
+ }
593
+
594
+ const browser = await getBrowser();
595
+ let page = null;
596
+
597
+ try {
598
+ // Get or create page for this domain
599
+ page = await getOrCreatePage(browser, hostname, reuseLastKeptPage);
600
+
601
+ // Navigate to URL with fallback strategy
602
+ await navigateToUrl(page, url, waitUntil, navigationTimeout);
603
+
604
+ const currentUrl = page.url();
605
+ const currentHostname = new URL(currentUrl).hostname;
606
+ console.error(`[MCPBrowser] Navigation completed: ${currentUrl}`);
607
+
608
+ // Detect redirect type and handle accordingly
609
+ const redirectInfo = detectRedirectType(url, hostname, currentUrl, currentHostname);
610
+
611
+ if (redirectInfo.type === 'requested_auth') {
612
+ console.error(`[MCPBrowser] User requested auth page directly, returning content`);
613
+ // Update domain mapping if needed
614
+ if (redirectInfo.currentHostname !== hostname) {
615
+ domainPages.delete(hostname);
616
+ domainPages.set(redirectInfo.currentHostname, page);
617
+ hostname = redirectInfo.currentHostname;
359
618
  }
619
+ } else if (redirectInfo.type === 'permanent') {
620
+ console.error(`[MCPBrowser] Permanent redirect detected: ${hostname} → ${redirectInfo.currentHostname}`);
621
+ console.error(`[MCPBrowser] Accepting redirect and updating domain mapping`);
622
+ domainPages.delete(hostname);
623
+ domainPages.set(redirectInfo.currentHostname, page);
624
+ hostname = redirectInfo.currentHostname;
625
+ } else if (redirectInfo.type === 'auth') {
626
+ console.error(`[MCPBrowser] Authentication flow detected (${redirectInfo.flowType})`);
627
+ console.error(`[MCPBrowser] Current location: ${redirectInfo.currentUrl}`);
360
628
 
361
- // Wait for page to fully stabilize after auth redirect
362
- console.error(`[MCPBrowser] Waiting for page to stabilize after authentication...`);
363
- await new Promise(resolve => setTimeout(resolve, 3000)); // Give page time to settle
629
+ // Try auto-auth first
630
+ const autoAuthResult = await waitForAutoAuth(page, redirectInfo.hostname, redirectInfo.originalBase);
364
631
 
365
- // Ensure page is ready
366
- try {
367
- await page.waitForFunction(() => document.readyState === 'complete', { timeout: 10000 });
368
- } catch {
369
- // Ignore timeout - page might already be ready
632
+ if (autoAuthResult.success) {
633
+ // Update hostname if changed
634
+ if (autoAuthResult.hostname !== hostname) {
635
+ domainPages.delete(hostname);
636
+ domainPages.set(autoAuthResult.hostname, page);
637
+ hostname = autoAuthResult.hostname;
638
+ }
639
+ } else {
640
+ // Wait for manual auth
641
+ const manualAuthResult = await waitForManualAuth(page, redirectInfo.hostname, redirectInfo.originalBase, authCompletionTimeout);
642
+
643
+ if (!manualAuthResult.success) {
644
+ return {
645
+ success: false,
646
+ error: manualAuthResult.error,
647
+ pageKeptOpen: true,
648
+ hint: manualAuthResult.hint
649
+ };
650
+ }
651
+
652
+ // Update hostname if changed
653
+ if (manualAuthResult.hostname !== hostname) {
654
+ domainPages.delete(hostname);
655
+ domainPages.set(manualAuthResult.hostname, page);
656
+ hostname = manualAuthResult.hostname;
657
+ }
370
658
  }
659
+
660
+ // Wait for page stability after auth
661
+ await waitForPageStability(page);
371
662
  }
372
663
 
373
- // Extract HTML content
374
- const html = await page.evaluate(() => document.documentElement?.outerHTML || "");
375
-
376
- // Process HTML based on removeUnnecessaryHTML parameter
377
- let processedHtml;
378
- if (removeUnnecessaryHTML) {
379
- const cleaned = cleanHtml(html);
380
- processedHtml = enrichHtml(cleaned, page.url());
381
- } else {
382
- processedHtml = enrichHtml(html, page.url());
383
- }
664
+ // Extract and process HTML
665
+ const processedHtml = await extractAndProcessHtml(page, removeUnnecessaryHTML);
384
666
 
385
- const result = {
667
+ return {
386
668
  success: true,
387
669
  url: page.url(),
388
670
  html: processedHtml
389
671
  };
390
-
391
- wasSuccess = true;
392
- return result;
393
672
  } catch (err) {
394
673
  const hint = "Tab is left open. Complete sign-in there, then call fetch_webpage_protected again with just the URL.";
395
674
  return { success: false, error: err.message || String(err), pageKeptOpen: true, hint };
@@ -587,10 +866,27 @@ async function main() {
587
866
  }
588
867
 
589
868
  // Export for testing
590
- export { fetchPage, getBrowser, prepareHtml, cleanHtml, enrichHtml };
869
+ export {
870
+ fetchPage,
871
+ getBrowser,
872
+ prepareHtml,
873
+ cleanHtml,
874
+ enrichHtml,
875
+ getOrCreatePage,
876
+ navigateToUrl,
877
+ detectRedirectType,
878
+ waitForAutoAuth,
879
+ waitForManualAuth,
880
+ waitForPageStability,
881
+ extractAndProcessHtml,
882
+ getBaseDomain,
883
+ isLikelyAuthUrl
884
+ };
591
885
 
592
- // Run the MCP server
593
- main().catch((err) => {
594
- console.error(err);
595
- process.exit(1);
596
- });
886
+ // Run the MCP server only if this is the main module (not imported for testing)
887
+ if (import.meta.url === `file:///${process.argv[1].replace(/\\/g, '/')}`) {
888
+ main().catch((err) => {
889
+ console.error(err);
890
+ process.exit(1);
891
+ });
892
+ }
@@ -0,0 +1,96 @@
1
+ # MCPBrowser Tests
2
+
3
+ Comprehensive test suite for MCPBrowser redirect detection and authentication flow handling.
4
+
5
+ ## Test Suites
6
+
7
+ ### 1. `redirect-detection.test.js`
8
+ Tests for redirect detection and URL analysis functions:
9
+ - **`getBaseDomain()`** - Extracts base domain from hostnames
10
+ - **`isLikelyAuthUrl()`** - Detects authentication URLs using patterns
11
+ - **`detectRedirectType()`** - Classifies redirects (permanent, auth flow, etc.)
12
+
13
+ **43 tests** covering:
14
+ - Gmail → mail.google.com permanent redirects
15
+ - Cross-domain SSO (Google, Microsoft, Okta)
16
+ - Same-domain auth path changes
17
+ - Requested auth page detection
18
+ - False positive prevention
19
+
20
+ ### 2. `auth-flow.test.js`
21
+ Tests for authentication flow handling:
22
+ - **`waitForAutoAuth()`** - Auto-authentication detection (5s timeout)
23
+ - **`waitForManualAuth()`** - Manual auth completion (10min timeout)
24
+
25
+ **14 tests** covering:
26
+ - Auto-authentication with valid cookies
27
+ - Cross-domain SSO flows
28
+ - Subdomain landing after auth
29
+ - Timeout handling
30
+ - Error resilience
31
+
32
+ ### 3. `prepare-html.test.js`
33
+ Tests for HTML processing:
34
+ - **`cleanHtml()`** - Removes scripts, styles, attributes
35
+ - **`enrichHtml()`** - Converts relative URLs to absolute
36
+ - **`prepareHtml()`** - Combined clean + enrich
37
+
38
+ **49 tests** for HTML sanitization and URL enrichment.
39
+
40
+ ## Running Tests
41
+
42
+ ### Run All Tests
43
+ ```bash
44
+ node tests/run-all.js
45
+ ```
46
+
47
+ ### Run Individual Test Suite
48
+ ```bash
49
+ node tests/redirect-detection.test.js
50
+ node tests/auth-flow.test.js
51
+ node tests/prepare-html.test.js
52
+ ```
53
+
54
+ ## Test Coverage
55
+
56
+ **Total: 106 tests**
57
+ - ✅ All redirect scenarios (permanent, auth, same-domain)
58
+ - ✅ Authentication flows (auto-auth, manual, SSO)
59
+ - ✅ HTML processing and sanitization
60
+ - ✅ Edge cases and error handling
61
+
62
+ ## Key Scenarios Tested
63
+
64
+ ### Redirect Detection
65
+ - `gmail.com` → `mail.google.com` (permanent redirect)
66
+ - `site.com` → `accounts.google.com` (SSO auth)
67
+ - `site.com/dashboard` → `site.com/login` (same-domain auth)
68
+ - `accounts.google.com` requested directly (no redirect)
69
+
70
+ ### Auth Flows
71
+ - Auto-auth with valid session cookies
72
+ - Manual auth with cross-domain SSO providers
73
+ - Landing on different subdomain after auth
74
+ - Timeout scenarios with user hints
75
+
76
+ ### HTML Processing
77
+ - Script/style removal
78
+ - Attribute cleaning (class, id, data-*, events)
79
+ - Relative → absolute URL conversion
80
+ - SVG and comment removal
81
+
82
+ ## Mock Objects
83
+
84
+ Tests use mock `Page` objects that simulate Puppeteer's page behavior:
85
+ - Configurable URL transitions
86
+ - Timing controls for async auth flows
87
+ - Error simulation for robustness testing
88
+
89
+ ## Fast Execution
90
+
91
+ All tests complete in **~15 seconds**:
92
+ - Pure function tests (redirect detection): instant
93
+ - Async tests (auth flows): ~10 seconds
94
+ - HTML processing: instant
95
+
96
+ Fixed module import issue that previously caused hanging by preventing MCP server auto-start during test imports.