mcpbrowser 0.2.29 → 0.2.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/mcp-browser.js +385 -89
- package/tests/README.md +96 -0
- package/tests/auth-flow.test.js +279 -0
- package/tests/integration.test.js +171 -163
- package/tests/redirect-detection.test.js +341 -0
- package/tests/run-all.js +77 -0
package/package.json
CHANGED
package/src/mcp-browser.js
CHANGED
|
@@ -240,37 +240,79 @@ async function getBrowser() {
|
|
|
240
240
|
}
|
|
241
241
|
|
|
242
242
|
/**
|
|
243
|
-
*
|
|
244
|
-
*
|
|
245
|
-
*
|
|
246
|
-
* and convert relative URLs to absolute.
|
|
247
|
-
* @param {Object} params - Fetch parameters
|
|
248
|
-
* @param {string} params.url - The URL to fetch
|
|
249
|
-
* @param {boolean} [params.removeUnnecessaryHTML=true] - Whether to clean HTML (removes scripts, styles, etc.)
|
|
250
|
-
* @returns {Promise<Object>} Result object with success status, URL, HTML content, or error details
|
|
243
|
+
* Extract base domain from hostname (e.g., "mail.google.com" → "google.com")
|
|
244
|
+
* @param {string} hostname - The hostname to parse
|
|
245
|
+
* @returns {string} The base domain
|
|
251
246
|
*/
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
const authCompletionTimeout = 600000; // 10 minutes for user to complete authentication
|
|
257
|
-
const reuseLastKeptPage = true;
|
|
258
|
-
|
|
259
|
-
if (!url) {
|
|
260
|
-
throw new Error("url parameter is required");
|
|
247
|
+
function getBaseDomain(hostname) {
|
|
248
|
+
const parts = hostname.split('.');
|
|
249
|
+
if (parts.length >= 2) {
|
|
250
|
+
return parts.slice(-2).join('.');
|
|
261
251
|
}
|
|
252
|
+
return hostname;
|
|
253
|
+
}
|
|
262
254
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
255
|
+
/**
|
|
256
|
+
* Detect if URL contains authentication patterns
|
|
257
|
+
* @param {string} url - The URL to check
|
|
258
|
+
* @returns {boolean} True if URL appears to be auth-related
|
|
259
|
+
*/
|
|
260
|
+
function isLikelyAuthUrl(url) {
|
|
261
|
+
const lowerUrl = url.toLowerCase();
|
|
266
262
|
|
|
267
|
-
//
|
|
263
|
+
// Path-based patterns (more strict - require / boundaries or end of path)
|
|
264
|
+
const pathPatterns = [
|
|
265
|
+
'/login', '/signin', '/sign-in', '/auth', '/sso', '/oauth',
|
|
266
|
+
'/authenticate', '/saml', '/openid'
|
|
267
|
+
];
|
|
268
|
+
|
|
269
|
+
// Subdomain patterns (require as subdomain at start)
|
|
270
|
+
const subdomainPatterns = [
|
|
271
|
+
'login.', 'auth.', 'sso.', 'accounts.', 'id.', 'identity.',
|
|
272
|
+
'signin.', 'authentication.', 'idp.'
|
|
273
|
+
];
|
|
274
|
+
|
|
275
|
+
// Extract path from URL
|
|
276
|
+
let pathname = '';
|
|
268
277
|
try {
|
|
269
|
-
|
|
278
|
+
pathname = new URL(url).pathname.toLowerCase();
|
|
270
279
|
} catch {
|
|
271
|
-
|
|
280
|
+
// If URL parsing fails, check if any pattern exists in the string
|
|
281
|
+
pathname = lowerUrl;
|
|
272
282
|
}
|
|
273
283
|
|
|
284
|
+
// Check path patterns - ensure they're at path boundaries
|
|
285
|
+
const hasAuthPath = pathPatterns.some(pattern => {
|
|
286
|
+
// Check if pattern appears at start of path, followed by nothing, /, ?, or #
|
|
287
|
+
return pathname === pattern ||
|
|
288
|
+
pathname.startsWith(pattern + '/') ||
|
|
289
|
+
pathname.startsWith(pattern + '?') ||
|
|
290
|
+
lowerUrl.includes(pattern + '#');
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
// Check subdomain patterns (must be at start of hostname)
|
|
294
|
+
const hostname = (() => {
|
|
295
|
+
try {
|
|
296
|
+
return new URL(url).hostname.toLowerCase();
|
|
297
|
+
} catch {
|
|
298
|
+
return '';
|
|
299
|
+
}
|
|
300
|
+
})();
|
|
301
|
+
const hasAuthSubdomain = subdomainPatterns.some(pattern => hostname.startsWith(pattern));
|
|
302
|
+
|
|
303
|
+
return hasAuthPath || hasAuthSubdomain;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/**
|
|
307
|
+
* Get or create a page for the given domain, reusing existing tabs when possible.
|
|
308
|
+
* @param {Browser} browser - The Puppeteer browser instance
|
|
309
|
+
* @param {string} hostname - The hostname to get/create a page for
|
|
310
|
+
* @param {boolean} reuseLastKeptPage - Whether to reuse existing tabs
|
|
311
|
+
* @returns {Promise<Page>} The page for this domain
|
|
312
|
+
*/
|
|
313
|
+
async function getOrCreatePage(browser, hostname, reuseLastKeptPage = true) {
|
|
314
|
+
let page = null;
|
|
315
|
+
|
|
274
316
|
// Check if we have an existing page for this domain
|
|
275
317
|
if (reuseLastKeptPage && domainPages.has(hostname)) {
|
|
276
318
|
const existingPage = domainPages.get(hostname);
|
|
@@ -311,85 +353,322 @@ async function fetchPage({ url, removeUnnecessaryHTML = true }) {
|
|
|
311
353
|
domainPages.set(hostname, page);
|
|
312
354
|
console.error(`[MCPBrowser] Created new tab for domain: ${hostname}`);
|
|
313
355
|
}
|
|
356
|
+
|
|
357
|
+
return page;
|
|
358
|
+
}
|
|
314
359
|
|
|
315
|
-
|
|
316
|
-
|
|
360
|
+
/**
|
|
361
|
+
* Navigate to URL with fallback strategy for slow pages.
|
|
362
|
+
* @param {Page} page - The Puppeteer page instance
|
|
363
|
+
* @param {string} url - The URL to navigate to
|
|
364
|
+
* @param {string} waitUntil - Wait condition (networkidle0, load, etc.)
|
|
365
|
+
* @param {number} timeout - Navigation timeout in ms
|
|
366
|
+
* @returns {Promise<void>}
|
|
367
|
+
*/
|
|
368
|
+
async function navigateToUrl(page, url, waitUntil, timeout) {
|
|
369
|
+
console.error(`[MCPBrowser] Navigating to: ${url}`);
|
|
370
|
+
|
|
371
|
+
// Set up listener for JS-based redirects that happen after page load
|
|
372
|
+
let jsRedirectDetected = false;
|
|
373
|
+
let jsRedirectUrl = null;
|
|
374
|
+
const navigationHandler = (frame) => {
|
|
375
|
+
if (frame === page.mainFrame()) {
|
|
376
|
+
jsRedirectUrl = frame.url();
|
|
377
|
+
jsRedirectDetected = true;
|
|
378
|
+
}
|
|
379
|
+
};
|
|
380
|
+
page.on('framenavigated', navigationHandler);
|
|
381
|
+
|
|
317
382
|
try {
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
383
|
+
// Handle slow pages: try networkidle0 first, fallback to load if it takes too long
|
|
384
|
+
try {
|
|
385
|
+
await page.goto(url, { waitUntil, timeout });
|
|
386
|
+
} catch (error) {
|
|
387
|
+
// If networkidle0 times out or page has issues, try with just 'load'
|
|
388
|
+
if (error.message.includes('timeout') || error.message.includes('Navigation')) {
|
|
389
|
+
console.error(`[MCPBrowser] Navigation slow, trying fallback load strategy...`);
|
|
390
|
+
await page.goto(url, { waitUntil: 'load', timeout });
|
|
391
|
+
} else {
|
|
392
|
+
throw error;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
325
395
|
|
|
326
|
-
//
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
396
|
+
// Wait briefly for potential JS redirects
|
|
397
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
398
|
+
} finally {
|
|
399
|
+
// Remove navigation listener
|
|
400
|
+
page.off('framenavigated', navigationHandler);
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
/**
|
|
405
|
+
* Detect redirect type: permanent redirect, auth flow, or same-domain auth path change.
|
|
406
|
+
* @param {string} url - Original requested URL
|
|
407
|
+
* @param {string} hostname - Original hostname
|
|
408
|
+
* @param {string} currentUrl - Current page URL
|
|
409
|
+
* @param {string} currentHostname - Current page hostname
|
|
410
|
+
* @returns {Object} Object with redirect type and related info
|
|
411
|
+
*/
|
|
412
|
+
function detectRedirectType(url, hostname, currentUrl, currentHostname) {
|
|
413
|
+
const isDifferentDomain = currentHostname !== hostname;
|
|
414
|
+
const requestedAuthPage = isLikelyAuthUrl(url);
|
|
415
|
+
const currentIsAuthPage = isLikelyAuthUrl(currentUrl);
|
|
416
|
+
const isSameDomainAuthPath = !isDifferentDomain && currentIsAuthPage && !requestedAuthPage;
|
|
417
|
+
|
|
418
|
+
// If user requested auth page directly and landed on it (same domain), return content
|
|
419
|
+
if (requestedAuthPage && currentHostname === hostname && !isDifferentDomain) {
|
|
420
|
+
return { type: 'requested_auth', currentHostname };
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// No redirect scenario
|
|
424
|
+
if (!isDifferentDomain && !isSameDomainAuthPath) {
|
|
425
|
+
return { type: 'none' };
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
const originalBase = getBaseDomain(hostname);
|
|
429
|
+
const currentBase = getBaseDomain(currentHostname);
|
|
430
|
+
|
|
431
|
+
// Permanent redirect: Different domain without auth patterns
|
|
432
|
+
if (!currentIsAuthPage) {
|
|
433
|
+
return { type: 'permanent', currentHostname };
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
// Authentication flow
|
|
437
|
+
const flowType = isSameDomainAuthPath ? 'same-domain path change' : 'cross-domain redirect';
|
|
438
|
+
return {
|
|
439
|
+
type: 'auth',
|
|
440
|
+
flowType,
|
|
441
|
+
originalBase,
|
|
442
|
+
currentBase,
|
|
443
|
+
currentUrl,
|
|
444
|
+
hostname,
|
|
445
|
+
currentHostname
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
/**
|
|
450
|
+
* Check if authentication auto-completes quickly (valid session/cookies).
|
|
451
|
+
* @param {Page} page - The Puppeteer page instance
|
|
452
|
+
* @param {string} hostname - Original hostname
|
|
453
|
+
* @param {string} originalBase - Original base domain
|
|
454
|
+
* @param {number} timeoutMs - How long to wait for auto-auth
|
|
455
|
+
* @returns {Promise<Object>} Object with success status and final hostname
|
|
456
|
+
*/
|
|
457
|
+
async function waitForAutoAuth(page, hostname, originalBase, timeoutMs = 5000) {
|
|
458
|
+
console.error(`[MCPBrowser] Checking for auto-authentication (${timeoutMs / 1000} sec)...`);
|
|
459
|
+
|
|
460
|
+
const deadline = Date.now() + timeoutMs;
|
|
461
|
+
|
|
462
|
+
while (Date.now() < deadline) {
|
|
463
|
+
try {
|
|
464
|
+
const checkUrl = page.url();
|
|
465
|
+
const checkHostname = new URL(checkUrl).hostname;
|
|
466
|
+
const checkBase = getBaseDomain(checkHostname);
|
|
331
467
|
|
|
332
|
-
//
|
|
333
|
-
|
|
334
|
-
|
|
468
|
+
// Check if returned to original domain/base and no longer on auth URL
|
|
469
|
+
if ((checkHostname === hostname || checkBase === originalBase) && !isLikelyAuthUrl(checkUrl)) {
|
|
470
|
+
console.error(`[MCPBrowser] Auto-authentication successful! Now at: ${checkUrl}`);
|
|
471
|
+
return { success: true, hostname: checkHostname };
|
|
472
|
+
}
|
|
335
473
|
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
474
|
+
await new Promise(resolve => setTimeout(resolve, 500));
|
|
475
|
+
} catch (error) {
|
|
476
|
+
await new Promise(resolve => setTimeout(resolve, 500));
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
return { success: false };
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
/**
|
|
484
|
+
* Wait for user to complete manual authentication.
|
|
485
|
+
* @param {Page} page - The Puppeteer page instance
|
|
486
|
+
* @param {string} hostname - Original hostname
|
|
487
|
+
* @param {string} originalBase - Original base domain
|
|
488
|
+
* @param {number} timeoutMs - How long to wait for manual auth
|
|
489
|
+
* @returns {Promise<Object>} Object with success status, final hostname, and optional error
|
|
490
|
+
*/
|
|
491
|
+
async function waitForManualAuth(page, hostname, originalBase, timeoutMs = 600000) {
|
|
492
|
+
console.error(`[MCPBrowser] Auto-authentication did not complete. Waiting for user...`);
|
|
493
|
+
console.error(`[MCPBrowser] Will wait for return to ${hostname} or same base domain (${originalBase})`);
|
|
494
|
+
|
|
495
|
+
const deadline = Date.now() + timeoutMs;
|
|
496
|
+
|
|
497
|
+
while (Date.now() < deadline) {
|
|
498
|
+
try {
|
|
499
|
+
const checkUrl = page.url();
|
|
500
|
+
const checkHostname = new URL(checkUrl).hostname;
|
|
501
|
+
const checkBase = getBaseDomain(checkHostname);
|
|
502
|
+
|
|
503
|
+
// Auth complete if back to original domain OR same base domain AND not on auth page
|
|
504
|
+
if ((checkHostname === hostname || checkBase === originalBase) && !isLikelyAuthUrl(checkUrl)) {
|
|
505
|
+
console.error(`[MCPBrowser] Authentication completed! Now at: ${checkUrl}`);
|
|
506
|
+
|
|
507
|
+
if (checkHostname !== hostname) {
|
|
508
|
+
console.error(`[MCPBrowser] Landed on different subdomain: ${checkHostname}`);
|
|
353
509
|
}
|
|
510
|
+
|
|
511
|
+
return { success: true, hostname: checkHostname };
|
|
354
512
|
}
|
|
355
513
|
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
514
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
515
|
+
} catch (error) {
|
|
516
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
const currentUrl = page.url();
|
|
521
|
+
const hint = `Authentication timeout. Tab is left open at ${currentUrl}. Complete authentication and retry the same URL.`;
|
|
522
|
+
return {
|
|
523
|
+
success: false,
|
|
524
|
+
error: "Authentication timeout - user did not complete login",
|
|
525
|
+
hint
|
|
526
|
+
};
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
/**
|
|
530
|
+
* Wait for page to stabilize after authentication.
|
|
531
|
+
* @param {Page} page - The Puppeteer page instance
|
|
532
|
+
* @returns {Promise<void>}
|
|
533
|
+
*/
|
|
534
|
+
async function waitForPageStability(page) {
|
|
535
|
+
console.error(`[MCPBrowser] Waiting for page to stabilize...`);
|
|
536
|
+
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
537
|
+
|
|
538
|
+
try {
|
|
539
|
+
await page.waitForFunction(() => document.readyState === 'complete', { timeout: 10000 });
|
|
540
|
+
} catch {
|
|
541
|
+
// Ignore timeout - page might already be ready
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
/**
|
|
546
|
+
* Extract and process HTML from the page.
|
|
547
|
+
* @param {Page} page - The Puppeteer page instance
|
|
548
|
+
* @param {boolean} removeUnnecessaryHTML - Whether to clean the HTML
|
|
549
|
+
* @returns {Promise<string>} The processed HTML
|
|
550
|
+
*/
|
|
551
|
+
async function extractAndProcessHtml(page, removeUnnecessaryHTML) {
|
|
552
|
+
const html = await page.evaluate(() => document.documentElement?.outerHTML || "");
|
|
553
|
+
|
|
554
|
+
let processedHtml;
|
|
555
|
+
if (removeUnnecessaryHTML) {
|
|
556
|
+
const cleaned = cleanHtml(html);
|
|
557
|
+
processedHtml = enrichHtml(cleaned, page.url());
|
|
558
|
+
} else {
|
|
559
|
+
processedHtml = enrichHtml(html, page.url());
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
return processedHtml;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
/**
|
|
566
|
+
* Fetch a web page using Chrome browser, with support for authentication flows and tab reuse.
|
|
567
|
+
* Reuses existing tabs per domain when possible. Handles authentication redirects by waiting
|
|
568
|
+
* for user to complete login (up to 10 minutes). Processes HTML to remove unnecessary elements
|
|
569
|
+
* and convert relative URLs to absolute.
|
|
570
|
+
* @param {Object} params - Fetch parameters
|
|
571
|
+
* @param {string} params.url - The URL to fetch
|
|
572
|
+
* @param {boolean} [params.removeUnnecessaryHTML=true] - Whether to clean HTML (removes scripts, styles, etc.)
|
|
573
|
+
* @returns {Promise<Object>} Result object with success status, URL, HTML content, or error details
|
|
574
|
+
*/
|
|
575
|
+
async function fetchPage({ url, removeUnnecessaryHTML = true }) {
|
|
576
|
+
// Hardcoded smart defaults
|
|
577
|
+
const waitUntil = "networkidle0";
|
|
578
|
+
const navigationTimeout = 60000;
|
|
579
|
+
const authCompletionTimeout = 600000;
|
|
580
|
+
const reuseLastKeptPage = true;
|
|
581
|
+
|
|
582
|
+
if (!url) {
|
|
583
|
+
throw new Error("url parameter is required");
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// Parse hostname for domain-based tab reuse
|
|
587
|
+
let hostname;
|
|
588
|
+
try {
|
|
589
|
+
hostname = new URL(url).hostname;
|
|
590
|
+
} catch {
|
|
591
|
+
throw new Error(`Invalid URL: ${url}`);
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
const browser = await getBrowser();
|
|
595
|
+
let page = null;
|
|
596
|
+
|
|
597
|
+
try {
|
|
598
|
+
// Get or create page for this domain
|
|
599
|
+
page = await getOrCreatePage(browser, hostname, reuseLastKeptPage);
|
|
600
|
+
|
|
601
|
+
// Navigate to URL with fallback strategy
|
|
602
|
+
await navigateToUrl(page, url, waitUntil, navigationTimeout);
|
|
603
|
+
|
|
604
|
+
const currentUrl = page.url();
|
|
605
|
+
const currentHostname = new URL(currentUrl).hostname;
|
|
606
|
+
console.error(`[MCPBrowser] Navigation completed: ${currentUrl}`);
|
|
607
|
+
|
|
608
|
+
// Detect redirect type and handle accordingly
|
|
609
|
+
const redirectInfo = detectRedirectType(url, hostname, currentUrl, currentHostname);
|
|
610
|
+
|
|
611
|
+
if (redirectInfo.type === 'requested_auth') {
|
|
612
|
+
console.error(`[MCPBrowser] User requested auth page directly, returning content`);
|
|
613
|
+
// Update domain mapping if needed
|
|
614
|
+
if (redirectInfo.currentHostname !== hostname) {
|
|
615
|
+
domainPages.delete(hostname);
|
|
616
|
+
domainPages.set(redirectInfo.currentHostname, page);
|
|
617
|
+
hostname = redirectInfo.currentHostname;
|
|
359
618
|
}
|
|
619
|
+
} else if (redirectInfo.type === 'permanent') {
|
|
620
|
+
console.error(`[MCPBrowser] Permanent redirect detected: ${hostname} → ${redirectInfo.currentHostname}`);
|
|
621
|
+
console.error(`[MCPBrowser] Accepting redirect and updating domain mapping`);
|
|
622
|
+
domainPages.delete(hostname);
|
|
623
|
+
domainPages.set(redirectInfo.currentHostname, page);
|
|
624
|
+
hostname = redirectInfo.currentHostname;
|
|
625
|
+
} else if (redirectInfo.type === 'auth') {
|
|
626
|
+
console.error(`[MCPBrowser] Authentication flow detected (${redirectInfo.flowType})`);
|
|
627
|
+
console.error(`[MCPBrowser] Current location: ${redirectInfo.currentUrl}`);
|
|
360
628
|
|
|
361
|
-
//
|
|
362
|
-
|
|
363
|
-
await new Promise(resolve => setTimeout(resolve, 3000)); // Give page time to settle
|
|
629
|
+
// Try auto-auth first
|
|
630
|
+
const autoAuthResult = await waitForAutoAuth(page, redirectInfo.hostname, redirectInfo.originalBase);
|
|
364
631
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
632
|
+
if (autoAuthResult.success) {
|
|
633
|
+
// Update hostname if changed
|
|
634
|
+
if (autoAuthResult.hostname !== hostname) {
|
|
635
|
+
domainPages.delete(hostname);
|
|
636
|
+
domainPages.set(autoAuthResult.hostname, page);
|
|
637
|
+
hostname = autoAuthResult.hostname;
|
|
638
|
+
}
|
|
639
|
+
} else {
|
|
640
|
+
// Wait for manual auth
|
|
641
|
+
const manualAuthResult = await waitForManualAuth(page, redirectInfo.hostname, redirectInfo.originalBase, authCompletionTimeout);
|
|
642
|
+
|
|
643
|
+
if (!manualAuthResult.success) {
|
|
644
|
+
return {
|
|
645
|
+
success: false,
|
|
646
|
+
error: manualAuthResult.error,
|
|
647
|
+
pageKeptOpen: true,
|
|
648
|
+
hint: manualAuthResult.hint
|
|
649
|
+
};
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
// Update hostname if changed
|
|
653
|
+
if (manualAuthResult.hostname !== hostname) {
|
|
654
|
+
domainPages.delete(hostname);
|
|
655
|
+
domainPages.set(manualAuthResult.hostname, page);
|
|
656
|
+
hostname = manualAuthResult.hostname;
|
|
657
|
+
}
|
|
370
658
|
}
|
|
659
|
+
|
|
660
|
+
// Wait for page stability after auth
|
|
661
|
+
await waitForPageStability(page);
|
|
371
662
|
}
|
|
372
663
|
|
|
373
|
-
// Extract HTML
|
|
374
|
-
const
|
|
375
|
-
|
|
376
|
-
// Process HTML based on removeUnnecessaryHTML parameter
|
|
377
|
-
let processedHtml;
|
|
378
|
-
if (removeUnnecessaryHTML) {
|
|
379
|
-
const cleaned = cleanHtml(html);
|
|
380
|
-
processedHtml = enrichHtml(cleaned, page.url());
|
|
381
|
-
} else {
|
|
382
|
-
processedHtml = enrichHtml(html, page.url());
|
|
383
|
-
}
|
|
664
|
+
// Extract and process HTML
|
|
665
|
+
const processedHtml = await extractAndProcessHtml(page, removeUnnecessaryHTML);
|
|
384
666
|
|
|
385
|
-
|
|
667
|
+
return {
|
|
386
668
|
success: true,
|
|
387
669
|
url: page.url(),
|
|
388
670
|
html: processedHtml
|
|
389
671
|
};
|
|
390
|
-
|
|
391
|
-
wasSuccess = true;
|
|
392
|
-
return result;
|
|
393
672
|
} catch (err) {
|
|
394
673
|
const hint = "Tab is left open. Complete sign-in there, then call fetch_webpage_protected again with just the URL.";
|
|
395
674
|
return { success: false, error: err.message || String(err), pageKeptOpen: true, hint };
|
|
@@ -587,10 +866,27 @@ async function main() {
|
|
|
587
866
|
}
|
|
588
867
|
|
|
589
868
|
// Export for testing
|
|
590
|
-
export {
|
|
869
|
+
export {
|
|
870
|
+
fetchPage,
|
|
871
|
+
getBrowser,
|
|
872
|
+
prepareHtml,
|
|
873
|
+
cleanHtml,
|
|
874
|
+
enrichHtml,
|
|
875
|
+
getOrCreatePage,
|
|
876
|
+
navigateToUrl,
|
|
877
|
+
detectRedirectType,
|
|
878
|
+
waitForAutoAuth,
|
|
879
|
+
waitForManualAuth,
|
|
880
|
+
waitForPageStability,
|
|
881
|
+
extractAndProcessHtml,
|
|
882
|
+
getBaseDomain,
|
|
883
|
+
isLikelyAuthUrl
|
|
884
|
+
};
|
|
591
885
|
|
|
592
|
-
// Run the MCP server
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
886
|
+
// Run the MCP server only if this is the main module (not imported for testing)
|
|
887
|
+
if (import.meta.url === `file:///${process.argv[1].replace(/\\/g, '/')}`) {
|
|
888
|
+
main().catch((err) => {
|
|
889
|
+
console.error(err);
|
|
890
|
+
process.exit(1);
|
|
891
|
+
});
|
|
892
|
+
}
|
package/tests/README.md
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# MCPBrowser Tests
|
|
2
|
+
|
|
3
|
+
Comprehensive test suite for MCPBrowser redirect detection and authentication flow handling.
|
|
4
|
+
|
|
5
|
+
## Test Suites
|
|
6
|
+
|
|
7
|
+
### 1. `redirect-detection.test.js`
|
|
8
|
+
Tests for redirect detection and URL analysis functions:
|
|
9
|
+
- **`getBaseDomain()`** - Extracts base domain from hostnames
|
|
10
|
+
- **`isLikelyAuthUrl()`** - Detects authentication URLs using patterns
|
|
11
|
+
- **`detectRedirectType()`** - Classifies redirects (permanent, auth flow, etc.)
|
|
12
|
+
|
|
13
|
+
**43 tests** covering:
|
|
14
|
+
- Gmail → mail.google.com permanent redirects
|
|
15
|
+
- Cross-domain SSO (Google, Microsoft, Okta)
|
|
16
|
+
- Same-domain auth path changes
|
|
17
|
+
- Requested auth page detection
|
|
18
|
+
- False positive prevention
|
|
19
|
+
|
|
20
|
+
### 2. `auth-flow.test.js`
|
|
21
|
+
Tests for authentication flow handling:
|
|
22
|
+
- **`waitForAutoAuth()`** - Auto-authentication detection (5s timeout)
|
|
23
|
+
- **`waitForManualAuth()`** - Manual auth completion (10min timeout)
|
|
24
|
+
|
|
25
|
+
**14 tests** covering:
|
|
26
|
+
- Auto-authentication with valid cookies
|
|
27
|
+
- Cross-domain SSO flows
|
|
28
|
+
- Subdomain landing after auth
|
|
29
|
+
- Timeout handling
|
|
30
|
+
- Error resilience
|
|
31
|
+
|
|
32
|
+
### 3. `prepare-html.test.js`
|
|
33
|
+
Tests for HTML processing:
|
|
34
|
+
- **`cleanHtml()`** - Removes scripts, styles, attributes
|
|
35
|
+
- **`enrichHtml()`** - Converts relative URLs to absolute
|
|
36
|
+
- **`prepareHtml()`** - Combined clean + enrich
|
|
37
|
+
|
|
38
|
+
**49 tests** for HTML sanitization and URL enrichment.
|
|
39
|
+
|
|
40
|
+
## Running Tests
|
|
41
|
+
|
|
42
|
+
### Run All Tests
|
|
43
|
+
```bash
|
|
44
|
+
node tests/run-all.js
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Run Individual Test Suite
|
|
48
|
+
```bash
|
|
49
|
+
node tests/redirect-detection.test.js
|
|
50
|
+
node tests/auth-flow.test.js
|
|
51
|
+
node tests/prepare-html.test.js
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Test Coverage
|
|
55
|
+
|
|
56
|
+
**Total: 106 tests**
|
|
57
|
+
- ✅ All redirect scenarios (permanent, auth, same-domain)
|
|
58
|
+
- ✅ Authentication flows (auto-auth, manual, SSO)
|
|
59
|
+
- ✅ HTML processing and sanitization
|
|
60
|
+
- ✅ Edge cases and error handling
|
|
61
|
+
|
|
62
|
+
## Key Scenarios Tested
|
|
63
|
+
|
|
64
|
+
### Redirect Detection
|
|
65
|
+
- `gmail.com` → `mail.google.com` (permanent redirect)
|
|
66
|
+
- `site.com` → `accounts.google.com` (SSO auth)
|
|
67
|
+
- `site.com/dashboard` → `site.com/login` (same-domain auth)
|
|
68
|
+
- `accounts.google.com` requested directly (no redirect)
|
|
69
|
+
|
|
70
|
+
### Auth Flows
|
|
71
|
+
- Auto-auth with valid session cookies
|
|
72
|
+
- Manual auth with cross-domain SSO providers
|
|
73
|
+
- Landing on different subdomain after auth
|
|
74
|
+
- Timeout scenarios with user hints
|
|
75
|
+
|
|
76
|
+
### HTML Processing
|
|
77
|
+
- Script/style removal
|
|
78
|
+
- Attribute cleaning (class, id, data-*, events)
|
|
79
|
+
- Relative → absolute URL conversion
|
|
80
|
+
- SVG and comment removal
|
|
81
|
+
|
|
82
|
+
## Mock Objects
|
|
83
|
+
|
|
84
|
+
Tests use mock `Page` objects that simulate Puppeteer's page behavior:
|
|
85
|
+
- Configurable URL transitions
|
|
86
|
+
- Timing controls for async auth flows
|
|
87
|
+
- Error simulation for robustness testing
|
|
88
|
+
|
|
89
|
+
## Fast Execution
|
|
90
|
+
|
|
91
|
+
All tests complete in **~15 seconds**:
|
|
92
|
+
- Pure function tests (redirect detection): instant
|
|
93
|
+
- Async tests (auth flows): ~10 seconds
|
|
94
|
+
- HTML processing: instant
|
|
95
|
+
|
|
96
|
+
Fixed module import issue that previously caused hanging by preventing MCP server auto-start during test imports.
|