@govtechsg/oobee 0.10.91 → 0.10.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/AGENTS.md +303 -0
  2. package/README.md +22 -0
  3. package/dist/cli.js +3 -0
  4. package/dist/combine.js +15 -3
  5. package/dist/constants/cliFunctions.js +7 -0
  6. package/dist/constants/common.js +149 -80
  7. package/dist/constants/constants.js +1 -0
  8. package/dist/crawlers/commonCrawlerFunc.js +136 -15
  9. package/dist/crawlers/crawlDomain.js +55 -58
  10. package/dist/crawlers/crawlIntelligentSitemap.js +21 -11
  11. package/dist/crawlers/crawlRateController.js +47 -0
  12. package/dist/crawlers/crawlSitemap.js +51 -62
  13. package/dist/crawlers/runCustom.js +8 -2
  14. package/dist/generateOobeeClientScanner.js +32 -1
  15. package/dist/mergeAxeResults/itemsStore.js +32 -3
  16. package/dist/mergeAxeResults/sentryTelemetry.js +3 -0
  17. package/dist/mergeAxeResults.js +120 -92
  18. package/dist/npmIndex.js +1 -0
  19. package/dist/utils.js +23 -28
  20. package/oobee-client-scanner.js +35 -4
  21. package/package.json +3 -3
  22. package/src/cli.ts +4 -0
  23. package/src/combine.ts +16 -1
  24. package/src/constants/cliFunctions.ts +7 -0
  25. package/src/constants/common.ts +162 -90
  26. package/src/constants/constants.ts +1 -0
  27. package/src/crawlers/commonCrawlerFunc.ts +148 -14
  28. package/src/crawlers/crawlDomain.ts +64 -66
  29. package/src/crawlers/crawlIntelligentSitemap.ts +23 -11
  30. package/src/crawlers/crawlRateController.ts +63 -0
  31. package/src/crawlers/crawlSitemap.ts +57 -70
  32. package/src/crawlers/runCustom.ts +10 -1
  33. package/src/generateOobeeClientScanner.ts +32 -1
  34. package/src/index.ts +1 -0
  35. package/src/mergeAxeResults/itemsStore.ts +37 -3
  36. package/src/mergeAxeResults/sentryTelemetry.ts +3 -0
  37. package/src/mergeAxeResults.ts +139 -99
  38. package/src/npmIndex.ts +1 -0
  39. package/src/utils.ts +25 -33
  40. /package/{bf04540e-0894-4d00-98ec-c1be74c6f199.txt → 7339fae5-e8ed-4b50-af13-317847620dbf.txt} +0 -0
@@ -377,9 +377,21 @@ const checkUrlConnectivityWithBrowser = async (
377
377
  } = rawDevice;
378
378
 
379
379
  const launchOptions = getPlaywrightLaunchOptions(browserToRun);
380
+
381
+ const { Authorization, ...nonAuthHeaders } = extraHTTPHeaders || {};
382
+ let httpCredentials = undefined;
383
+ if (Authorization?.startsWith('Basic ')) {
384
+ const decoded = Buffer.from(Authorization.slice(6), 'base64').toString();
385
+ const colonIdx = decoded.indexOf(':');
386
+ if (colonIdx > 0) {
387
+ httpCredentials = { username: decoded.slice(0, colonIdx), password: decoded.slice(colonIdx + 1) };
388
+ }
389
+ }
390
+
380
391
  const contextOptions: Record<string, unknown> = {
381
392
  ...restDevice,
382
- ...(extraHTTPHeaders && { extraHTTPHeaders }),
393
+ ...(Object.keys(nonAuthHeaders).length > 0 && { extraHTTPHeaders: nonAuthHeaders }),
394
+ ...(httpCredentials && { httpCredentials }),
383
395
  ignoreHTTPSErrors: true,
384
396
  ...(process.env.OOBEE_DISABLE_BROWSER_DOWNLOAD && { acceptDownloads: false }),
385
397
  };
@@ -421,6 +433,24 @@ const checkUrlConnectivityWithBrowser = async (
421
433
  }
422
434
 
423
435
  try {
436
+ // Only enable generic Authorization header routing interception broadly if
437
+ // a non-Basic Bearer auth string is heavily relied upon, thereby bypassing
438
+ // performance warnings inside the check checkUrl phase for typical public scans
439
+ if (Authorization && !httpCredentials) {
440
+ const entryOrigin = new URL(url).origin;
441
+ await browserContext.route('**/*', async (route: any, request: any) => {
442
+ try {
443
+ if (new URL(request.url()).origin === entryOrigin) {
444
+ await route.continue({ headers: { ...request.headers(), Authorization } });
445
+ } else {
446
+ await route.continue();
447
+ }
448
+ } catch {
449
+ await route.continue();
450
+ }
451
+ });
452
+ }
453
+
424
454
  const page = await browserContext.newPage();
425
455
 
426
456
  // Block native Chrome download UI
@@ -431,16 +461,6 @@ const checkUrlConnectivityWithBrowser = async (
431
461
  consoleLogger.info(`Unable to set download deny: ${(e as Error).message}`);
432
462
  }
433
463
 
434
- // OPTIMIZATION: Block heavy visual resources (Images/Fonts/CSS)
435
- // This allows the "Connectivity Check" to pass as soon as HTML is ready
436
- await page.route('**/*', (route) => {
437
- const type = route.request().resourceType();
438
- if (['image', 'media', 'font', 'stylesheet'].includes(type)) {
439
- return route.abort();
440
- }
441
- return route.continue();
442
- });
443
-
444
464
  // STEP 2: Navigate (follows server-side redirects)
445
465
  page.once('download', () => {
446
466
  res.status = constants.urlCheckStatuses.notASupportedDocument.code;
@@ -888,6 +908,7 @@ const getRobotsTxtViaPlaywright = async (
888
908
  browserContext = await constants.launcher.launchPersistentContext(robotsDataDir, {
889
909
  ...getPlaywrightLaunchOptions(browser),
890
910
  ...(extraHTTPHeaders && { extraHTTPHeaders }),
911
+ ...(process.env.OOBEE_USER_AGENT && { userAgent: process.env.OOBEE_USER_AGENT }),
891
912
  });
892
913
  register(browserContext);
893
914
  } else {
@@ -895,9 +916,10 @@ const getRobotsTxtViaPlaywright = async (
895
916
  const launchOptions = getPlaywrightLaunchOptions(browser);
896
917
  browserInstance = await constants.launcher.launch(launchOptions);
897
918
  register(browserInstance as unknown as { close: () => Promise<void> });
898
-
919
+
899
920
  browserContext = await browserInstance.newContext({
900
921
  ...(extraHTTPHeaders && { extraHTTPHeaders }),
922
+ ...(process.env.OOBEE_USER_AGENT && { userAgent: process.env.OOBEE_USER_AGENT }),
901
923
  });
902
924
  }
903
925
 
@@ -975,7 +997,7 @@ export const isDisallowedInRobotsTxt = (url: string): boolean => {
975
997
 
976
998
  export const getLinksFromSitemap = async (
977
999
  sitemapUrl: string,
978
- maxLinksCount: number,
1000
+ _maxLinksCount: number,
979
1001
  browser: string,
980
1002
  userDataDirectory: string,
981
1003
  userUrlInput: string,
@@ -985,9 +1007,8 @@ export const getLinksFromSitemap = async (
985
1007
  userUrl: string = userUrlInput,
986
1008
  ) => {
987
1009
  const scannedSitemaps = new Set<string>();
988
- const urls: Record<string, Request> = {}; // dictionary of requests to urls to be scanned
989
-
990
- const isLimitReached = () => Object.keys(urls).length >= maxLinksCount;
1010
+ const sitemapLinkCounts: Record<string, number> = {};
1011
+ const allUrls = new Set<string>(); // all discovered URLs (lightweight strings)
991
1012
 
992
1013
  const addToUrlList = (url: string) => {
993
1014
  if (!url) return;
@@ -995,17 +1016,7 @@ export const getLinksFromSitemap = async (
995
1016
  if (!isFilePath(userUrl) && !isFollowStrategy(url, userUrl, strategy)) return;
996
1017
 
997
1018
  url = convertPathToLocalFile(url);
998
-
999
- let request;
1000
- try {
1001
- request = new Request({ url });
1002
- } catch (e) {
1003
- console.log('Error creating request', e);
1004
- }
1005
- if (isUrlPdf(url)) {
1006
- request.skipNavigation = true;
1007
- }
1008
- urls[url] = request;
1019
+ allUrls.add(url);
1009
1020
  };
1010
1021
 
1011
1022
  const calculateCloseness = (sitemapUrl: string) => {
@@ -1058,16 +1069,15 @@ export const getLinksFromSitemap = async (
1058
1069
  });
1059
1070
  }
1060
1071
 
1061
- // Add the sorted URLs to the main URL list
1062
- for (const { url } of urlList.slice(0, maxLinksCount)) {
1072
+ // Add all URLs to the discovered list (limit applied later at return time)
1073
+ for (const { url } of urlList) {
1063
1074
  addToUrlList(url);
1064
1075
  }
1065
1076
  };
1066
1077
 
1067
1078
  const processNonStandardSitemap = (data: string) => {
1068
1079
  const urlsFromData = crawlee
1069
- .extractUrls({ string: data, urlRegExp: new RegExp('^(http|https):/{2}.+$', 'gmi') })
1070
- .slice(0, maxLinksCount);
1080
+ .extractUrls({ string: data, urlRegExp: new RegExp('^(http|https):/{2}.+$', 'gmi') });
1071
1081
  urlsFromData.forEach(url => {
1072
1082
  addToUrlList(url);
1073
1083
  });
@@ -1118,6 +1128,7 @@ export const getLinksFromSitemap = async (
1118
1128
  // Bug in Chrome which causes browser pool crash when userDataDirectory is set in non-headless mode
1119
1129
  ...(process.env.CRAWLEE_HEADLESS === '1' && { userDataDir: userDataDirectory }),
1120
1130
  ...(extraHTTPHeaders && { extraHTTPHeaders }),
1131
+ ...(process.env.OOBEE_USER_AGENT && { userAgent: process.env.OOBEE_USER_AGENT }),
1121
1132
  },
1122
1133
  );
1123
1134
 
@@ -1127,9 +1138,10 @@ export const getLinksFromSitemap = async (
1127
1138
  const launchOptions = getPlaywrightLaunchOptions(browser);
1128
1139
  browserInstance = await constants.launcher.launch(launchOptions);
1129
1140
  register(browserInstance as unknown as { close: () => Promise<void> });
1130
-
1141
+
1131
1142
  browserContext = await browserInstance.newContext({
1132
1143
  ...(extraHTTPHeaders && { extraHTTPHeaders }),
1144
+ ...(process.env.OOBEE_USER_AGENT && { userAgent: process.env.OOBEE_USER_AGENT }),
1133
1145
  });
1134
1146
  }
1135
1147
 
@@ -1202,14 +1214,13 @@ export const getLinksFromSitemap = async (
1202
1214
  sitemapType = constants.xmlSitemapTypes.unknown;
1203
1215
  }
1204
1216
 
1217
+ const countBefore = allUrls.size;
1218
+
1205
1219
  switch (sitemapType) {
1206
1220
  case constants.xmlSitemapTypes.xmlIndex:
1207
1221
  consoleLogger.info(`This is a XML format sitemap index.`);
1208
1222
  for (const childSitemapUrl of $('loc')) {
1209
1223
  const childSitemapUrlText = $(childSitemapUrl).text();
1210
- if (isLimitReached()) {
1211
- break;
1212
- }
1213
1224
  if (childSitemapUrlText.endsWith('.xml') || childSitemapUrlText.endsWith('.txt')) {
1214
1225
  await fetchUrls(childSitemapUrlText, extraHTTPHeaders); // Recursive call for nested sitemaps
1215
1226
  } else {
@@ -1233,6 +1244,11 @@ export const getLinksFromSitemap = async (
1233
1244
  consoleLogger.info(`This is an unrecognised XML sitemap format.`);
1234
1245
  processNonStandardSitemap(data);
1235
1246
  }
1247
+
1248
+ const linksFromThisSitemap = allUrls.size - countBefore;
1249
+ if (linksFromThisSitemap > 0) {
1250
+ sitemapLinkCounts[url] = (sitemapLinkCounts[url] || 0) + linksFromThisSitemap;
1251
+ }
1236
1252
  };
1237
1253
 
1238
1254
  try {
@@ -1241,7 +1257,41 @@ export const getLinksFromSitemap = async (
1241
1257
  consoleLogger.error(e);
1242
1258
  }
1243
1259
 
1244
- const requestList = Object.values(urls);
1260
+ // Build Request objects for all discovered URLs; the crawler itself enforces
1261
+ // maxRequestsPerCrawl by counting only successfully scanned pages.
1262
+ const requestList: Request[] = [];
1263
+ for (const url of allUrls) {
1264
+ try {
1265
+ const request = new Request({ url });
1266
+ if (isUrlPdf(url)) {
1267
+ request.skipNavigation = true;
1268
+ }
1269
+ requestList.push(request);
1270
+ } catch (e) {
1271
+ consoleLogger.info(`Error creating request for ${url}: ${e}`);
1272
+ }
1273
+ }
1274
+
1275
+ const totalLinksDiscovered = allUrls.size;
1276
+ const fetchedSitemaps = Object.entries(sitemapLinkCounts).map(([url, fetchedLinks]) => ({
1277
+ url,
1278
+ fetchedLinks,
1279
+ }));
1280
+
1281
+ const prev = constants.sitemapFetchedLinks;
1282
+ constants.sitemapFetchedLinks = {
1283
+ totalLinksFetchedFromSitemaps: (prev?.totalLinksFetchedFromSitemaps ?? 0) + totalLinksDiscovered,
1284
+ fetchedSitemaps: [...(prev?.fetchedSitemaps ?? []), ...fetchedSitemaps],
1285
+ };
1286
+
1287
+ if (totalLinksDiscovered > 0) {
1288
+ const breakdown = fetchedSitemaps
1289
+ .map(({ url, fetchedLinks }) => `${url} (${fetchedLinks})`)
1290
+ .join(', ');
1291
+ consoleLogger.info(
1292
+ `There are a total of ${totalLinksDiscovered} links found across ${breakdown}.`,
1293
+ );
1294
+ }
1245
1295
 
1246
1296
  return requestList;
1247
1297
  };
@@ -1406,6 +1456,36 @@ export const getEdgeData = (randomToken: string) => {
1406
1456
  * @param {*} destDir destination directory
1407
1457
  * @returns boolean indicating whether the operation was successful
1408
1458
  */
1459
+ // Helper to copy a file with retry logic for transient EBUSY errors
1460
+ const copyFileWithRetry = (src: string, dest: string, maxRetries: number = 3): boolean => {
1461
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
1462
+ try {
1463
+ fs.copyFileSync(src, dest);
1464
+ if (attempt > 1) {
1465
+ consoleLogger.info(`File copy succeeded on attempt ${attempt}: ${dest}`);
1466
+ }
1467
+ return true;
1468
+ } catch (err: any) {
1469
+ if (err.code === 'EBUSY' && attempt < maxRetries) {
1470
+ // Transient lock — wait and retry
1471
+ const delayMs = Math.min(100 * Math.pow(2, attempt - 1), 1000); // Exponential backoff: 100ms, 200ms, 400ms, capped at 1s
1472
+ consoleLogger.warn(
1473
+ `File copy attempt ${attempt}/${maxRetries} failed with EBUSY. Retrying after ${delayMs}ms: ${dest}`,
1474
+ );
1475
+ // Synchronous sleep via busy-wait (not ideal but avoids promise complications in sync context)
1476
+ const endTime = Date.now() + delayMs;
1477
+ while (Date.now() < endTime) {
1478
+ // Busy wait
1479
+ }
1480
+ continue; // Retry
1481
+ }
1482
+ // Non-transient error or max retries reached
1483
+ return false;
1484
+ }
1485
+ }
1486
+ return false;
1487
+ };
1488
+
1409
1489
  const cloneChromeProfileCookieFiles = (options: GlobOptionsWithFileTypesFalse, destDir: string) => {
1410
1490
  let profileCookiesDir;
1411
1491
  // Cookies file per profile is located in .../User Data/<profile name>/Network/Cookies for windows
@@ -1445,23 +1525,9 @@ const cloneChromeProfileCookieFiles = (options: GlobOptionsWithFileTypesFalse, d
1445
1525
 
1446
1526
  // Prevents duplicate cookies file if the cookies already exist
1447
1527
  if (!fs.existsSync(path.join(destProfileDir, 'Cookies'))) {
1448
- try {
1449
- fs.copyFileSync(dir, path.join(destProfileDir, 'Cookies'));
1450
- } catch (err) {
1451
- consoleLogger.error(err);
1452
- if (err.code === 'EBUSY') {
1453
- console.log(
1454
- `Unable to copy the file for ${profileName} because it is currently in use.`,
1455
- );
1456
- console.log(
1457
- 'Please close any applications that might be using this file and try again.',
1458
- );
1459
- } else {
1460
- console.log(
1461
- `An unexpected error occurred for ${profileName} while copying the file: ${err.message}`,
1462
- );
1463
- }
1464
- // printMessage([err], messageOptions);
1528
+ const destCookiesPath = path.join(destProfileDir, 'Cookies');
1529
+ if (!copyFileWithRetry(dir, destCookiesPath)) {
1530
+ consoleLogger.error(`Failed to copy Chrome profile cookies for ${profileName} after retries.`);
1465
1531
  success = false;
1466
1532
  }
1467
1533
  }
@@ -1475,12 +1541,6 @@ const cloneChromeProfileCookieFiles = (options: GlobOptionsWithFileTypesFalse, d
1475
1541
  return false;
1476
1542
  };
1477
1543
 
1478
- /**
1479
- * Clone the Chrome profile cookie files to the destination directory
1480
- * @param {*} options glob options object
1481
- * @param {*} destDir destination directory
1482
- * @returns boolean indicating whether the operation was successful
1483
- */
1484
1544
  const cloneEdgeProfileCookieFiles = (options: GlobOptionsWithFileTypesFalse, destDir: string) => {
1485
1545
  let profileCookiesDir;
1486
1546
  // Cookies file per profile is located in .../User Data/<profile name>/Network/Cookies for windows
@@ -1521,21 +1581,9 @@ const cloneEdgeProfileCookieFiles = (options: GlobOptionsWithFileTypesFalse, des
1521
1581
 
1522
1582
  // Prevents duplicate cookies file if the cookies already exist
1523
1583
  if (!fs.existsSync(path.join(destProfileDir, 'Cookies'))) {
1524
- try {
1525
- fs.copyFileSync(dir, path.join(destProfileDir, 'Cookies'));
1526
- } catch (err) {
1527
- consoleLogger.error(err);
1528
- if (err.code === 'EBUSY') {
1529
- console.log(
1530
- `Unable to copy the file for ${profileName} because it is currently in use.`,
1531
- );
1532
- console.log(
1533
- 'Please close any applications that might be using this file and try again.',
1534
- );
1535
- } else {
1536
- console.log(`An unexpected error occurred while copying the file: ${err.message}`);
1537
- }
1538
- // printMessage([err], messageOptions);
1584
+ const destCookiesPath = path.join(destProfileDir, 'Cookies');
1585
+ if (!copyFileWithRetry(dir, destCookiesPath)) {
1586
+ consoleLogger.error(`Failed to copy Edge profile cookies for ${profileName} after retries.`);
1539
1587
  success = false;
1540
1588
  }
1541
1589
  }
@@ -1566,19 +1614,9 @@ const cloneLocalStateFile = (options: GlobOptionsWithFileTypesFalse, destDir: st
1566
1614
 
1567
1615
  localState.forEach(dir => {
1568
1616
  const profileName = dir.match(profileNamesRegex)[1];
1569
- try {
1570
- fs.copyFileSync(dir, path.join(destDir, 'Local State'));
1571
- } catch (err) {
1572
- consoleLogger.error(err);
1573
- if (err.code === 'EBUSY') {
1574
- console.log(`Unable to copy the file because it is currently in use.`);
1575
- console.log('Please close any applications that might be using this file and try again.');
1576
- } else {
1577
- console.log(
1578
- `An unexpected error occurred for ${profileName} while copying the file: ${err.message}`,
1579
- );
1580
- }
1581
- printMessage([err], messageOptions);
1617
+ const destPath = path.join(destDir, 'Local State');
1618
+ if (!copyFileWithRetry(dir, destPath)) {
1619
+ consoleLogger.error(`Failed to copy Local State file for ${profileName} after retries.`);
1582
1620
  success = false;
1583
1621
  }
1584
1622
  });
@@ -1629,6 +1667,17 @@ export const cloneChromeProfiles = (randomToken: string): string => {
1629
1667
  }
1630
1668
 
1631
1669
  consoleLogger.error('Failed to clone Chrome profiles. You may be logged out of your accounts.');
1670
+
1671
+ // Fall back to a clean profile directory to avoid launch failures from partial clones.
1672
+ try {
1673
+ fs.rmSync(destDir, { recursive: true, force: true });
1674
+ fs.mkdirSync(destDir, { recursive: true });
1675
+ consoleLogger.warn('Using an empty cloned Chrome profile directory due to clone failure.');
1676
+ } catch (cleanupError) {
1677
+ consoleLogger.error(
1678
+ `Unable to reset cloned Chrome profile directory ${destDir}: ${cleanupError}`,
1679
+ );
1680
+ }
1632
1681
  }
1633
1682
  // For future reference, return a null instead to halt the scan
1634
1683
  return destDir;
@@ -1697,6 +1746,15 @@ export const cloneEdgeProfiles = (randomToken: string): string => {
1697
1746
  }
1698
1747
 
1699
1748
  consoleLogger.error('Failed to clone Edge profiles. You may be logged out of your accounts.');
1749
+
1750
+ // Fall back to a clean profile directory to avoid launch failures from partial clones.
1751
+ try {
1752
+ fs.rmSync(destDir, { recursive: true, force: true });
1753
+ fs.mkdirSync(destDir, { recursive: true });
1754
+ consoleLogger.warn('Using an empty cloned Edge profile directory due to clone failure.');
1755
+ } catch (cleanupError) {
1756
+ consoleLogger.error(`Unable to reset cloned Edge profile directory ${destDir}: ${cleanupError}`);
1757
+ }
1700
1758
  }
1701
1759
 
1702
1760
  // For future reference, return a null instead to halt the scan
@@ -1725,7 +1783,14 @@ export const deleteClonedChromeProfiles = (randomToken?: string): void => {
1725
1783
  }
1726
1784
  let destDir: string[];
1727
1785
  if (randomToken) {
1728
- destDir = [`${baseDir}/oobee-${randomToken}`];
1786
+ // Also match _pool* directories created by browser pool re-launches
1787
+ destDir = globSync(`oobee-${randomToken}*`, {
1788
+ cwd: baseDir,
1789
+ absolute: true,
1790
+ });
1791
+ if (destDir.length === 0) {
1792
+ destDir = [`${baseDir}/oobee-${randomToken}`];
1793
+ }
1729
1794
  } else {
1730
1795
  // Find all the oobee directories in the Chrome data directory
1731
1796
  destDir = globSync('**/oobee*', {
@@ -1766,9 +1831,16 @@ export const deleteClonedEdgeProfiles = (randomToken?: string): void => {
1766
1831
  }
1767
1832
  let destDir: string[];
1768
1833
  if (randomToken) {
1769
- destDir = [`${baseDir}/oobee-${randomToken}`];
1834
+ // Also match _pool* directories created by browser pool re-launches
1835
+ destDir = globSync(`oobee-${randomToken}*`, {
1836
+ cwd: baseDir,
1837
+ absolute: true,
1838
+ });
1839
+ if (destDir.length === 0) {
1840
+ destDir = [`${baseDir}/oobee-${randomToken}`];
1841
+ }
1770
1842
  } else {
1771
- // Find all the oobee directories in the Chrome data directory
1843
+ // Find all the oobee directories in the Edge data directory
1772
1844
  destDir = globSync('**/oobee*', {
1773
1845
  cwd: baseDir,
1774
1846
  absolute: true,
@@ -946,6 +946,7 @@ export default {
946
946
  a11yRuleShortDescriptionMap,
947
947
  disabilityBadgesMap,
948
948
  robotsTxtUrls: null,
949
+ sitemapFetchedLinks: null as { totalLinksFetchedFromSitemaps: number; fetchedSitemaps: { url: string; fetchedLinks: number }[] } | null,
949
950
  userDataDirectory: null, // This will be set later in the code
950
951
  randomToken: null, // This will be set later in the code
951
952
  // Track all active Crawlee / Playwright resources for cleanup
@@ -874,6 +874,13 @@ export const runAxeScript = async ({
874
874
  const browserContext: BrowserContext = page.context();
875
875
  const requestUrl = page.url();
876
876
 
877
+ let pageTitle: string | null = null;
878
+ try {
879
+ pageTitle = await page.evaluate(() => document.title);
880
+ } catch {
881
+ // Page may already be in a bad state; title will remain null
882
+ }
883
+
877
884
  try {
878
885
  // Checking for DOM mutations before proceeding to scan
879
886
  await page.evaluate(() => {
@@ -1012,7 +1019,42 @@ export const runAxeScript = async ({
1012
1019
  .run(selectors, {
1013
1020
  resultTypes: defaultResultTypes,
1014
1021
  })
1015
- .then(results => {
1022
+ .then(async results => {
1023
+ // Re-verify aria-hidden-focus violations against the live DOM to
1024
+ // handle race conditions with JS that sets tabindex="-1" after
1025
+ // aria-hidden (common in carousel/slider libraries like slick)
1026
+ const ariaHiddenViolation = results.violations.find(
1027
+ v => v.id === 'aria-hidden-focus',
1028
+ );
1029
+ if (ariaHiddenViolation) {
1030
+ await new Promise(resolve => setTimeout(resolve, 0));
1031
+ ariaHiddenViolation.nodes = ariaHiddenViolation.nodes.filter(node => {
1032
+ const selector = node.target && node.target[0];
1033
+ if (typeof selector !== 'string') return true;
1034
+ try {
1035
+ const el = document.querySelector(selector);
1036
+ if (!el) return true;
1037
+ const focusables = el.querySelectorAll(
1038
+ 'a[href], area[href], button:not([disabled]), input:not([disabled]):not([type="hidden"]), select:not([disabled]), textarea:not([disabled]), [tabindex]',
1039
+ );
1040
+ if (focusables.length === 0) return false;
1041
+ return Array.from(focusables).some(child => {
1042
+ const tabindex = child.getAttribute('tabindex');
1043
+ if (tabindex === null) return true;
1044
+ const parsed = parseInt(tabindex, 10);
1045
+ return isNaN(parsed) || parsed >= 0;
1046
+ });
1047
+ } catch {
1048
+ return true;
1049
+ }
1050
+ });
1051
+ if (ariaHiddenViolation.nodes.length === 0) {
1052
+ results.violations = results.violations.filter(
1053
+ v => v.id !== 'aria-hidden-focus',
1054
+ );
1055
+ }
1056
+ }
1057
+
1016
1058
  if (disableOobee) {
1017
1059
  return results;
1018
1060
  }
@@ -1086,19 +1128,6 @@ export const runAxeScript = async ({
1086
1128
  results.incomplete = await takeScreenshotForHTMLElements(results.incomplete, page, randomToken);
1087
1129
  }
1088
1130
 
1089
- let pageTitle = null;
1090
- try {
1091
- pageTitle = await page.evaluate(() => document.title);
1092
- } catch (e) {
1093
- consoleLogger.info(`Error while getting page title: ${e}`);
1094
- if (page.isClosed()) {
1095
- consoleLogger.info(`Page was closed for ${requestUrl}, creating new page`);
1096
- page = await browserContext.newPage();
1097
- await page.goto(requestUrl, { waitUntil: 'domcontentloaded' });
1098
- pageTitle = await page.evaluate(() => document.title);
1099
- }
1100
- }
1101
-
1102
1131
  return filterAxeResults(results, pageTitle, customFlowDetails);
1103
1132
  };
1104
1133
 
@@ -1124,12 +1153,117 @@ export const preNavigationHooks = (extraHTTPHeaders: Record<string, string>) =>
1124
1153
  ];
1125
1154
  };
1126
1155
 
1156
+ /**
1157
+ * Splits extraHTTPHeaders into auth and non-auth parts.
1158
+ * Auth headers (Authorization) must only be sent to same-origin requests to avoid CORS preflight failures.
1159
+ * Non-auth headers are safe to set globally on the browser context.
1160
+ */
1161
+ export const splitAuthHeaders = (extraHTTPHeaders?: Record<string, string>) => {
1162
+ const { Authorization, ...nonAuthHeaders } = extraHTTPHeaders || {};
1163
+ return {
1164
+ authHeader: Authorization || null,
1165
+ nonAuthHeaders: Object.keys(nonAuthHeaders).length > 0 ? nonAuthHeaders : null,
1166
+ httpCredentials: (() => {
1167
+ if (!Authorization?.startsWith('Basic ')) return null;
1168
+ const decoded = Buffer.from(Authorization.slice(6), 'base64').toString();
1169
+ const colonIdx = decoded.indexOf(':');
1170
+ if (colonIdx <= 0) return null;
1171
+ return { username: decoded.slice(0, colonIdx), password: decoded.slice(colonIdx + 1) };
1172
+ })(),
1173
+ };
1174
+ };
1175
+
1176
+ /**
1177
+ * Adds a route handler to a BrowserContext that sends the Authorization header
1178
+ * only to same-origin requests, preventing CORS preflight failures on cross-origin CDN resources.
1179
+ */
1180
+ export const addAuthRouteHandler = async (
1181
+ context: BrowserContext,
1182
+ entryUrl: string,
1183
+ authHeader: string | null
1184
+ ) => {
1185
+ if (!authHeader) return;
1186
+
1187
+ const entryOrigin = new URL(entryUrl).origin;
1188
+ await context.route('**/*', async (route, request) => {
1189
+ try {
1190
+ if (new URL(request.url()).origin === entryOrigin) {
1191
+ await route.continue({ headers: { ...request.headers(), Authorization: authHeader } });
1192
+ } else {
1193
+ await route.continue();
1194
+ }
1195
+ } catch {
1196
+ await route.continue();
1197
+ }
1198
+ });
1199
+ };
1200
+
1127
1201
  export const postNavigationHooks = [
1128
1202
  async (_crawlingContext: CrawlingContext) => {
1129
1203
  guiInfoLog(guiInfoStatusTypes.COMPLETED, {});
1130
1204
  },
1131
1205
  ];
1132
1206
 
1207
+ export const getPreLaunchHook = (userDataDirectory: string) => {
1208
+ let launchCount = 0;
1209
+
1210
+ return async (_pageId: string, launchContext: any) => {
1211
+ const fsp = await import('fs/promises').then(m => m.default);
1212
+ launchCount += 1;
1213
+
1214
+ // First launch uses the base directory; subsequent launches get a unique
1215
+ // directory so that lingering file handles from a retired browser don't
1216
+ // cause Chrome exit code 21 on Windows.
1217
+ const effectiveDir =
1218
+ launchCount === 1
1219
+ ? userDataDirectory
1220
+ : `${userDataDirectory}_pool${launchCount}`;
1221
+
1222
+ await fsp.mkdir(effectiveDir, { recursive: true });
1223
+
1224
+ // For pool re-launches, best-effort clone profile data from base directory
1225
+ // so authenticated sessions are preserved across browser pool retirements.
1226
+ if (launchCount > 1) {
1227
+ try {
1228
+ const copyRecursive = async (src: string, dest: string) => {
1229
+ const stat = await fsp.stat(src).catch(() => null);
1230
+ if (!stat) return;
1231
+ if (stat.isDirectory()) {
1232
+ await fsp.mkdir(dest, { recursive: true }).catch(() => {});
1233
+ const entries = await fsp.readdir(src).catch(() => []);
1234
+ await Promise.all(
1235
+ entries
1236
+ .filter(entry => !entry.startsWith('Singleton') && entry !== 'lockfile' && entry !== 'LOCK')
1237
+ .map(entry =>
1238
+ copyRecursive(path.join(src, entry), path.join(dest, entry)).catch(() => {}),
1239
+ ),
1240
+ );
1241
+ } else {
1242
+ await fsp.copyFile(src, dest).catch(() => {});
1243
+ }
1244
+ };
1245
+ await copyRecursive(userDataDirectory, effectiveDir).catch(() => {});
1246
+ } catch {
1247
+ // Silent fallback: use empty profile if clone fails
1248
+ }
1249
+ }
1250
+
1251
+ // Clean any stale lock files that may block browser launches on Windows
1252
+ const lockFiles = [
1253
+ path.join(effectiveDir, 'SingletonLock'),
1254
+ path.join(effectiveDir, 'SingletonSocket'),
1255
+ path.join(effectiveDir, 'SingletonCookie'),
1256
+ path.join(effectiveDir, 'lockfile'),
1257
+ path.join(effectiveDir, 'Default', 'LOCK'),
1258
+ path.join(effectiveDir, 'Default', 'Network', 'LOCK'),
1259
+ ];
1260
+ await Promise.all(lockFiles.map(f => fsp.rm(f, { force: true }).catch(() => {})));
1261
+
1262
+ // eslint-disable-next-line no-param-reassign
1263
+ launchContext.userDataDir = effectiveDir;
1264
+ };
1265
+ };
1266
+
1133
1267
  export const failedRequestHandler = async ({ request }: { request: Request }) => {
1134
1268
  guiInfoLog(guiInfoStatusTypes.ERROR, { numScanned: 0, urlScanned: request.url });
1135
1269
  log.error(`Failed Request - ${request.url}: ${request.errorMessages}`);