@govtechsg/oobee 0.10.28 → 0.10.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Dockerfile CHANGED
@@ -1,6 +1,6 @@
1
1
  # Use Microsoft Playwright image as base image
2
2
  # Node version is v22
3
- FROM mcr.microsoft.com/playwright:v1.49.1-jammy
3
+ FROM mcr.microsoft.com/playwright:v1.50.0-noble
4
4
 
5
5
  # Installation of packages for oobee and runner
6
6
  RUN apt-get update && apt-get install -y zip git
package/INSTALLATION.md CHANGED
@@ -6,7 +6,7 @@ Oobee (CLI) is provided as a portable distribution which minimises installation
6
6
 
7
7
  Oobee is a customisable, automated accessibility testing tool that allows software development teams to find and fix accessibility problems to improve persons with disabilities (PWDs) access to digital services.
8
8
 
9
- Oobee (CLI) allows software engineers to run Oobee as part of their software development environment as the command line, as well as [integrate it into their CI/CD pipleline](https://github.com/GovTechSG/oobee/blob/master/INTEGRATION.md).
9
+ Oobee (CLI) allows software engineers to run Oobee as part of their software development environment as the command line, as well as [integrate it into their CI/CD pipleline](INTEGRATION.md).
10
10
 
11
11
  ## System Requirements
12
12
 
package/exclusions.txt CHANGED
@@ -1,2 +1,3 @@
1
1
  \.*login.singpass.gov.sg\.*
2
- \.*auth.singpass.gov.sg\.*
2
+ \.*auth.singpass.gov.sg\.*
3
+ \.*form.gov.sg\.*
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@govtechsg/oobee",
3
3
  "main": "dist/npmIndex.js",
4
- "version": "0.10.28",
4
+ "version": "0.10.33",
5
5
  "type": "module",
6
6
  "author": "Government Technology Agency <info@tech.gov.sg>",
7
7
  "dependencies": {
@@ -23,7 +23,7 @@
23
23
  "mime-types": "^2.1.35",
24
24
  "minimatch": "^9.0.3",
25
25
  "pdfjs-dist": "github:veraPDF/pdfjs-dist#v4.4.168-taggedPdf-0.1.20",
26
- "playwright": "1.49.1",
26
+ "playwright": "1.50.1",
27
27
  "prettier": "^3.1.0",
28
28
  "print-message": "^3.0.1",
29
29
  "safe-regex": "^2.1.1",
package/src/combine.ts CHANGED
@@ -97,6 +97,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
97
97
  isEnableWcagAaa: envDetails.ruleset,
98
98
  isSlowScanMode: envDetails.specifiedMaxConcurrency,
99
99
  isAdhereRobots: envDetails.followRobots,
100
+ deviceChosen: deviceToScan,
100
101
  };
101
102
 
102
103
  const viewportSettings: ViewportSettingsClass = new ViewportSettingsClass(
@@ -209,6 +210,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
209
210
  ...urlsCrawledObj.error,
210
211
  ...urlsCrawledObj.invalid,
211
212
  ...urlsCrawledObj.forbidden,
213
+ ...urlsCrawledObj.userExcluded,
212
214
  ];
213
215
  const basicFormHTMLSnippet = await generateArtifacts(
214
216
  randomToken,
@@ -239,6 +241,8 @@ const combineRun = async (details: Data, deviceToScan: string) => {
239
241
  pagesNotScanned.length,
240
242
  metadata,
241
243
  );
244
+ } else {
245
+ printMessage([`No pages were scanned.`], alertMessageOptions);
242
246
  }
243
247
  } else {
244
248
  printMessage([`No pages were scanned.`], alertMessageOptions);
@@ -269,7 +269,7 @@ export const cliOptions: { [key: string]: Options } = {
269
269
  default: 'default',
270
270
  coerce: option => {
271
271
  const validChoices = Object.values(RuleFlags);
272
- const userChoices: string[] = option.split(',');
272
+ const userChoices: string[] = String(option).split(',');
273
273
  const invalidUserChoices = userChoices.filter(
274
274
  choice => !validChoices.includes(choice as RuleFlags),
275
275
  );
@@ -1819,13 +1819,72 @@ export const urlWithoutAuth = (url: string): string => {
1819
1819
  };
1820
1820
 
1821
1821
  export const waitForPageLoaded = async (page, timeout = 10000) => {
1822
+ const OBSERVER_TIMEOUT = timeout; // Ensure observer timeout does not exceed the main timeout
1823
+
1822
1824
  return Promise.race([
1823
- page.waitForLoadState('load'),
1824
- page.waitForLoadState('networkidle'),
1825
- new Promise(resolve => setTimeout(resolve, timeout)),
1825
+ page.waitForLoadState('load'), // Ensure page load completes
1826
+ page.waitForLoadState('networkidle'), // Wait for network requests to settle
1827
+ new Promise(resolve => setTimeout(resolve, timeout)), // Hard timeout as a fallback
1828
+ page.evaluate((OBSERVER_TIMEOUT) => {
1829
+ return new Promise((resolve) => {
1830
+ // Skip mutation check for PDFs
1831
+ if (document.contentType === 'application/pdf') {
1832
+ resolve('Skipping DOM mutation check for PDF.');
1833
+ return;
1834
+ }
1835
+
1836
+ let timeout;
1837
+ let mutationCount = 0;
1838
+ const MAX_MUTATIONS = 250; // Limit max mutations
1839
+ const mutationHash = {};
1840
+
1841
+ const observer = new MutationObserver(mutationsList => {
1842
+ clearTimeout(timeout);
1843
+
1844
+ mutationCount++;
1845
+ if (mutationCount > MAX_MUTATIONS) {
1846
+ observer.disconnect();
1847
+ resolve('Too many mutations detected, exiting.');
1848
+ return;
1849
+ }
1850
+
1851
+ mutationsList.forEach(mutation => {
1852
+ if (mutation.target instanceof Element) {
1853
+ Array.from(mutation.target.attributes).forEach(attr => {
1854
+ const mutationKey = `${mutation.target.nodeName}-${attr.name}`;
1855
+
1856
+ if (mutationKey) {
1857
+ mutationHash[mutationKey] = (mutationHash[mutationKey] || 0) + 1;
1858
+
1859
+ if (mutationHash[mutationKey] >= 10) {
1860
+ observer.disconnect();
1861
+ resolve(`Repeated mutation detected for ${mutationKey}, exiting.`);
1862
+ }
1863
+ }
1864
+ });
1865
+ }
1866
+ });
1867
+
1868
+ // If no mutations occur for 1 second, resolve
1869
+ timeout = setTimeout(() => {
1870
+ observer.disconnect();
1871
+ resolve('DOM stabilized after mutations.');
1872
+ }, 1000);
1873
+ });
1874
+
1875
+ // Final timeout to avoid infinite waiting
1876
+ timeout = setTimeout(() => {
1877
+ observer.disconnect();
1878
+ resolve('Observer timeout reached, exiting.');
1879
+ }, OBSERVER_TIMEOUT);
1880
+
1881
+ observer.observe(document.documentElement, { childList: true, subtree: true, attributes: true });
1882
+ });
1883
+ }, OBSERVER_TIMEOUT), // Pass OBSERVER_TIMEOUT dynamically to the browser context
1826
1884
  ]);
1827
1885
  };
1828
1886
 
1887
+
1829
1888
  function isValidHttpUrl(urlString) {
1830
1889
  const pattern = /^(http|https):\/\/[^ "]+$/;
1831
1890
  return pattern.test(urlString);
@@ -186,7 +186,7 @@ export class UrlsCrawled {
186
186
  error: { url: string }[] = [];
187
187
  exceededRequests: string[] = [];
188
188
  forbidden: string[] = [];
189
- userExcluded: string[] = [];
189
+ userExcluded: { url: string; actualUrl: string; pageTitle: string }[] = [];
190
190
  everything: string[] = [];
191
191
 
192
192
  constructor(urlsCrawled?: Partial<UrlsCrawled>) {
@@ -1,7 +1,7 @@
1
1
  import crawlee, { CrawlingContext, PlaywrightGotoOptions } from 'crawlee';
2
2
  import axe, { AxeResults, ImpactValue, NodeResult, Result, resultGroups, TagValue } from 'axe-core';
3
- import { xPathToCss } from '../xPathToCss.js';
4
3
  import { BrowserContext, Page } from 'playwright';
4
+ import { xPathToCss } from '../xPathToCss.js';
5
5
  import {
6
6
  axeScript,
7
7
  guiInfoStatusTypes,
@@ -357,24 +357,28 @@ export const runAxeScript = async ({
357
357
  return !node.dataset.flagged; // fail any element with a data-flagged attribute set to true
358
358
  },
359
359
  },
360
- {
361
- ...customAxeConfig.checks[2],
362
- evaluate: (_node: HTMLElement) => {
363
- if (gradingReadabilityFlag === '') {
364
- return true; // Pass if no readability issues
365
- }
366
- // Dynamically update the grading messages
367
- const gradingCheck = customAxeConfig.checks.find(
368
- check => check.id === 'oobee-grading-text-contents',
369
- );
370
- if (gradingCheck) {
371
- gradingCheck.metadata.messages.incomplete = `The text content is potentially difficult to read, with a Flesch-Kincaid Reading Ease score of ${gradingReadabilityFlag
372
- }.\nThe target passing score is above 50, indicating content readable by university students and lower grade levels.\nA higher score reflects better readability.`;
373
- }
374
-
375
- // Fail if readability issues are detected
376
- },
377
- },
360
+ ...(enableWcagAaa
361
+ ? [
362
+ {
363
+ ...customAxeConfig.checks[2],
364
+ evaluate: (_node: HTMLElement) => {
365
+ if (gradingReadabilityFlag === '') {
366
+ return true; // Pass if no readability issues
367
+ }
368
+ // Dynamically update the grading messages
369
+ const gradingCheck = customAxeConfig.checks.find(
370
+ check => check.id === 'oobee-grading-text-contents',
371
+ );
372
+ if (gradingCheck) {
373
+ gradingCheck.metadata.messages.incomplete = `The text content is potentially difficult to read, with a Flesch-Kincaid Reading Ease score of ${gradingReadabilityFlag
374
+ }.\nThe target passing score is above 50, indicating content readable by university students and lower grade levels.\nA higher score reflects better readability.`;
375
+ }
376
+
377
+ // Fail if readability issues are detected
378
+ },
379
+ },
380
+ ]
381
+ : []),
378
382
  ],
379
383
  rules: customAxeConfig.rules
380
384
  .filter(rule => (disableOobee ? !rule.id.startsWith('oobee') : true))
@@ -416,9 +420,12 @@ export const runAxeScript = async ({
416
420
  const escapedCssSelectors =
417
421
  oobeeAccessibleLabelFlaggedCssSelectors.map(escapeCSSSelector);
418
422
 
419
- function framesCheck(cssSelector: string): { doc: Document; remainingSelector: string } {
423
+ function framesCheck(cssSelector: string): {
424
+ doc: Document;
425
+ remainingSelector: string;
426
+ } {
420
427
  let doc = document; // Start with the main document
421
- let remainingSelector = ""; // To store the last part of the selector
428
+ let remainingSelector = ''; // To store the last part of the selector
422
429
  let targetIframe = null;
423
430
 
424
431
  // Split the selector into parts at "> html"
@@ -429,18 +436,18 @@ export const runAxeScript = async ({
429
436
 
430
437
  // Add back '> html' to the current part
431
438
  if (i > 0) {
432
- iframeSelector = "html > " + iframeSelector;
439
+ iframeSelector = `html > ${iframeSelector}`;
433
440
  }
434
441
 
435
442
  let frameset = null;
436
443
  // Find the iframe using the current document context
437
- if (doc.querySelector("frameset")) {
438
- frameset = doc.querySelector("frameset");
444
+ if (doc.querySelector('frameset')) {
445
+ frameset = doc.querySelector('frameset');
439
446
  }
440
447
 
441
448
  if (frameset) {
442
449
  doc = frameset;
443
- iframeSelector = iframeSelector.split("body >")[1].trim();
450
+ iframeSelector = iframeSelector.split('body >')[1].trim();
444
451
  }
445
452
  targetIframe = doc.querySelector(iframeSelector);
446
453
 
@@ -448,7 +455,9 @@ export const runAxeScript = async ({
448
455
  // Update the document to the iframe's contentDocument
449
456
  doc = targetIframe.contentDocument;
450
457
  } else {
451
- console.warn(`Iframe not found or contentDocument inaccessible for selector: ${iframeSelector}`);
458
+ console.warn(
459
+ `Iframe not found or contentDocument inaccessible for selector: ${iframeSelector}`,
460
+ );
452
461
  return { doc, remainingSelector: cssSelector }; // Return original selector if iframe not found
453
462
  }
454
463
  }
@@ -457,19 +466,18 @@ export const runAxeScript = async ({
457
466
  remainingSelector = diffParts[diffParts.length - 1].trim();
458
467
 
459
468
  // Remove any leading '>' combinators from remainingSelector
460
- remainingSelector = "html" + remainingSelector;
469
+ remainingSelector = `html${remainingSelector}`;
461
470
 
462
471
  return { doc, remainingSelector };
463
472
  }
464
473
 
465
-
466
474
  function findElementByCssSelector(cssSelector: string): string | null {
467
475
  let doc = document;
468
476
 
469
477
  // Check if the selector includes 'frame' or 'iframe' and update doc and selector
470
478
 
471
479
  if (/\s*>\s*html\s*/.test(cssSelector)) {
472
- let inFrames = framesCheck(cssSelector)
480
+ const inFrames = framesCheck(cssSelector);
473
481
  doc = inFrames.doc;
474
482
  cssSelector = inFrames.remainingSelector;
475
483
  }
@@ -515,24 +523,26 @@ export const runAxeScript = async ({
515
523
  description: 'Ensures clickable elements have an accessible label.',
516
524
  help: 'Clickable elements (i.e. elements with mouse-click interaction) must have accessible labels.',
517
525
  helpUrl: 'https://www.deque.com/blog/accessible-aria-buttons',
518
- nodes: escapedCssSelectors.map(cssSelector => ({
519
- html: findElementByCssSelector(cssSelector),
520
- target: [cssSelector],
521
- impact: 'serious' as ImpactValue,
522
- failureSummary:
523
- 'Fix any of the following:\n The clickable element does not have an accessible label.',
524
- any: [
525
- {
526
- id: 'oobee-accessible-label',
527
- data: null,
528
- relatedNodes: [],
529
- impact: 'serious',
530
- message: 'The clickable element does not have an accessible label.',
531
- },
532
- ],
533
- all: [],
534
- none: [],
535
- })).filter(item => item.html)
526
+ nodes: escapedCssSelectors
527
+ .map(cssSelector => ({
528
+ html: findElementByCssSelector(cssSelector),
529
+ target: [cssSelector],
530
+ impact: 'serious' as ImpactValue,
531
+ failureSummary:
532
+ 'Fix any of the following:\n The clickable element does not have an accessible label.',
533
+ any: [
534
+ {
535
+ id: 'oobee-accessible-label',
536
+ data: null,
537
+ relatedNodes: [],
538
+ impact: 'serious',
539
+ message: 'The clickable element does not have an accessible label.',
540
+ },
541
+ ],
542
+ all: [],
543
+ none: [],
544
+ }))
545
+ .filter(item => item.html),
536
546
  };
537
547
 
538
548
  results.violations = [...results.violations, oobeeAccessibleLabelViolations];
@@ -40,8 +40,7 @@ import {
40
40
  import { silentLogger, guiInfoLog } from '../logs.js';
41
41
  import { ViewportSettingsClass } from '../combine.js';
42
42
 
43
- const isBlacklisted = (url: string) => {
44
- const blacklistedPatterns = getBlackListedPatterns(null);
43
+ const isBlacklisted = (url: string, blacklistedPatterns: string[]) => {
45
44
  if (!blacklistedPatterns) {
46
45
  return false;
47
46
  }
@@ -122,18 +121,10 @@ const crawlDomain = async ({
122
121
  const isScanPdfs = ['all', 'pdf-only'].includes(fileTypes);
123
122
  const { maxConcurrency } = constants;
124
123
  const { playwrightDeviceDetailsObject } = viewportSettings;
125
- const isBlacklistedUrl = isBlacklisted(url);
124
+ const isBlacklistedUrl = isBlacklisted(url, blacklistedPatterns);
126
125
 
127
126
  const httpsAgent = new https.Agent({ rejectUnauthorized: false });
128
127
 
129
- if (isBlacklistedUrl) {
130
- guiInfoLog(guiInfoStatusTypes.SKIPPED, {
131
- numScanned: urlsCrawled.scanned.length,
132
- urlScanned: url,
133
- });
134
- return;
135
- }
136
-
137
128
  // Boolean to omit axe scan for basic auth URL
138
129
  let isBasicAuth = false;
139
130
  let authHeader = '';
@@ -315,7 +306,7 @@ const crawlDomain = async ({
315
306
 
316
307
  const isExcluded = (newPageUrl: string): boolean => {
317
308
  const isAlreadyScanned: boolean = urlsCrawled.scanned.some(item => item.url === newPageUrl);
318
- const isBlacklistedUrl: boolean = isBlacklisted(newPageUrl);
309
+ const isBlacklistedUrl: boolean = isBlacklisted(newPageUrl, blacklistedPatterns);
319
310
  const isNotFollowStrategy: boolean = !isFollowStrategy(newPageUrl, initialPageUrl, strategy);
320
311
  return isAlreadyScanned || isBlacklistedUrl || isNotFollowStrategy;
321
312
  };
@@ -609,13 +600,13 @@ const crawlDomain = async ({
609
600
  }
610
601
 
611
602
  await waitForPageLoaded(page, 10000);
612
- let actualUrl = request.url;
603
+ let actualUrl = page.url() || request.loadedUrl || request.url;
613
604
 
614
605
  if (page.url() !== 'about:blank') {
615
606
  actualUrl = page.url();
616
607
  }
617
608
 
618
- if (isBlacklisted(actualUrl) || (isUrlPdf(actualUrl) && !isScanPdfs)) {
609
+ if (!isFollowStrategy(url, actualUrl, strategy) && (isBlacklisted(actualUrl, blacklistedPatterns) || (isUrlPdf(actualUrl) && !isScanPdfs))) {
619
610
  guiInfoLog(guiInfoStatusTypes.SKIPPED, {
620
611
  numScanned: urlsCrawled.scanned.length,
621
612
  urlScanned: actualUrl,
@@ -684,8 +675,13 @@ const crawlDomain = async ({
684
675
  return;
685
676
  }
686
677
 
687
- if (blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
688
- urlsCrawled.userExcluded.push(request.url);
678
+ if (!isFollowStrategy(url, actualUrl, strategy) && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
679
+ urlsCrawled.userExcluded.push({
680
+ url: request.url,
681
+ pageTitle: request.url,
682
+ actualUrl: actualUrl,
683
+ });
684
+
689
685
  await enqueueProcess(page, enqueueLinks, browserContext);
690
686
  return;
691
687
  }
@@ -710,18 +706,18 @@ const crawlDomain = async ({
710
706
 
711
707
  if (isScanHtml) {
712
708
  // For deduplication, if the URL is redirected, we want to store the original URL and the redirected URL (actualUrl)
713
- const isRedirected = !areLinksEqual(request.loadedUrl, request.url);
709
+ const isRedirected = !areLinksEqual(actualUrl, request.url);
714
710
 
715
711
  // check if redirected link is following strategy (same-domain/same-hostname)
716
712
  const isLoadedUrlFollowStrategy = isFollowStrategy(
717
- request.loadedUrl,
713
+ actualUrl,
718
714
  request.url,
719
715
  strategy,
720
716
  );
721
717
  if (isRedirected && !isLoadedUrlFollowStrategy) {
722
718
  urlsCrawled.notScannedRedirects.push({
723
719
  fromUrl: request.url,
724
- toUrl: request.loadedUrl, // i.e. actualUrl
720
+ toUrl: actualUrl, // i.e. actualUrl
725
721
  });
726
722
  return;
727
723
  }
@@ -730,13 +726,13 @@ const crawlDomain = async ({
730
726
 
731
727
  if (isRedirected) {
732
728
  const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(
733
- item => (item.actualUrl || item.url) === request.loadedUrl,
729
+ item => (item.actualUrl || item.url) === actualUrl,
734
730
  );
735
731
 
736
732
  if (isLoadedUrlInCrawledUrls) {
737
733
  urlsCrawled.notScannedRedirects.push({
738
734
  fromUrl: request.url,
739
- toUrl: request.loadedUrl, // i.e. actualUrl
735
+ toUrl: actualUrl, // i.e. actualUrl
740
736
  });
741
737
  return;
742
738
  }
@@ -751,16 +747,16 @@ const crawlDomain = async ({
751
747
  urlsCrawled.scanned.push({
752
748
  url: urlWithoutAuth(request.url),
753
749
  pageTitle: results.pageTitle,
754
- actualUrl: request.loadedUrl, // i.e. actualUrl
750
+ actualUrl: actualUrl, // i.e. actualUrl
755
751
  });
756
752
 
757
753
  urlsCrawled.scannedRedirects.push({
758
754
  fromUrl: urlWithoutAuth(request.url),
759
- toUrl: request.loadedUrl, // i.e. actualUrl
755
+ toUrl: actualUrl, // i.e. actualUrl
760
756
  });
761
757
 
762
758
  results.url = request.url;
763
- results.actualUrl = request.loadedUrl;
759
+ results.actualUrl = actualUrl;
764
760
  await dataset.pushData(results);
765
761
  }
766
762
  } else {
@@ -153,6 +153,8 @@ const crawlLocalFile = async (
153
153
  await page.goto(request.url);
154
154
  const results = await runAxeScript({ includeScreenshots, page, randomToken });
155
155
 
156
+ const actualUrl = page.url() || request.loadedUrl || request.url;
157
+
156
158
  guiInfoLog(guiInfoStatusTypes.SCANNED, {
157
159
  numScanned: urlsCrawled.scanned.length,
158
160
  urlScanned: request.url,
@@ -161,16 +163,16 @@ const crawlLocalFile = async (
161
163
  urlsCrawled.scanned.push({
162
164
  url: request.url,
163
165
  pageTitle: results.pageTitle,
164
- actualUrl: request.loadedUrl, // i.e. actualUrl
166
+ actualUrl: actualUrl, // i.e. actualUrl
165
167
  });
166
168
 
167
169
  urlsCrawled.scannedRedirects.push({
168
170
  fromUrl: request.url,
169
- toUrl: request.loadedUrl, // i.e. actualUrl
171
+ toUrl: actualUrl, // i.e. actualUrl
170
172
  });
171
173
 
172
174
  results.url = request.url;
173
- // results.actualUrl = request.loadedUrl;
175
+ results.actualUrl = actualUrl;
174
176
 
175
177
  await dataset.pushData(results);
176
178
  } else {
@@ -18,7 +18,7 @@ import {
18
18
  waitForPageLoaded,
19
19
  isFilePath,
20
20
  } from '../constants/common.js';
21
- import { areLinksEqual, isWhitelistedContentType } from '../utils.js';
21
+ import { areLinksEqual, isWhitelistedContentType, isFollowStrategy } from '../utils.js';
22
22
  import { handlePdfDownload, runPdfScan, mapPdfScanResults } from './pdfScanFunc.js';
23
23
  import { guiInfoLog } from '../logs.js';
24
24
 
@@ -161,21 +161,67 @@ const crawlSitemap = async (
161
161
  ],
162
162
  },
163
163
  requestList,
164
+ postNavigationHooks: [
165
+ async ({ page, request }) => {
166
+ try {
167
+ // Wait for a quiet period in the DOM, but with safeguards
168
+ await page.evaluate(() => {
169
+ return new Promise((resolve) => {
170
+ let timeout;
171
+ let mutationCount = 0;
172
+ const MAX_MUTATIONS = 250; // Prevent infinite mutations
173
+ const OBSERVER_TIMEOUT = 5000; // Hard timeout to exit
174
+
175
+ const observer = new MutationObserver(() => {
176
+ clearTimeout(timeout);
177
+
178
+ mutationCount++;
179
+ if (mutationCount > MAX_MUTATIONS) {
180
+ observer.disconnect();
181
+ resolve('Too many mutations detected, exiting.');
182
+ return;
183
+ }
184
+
185
+ timeout = setTimeout(() => {
186
+ observer.disconnect();
187
+ resolve('DOM stabilized after mutations.');
188
+ }, 1000);
189
+ });
190
+
191
+ timeout = setTimeout(() => {
192
+ observer.disconnect();
193
+ resolve('Observer timeout reached, exiting.');
194
+ }, OBSERVER_TIMEOUT); // Ensure the observer stops after X seconds
195
+
196
+ observer.observe(document.documentElement, { childList: true, subtree: true });
197
+
198
+ });
199
+ });
200
+ } catch (err) {
201
+ // Handle page navigation errors gracefully
202
+ if (err.message.includes('was destroyed')) {
203
+ return; // Page navigated or closed, no need to handle
204
+ }
205
+ throw err; // Rethrow unknown errors
206
+ }
207
+ },
208
+ ],
209
+
164
210
  preNavigationHooks: isBasicAuth
165
211
  ? [
166
- async ({ page }) => {
167
- await page.setExtraHTTPHeaders({
168
- Authorization: authHeader,
169
- ...extraHTTPHeaders,
170
- });
171
- },
172
- ]
212
+ async ({ page }) => {
213
+ await page.setExtraHTTPHeaders({
214
+ Authorization: authHeader,
215
+ ...extraHTTPHeaders,
216
+ });
217
+ },
218
+ ]
173
219
  : [
174
- async () => {
175
- preNavigationHooks(extraHTTPHeaders);
176
- // insert other code here
177
- },
178
- ],
220
+ async () => {
221
+ preNavigationHooks(extraHTTPHeaders);
222
+ // insert other code here
223
+ },
224
+ ],
179
225
  requestHandlerTimeoutSecs: 90,
180
226
  requestHandler: async ({ page, request, response, sendRequest }) => {
181
227
  await waitForPageLoaded(page, 10000);
@@ -191,7 +237,7 @@ const crawlSitemap = async (
191
237
  request.url = currentUrl.href;
192
238
  }
193
239
 
194
- const actualUrl = request.loadedUrl || request.url;
240
+ const actualUrl = page.url() || request.loadedUrl || request.url;
195
241
 
196
242
  if (urlsCrawled.scanned.length >= maxRequestsPerCrawl) {
197
243
  crawler.autoscaledPool.abort();
@@ -223,8 +269,17 @@ const crawlSitemap = async (
223
269
  const contentType = response.headers()['content-type'];
224
270
  const status = response.status();
225
271
 
226
- if (blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
227
- urlsCrawled.userExcluded.push(request.url);
272
+ if (blacklistedPatterns && !isFollowStrategy(actualUrl, request.url, "same-hostname") && isSkippedUrl(actualUrl, blacklistedPatterns)) {
273
+ urlsCrawled.userExcluded.push({
274
+ url: request.url,
275
+ pageTitle: request.url,
276
+ actualUrl: actualUrl,
277
+ });
278
+
279
+ guiInfoLog(guiInfoStatusTypes.SKIPPED, {
280
+ numScanned: urlsCrawled.scanned.length,
281
+ urlScanned: request.url,
282
+ });
228
283
  return;
229
284
  }
230
285
 
@@ -255,16 +310,16 @@ const crawlSitemap = async (
255
310
  urlScanned: request.url,
256
311
  });
257
312
 
258
- const isRedirected = !areLinksEqual(request.loadedUrl, request.url);
313
+ const isRedirected = !areLinksEqual(page.url(), request.url);
259
314
  if (isRedirected) {
260
315
  const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(
261
- item => (item.actualUrl || item.url.href) === request.loadedUrl,
316
+ item => (item.actualUrl || item.url.href) === page,
262
317
  );
263
318
 
264
319
  if (isLoadedUrlInCrawledUrls) {
265
320
  urlsCrawled.notScannedRedirects.push({
266
321
  fromUrl: request.url,
267
- toUrl: request.loadedUrl, // i.e. actualUrl
322
+ toUrl: actualUrl, // i.e. actualUrl
268
323
  });
269
324
  return;
270
325
  }
@@ -272,16 +327,16 @@ const crawlSitemap = async (
272
327
  urlsCrawled.scanned.push({
273
328
  url: urlWithoutAuth(request.url),
274
329
  pageTitle: results.pageTitle,
275
- actualUrl: request.loadedUrl, // i.e. actualUrl
330
+ actualUrl: actualUrl, // i.e. actualUrl
276
331
  });
277
332
 
278
333
  urlsCrawled.scannedRedirects.push({
279
334
  fromUrl: urlWithoutAuth(request.url),
280
- toUrl: request.loadedUrl, // i.e. actualUrl
335
+ toUrl: actualUrl,
281
336
  });
282
337
 
283
338
  results.url = request.url;
284
- results.actualUrl = request.loadedUrl;
339
+ results.actualUrl = actualUrl;
285
340
  } else {
286
341
  urlsCrawled.scanned.push({
287
342
  url: urlWithoutAuth(request.url),
@@ -152,7 +152,12 @@ export const processPage = async (page, processPageParams) => {
152
152
  window.confirm('Page has been excluded, would you still like to proceed with the scan?'),
153
153
  );
154
154
  if (!continueScan) {
155
- urlsCrawled.userExcluded.push(pageUrl);
155
+ urlsCrawled.userExcluded.push({
156
+ url: pageUrl,
157
+ pageTitle: pageUrl,
158
+ actualUrl: pageUrl,
159
+ });
160
+
156
161
  return;
157
162
  }
158
163
  }
@@ -396,7 +401,7 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
396
401
  // eslint-disable-next-line no-underscore-dangle
397
402
  const pageId = page._guid;
398
403
 
399
- page.on('dialog', () => {});
404
+ page.on('dialog', () => { });
400
405
 
401
406
  const pageClosePromise = new Promise(resolve => {
402
407
  page.on('close', () => {
@@ -68,7 +68,7 @@ export const customAxeConfig: Spec = {
68
68
  selector: 'html',
69
69
  enabled: true,
70
70
  any: ['oobee-grading-text-contents'],
71
- tags: ['wcag2a', 'wcag315'],
71
+ tags: ['wcag2aaa', 'wcag315'],
72
72
  metadata: {
73
73
  description:
74
74
  'Text content should be easy to understand for individuals with education levels up to university graduates. If the text content is difficult to understand, provide supplemental content or a version that is easy to understand.',
@@ -219,8 +219,46 @@ const writeCsv = async (allIssues, storagePath) => {
219
219
  includeEmptyRows: true,
220
220
  };
221
221
 
222
+ // Create the parse stream (it’s asynchronous)
222
223
  const parser = new AsyncParser(opts);
223
- parser.parse(allIssues).pipe(csvOutput);
224
+ const parseStream = parser.parse(allIssues);
225
+
226
+ // Pipe JSON2CSV output into the file, but don't end automatically
227
+ parseStream.pipe(csvOutput, { end: false });
228
+
229
+ // Once JSON2CSV is done writing all normal rows, append any "pagesNotScanned"
230
+ parseStream.on('end', () => {
231
+ if (allIssues.pagesNotScanned && allIssues.pagesNotScanned.length > 0) {
232
+ csvOutput.write('\n');
233
+ allIssues.pagesNotScanned.forEach(page => {
234
+ const skippedPage = {
235
+ customFlowLabel: allIssues.customFlowLabel || '',
236
+ deviceChosen: allIssues.deviceChosen || '',
237
+ scanCompletedAt: allIssues.endTime ? allIssues.endTime.toISOString() : '',
238
+ severity: 'error',
239
+ issueId: 'error-pages-skipped',
240
+ issueDescription: 'Page was skipped during the scan',
241
+ wcagConformance: '',
242
+ url: page.url || '',
243
+ pageTitle: '',
244
+ context: '',
245
+ howToFix: '',
246
+ axeImpact: '',
247
+ xpath: '',
248
+ learnMore: '',
249
+ };
250
+ csvOutput.write(`${Object.values(skippedPage).join(',')}\n`);
251
+ });
252
+ }
253
+
254
+ // Now close the CSV file
255
+ csvOutput.end();
256
+ });
257
+
258
+ parseStream.on('error', err => {
259
+ console.error('Error parsing CSV:', err);
260
+ csvOutput.end();
261
+ });
224
262
  };
225
263
 
226
264
  const compileHtmlWithEJS = async (
@@ -234,7 +272,7 @@ const compileHtmlWithEJS = async (
234
272
  filename: path.join(dirname, './static/ejs/report.ejs'),
235
273
  });
236
274
 
237
- const html = template({...allIssues, storagePath: JSON.stringify(storagePath)});
275
+ const html = template({ ...allIssues, storagePath: JSON.stringify(storagePath) });
238
276
  await fs.writeFile(htmlFilePath, html);
239
277
 
240
278
  let htmlContent = await fs.readFile(htmlFilePath, { encoding: 'utf8' });