npm - @govtechsg/oobee - Versions diffs - 0.10.28 → 0.10.33 - Mend

@govtechsg/oobee 0.10.28 → 0.10.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/Dockerfile +1 -1
package/INSTALLATION.md +1 -1
package/exclusions.txt +2 -1
package/package.json +2 -2
package/src/combine.ts +4 -0
package/src/constants/cliFunctions.ts +1 -1
package/src/constants/common.ts +62 -3
package/src/constants/constants.ts +1 -1
package/src/crawlers/commonCrawlerFunc.ts +57 -47
package/src/crawlers/crawlDomain.ts +20 -24
package/src/crawlers/crawlLocalFile.ts +5 -3
package/src/crawlers/crawlSitemap.ts +77 -22
package/src/crawlers/custom/utils.ts +7 -2
package/src/crawlers/customAxeFunctions.ts +1 -1
package/src/mergeAxeResults.ts +40 -2

package/Dockerfile CHANGED Viewed

@@ -1,6 +1,6 @@
 # Use Microsoft Playwright image as base image
 # Node version is v22
-FROM mcr.microsoft.com/playwright:v1.49.1-jammy
+FROM mcr.microsoft.com/playwright:v1.50.0-noble
 # Installation of packages for oobee and runner
 RUN apt-get update && apt-get install -y zip git

package/INSTALLATION.md CHANGED Viewed

@@ -6,7 +6,7 @@ Oobee (CLI) is provided as a portable distribution which minimises installation
 Oobee is a customisable, automated accessibility testing tool that allows software development teams to find and fix accessibility problems to improve persons with disabilities (PWDs) access to digital services.
-Oobee (CLI) allows software engineers to run Oobee as part of their software development environment as the command line, as well as [integrate it into their CI/CD pipleline](https://github.com/GovTechSG/oobee/blob/master/INTEGRATION.md).
+Oobee (CLI) allows software engineers to run Oobee as part of their software development environment as the command line, as well as [integrate it into their CI/CD pipleline](INTEGRATION.md).
 ## System Requirements

package/exclusions.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 \.*login.singpass.gov.sg\.*
-\.*auth.singpass.gov.sg\.*
+\.*auth.singpass.gov.sg\.*
+\.*form.gov.sg\.*

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@govtechsg/oobee",
   "main": "dist/npmIndex.js",
-  "version": "0.10.28",
+  "version": "0.10.33",
   "type": "module",
   "author": "Government Technology Agency <info@tech.gov.sg>",
   "dependencies": {
@@ -23,7 +23,7 @@
     "mime-types": "^2.1.35",
     "minimatch": "^9.0.3",
     "pdfjs-dist": "github:veraPDF/pdfjs-dist#v4.4.168-taggedPdf-0.1.20",
-    "playwright": "1.49.1",
+    "playwright": "1.50.1",
     "prettier": "^3.1.0",
     "print-message": "^3.0.1",
     "safe-regex": "^2.1.1",

package/src/combine.ts CHANGED Viewed

@@ -97,6 +97,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
     isEnableWcagAaa: envDetails.ruleset,
     isSlowScanMode: envDetails.specifiedMaxConcurrency,
     isAdhereRobots: envDetails.followRobots,
+    deviceChosen: deviceToScan,
   };
   const viewportSettings: ViewportSettingsClass = new ViewportSettingsClass(
@@ -209,6 +210,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
         ...urlsCrawledObj.error,
         ...urlsCrawledObj.invalid,
         ...urlsCrawledObj.forbidden,
+        ...urlsCrawledObj.userExcluded,
       ];
       const basicFormHTMLSnippet = await generateArtifacts(
         randomToken,
@@ -239,6 +241,8 @@ const combineRun = async (details: Data, deviceToScan: string) => {
         pagesNotScanned.length,
         metadata,
       );
+    } else {
+      printMessage([`No pages were scanned.`], alertMessageOptions);
     }
   } else {
     printMessage([`No pages were scanned.`], alertMessageOptions);

package/src/constants/cliFunctions.ts CHANGED Viewed

@@ -269,7 +269,7 @@ export const cliOptions: { [key: string]: Options } = {
     default: 'default',
     coerce: option => {
       const validChoices = Object.values(RuleFlags);
-      const userChoices: string[] = option.split(',');
+      const userChoices: string[] = String(option).split(',');
       const invalidUserChoices = userChoices.filter(
         choice => !validChoices.includes(choice as RuleFlags),
       );

package/src/constants/common.ts CHANGED Viewed

@@ -1819,13 +1819,72 @@ export const urlWithoutAuth = (url: string): string => {
 };
 export const waitForPageLoaded = async (page, timeout = 10000) => {
+  const OBSERVER_TIMEOUT = timeout; // Ensure observer timeout does not exceed the main timeout
   return Promise.race([
-    page.waitForLoadState('load'),
-    page.waitForLoadState('networkidle'),
-    new Promise(resolve => setTimeout(resolve, timeout)),
+    page.waitForLoadState('load'), // Ensure page load completes
+    page.waitForLoadState('networkidle'), // Wait for network requests to settle
+    new Promise(resolve => setTimeout(resolve, timeout)), // Hard timeout as a fallback
+    page.evaluate((OBSERVER_TIMEOUT) => {
+      return new Promise((resolve) => {
+        // Skip mutation check for PDFs
+        if (document.contentType === 'application/pdf') {
+          resolve('Skipping DOM mutation check for PDF.');
+          return;
+        }
+        let timeout;
+        let mutationCount = 0;
+        const MAX_MUTATIONS = 250; // Limit max mutations
+        const mutationHash = {};
+        const observer = new MutationObserver(mutationsList => {
+          clearTimeout(timeout);
+          mutationCount++;
+          if (mutationCount > MAX_MUTATIONS) {
+            observer.disconnect();
+            resolve('Too many mutations detected, exiting.');
+            return;
+          }
+          mutationsList.forEach(mutation => {
+            if (mutation.target instanceof Element) {
+              Array.from(mutation.target.attributes).forEach(attr => {
+                const mutationKey = `${mutation.target.nodeName}-${attr.name}`;
+                if (mutationKey) {
+                  mutationHash[mutationKey] = (mutationHash[mutationKey] || 0) + 1;
+                  if (mutationHash[mutationKey] >= 10) {
+                    observer.disconnect();
+                    resolve(`Repeated mutation detected for ${mutationKey}, exiting.`);
+                  }
+                }
+              });
+            }
+          });
+          // If no mutations occur for 1 second, resolve
+          timeout = setTimeout(() => {
+            observer.disconnect();
+            resolve('DOM stabilized after mutations.');
+          }, 1000);
+        });
+        // Final timeout to avoid infinite waiting
+        timeout = setTimeout(() => {
+          observer.disconnect();
+          resolve('Observer timeout reached, exiting.');
+        }, OBSERVER_TIMEOUT);
+        observer.observe(document.documentElement, { childList: true, subtree: true, attributes: true });
+      });
+    }, OBSERVER_TIMEOUT), // Pass OBSERVER_TIMEOUT dynamically to the browser context
   ]);
 };
 function isValidHttpUrl(urlString) {
   const pattern = /^(http|https):\/\/[^ "]+$/;
   return pattern.test(urlString);

package/src/constants/constants.ts CHANGED Viewed

@@ -186,7 +186,7 @@ export class UrlsCrawled {
   error: { url: string }[] = [];
   exceededRequests: string[] = [];
   forbidden: string[] = [];
-  userExcluded: string[] = [];
+  userExcluded: { url: string; actualUrl: string; pageTitle: string }[] = [];
   everything: string[] = [];
   constructor(urlsCrawled?: Partial<UrlsCrawled>) {

package/src/crawlers/commonCrawlerFunc.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import crawlee, { CrawlingContext, PlaywrightGotoOptions } from 'crawlee';
 import axe, { AxeResults, ImpactValue, NodeResult, Result, resultGroups, TagValue } from 'axe-core';
-import { xPathToCss } from '../xPathToCss.js';
 import { BrowserContext, Page } from 'playwright';
+import { xPathToCss } from '../xPathToCss.js';
 import {
   axeScript,
   guiInfoStatusTypes,
@@ -357,24 +357,28 @@ export const runAxeScript = async ({
                 return !node.dataset.flagged; // fail any element with a data-flagged attribute set to true
               },
             },
-            {
-              ...customAxeConfig.checks[2],
-              evaluate: (_node: HTMLElement) => {
-                if (gradingReadabilityFlag === '') {
-                  return true; // Pass if no readability issues
-                }
-                // Dynamically update the grading messages
-                const gradingCheck = customAxeConfig.checks.find(
-                  check => check.id === 'oobee-grading-text-contents',
-                );
-                if (gradingCheck) {
-                  gradingCheck.metadata.messages.incomplete = `The text content is potentially difficult to read, with a Flesch-Kincaid Reading Ease score of ${gradingReadabilityFlag
-                    }.\nThe target passing score is above 50, indicating content readable by university students and lower grade levels.\nA higher score reflects better readability.`;
-                }
-                // Fail if readability issues are detected
-              },
-            },
+            ...(enableWcagAaa
+              ? [
+                {
+                  ...customAxeConfig.checks[2],
+                  evaluate: (_node: HTMLElement) => {
+                    if (gradingReadabilityFlag === '') {
+                      return true; // Pass if no readability issues
+                    }
+                    // Dynamically update the grading messages
+                    const gradingCheck = customAxeConfig.checks.find(
+                      check => check.id === 'oobee-grading-text-contents',
+                    );
+                    if (gradingCheck) {
+                      gradingCheck.metadata.messages.incomplete = `The text content is potentially difficult to read, with a Flesch-Kincaid Reading Ease score of ${gradingReadabilityFlag
+                        }.\nThe target passing score is above 50, indicating content readable by university students and lower grade levels.\nA higher score reflects better readability.`;
+                    }
+                    // Fail if readability issues are detected
+                  },
+                },
+              ]
+              : []),
           ],
           rules: customAxeConfig.rules
             .filter(rule => (disableOobee ? !rule.id.startsWith('oobee') : true))
@@ -416,9 +420,12 @@ export const runAxeScript = async ({
             const escapedCssSelectors =
               oobeeAccessibleLabelFlaggedCssSelectors.map(escapeCSSSelector);
-            function framesCheck(cssSelector: string): { doc: Document; remainingSelector: string } {
+            function framesCheck(cssSelector: string): {
+              doc: Document;
+              remainingSelector: string;
+            } {
               let doc = document; // Start with the main document
-              let remainingSelector = ""; // To store the last part of the selector
+              let remainingSelector = ''; // To store the last part of the selector
               let targetIframe = null;
               // Split the selector into parts at "> html"
@@ -429,18 +436,18 @@ export const runAxeScript = async ({
                 // Add back '> html' to the current part
                 if (i > 0) {
-                  iframeSelector = "html > " + iframeSelector;
+                  iframeSelector = `html > ${iframeSelector}`;
                 }
                 let frameset = null;
                 // Find the iframe using the current document context
-                if (doc.querySelector("frameset")) {
-                  frameset = doc.querySelector("frameset");
+                if (doc.querySelector('frameset')) {
+                  frameset = doc.querySelector('frameset');
                 }
                 if (frameset) {
                   doc = frameset;
-                  iframeSelector = iframeSelector.split("body >")[1].trim();
+                  iframeSelector = iframeSelector.split('body >')[1].trim();
                 }
                 targetIframe = doc.querySelector(iframeSelector);
@@ -448,7 +455,9 @@ export const runAxeScript = async ({
                   // Update the document to the iframe's contentDocument
                   doc = targetIframe.contentDocument;
                 } else {
-                  console.warn(`Iframe not found or contentDocument inaccessible for selector: ${iframeSelector}`);
+                  console.warn(
+                    `Iframe not found or contentDocument inaccessible for selector: ${iframeSelector}`,
+                  );
                   return { doc, remainingSelector: cssSelector }; // Return original selector if iframe not found
                 }
               }
@@ -457,19 +466,18 @@ export const runAxeScript = async ({
               remainingSelector = diffParts[diffParts.length - 1].trim();
               // Remove any leading '>' combinators from remainingSelector
-              remainingSelector = "html" + remainingSelector;
+              remainingSelector = `html${remainingSelector}`;
               return { doc, remainingSelector };
             }
             function findElementByCssSelector(cssSelector: string): string | null {
               let doc = document;
               // Check if the selector includes 'frame' or 'iframe' and update doc and selector
               if (/\s*>\s*html\s*/.test(cssSelector)) {
-                let inFrames = framesCheck(cssSelector)
+                const inFrames = framesCheck(cssSelector);
                 doc = inFrames.doc;
                 cssSelector = inFrames.remainingSelector;
               }
@@ -515,24 +523,26 @@ export const runAxeScript = async ({
               description: 'Ensures clickable elements have an accessible label.',
               help: 'Clickable elements (i.e. elements with mouse-click interaction) must have accessible labels.',
               helpUrl: 'https://www.deque.com/blog/accessible-aria-buttons',
-              nodes: escapedCssSelectors.map(cssSelector => ({
-                html: findElementByCssSelector(cssSelector),
-                target: [cssSelector],
-                impact: 'serious' as ImpactValue,
-                failureSummary:
-                  'Fix any of the following:\n  The clickable element does not have an accessible label.',
-                any: [
-                  {
-                    id: 'oobee-accessible-label',
-                    data: null,
-                    relatedNodes: [],
-                    impact: 'serious',
-                    message: 'The clickable element does not have an accessible label.',
-                  },
-                ],
-                all: [],
-                none: [],
-              })).filter(item => item.html)
+              nodes: escapedCssSelectors
+                .map(cssSelector => ({
+                  html: findElementByCssSelector(cssSelector),
+                  target: [cssSelector],
+                  impact: 'serious' as ImpactValue,
+                  failureSummary:
+                    'Fix any of the following:\n  The clickable element does not have an accessible label.',
+                  any: [
+                    {
+                      id: 'oobee-accessible-label',
+                      data: null,
+                      relatedNodes: [],
+                      impact: 'serious',
+                      message: 'The clickable element does not have an accessible label.',
+                    },
+                  ],
+                  all: [],
+                  none: [],
+                }))
+                .filter(item => item.html),
             };
             results.violations = [...results.violations, oobeeAccessibleLabelViolations];

package/src/crawlers/crawlDomain.ts CHANGED Viewed

@@ -40,8 +40,7 @@ import {
 import { silentLogger, guiInfoLog } from '../logs.js';
 import { ViewportSettingsClass } from '../combine.js';
-const isBlacklisted = (url: string) => {
-  const blacklistedPatterns = getBlackListedPatterns(null);
+const isBlacklisted = (url: string, blacklistedPatterns: string[]) => {
   if (!blacklistedPatterns) {
     return false;
   }
@@ -122,18 +121,10 @@ const crawlDomain = async ({
   const isScanPdfs = ['all', 'pdf-only'].includes(fileTypes);
   const { maxConcurrency } = constants;
   const { playwrightDeviceDetailsObject } = viewportSettings;
-  const isBlacklistedUrl = isBlacklisted(url);
+  const isBlacklistedUrl = isBlacklisted(url, blacklistedPatterns);
   const httpsAgent = new https.Agent({ rejectUnauthorized: false });
-  if (isBlacklistedUrl) {
-    guiInfoLog(guiInfoStatusTypes.SKIPPED, {
-      numScanned: urlsCrawled.scanned.length,
-      urlScanned: url,
-    });
-    return;
-  }
   // Boolean to omit axe scan for basic auth URL
   let isBasicAuth = false;
   let authHeader = '';
@@ -315,7 +306,7 @@ const crawlDomain = async ({
     const isExcluded = (newPageUrl: string): boolean => {
       const isAlreadyScanned: boolean = urlsCrawled.scanned.some(item => item.url === newPageUrl);
-      const isBlacklistedUrl: boolean = isBlacklisted(newPageUrl);
+      const isBlacklistedUrl: boolean = isBlacklisted(newPageUrl, blacklistedPatterns);
       const isNotFollowStrategy: boolean = !isFollowStrategy(newPageUrl, initialPageUrl, strategy);
       return isAlreadyScanned || isBlacklistedUrl || isNotFollowStrategy;
     };
@@ -609,13 +600,13 @@ const crawlDomain = async ({
         }
         await waitForPageLoaded(page, 10000);
-        let actualUrl = request.url;
+        let actualUrl = page.url() || request.loadedUrl || request.url;
         if (page.url() !== 'about:blank') {
           actualUrl = page.url();
         }
-        if (isBlacklisted(actualUrl) || (isUrlPdf(actualUrl) && !isScanPdfs)) {
+        if (!isFollowStrategy(url, actualUrl, strategy) && (isBlacklisted(actualUrl, blacklistedPatterns) || (isUrlPdf(actualUrl) && !isScanPdfs))) {
           guiInfoLog(guiInfoStatusTypes.SKIPPED, {
             numScanned: urlsCrawled.scanned.length,
             urlScanned: actualUrl,
@@ -684,8 +675,13 @@ const crawlDomain = async ({
           return;
         }
-        if (blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
-          urlsCrawled.userExcluded.push(request.url);
+        if (!isFollowStrategy(url, actualUrl, strategy) && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
+          urlsCrawled.userExcluded.push({
+            url: request.url,
+            pageTitle: request.url,
+            actualUrl: actualUrl,
+          });
           await enqueueProcess(page, enqueueLinks, browserContext);
           return;
         }
@@ -710,18 +706,18 @@ const crawlDomain = async ({
         if (isScanHtml) {
           // For deduplication, if the URL is redirected, we want to store the original URL and the redirected URL (actualUrl)
-          const isRedirected = !areLinksEqual(request.loadedUrl, request.url);
+          const isRedirected = !areLinksEqual(actualUrl, request.url);
           // check if redirected link is following strategy (same-domain/same-hostname)
           const isLoadedUrlFollowStrategy = isFollowStrategy(
-            request.loadedUrl,
+            actualUrl,
             request.url,
             strategy,
           );
           if (isRedirected && !isLoadedUrlFollowStrategy) {
             urlsCrawled.notScannedRedirects.push({
               fromUrl: request.url,
-              toUrl: request.loadedUrl, // i.e. actualUrl
+              toUrl: actualUrl, // i.e. actualUrl
             });
             return;
           }
@@ -730,13 +726,13 @@ const crawlDomain = async ({
           if (isRedirected) {
             const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(
-              item => (item.actualUrl || item.url) === request.loadedUrl,
+              item => (item.actualUrl || item.url) === actualUrl,
             );
             if (isLoadedUrlInCrawledUrls) {
               urlsCrawled.notScannedRedirects.push({
                 fromUrl: request.url,
-                toUrl: request.loadedUrl, // i.e. actualUrl
+                toUrl: actualUrl, // i.e. actualUrl
               });
               return;
             }
@@ -751,16 +747,16 @@ const crawlDomain = async ({
               urlsCrawled.scanned.push({
                 url: urlWithoutAuth(request.url),
                 pageTitle: results.pageTitle,
-                actualUrl: request.loadedUrl, // i.e. actualUrl
+                actualUrl: actualUrl, // i.e. actualUrl
               });
               urlsCrawled.scannedRedirects.push({
                 fromUrl: urlWithoutAuth(request.url),
-                toUrl: request.loadedUrl, // i.e. actualUrl
+                toUrl: actualUrl, // i.e. actualUrl
               });
               results.url = request.url;
-              results.actualUrl = request.loadedUrl;
+              results.actualUrl = actualUrl;
               await dataset.pushData(results);
             }
           } else {

package/src/crawlers/crawlLocalFile.ts CHANGED Viewed

@@ -153,6 +153,8 @@ const crawlLocalFile = async (
     await page.goto(request.url);
     const results = await runAxeScript({ includeScreenshots, page, randomToken });
+    const actualUrl = page.url() || request.loadedUrl || request.url;
     guiInfoLog(guiInfoStatusTypes.SCANNED, {
       numScanned: urlsCrawled.scanned.length,
       urlScanned: request.url,
@@ -161,16 +163,16 @@ const crawlLocalFile = async (
     urlsCrawled.scanned.push({
       url: request.url,
       pageTitle: results.pageTitle,
-      actualUrl: request.loadedUrl, // i.e. actualUrl
+      actualUrl: actualUrl, // i.e. actualUrl
     });
     urlsCrawled.scannedRedirects.push({
       fromUrl: request.url,
-      toUrl: request.loadedUrl, // i.e. actualUrl
+      toUrl: actualUrl, // i.e. actualUrl
     });
     results.url = request.url;
-    // results.actualUrl = request.loadedUrl;
+    results.actualUrl = actualUrl;
     await dataset.pushData(results);
   } else {

package/src/crawlers/crawlSitemap.ts CHANGED Viewed

@@ -18,7 +18,7 @@ import {
   waitForPageLoaded,
   isFilePath,
 } from '../constants/common.js';
-import { areLinksEqual, isWhitelistedContentType } from '../utils.js';
+import { areLinksEqual, isWhitelistedContentType, isFollowStrategy } from '../utils.js';
 import { handlePdfDownload, runPdfScan, mapPdfScanResults } from './pdfScanFunc.js';
 import { guiInfoLog } from '../logs.js';
@@ -161,21 +161,67 @@ const crawlSitemap = async (
       ],
     },
     requestList,
+    postNavigationHooks: [
+      async ({ page, request }) => {
+        try {
+          // Wait for a quiet period in the DOM, but with safeguards
+          await page.evaluate(() => {
+            return new Promise((resolve) => {
+              let timeout;
+              let mutationCount = 0;
+              const MAX_MUTATIONS = 250; // Prevent infinite mutations
+              const OBSERVER_TIMEOUT = 5000; // Hard timeout to exit
+              const observer = new MutationObserver(() => {
+                clearTimeout(timeout);
+                mutationCount++;
+                if (mutationCount > MAX_MUTATIONS) {
+                  observer.disconnect();
+                  resolve('Too many mutations detected, exiting.');
+                  return;
+                }
+                timeout = setTimeout(() => {
+                  observer.disconnect();
+                  resolve('DOM stabilized after mutations.');
+                }, 1000);
+              });
+              timeout = setTimeout(() => {
+                observer.disconnect();
+                resolve('Observer timeout reached, exiting.');
+              }, OBSERVER_TIMEOUT); // Ensure the observer stops after X seconds
+              observer.observe(document.documentElement, { childList: true, subtree: true });
+            });
+          });
+        } catch (err) {
+          // Handle page navigation errors gracefully
+          if (err.message.includes('was destroyed')) {
+            return; // Page navigated or closed, no need to handle
+          }
+          throw err; // Rethrow unknown errors
+        }
+      },
+    ],
     preNavigationHooks: isBasicAuth
       ? [
-          async ({ page }) => {
-            await page.setExtraHTTPHeaders({
-              Authorization: authHeader,
-              ...extraHTTPHeaders,
-            });
-          },
-        ]
+        async ({ page }) => {
+          await page.setExtraHTTPHeaders({
+            Authorization: authHeader,
+            ...extraHTTPHeaders,
+          });
+        },
+      ]
       : [
-          async () => {
-            preNavigationHooks(extraHTTPHeaders);
-            // insert other code here
-          },
-        ],
+        async () => {
+          preNavigationHooks(extraHTTPHeaders);
+          // insert other code here
+        },
+      ],
     requestHandlerTimeoutSecs: 90,
     requestHandler: async ({ page, request, response, sendRequest }) => {
       await waitForPageLoaded(page, 10000);
@@ -191,7 +237,7 @@ const crawlSitemap = async (
         request.url = currentUrl.href;
       }
-      const actualUrl = request.loadedUrl || request.url;
+      const actualUrl = page.url() || request.loadedUrl || request.url;
       if (urlsCrawled.scanned.length >= maxRequestsPerCrawl) {
         crawler.autoscaledPool.abort();
@@ -223,8 +269,17 @@ const crawlSitemap = async (
       const contentType = response.headers()['content-type'];
       const status = response.status();
-      if (blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
-        urlsCrawled.userExcluded.push(request.url);
+      if (blacklistedPatterns && !isFollowStrategy(actualUrl, request.url, "same-hostname") && isSkippedUrl(actualUrl, blacklistedPatterns)) {
+        urlsCrawled.userExcluded.push({
+          url: request.url,
+          pageTitle: request.url,
+          actualUrl: actualUrl,
+        });
+        guiInfoLog(guiInfoStatusTypes.SKIPPED, {
+          numScanned: urlsCrawled.scanned.length,
+          urlScanned: request.url,
+        });
         return;
       }
@@ -255,16 +310,16 @@ const crawlSitemap = async (
           urlScanned: request.url,
         });
-        const isRedirected = !areLinksEqual(request.loadedUrl, request.url);
+        const isRedirected = !areLinksEqual(page.url(), request.url);
         if (isRedirected) {
           const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(
-            item => (item.actualUrl || item.url.href) === request.loadedUrl,
+            item => (item.actualUrl || item.url.href) === page,
           );
           if (isLoadedUrlInCrawledUrls) {
             urlsCrawled.notScannedRedirects.push({
               fromUrl: request.url,
-              toUrl: request.loadedUrl, // i.e. actualUrl
+              toUrl: actualUrl, // i.e. actualUrl
             });
             return;
           }
@@ -272,16 +327,16 @@ const crawlSitemap = async (
           urlsCrawled.scanned.push({
             url: urlWithoutAuth(request.url),
             pageTitle: results.pageTitle,
-            actualUrl: request.loadedUrl, // i.e. actualUrl
+            actualUrl: actualUrl, // i.e. actualUrl
           });
           urlsCrawled.scannedRedirects.push({
             fromUrl: urlWithoutAuth(request.url),
-            toUrl: request.loadedUrl, // i.e. actualUrl
+            toUrl: actualUrl,
           });
           results.url = request.url;
-          results.actualUrl = request.loadedUrl;
+          results.actualUrl = actualUrl;
         } else {
           urlsCrawled.scanned.push({
             url: urlWithoutAuth(request.url),

package/src/crawlers/custom/utils.ts CHANGED Viewed

@@ -152,7 +152,12 @@ export const processPage = async (page, processPageParams) => {
       window.confirm('Page has been excluded, would you still like to proceed with the scan?'),
     );
     if (!continueScan) {
-      urlsCrawled.userExcluded.push(pageUrl);
+      urlsCrawled.userExcluded.push({
+        url: pageUrl,
+        pageTitle: pageUrl,
+        actualUrl: pageUrl,
+      });
       return;
     }
   }
@@ -396,7 +401,7 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
   // eslint-disable-next-line no-underscore-dangle
   const pageId = page._guid;
-  page.on('dialog', () => {});
+  page.on('dialog', () => { });
   const pageClosePromise = new Promise(resolve => {
     page.on('close', () => {

package/src/crawlers/customAxeFunctions.ts CHANGED Viewed

@@ -68,7 +68,7 @@ export const customAxeConfig: Spec = {
       selector: 'html',
       enabled: true,
       any: ['oobee-grading-text-contents'],
-      tags: ['wcag2a', 'wcag315'],
+      tags: ['wcag2aaa', 'wcag315'],
       metadata: {
         description:
           'Text content should be easy to understand for individuals with education levels up to university graduates. If the text content is difficult to understand, provide supplemental content or a version that is easy to understand.',

package/src/mergeAxeResults.ts CHANGED Viewed

@@ -219,8 +219,46 @@ const writeCsv = async (allIssues, storagePath) => {
     includeEmptyRows: true,
   };
+  // Create the parse stream (it’s asynchronous)
   const parser = new AsyncParser(opts);
-  parser.parse(allIssues).pipe(csvOutput);
+  const parseStream = parser.parse(allIssues);
+  // Pipe JSON2CSV output into the file, but don't end automatically
+  parseStream.pipe(csvOutput, { end: false });
+  // Once JSON2CSV is done writing all normal rows, append any "pagesNotScanned"
+  parseStream.on('end', () => {
+    if (allIssues.pagesNotScanned && allIssues.pagesNotScanned.length > 0) {
+      csvOutput.write('\n');
+      allIssues.pagesNotScanned.forEach(page => {
+        const skippedPage = {
+          customFlowLabel: allIssues.customFlowLabel || '',
+          deviceChosen: allIssues.deviceChosen || '',
+          scanCompletedAt: allIssues.endTime ? allIssues.endTime.toISOString() : '',
+          severity: 'error',
+          issueId: 'error-pages-skipped',
+          issueDescription: 'Page was skipped during the scan',
+          wcagConformance: '',
+          url: page.url || '',
+          pageTitle: '',
+          context: '',
+          howToFix: '',
+          axeImpact: '',
+          xpath: '',
+          learnMore: '',
+        };
+        csvOutput.write(`${Object.values(skippedPage).join(',')}\n`);
+      });
+    }
+    // Now close the CSV file
+    csvOutput.end();
+  });
+  parseStream.on('error', err => {
+    console.error('Error parsing CSV:', err);
+    csvOutput.end();
+  });
 };
 const compileHtmlWithEJS = async (
@@ -234,7 +272,7 @@ const compileHtmlWithEJS = async (
     filename: path.join(dirname, './static/ejs/report.ejs'),
   });
-  const html = template({...allIssues, storagePath: JSON.stringify(storagePath)});
+  const html = template({ ...allIssues, storagePath: JSON.stringify(storagePath) });
   await fs.writeFile(htmlFilePath, html);
   let htmlContent = await fs.readFile(htmlFilePath, { encoding: 'utf8' });