npm - @govtechsg/oobee - Versions diffs - 0.10.36 → 0.10.42 - Mend

@govtechsg/oobee 0.10.36 → 0.10.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/.github/workflows/docker-test.yml +1 -1
package/DETAILS.md +3 -3
package/INTEGRATION.md +142 -53
package/README.md +17 -0
package/REPORTS.md +362 -0
package/exclusions.txt +4 -1
package/package.json +2 -2
package/src/constants/cliFunctions.ts +0 -7
package/src/constants/common.ts +39 -1
package/src/constants/constants.ts +9 -8
package/src/crawlers/commonCrawlerFunc.ts +95 -220
package/src/crawlers/crawlDomain.ts +10 -23
package/src/crawlers/crawlLocalFile.ts +2 -0
package/src/crawlers/crawlSitemap.ts +6 -4
package/src/crawlers/custom/escapeCssSelector.ts +10 -0
package/src/crawlers/custom/evaluateAltText.ts +13 -0
package/src/crawlers/custom/extractAndGradeText.ts +0 -2
package/src/crawlers/custom/extractText.ts +28 -0
package/src/crawlers/custom/findElementByCssSelector.ts +46 -0
package/src/crawlers/custom/flagUnlabelledClickableElements.ts +982 -842
package/src/crawlers/custom/framesCheck.ts +51 -0
package/src/crawlers/custom/getAxeConfiguration.ts +126 -0
package/src/crawlers/custom/gradeReadability.ts +30 -0
package/src/crawlers/custom/xPathToCss.ts +178 -0
package/src/crawlers/pdfScanFunc.ts +67 -26
package/src/mergeAxeResults.ts +535 -132
package/src/npmIndex.ts +130 -62
package/src/screenshotFunc/htmlScreenshotFunc.ts +1 -1
package/src/screenshotFunc/pdfScreenshotFunc.ts +34 -1
package/src/static/ejs/partials/components/ruleOffcanvas.ejs +1 -1
package/src/static/ejs/partials/components/scanAbout.ejs +1 -1
package/src/static/ejs/partials/footer.ejs +3 -3
package/src/static/ejs/partials/scripts/reportSearch.ejs +112 -74
package/src/static/ejs/partials/scripts/ruleOffcanvas.ejs +2 -2
package/src/static/ejs/partials/summaryMain.ejs +3 -3
package/src/static/ejs/report.ejs +3 -3
package/src/utils.ts +289 -13
package/src/xPathToCssCypress.ts +178 -0
package/src/crawlers/customAxeFunctions.ts +0 -82

package/src/crawlers/custom/framesCheck.ts ADDED Viewed

@@ -0,0 +1,51 @@
+export function framesCheck(cssSelector: string): {
+  doc: Document;
+  remainingSelector: string;
+} {
+  let doc = document; // Start with the main document
+  let remainingSelector = ''; // To store the last part of the selector
+  let targetIframe = null;
+  // Split the selector into parts at "> html"
+  const diffParts = cssSelector.split(/\s*>\s*html\s*/);
+  for (let i = 0; i < diffParts.length - 1; i++) {
+    let iframeSelector = `${diffParts[i].trim()}`;
+    // Add back '> html' to the current part
+    if (i > 0) {
+      iframeSelector = `html > ${iframeSelector}`;
+    }
+    let frameset = null;
+    // Find the iframe using the current document context
+    if (doc.querySelector('frameset')) {
+      frameset = doc.querySelector('frameset');
+    }
+    if (frameset) {
+      doc = frameset;
+      iframeSelector = iframeSelector.split('body >')[1].trim();
+    }
+    targetIframe = doc.querySelector(iframeSelector);
+    if (targetIframe && targetIframe.contentDocument) {
+      // Update the document to the iframe's contentDocument
+      doc = targetIframe.contentDocument;
+    } else {
+      console.warn(
+        `Iframe not found or contentDocument inaccessible for selector: ${iframeSelector}`,
+      );
+      return { doc, remainingSelector: cssSelector }; // Return original selector if iframe not found
+    }
+  }
+  // The last part is the remaining CSS selector
+  remainingSelector = diffParts[diffParts.length - 1].trim();
+  // Remove any leading '>' combinators from remainingSelector
+  remainingSelector = `html${remainingSelector}`;
+  return { doc, remainingSelector };
+}

package/src/crawlers/custom/getAxeConfiguration.ts ADDED Viewed

@@ -0,0 +1,126 @@
+import { ImpactValue } from "axe-core";
+import { evaluateAltText } from "./evaluateAltText.js";
+export function getAxeConfiguration({
+  enableWcagAaa = false,
+  gradingReadabilityFlag = '',
+  disableOobee = false,
+}: {
+  enableWcagAaa?: boolean;
+  gradingReadabilityFlag?: string;
+  disableOobee?: boolean;
+}) {
+  return {
+    branding: {
+      application: 'oobee',
+    },
+    checks: [
+      {
+        id: 'oobee-confusing-alt-text',
+        metadata: {
+          impact: 'serious' as ImpactValue,
+          messages: {
+            pass: 'The image alt text is probably useful.',
+            fail: "The image alt text set as 'img', 'image', 'picture', 'photo', or 'graphic' is confusing or not useful.",
+          },
+        },
+        evaluate: evaluateAltText,
+      },
+      {
+        id: 'oobee-accessible-label',
+        metadata: {
+          impact: 'serious' as ImpactValue,
+          messages: {
+            pass: 'The clickable element has an accessible label.',
+            fail: 'The clickable element does not have an accessible label.',
+          },
+        },
+        evaluate: (node: HTMLElement) => {
+          return !node.dataset.flagged; // fail any element with a data-flagged attribute set to true
+        },
+      },
+      ...(enableWcagAaa
+        ? [
+          {
+            id: 'oobee-grading-text-contents',
+            metadata: {
+              impact: 'moderate' as ImpactValue,
+              messages: {
+                pass: 'The text content is easy to understand.',
+                fail: 'The text content is potentially difficult to understand.',
+                incomplete: `The text content is potentially difficult to read, with a Flesch-Kincaid Reading Ease score of ${gradingReadabilityFlag
+                  }.\nThe target passing score is above 50, indicating content readable by university students and lower grade levels.\nA higher score reflects better readability.`,
+              },
+            },
+            evaluate: (_node: HTMLElement) => {
+              if (gradingReadabilityFlag === '') {
+                return true; // Pass if no readability issues
+              }
+              // Fail if readability issues are detected
+            },
+          },
+        ]
+        : []),
+    ],
+    rules: [
+      { id: 'target-size', enabled: true },
+      {
+        id: 'oobee-confusing-alt-text',
+        selector: 'img[alt]',
+        enabled: true,
+        any: ['oobee-confusing-alt-text'],
+        tags: ['wcag2a', 'wcag111'],
+        metadata: {
+          description: 'Ensures image alt text is clear and useful.',
+          help: 'Image alt text must not be vague or unhelpful.',
+          helpUrl: 'https://www.deque.com/blog/great-alt-text-introduction/',
+        },
+      },
+      {
+        id: 'oobee-accessible-label',
+        // selector: '*', // to be set with the checker function output xpaths converted to css selectors
+        enabled: true,
+        any: ['oobee-accessible-label'],
+        tags: ['wcag2a', 'wcag211', 'wcag412'],
+        metadata: {
+          description: 'Ensures clickable elements have an accessible label.',
+          help: 'Clickable elements must have accessible labels.',
+          helpUrl: 'https://www.deque.com/blog/accessible-aria-buttons',
+        },
+      },
+      {
+        id: 'oobee-grading-text-contents',
+        selector: 'html',
+        enabled: true,
+        any: ['oobee-grading-text-contents'],
+        tags: ['wcag2aaa', 'wcag315'],
+        metadata: {
+          description:
+            'Text content should be easy to understand for individuals with education levels up to university graduates. If the text content is difficult to understand, provide supplemental content or a version that is easy to understand.',
+          help: 'Text content should be clear and plain to ensure that it is easily understood.',
+          helpUrl: 'https://www.wcag.com/uncategorized/3-1-5-reading-level/',
+        },
+      },
+    ]
+      .filter(rule => (disableOobee ? !rule.id.startsWith('oobee') : true))
+      .concat(
+        enableWcagAaa
+          ? [
+            {
+              id: 'color-contrast-enhanced',
+              enabled: true,
+            },
+            {
+              id: 'identical-links-same-purpose',
+              enabled: true,
+            },
+            {
+              id: 'meta-refresh-no-exceptions',
+              enabled: true,
+            },
+          ]
+          : [],
+      ),
+  };
+}

package/src/crawlers/custom/gradeReadability.ts ADDED Viewed

@@ -0,0 +1,30 @@
+import textReadability from 'text-readability';
+export function gradeReadability(sentences: string[]): string {
+  try {
+    // Check if any valid sentences were extracted
+    if (sentences.length === 0) {
+      return ''; // Return an empty string if no valid sentences are found
+    }
+    // Join the valid sentences into a single string
+    const filteredText = sentences.join(' ').trim();
+    // Count the total number of words in the filtered text
+    const wordCount = filteredText.split(/\s+/).length;
+    // Grade the text content only if there are 20 words or more
+    const readabilityScore = wordCount >= 20 ? textReadability.fleschReadingEase(filteredText) : 0;
+    // Log details for debugging
+    // Determine the return value
+    const result =
+      readabilityScore === 0 || readabilityScore > 50 ? '' : readabilityScore.toString(); // Convert readabilityScore to string
+    return result;
+  } catch (error) {
+    console.error('Error extracting and grading text:', error);
+    return ''; // Return an empty string in case of an error
+  }
+}

package/src/crawlers/custom/xPathToCss.ts ADDED Viewed

@@ -0,0 +1,178 @@
+export function xPathToCss(expr: string) {
+  const isValidXPath = expr =>
+    typeof expr !== 'undefined' &&
+    expr.replace(/[\s-_=]/g, '') !== '' &&
+    expr.length ===
+    expr.replace(
+      /[-_\w:.]+\(\)\s*=|=\s*[-_\w:.]+\(\)|\sor\s|\sand\s|\[(?:[^\/\]]+[\/\[]\/?.+)+\]|starts-with\(|\[.*last\(\)\s*[-\+<>=].+\]|number\(\)|not\(|count\(|text\(|first\(|normalize-space|[^\/]following-sibling|concat\(|descendant::|parent::|self::|child::|/gi,
+      '',
+    ).length;
+  const getValidationRegex = () => {
+    let regex =
+      '(?P<node>' +
+      '(' +
+      '^id\\(["\\\']?(?P<idvalue>%(value)s)["\\\']?\\)' + // special case! `id(idValue)`
+      '|' +
+      '(?P<nav>//?(?:following-sibling::)?)(?P<tag>%(tag)s)' + //  `//div`
+      '(\\[(' +
+      '(?P<matched>(?P<mattr>@?%(attribute)s=["\\\'](?P<mvalue>%(value)s))["\\\']' + // `[@id="well"]` supported and `[text()="yes"]` is not
+      '|' +
+      '(?P<contained>contains\\((?P<cattr>@?%(attribute)s,\\s*["\\\'](?P<cvalue>%(value)s)["\\\']\\))' + // `[contains(@id, "bleh")]` supported and `[contains(text(), "some")]` is not
+      ')\\])?' +
+      '(\\[\\s*(?P<nth>\\d+|last\\(\\s*\\))\\s*\\])?' +
+      ')' +
+      ')';
+    const subRegexes = {
+      tag: '([a-zA-Z][a-zA-Z0-9:-]*|\\*)',
+      attribute: '[.a-zA-Z_:][-\\w:.]*(\\(\\))?)',
+      value: '\\s*[\\w/:][-/\\w\\s,:;.]*',
+    };
+    Object.keys(subRegexes).forEach(key => {
+      regex = regex.replace(new RegExp(`%\\(${key}\\)s`, 'gi'), subRegexes[key]);
+    });
+    regex = regex.replace(
+      /\?P<node>|\?P<idvalue>|\?P<nav>|\?P<tag>|\?P<matched>|\?P<mattr>|\?P<mvalue>|\?P<contained>|\?P<cattr>|\?P<cvalue>|\?P<nth>/gi,
+      '',
+    );
+    return new RegExp(regex, 'gi');
+  };
+  const preParseXpath = expr =>
+    expr.replace(
+      /contains\s*\(\s*concat\(["']\s+["']\s*,\s*@class\s*,\s*["']\s+["']\)\s*,\s*["']\s+([a-zA-Z0-9-_]+)\s+["']\)/gi,
+      '@class="$1"',
+    );
+  function escapeCssIdSelectors(cssSelector) {
+    return cssSelector.replace(/#([^ >]+)/g, (match, id) => {
+      // Escape special characters in the id part
+      return `#${id.replace(/[!"#$%&'()*+,./:;<=>?@[\\\]^`{|}~]/g, '\\$&')}`;
+    });
+  }
+  if (!expr) {
+    throw new Error('Missing XPath expression');
+  }
+  expr = preParseXpath(expr);
+  if (!isValidXPath(expr)) {
+    console.error(`Invalid or unsupported XPath: ${expr}`);
+    // do not throw error so that this function proceeds to convert xpath that it does not support
+    // for example, //*[@id="google_ads_iframe_/4654/dweb/imu1/homepage/landingpage/na_0"]/html/body/div[1]/a
+    // becomes #google_ads_iframe_/4654/dweb/imu1/homepage/landingpage/na_0 > html > body > div:first-of-type > div > a
+    // which is invalid because the slashes in the id selector are not escaped
+    // throw new Error('Invalid or unsupported XPath: ' + expr);
+  }
+  const xPathArr = expr.split('|');
+  const prog = getValidationRegex();
+  const cssSelectors = [];
+  let xindex = 0;
+  while (xPathArr[xindex]) {
+    const css = [];
+    let position = 0;
+    let nodes;
+    while ((nodes = prog.exec(xPathArr[xindex]))) {
+      let attr;
+      if (!nodes && position === 0) {
+        throw new Error(`Invalid or unsupported XPath: ${expr}`);
+      }
+      const match = {
+        node: nodes[5],
+        idvalue: nodes[12] || nodes[3],
+        nav: nodes[4],
+        tag: nodes[5],
+        matched: nodes[7],
+        mattr: nodes[10] || nodes[14],
+        mvalue: nodes[12] || nodes[16],
+        contained: nodes[13],
+        cattr: nodes[14],
+        cvalue: nodes[16],
+        nth: nodes[18],
+      };
+      let nav = '';
+      if (position != 0 && match.nav) {
+        if (~match.nav.indexOf('following-sibling::')) {
+          nav = ' + ';
+        } else {
+          nav = match.nav == '//' ? ' ' : ' > ';
+        }
+      }
+      const tag = match.tag === '*' ? '' : match.tag || '';
+      if (match.contained) {
+        if (match.cattr.indexOf('@') === 0) {
+          attr = `[${match.cattr.replace(/^@/, '')}*="${match.cvalue}"]`;
+        } else {
+          throw new Error(`Invalid or unsupported XPath attribute: ${match.cattr}`);
+        }
+      } else if (match.matched) {
+        switch (match.mattr) {
+          case '@id':
+            attr = `#${match.mvalue.replace(/^\s+|\s+$/, '').replace(/\s/g, '#')}`;
+            break;
+          case '@class':
+            attr = `.${match.mvalue.replace(/^\s+|\s+$/, '').replace(/\s/g, '.')}`;
+            break;
+          case 'text()':
+          case '.':
+            throw new Error(`Invalid or unsupported XPath attribute: ${match.mattr}`);
+          default:
+            if (match.mattr.indexOf('@') !== 0) {
+              throw new Error(`Invalid or unsupported XPath attribute: ${match.mattr}`);
+            }
+            if (match.mvalue.indexOf(' ') !== -1) {
+              match.mvalue = `\"${match.mvalue.replace(/^\s+|\s+$/, '')}\"`;
+            }
+            attr = `[${match.mattr.replace('@', '')}="${match.mvalue}"]`;
+            break;
+        }
+      } else if (match.idvalue) {
+        attr = `#${match.idvalue.replace(/\s/, '#')}`;
+      } else {
+        attr = '';
+      }
+      let nth = '';
+      if (match.nth) {
+        if (match.nth.indexOf('last') === -1) {
+          if (isNaN(parseInt(match.nth, 10))) {
+            throw new Error(`Invalid or unsupported XPath attribute: ${match.nth}`);
+          }
+          nth = parseInt(match.nth, 10) !== 1 ? `:nth-of-type(${match.nth})` : ':first-of-type';
+        } else {
+          nth = ':last-of-type';
+        }
+      }
+      css.push(nav + tag + attr + nth);
+      position++;
+    }
+    const result = css.join('');
+    if (result === '') {
+      throw new Error('Invalid or unsupported XPath');
+    }
+    cssSelectors.push(result);
+    xindex++;
+  }
+  // return cssSelectors.join(', ');
+  const originalResult = cssSelectors.join(', ');
+  return escapeCssIdSelectors(originalResult);
+}

package/src/crawlers/pdfScanFunc.ts CHANGED Viewed

@@ -256,30 +256,63 @@ export const handlePdfDownload = (
   pdfDownloads.push(
     new Promise<void>(async resolve => {
-      const bufs = [];
-      let pdfResponse: ReadStream;
+      let bufs: Buffer[] = [];
+      let buf: Buffer;
       if (isFilePath(url)) {
-        // Read the file from the file system
+        // Read from local file system
         const filePath = new URL(url).pathname;
-        pdfResponse = fs.createReadStream(filePath, { encoding: 'binary' });
+        const pdfResponse = fs.createReadStream(filePath, { encoding: 'binary' });
+        const downloadFile = fs.createWriteStream(`${randomToken}/${pdfFileName}.pdf`, {
+          flags: 'a',
+        });
+        pdfResponse.on('data', (chunk: Buffer) => {
+          downloadFile.write(chunk, 'binary');
+          bufs.push(Buffer.from(chunk));
+        });
+        pdfResponse.on('end', () => {
+          downloadFile.end();
+          buf = Buffer.concat(bufs);
+          if (isPDF(buf)) {
+            guiInfoLog(guiInfoStatusTypes.SCANNED, {
+              numScanned: urlsCrawled.scanned.length,
+              urlScanned: request.url,
+            });
+            urlsCrawled.scanned.push({
+              url: request.url,
+              pageTitle,
+              actualUrl: url,
+            });
+          } else {
+            guiInfoLog(guiInfoStatusTypes.SKIPPED, {
+              numScanned: urlsCrawled.scanned.length,
+              urlScanned: request.url,
+            });
+            urlsCrawled.invalid.push({
+              url: request.url,
+              pageTitle: url,
+              actualUrl: url,
+            });
+          }
+          resolve();
+        });
       } else {
-        // Send HTTP/HTTPS request
-        pdfResponse = await sendRequest({ responseType: 'buffer', isStream: true });
-        pdfResponse.setEncoding('binary');
-      }
-      const downloadFile = fs.createWriteStream(`${randomToken}/${pdfFileName}.pdf`, {
-        flags: 'a',
-      });
+        // Download from remote URL
+        const response = await sendRequest({ responseType: 'buffer' });
+        buf = Buffer.isBuffer(response) ? response : response.body;
-      pdfResponse.on('data', (chunk: Buffer) => {
-        downloadFile.write(chunk, 'binary');
-        bufs.push(Buffer.from(chunk));
-      });
+        const downloadFile = fs.createWriteStream(`${randomToken}/${pdfFileName}.pdf`, {
+          flags: 'a',
+        });
-      pdfResponse.on('end', () => {
+        downloadFile.write(buf, 'binary');
         downloadFile.end();
-        const buf = Buffer.concat(bufs);
         if (isPDF(buf)) {
           guiInfoLog(guiInfoStatusTypes.SCANNED, {
             numScanned: urlsCrawled.scanned.length,
@@ -298,11 +331,12 @@ export const handlePdfDownload = (
           urlsCrawled.invalid.push({
             url: request.url,
             pageTitle: url,
-            actualUrl: url, // i.e. actualUrl
+            actualUrl: url,
           });
         }
         resolve();
-      });
+      }
     }),
   );
@@ -374,14 +408,21 @@ export const mapPdfScanResults = async (
       const { itemDetails, validationResult } = jobs[jobIdx];
       const { name: fileName } = itemDetails;
-      const uuid = fileName
-        .split(os.platform() === 'win32' ? '\\' : '/')
-        .pop()
-        .split('.')[0];
-      const url = uuidToUrlMapping[uuid];
-      const pageTitle = decodeURI(url).split('/').pop();
-      const filePath = `${randomToken}/${uuid}.pdf`;
+      const rawFileName = fileName.split(os.platform() === 'win32' ? '\\' : '/').pop();
+      const fileNameWithoutExt = rawFileName.replace(/\.pdf$/i, '');
+      const url =
+        uuidToUrlMapping[rawFileName] || // exact match like 'Some-filename.pdf'
+        uuidToUrlMapping[fileNameWithoutExt] || // uuid-based key like 'a9f7ebbd-5a90...'
+        `file://${fileName}`; // fallback
+      const filePath = `${randomToken}/${rawFileName}`;
+      const pageTitle = decodeURI(url).split('/').pop();
+      translated.url = url;
+      translated.pageTitle = pageTitle;
       translated.url = url;
       translated.pageTitle = pageTitle;
       translated.filePath = filePath;