@govtechsg/oobee 0.10.20 → 0.10.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/.github/workflows/docker-test.yml +1 -1
  2. package/DETAILS.md +40 -25
  3. package/Dockerfile +41 -47
  4. package/LICENSE-3RD-PARTY-REPORT.txt +448 -0
  5. package/LICENSE-3RD-PARTY.txt +19913 -0
  6. package/README.md +26 -0
  7. package/__mocks__/mock-report.html +1503 -1360
  8. package/package.json +9 -5
  9. package/scripts/decodeUnzipParse.js +29 -0
  10. package/scripts/install_oobee_dependencies.command +2 -2
  11. package/scripts/install_oobee_dependencies.ps1 +3 -3
  12. package/src/cli.ts +9 -7
  13. package/src/combine.ts +13 -5
  14. package/src/constants/cliFunctions.ts +38 -1
  15. package/src/constants/common.ts +31 -5
  16. package/src/constants/constants.ts +28 -26
  17. package/src/constants/questions.ts +4 -1
  18. package/src/crawlers/commonCrawlerFunc.ts +114 -152
  19. package/src/crawlers/crawlDomain.ts +25 -32
  20. package/src/crawlers/crawlIntelligentSitemap.ts +7 -1
  21. package/src/crawlers/crawlLocalFile.ts +1 -1
  22. package/src/crawlers/crawlSitemap.ts +1 -1
  23. package/src/crawlers/custom/flagUnlabelledClickableElements.ts +546 -472
  24. package/src/crawlers/customAxeFunctions.ts +1 -1
  25. package/src/index.ts +2 -2
  26. package/src/mergeAxeResults.ts +590 -214
  27. package/src/screenshotFunc/pdfScreenshotFunc.ts +3 -3
  28. package/src/static/ejs/partials/components/scanAbout.ejs +65 -0
  29. package/src/static/ejs/partials/components/wcagCompliance.ejs +10 -29
  30. package/src/static/ejs/partials/footer.ejs +10 -13
  31. package/src/static/ejs/partials/scripts/categorySummary.ejs +2 -2
  32. package/src/static/ejs/partials/scripts/decodeUnzipParse.ejs +3 -0
  33. package/src/static/ejs/partials/scripts/reportSearch.ejs +1 -0
  34. package/src/static/ejs/partials/scripts/ruleOffcanvas.ejs +54 -52
  35. package/src/static/ejs/partials/scripts/scanAboutScript.ejs +38 -0
  36. package/src/static/ejs/partials/styles/styles.ejs +26 -1
  37. package/src/static/ejs/partials/summaryMain.ejs +15 -42
  38. package/src/static/ejs/report.ejs +22 -12
  39. package/src/utils.ts +10 -2
  40. package/src/xPathToCss.ts +186 -0
  41. package/a11y-scan-results.zip +0 -0
  42. package/src/types/xpath-to-css.d.ts +0 -3
@@ -1,14 +1,14 @@
1
1
  import crawlee, { CrawlingContext, PlaywrightGotoOptions } from 'crawlee';
2
2
  import axe, { AxeResults, ImpactValue, NodeResult, Result, resultGroups, TagValue } from 'axe-core';
3
- import xPathToCss from 'xpath-to-css';
4
- import { Page } from 'playwright';
3
+ import { xPathToCss } from '../xPathToCss.js';
4
+ import { BrowserContext, Page } from 'playwright';
5
5
  import {
6
6
  axeScript,
7
7
  guiInfoStatusTypes,
8
8
  RuleFlags,
9
9
  saflyIconSelector,
10
10
  } from '../constants/constants.js';
11
- import { consoleLogger, guiInfoLog, silentLogger } from '../logs.js';
11
+ import { guiInfoLog, silentLogger } from '../logs.js';
12
12
  import { takeScreenshotForHTMLElements } from '../screenshotFunc/htmlScreenshotFunc.js';
13
13
  import { isFilePath } from '../constants/common.js';
14
14
  import { customAxeConfig } from './customAxeFunctions.js';
@@ -208,63 +208,70 @@ export const runAxeScript = async ({
208
208
  selectors?: string[];
209
209
  ruleset?: RuleFlags[];
210
210
  }) => {
211
- // Checking for DOM mutations before proceeding to scan
212
- await page.evaluate(() => {
213
- return new Promise(resolve => {
214
- let timeout: NodeJS.Timeout;
215
- let mutationCount = 0;
216
- const MAX_MUTATIONS = 100;
217
- const MAX_SAME_MUTATION_LIMIT = 10;
218
- const mutationHash = {};
219
-
220
- const observer = new MutationObserver(mutationsList => {
221
- clearTimeout(timeout);
222
-
223
- mutationCount += 1;
211
+ const browserContext: BrowserContext = page.context();
212
+ const requestUrl = page.url();
213
+
214
+ try {
215
+ // Checking for DOM mutations before proceeding to scan
216
+ await page.evaluate(() => {
217
+ return new Promise(resolve => {
218
+ let timeout: NodeJS.Timeout;
219
+ let mutationCount = 0;
220
+ const MAX_MUTATIONS = 250;
221
+ const MAX_SAME_MUTATION_LIMIT = 10;
222
+ const mutationHash = {};
223
+
224
+ const observer = new MutationObserver(mutationsList => {
225
+ clearTimeout(timeout);
226
+
227
+ mutationCount += 1;
228
+
229
+ if (mutationCount > MAX_MUTATIONS) {
230
+ observer.disconnect();
231
+ resolve('Too many mutations detected');
232
+ }
224
233
 
225
- if (mutationCount > MAX_MUTATIONS) {
226
- observer.disconnect();
227
- resolve('Too many mutations detected');
228
- }
234
+ // To handle scenario where DOM elements are constantly changing and unable to exit
235
+ mutationsList.forEach(mutation => {
236
+ let mutationKey: string;
229
237
 
230
- // To handle scenario where DOM elements are constantly changing and unable to exit
231
- mutationsList.forEach(mutation => {
232
- let mutationKey: string;
238
+ if (mutation.target instanceof Element) {
239
+ Array.from(mutation.target.attributes).forEach(attr => {
240
+ mutationKey = `${mutation.target.nodeName}-${attr.name}`;
233
241
 
234
- if (mutation.target instanceof Element) {
235
- Array.from(mutation.target.attributes).forEach(attr => {
236
- mutationKey = `${mutation.target.nodeName}-${attr.name}`;
242
+ if (mutationKey) {
243
+ if (!mutationHash[mutationKey]) {
244
+ mutationHash[mutationKey] = 1;
245
+ } else {
246
+ mutationHash[mutationKey] += 1;
247
+ }
237
248
 
238
- if (mutationKey) {
239
- if (!mutationHash[mutationKey]) {
240
- mutationHash[mutationKey] = 1;
241
- } else {
242
- mutationHash[mutationKey] += 1;
249
+ if (mutationHash[mutationKey] >= MAX_SAME_MUTATION_LIMIT) {
250
+ observer.disconnect();
251
+ resolve(`Repeated mutation detected for ${mutationKey}`);
252
+ }
243
253
  }
254
+ });
255
+ }
256
+ });
244
257
 
245
- if (mutationHash[mutationKey] >= MAX_SAME_MUTATION_LIMIT) {
246
- observer.disconnect();
247
- resolve(`Repeated mutation detected for ${mutationKey}`);
248
- }
249
- }
250
- });
251
- }
258
+ timeout = setTimeout(() => {
259
+ observer.disconnect();
260
+ resolve('DOM stabilized after mutations.');
261
+ }, 1000);
252
262
  });
253
263
 
254
264
  timeout = setTimeout(() => {
255
265
  observer.disconnect();
256
- resolve('DOM stabilized after mutations.');
266
+ resolve('No mutations detected, exit from idle state');
257
267
  }, 1000);
258
- });
259
268
 
260
- timeout = setTimeout(() => {
261
- observer.disconnect();
262
- resolve('No mutations detected, exit from idle state');
263
- }, 1000);
264
-
265
- observer.observe(document, { childList: true, subtree: true, attributes: true });
269
+ observer.observe(document, { childList: true, subtree: true, attributes: true });
270
+ });
266
271
  });
267
- });
272
+ } catch (e) {
273
+ silentLogger.warn(`Error while checking for DOM mutations: ${e}`);
274
+ }
268
275
 
269
276
  page.on('console', msg => {
270
277
  const type = msg.type();
@@ -409,123 +416,62 @@ export const runAxeScript = async ({
409
416
  const escapedCssSelectors =
410
417
  oobeeAccessibleLabelFlaggedCssSelectors.map(escapeCSSSelector);
411
418
 
412
- function frameCheck(cssSelector: string): { doc: Document; remainingSelector: string } {
419
+ function framesCheck(cssSelector: string): { doc: Document; remainingSelector: string } {
413
420
  let doc = document; // Start with the main document
414
- let frameSelector = ""; // To store the frame part of the selector
421
+ let remainingSelector = ""; // To store the last part of the selector
422
+ let targetIframe = null;
415
423
 
416
- // Extract the 'frame' part of the selector
417
- let frameMatch = cssSelector.match(/(frame[^>]*>)/i);
418
- if (frameMatch) {
419
- frameSelector = frameMatch[1].replace(">", "").trim(); // Clean up the frame part
420
- cssSelector = cssSelector.split(frameMatch[1])[1].trim(); // Remove the frame portion
421
- }
424
+ // Split the selector into parts at "> html"
425
+ const diffParts = cssSelector.split(/\s*>\s*html\s*/);
422
426
 
423
- let targetFrame = null; // Target frame element
424
-
425
- // Locate the frame based on the extracted frameSelector
426
- if (frameSelector.includes("first-of-type")) {
427
- // Select the first frame
428
- targetFrame = document.querySelector("frame:first-of-type");
429
- } else if (frameSelector.includes("nth-of-type")) {
430
- // Select the nth frame
431
- let nthIndex = frameSelector.match(/nth-of-type\((\d+)\)/);
432
- if (nthIndex) {
433
- let index = parseInt(nthIndex[1]) - 1; // Zero-based index
434
- targetFrame = document.querySelectorAll("frame")[index];
435
- }
436
- } else if (frameSelector.includes("#")) {
437
- // Frame with a specific ID
438
- let idMatch = frameSelector.match(/#([\w-]+)/);
439
- if (idMatch) {
440
- targetFrame = document.getElementById(idMatch[1]);
441
- }
442
- } else if (frameSelector.includes('[name="')) {
443
- // Frame with a specific name attribute
444
- let nameMatch = frameSelector.match(/name="([\w-]+)"/);
445
- if (nameMatch) {
446
- targetFrame = document.querySelector(`frame[name="${nameMatch[1]}"]`);
447
- }
448
- } else {
449
- // Default to the first frame
450
- targetFrame = document.querySelector("frame");
451
- }
452
-
453
- // Update the document if the frame was found
454
- if (targetFrame && targetFrame.contentDocument) {
455
- doc = targetFrame.contentDocument;
456
- } else {
457
- console.warn("Frame not found or contentDocument inaccessible.");
458
- }
427
+ for (let i = 0; i < diffParts.length - 1; i++) {
428
+ let iframeSelector = `${diffParts[i].trim()}`;
459
429
 
460
- return { doc, remainingSelector: cssSelector };
461
- }
462
-
463
- function iframeCheck(cssSelector: string): { doc: Document; remainingSelector: string } {
464
- let doc = document; // Start with the main document
465
- let iframeSelector = ""; // To store the iframe part of the selector
466
-
467
- // Extract the 'iframe' part of the selector
468
- let iframeMatch = cssSelector.match(/(iframe[^>]*>)/i);
469
- if (iframeMatch) {
470
- iframeSelector = iframeMatch[1].replace(">", "").trim(); // Clean up the iframe part
471
- cssSelector = cssSelector.split(iframeMatch[1])[1].trim(); // Remove the iframe portion
472
- }
430
+ // Add back '> html' to the current part
431
+ if (i > 0) {
432
+ iframeSelector = "html > " + iframeSelector;
433
+ }
473
434
 
474
- let targetIframe = null; // Target iframe element
475
-
476
- // Locate the iframe based on the extracted iframeSelector
477
- if (iframeSelector.includes("first-of-type")) {
478
- // Select the first iframe
479
- targetIframe = document.querySelector("iframe:first-of-type");
480
- } else if (iframeSelector.includes("nth-of-type")) {
481
- // Select the nth iframe
482
- let nthIndex = iframeSelector.match(/nth-of-type\((\d+)\)/);
483
- if (nthIndex) {
484
- let index = parseInt(nthIndex[1]) - 1; // Zero-based index
485
- targetIframe = document.querySelectorAll("iframe")[index];
435
+ let frameset = null;
436
+ // Find the iframe using the current document context
437
+ if (doc.querySelector("frameset")) {
438
+ frameset = doc.querySelector("frameset");
486
439
  }
487
- } else if (iframeSelector.includes("#")) {
488
- // Iframe with a specific ID
489
- let idMatch = iframeSelector.match(/#([\w-]+)/);
490
- if (idMatch) {
491
- targetIframe = document.getElementById(idMatch[1]);
440
+
441
+ if (frameset) {
442
+ doc = frameset;
443
+ iframeSelector = iframeSelector.split("body >")[1].trim();
492
444
  }
493
- } else if (iframeSelector.includes('[name="')) {
494
- // Iframe with a specific name attribute
495
- let nameMatch = iframeSelector.match(/name="([\w-]+)"/);
496
- if (nameMatch) {
497
- targetIframe = document.querySelector(`iframe[name="${nameMatch[1]}"]`);
445
+ targetIframe = doc.querySelector(iframeSelector);
446
+
447
+ if (targetIframe && targetIframe.contentDocument) {
448
+ // Update the document to the iframe's contentDocument
449
+ doc = targetIframe.contentDocument;
450
+ } else {
451
+ console.warn(`Iframe not found or contentDocument inaccessible for selector: ${iframeSelector}`);
452
+ return { doc, remainingSelector: cssSelector }; // Return original selector if iframe not found
498
453
  }
499
- } else {
500
- // Default to the first iframe
501
- targetIframe = document.querySelector("iframe");
502
454
  }
503
455
 
504
- // Update the document if the iframe was found
505
- if (targetIframe && targetIframe.contentDocument) {
506
- doc = targetIframe.contentDocument;
507
- } else {
508
- console.warn("Iframe not found or contentDocument inaccessible.");
509
- }
456
+ // The last part is the remaining CSS selector
457
+ remainingSelector = diffParts[diffParts.length - 1].trim();
458
+
459
+ // Remove any leading '>' combinators from remainingSelector
460
+ remainingSelector = "html" + remainingSelector;
510
461
 
511
- return { doc, remainingSelector: cssSelector };
462
+ return { doc, remainingSelector };
512
463
  }
513
464
 
465
+
514
466
  function findElementByCssSelector(cssSelector: string): string | null {
515
467
  let doc = document;
516
468
 
517
- // Check if the selector includes 'frame' and update doc and selector
518
- if (cssSelector.includes("frame")) {
519
- const result = frameCheck(cssSelector);
520
- doc = result.doc;
521
- cssSelector = result.remainingSelector;
522
- }
469
+ // Check if the selector includes 'frame' or 'iframe' and update doc and selector
523
470
 
524
- // Check for iframe
525
- if (cssSelector.includes("iframe")) {
526
- const result = iframeCheck(cssSelector);
527
- doc = result.doc;
528
- cssSelector = result.remainingSelector;
471
+ if (/\s*>\s*html\s*/.test(cssSelector)) {
472
+ let inFrames = framesCheck(cssSelector)
473
+ doc = inFrames.doc;
474
+ cssSelector = inFrames.remainingSelector;
529
475
  }
530
476
 
531
477
  // Query the element in the document (including inside frames)
@@ -553,14 +499,19 @@ export const runAxeScript = async ({
553
499
  }
554
500
  }
555
501
 
556
- return element ? element.outerHTML : null;
502
+ if (element) {
503
+ return element.outerHTML;
504
+ }
505
+
506
+ console.warn(`Unable to find element for css selector: ${cssSelector}`);
507
+ return null;
557
508
  }
558
509
 
559
510
  // Add oobee violations to Axe's report
560
511
  const oobeeAccessibleLabelViolations = {
561
512
  id: 'oobee-accessible-label',
562
513
  impact: 'serious' as ImpactValue,
563
- tags: ['wcag2a', 'wcag211', 'wcag243', 'wcag412'],
514
+ tags: ['wcag2a', 'wcag211', 'wcag412'],
564
515
  description: 'Ensures clickable elements have an accessible label.',
565
516
  help: 'Clickable elements (i.e. elements with mouse-click interaction) must have accessible labels.',
566
517
  helpUrl: 'https://www.deque.com/blog/accessible-aria-buttons',
@@ -581,7 +532,7 @@ export const runAxeScript = async ({
581
532
  ],
582
533
  all: [],
583
534
  none: [],
584
- })),
535
+ })).filter(item => item.html)
585
536
  };
586
537
 
587
538
  results.violations = [...results.violations, oobeeAccessibleLabelViolations];
@@ -612,7 +563,18 @@ export const runAxeScript = async ({
612
563
  results.incomplete = await takeScreenshotForHTMLElements(results.incomplete, page, randomToken);
613
564
  }
614
565
 
615
- const pageTitle = await page.evaluate(() => document.title);
566
+ let pageTitle = null;
567
+ try {
568
+ pageTitle = await page.evaluate(() => document.title);
569
+ } catch (e) {
570
+ silentLogger.warn(`Error while getting page title: ${e}`);
571
+ if (page.isClosed()) {
572
+ silentLogger.info(`Page was closed for ${requestUrl}, creating new page`);
573
+ page = await browserContext.newPage();
574
+ await page.goto(requestUrl, { waitUntil: 'domcontentloaded' });
575
+ pageTitle = await page.evaluate(() => document.title);
576
+ }
577
+ }
616
578
 
617
579
  return filterAxeResults(results, pageTitle, customFlowDetails);
618
580
  };
@@ -653,4 +615,4 @@ export const isUrlPdf = (url: string) => {
653
615
  }
654
616
  const parsedUrl = new URL(url);
655
617
  return /\.pdf($|\?|#)/i.test(parsedUrl.pathname) || /\.pdf($|\?|#)/i.test(parsedUrl.href);
656
- };
618
+ };
@@ -469,7 +469,7 @@ const crawlDomain = async ({
469
469
  launcher: constants.launcher,
470
470
  launchOptions: getPlaywrightLaunchOptions(browser),
471
471
  // Bug in Chrome which causes browser pool crash when userDataDirectory is set in non-headless mode
472
- userDataDir,
472
+ ...(process.env.CRAWLEE_HEADLESS === '0' && { userDataDir }),
473
473
  },
474
474
  retryOnBlocked: true,
475
475
  browserPoolOptions: {
@@ -496,7 +496,7 @@ const crawlDomain = async ({
496
496
  return new Promise(resolve => {
497
497
  let timeout;
498
498
  let mutationCount = 0;
499
- const MAX_MUTATIONS = 100;
499
+ const MAX_MUTATIONS = 250;
500
500
  const MAX_SAME_MUTATION_LIMIT = 10;
501
501
  const mutationHash = {};
502
502
 
@@ -568,38 +568,31 @@ const crawlDomain = async ({
568
568
  ],
569
569
  preNavigationHooks: isBasicAuth
570
570
  ? [
571
- async ({ page, request }) => {
572
- await page.setExtraHTTPHeaders({
573
- Authorization: authHeader,
574
- ...extraHTTPHeaders,
575
- });
576
- const processible = await isProcessibleUrl(request.url);
577
- if (!processible) {
578
- request.skipNavigation = true;
579
- return null;
580
- }
581
- },
582
- ]
571
+ async ({ page, request }) => {
572
+ await page.setExtraHTTPHeaders({
573
+ Authorization: authHeader,
574
+ ...extraHTTPHeaders,
575
+ });
576
+ const processible = await isProcessibleUrl(request.url);
577
+ if (!processible) {
578
+ request.skipNavigation = true;
579
+ return null;
580
+ }
581
+ },
582
+ ]
583
583
  : [
584
- async (crawlingContext, gotoOptions) => {
585
- const { page, request } = crawlingContext;
586
-
587
- await page.setExtraHTTPHeaders({
588
- ...extraHTTPHeaders,
589
- });
590
-
591
- Object.assign(gotoOptions, {
592
- waitUntil: 'networkidle',
593
- timeout: 30000,
594
- });
584
+ async ({ page, request }) => {
585
+ await page.setExtraHTTPHeaders({
586
+ ...extraHTTPHeaders,
587
+ });
595
588
 
596
- const processible = await isProcessibleUrl(request.url);
597
- if (!processible) {
598
- request.skipNavigation = true;
599
- return null;
600
- }
601
- },
602
- ],
589
+ const processible = await isProcessibleUrl(request.url);
590
+ if (!processible) {
591
+ request.skipNavigation = true;
592
+ return null;
593
+ }
594
+ },
595
+ ],
603
596
  requestHandlerTimeoutSecs: 90, // Allow each page to be processed by up from default 60 seconds
604
597
  requestHandler: async ({ page, request, response, crawler, sendRequest, enqueueLinks }) => {
605
598
  const browserContext: BrowserContext = page.context();
@@ -50,7 +50,13 @@ const crawlIntelligentSitemap = async (
50
50
  const homeUrl = getHomeUrl(link);
51
51
  let sitemapLinkFound = false;
52
52
  let sitemapLink = '';
53
- const chromiumBrowser = await chromium.launch({ headless: true, channel: 'chrome' });
53
+ const chromiumBrowser = await chromium.launch(
54
+ {
55
+ headless: false,
56
+ channel: 'chrome',
57
+ args: ['--headless=new', '--no-sandbox']
58
+ });
59
+
54
60
  const page = await chromiumBrowser.newPage();
55
61
  for (const path of sitemapPaths) {
56
62
  sitemapLink = homeUrl + path;
@@ -143,7 +143,7 @@ const crawlLocalFile = async (
143
143
 
144
144
  if (!isUrlPdf(request.url)) {
145
145
  const browserContext = await constants.launcher.launchPersistentContext('', {
146
- headless: process.env.CRAWLEE_HEADLESS === '1',
146
+ headless: false,
147
147
  ...getPlaywrightLaunchOptions(browser),
148
148
  ...playwrightDeviceDetailsObject,
149
149
  });
@@ -144,7 +144,7 @@ const crawlSitemap = async (
144
144
  launcher: constants.launcher,
145
145
  launchOptions: getPlaywrightLaunchOptions(browser),
146
146
  // Bug in Chrome which causes browser pool crash when userDataDirectory is set in non-headless mode
147
- userDataDir,
147
+ ...(process.env.CRAWLEE_HEADLESS === '0' && { userDataDir }),
148
148
  },
149
149
  retryOnBlocked: true,
150
150
  browserPoolOptions: {