@govtechsg/oobee 0.10.51 → 0.10.58

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ import os from 'os';
7
7
  import { spawnSync, execSync } from 'child_process';
8
8
  import { chromium } from 'playwright';
9
9
  import * as Sentry from '@sentry/node';
10
- import { silentLogger } from '../logs.js';
10
+ import { consoleLogger, silentLogger } from '../logs.js';
11
11
  import { PageInfo } from '../mergeAxeResults.js';
12
12
 
13
13
  const filename = fileURLToPath(import.meta.url);
@@ -128,7 +128,7 @@ export const getDefaultChromiumDataDir = () => {
128
128
  defaultChromiumDataDir = '/tmp';
129
129
  }
130
130
 
131
- silentLogger.warn(`Using Chromium support directory at ${defaultChromiumDataDir}`);
131
+ consoleLogger.info(`Using Chromium support directory at ${defaultChromiumDataDir}`);
132
132
  }
133
133
 
134
134
  if (defaultChromiumDataDir && fs.existsSync(defaultChromiumDataDir)) {
@@ -179,6 +179,7 @@ export const basicAuthRegex = /^.*\/\/.*:.*@.*$/i;
179
179
  // for crawlers
180
180
  export const axeScript = path.join(dirname, '../../node_modules/axe-core/axe.min.js');
181
181
  export class UrlsCrawled {
182
+ siteName: string;
182
183
  toScan: string[] = [];
183
184
  scanned: PageInfo[] = [];
184
185
  invalid: PageInfo[] = [];
@@ -361,6 +362,7 @@ const wcagLinks = {
361
362
  // 'WCAG 1.4.10': 'https://www.w3.org/TR/WCAG22/#reflow', - TODO: review for veraPDF
362
363
  'WCAG 1.4.12': 'https://www.w3.org/TR/WCAG22/#text-spacing',
363
364
  'WCAG 2.1.1': 'https://www.w3.org/TR/WCAG22/#keyboard',
365
+ 'WCAG 2.1.3': 'https://www.w3.org/WAI/WCAG22/Understanding/keyboard-no-exception.html', // AAA
364
366
  'WCAG 2.2.1': 'https://www.w3.org/TR/WCAG22/#timing-adjustable',
365
367
  'WCAG 2.2.2': 'https://www.w3.org/TR/WCAG22/#pause-stop-hide',
366
368
  'WCAG 2.2.4': 'https://www.w3.org/TR/WCAG22/#interruptions', // AAA
@@ -564,3 +566,46 @@ export const STATUS_CODE_METADATA: Record<number,string> = {
564
566
  511: '511 - Network Authentication Required',
565
567
 
566
568
  };
569
+
570
+ // Elements that should not be clicked or enqueued
571
+ // With reference from https://chromeenterprise.google/policies/url-patterns/
572
+ export const disallowedListOfPatterns = [
573
+ "#",
574
+ "mailto:",
575
+ "tel:",
576
+ "sms:",
577
+ "skype:",
578
+ "zoommtg:",
579
+ "msteams:",
580
+ "whatsapp:",
581
+ "slack:",
582
+ "viber:",
583
+ "tg:",
584
+ "line:",
585
+ "meet:",
586
+ "facetime:",
587
+ "imessage:",
588
+ "discord:",
589
+ "sgnl:",
590
+ "webex:",
591
+ "intent:",
592
+ "ms-outlook:",
593
+ "ms-onedrive:",
594
+ "ms-word:",
595
+ "ms-excel:",
596
+ "ms-powerpoint:",
597
+ "ms-office:",
598
+ "onenote:",
599
+ "vs:",
600
+ "chrome-extension:",
601
+ "chrome-search:",
602
+ "chrome:",
603
+ "chrome-untrusted:",
604
+ "devtools:",
605
+ "isolated-app:"
606
+ ];
607
+
608
+ export const disallowedSelectorPatterns = disallowedListOfPatterns
609
+ .map(pattern => `a[href^="${pattern}"]`)
610
+ .join(',')
611
+ .replace(/\s+/g, '');
@@ -1,13 +1,14 @@
1
1
  import crawlee, { CrawlingContext, PlaywrightGotoOptions, Request } from 'crawlee';
2
2
  import axe, { AxeResults, ImpactValue, NodeResult, Result, resultGroups, TagValue } from 'axe-core';
3
- import { BrowserContext, Page } from 'playwright';
3
+ import { BrowserContext, ElementHandle, Page } from 'playwright';
4
4
  import {
5
5
  axeScript,
6
+ disallowedListOfPatterns,
6
7
  guiInfoStatusTypes,
7
8
  RuleFlags,
8
9
  saflyIconSelector,
9
10
  } from '../constants/constants.js';
10
- import { guiInfoLog, silentLogger } from '../logs.js';
11
+ import { consoleLogger, guiInfoLog, silentLogger } from '../logs.js';
11
12
  import { takeScreenshotForHTMLElements } from '../screenshotFunc/htmlScreenshotFunc.js';
12
13
  import { isFilePath } from '../constants/common.js';
13
14
  import { extractAndGradeText } from './custom/extractAndGradeText.js';
@@ -19,6 +20,7 @@ import { findElementByCssSelector } from './custom/findElementByCssSelector.js';
19
20
  import { getAxeConfiguration } from './custom/getAxeConfiguration.js';
20
21
  import { flagUnlabelledClickableElements } from './custom/flagUnlabelledClickableElements.js';
21
22
  import xPathToCss from './custom/xPathToCss.js';
23
+ import type { Response as PlaywrightResponse } from 'playwright';
22
24
 
23
25
  // types
24
26
  interface AxeResultsWithScreenshot extends AxeResults {
@@ -305,7 +307,7 @@ export const runAxeScript = async ({
305
307
  });
306
308
  });
307
309
  } catch (e) {
308
- silentLogger.warn(`Error while checking for DOM mutations: ${e}`);
310
+ // do nothing, just continue
309
311
  }
310
312
 
311
313
  // Omit logging of browser console errors to reduce unnecessary verbosity
@@ -459,9 +461,9 @@ export const runAxeScript = async ({
459
461
  try {
460
462
  pageTitle = await page.evaluate(() => document.title);
461
463
  } catch (e) {
462
- silentLogger.warn(`Error while getting page title: ${e}`);
464
+ consoleLogger.info(`Error while getting page title: ${e}`);
463
465
  if (page.isClosed()) {
464
- silentLogger.info(`Page was closed for ${requestUrl}, creating new page`);
466
+ consoleLogger.info(`Page was closed for ${requestUrl}, creating new page`);
465
467
  page = await browserContext.newPage();
466
468
  await page.goto(requestUrl, { waitUntil: 'domcontentloaded' });
467
469
  pageTitle = await page.evaluate(() => document.title);
@@ -508,3 +510,80 @@ export const isUrlPdf = (url: string) => {
508
510
  const parsedUrl = new URL(url);
509
511
  return /\.pdf($|\?|#)/i.test(parsedUrl.pathname) || /\.pdf($|\?|#)/i.test(parsedUrl.href);
510
512
  };
513
+
514
+ export async function shouldSkipClickDueToDisallowedHref(
515
+ page: Page,
516
+ element: ElementHandle
517
+ ): Promise<boolean> {
518
+ return await page.evaluate(
519
+ ({ el, disallowedPrefixes }) => {
520
+ function isDisallowedHref(href: string | null): boolean {
521
+ if (!href) return false;
522
+ href = href.toLowerCase();
523
+ return disallowedPrefixes.some((prefix: string) => href.startsWith(prefix));
524
+ }
525
+
526
+ const castEl = el as HTMLElement;
527
+
528
+ // Check descendant <a href="">
529
+ const descendants = castEl.querySelectorAll('a[href]');
530
+ for (const a of descendants) {
531
+ const href = a.getAttribute('href');
532
+ if (isDisallowedHref(href)) {
533
+ return true;
534
+ }
535
+ }
536
+
537
+ // Check self and ancestors for disallowed <a>
538
+ let current: HTMLElement | null = castEl;
539
+ while (current) {
540
+ if (
541
+ current.tagName === 'A' &&
542
+ isDisallowedHref(current.getAttribute('href'))
543
+ ) {
544
+ return true;
545
+ }
546
+ current = current.parentElement;
547
+ }
548
+
549
+ return false;
550
+ },
551
+ {
552
+ el: element,
553
+ disallowedPrefixes: disallowedListOfPatterns,
554
+ }
555
+ );
556
+ }
557
+
558
+ /**
559
+ * Check if response should be skipped based on content headers.
560
+ * @param response - Playwright Response object
561
+ * @param requestUrl - Optional: request URL for logging
562
+ * @returns true if the content should be skipped
563
+ */
564
+ export const shouldSkipDueToUnsupportedContent = (
565
+ response: PlaywrightResponse,
566
+ requestUrl: string = ''
567
+ ): boolean => {
568
+ if (!response) return false;
569
+
570
+ const headers = response.headers();
571
+ const contentDisposition = headers['content-disposition'] || '';
572
+ const contentType = headers['content-type'] || '';
573
+
574
+ if (contentDisposition.includes('attachment')) {
575
+ // consoleLogger.info(`Skipping attachment (content-disposition) at ${requestUrl}`);
576
+ return true;
577
+ }
578
+
579
+ if (
580
+ contentType.startsWith('application/') ||
581
+ contentType.includes('octet-stream') ||
582
+ (!contentType.startsWith('text/') && !contentType.includes('html'))
583
+ ) {
584
+ // consoleLogger.info(`Skipping non-processible content-type "${contentType}" at ${requestUrl}`);
585
+ return true;
586
+ }
587
+
588
+ return false;
589
+ };