@govtechsg/oobee 0.10.51 → 0.10.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/bump-package-version.yml +58 -0
- package/.github/workflows/image.yml +38 -17
- package/DETAILS.md +5 -2
- package/INTEGRATION.md +57 -53
- package/README.md +4 -1
- package/__tests__/test-sitemap-url-patterns.xml +105 -0
- package/exclusions.txt +1 -0
- package/package.json +7 -6
- package/src/cli.ts +35 -2
- package/src/combine.ts +10 -7
- package/src/constants/cliFunctions.ts +9 -0
- package/src/constants/common.ts +95 -105
- package/src/constants/constants.ts +47 -2
- package/src/crawlers/commonCrawlerFunc.ts +84 -5
- package/src/crawlers/crawlDomain.ts +93 -160
- package/src/crawlers/crawlIntelligentSitemap.ts +40 -36
- package/src/crawlers/crawlLocalFile.ts +77 -35
- package/src/crawlers/crawlSitemap.ts +156 -89
- package/src/crawlers/pdfScanFunc.ts +2 -0
- package/src/index.ts +2 -0
- package/src/logs.ts +4 -2
- package/src/mergeAxeResults.ts +20 -9
- package/src/npmIndex.ts +1 -1
- package/src/screenshotFunc/htmlScreenshotFunc.ts +7 -5
- package/src/screenshotFunc/pdfScreenshotFunc.ts +2 -2
- package/src/static/ejs/partials/components/wcagCompliance.ejs +1 -1
- package/src/static/ejs/partials/scripts/ruleOffcanvas.ejs +1 -0
- package/src/static/ejs/partials/styles/styles.ejs +11 -0
- package/src/static/ejs/report.ejs +14 -1
- package/src/utils.ts +3 -3
@@ -7,7 +7,7 @@ import os from 'os';
|
|
7
7
|
import { spawnSync, execSync } from 'child_process';
|
8
8
|
import { chromium } from 'playwright';
|
9
9
|
import * as Sentry from '@sentry/node';
|
10
|
-
import { silentLogger } from '../logs.js';
|
10
|
+
import { consoleLogger, silentLogger } from '../logs.js';
|
11
11
|
import { PageInfo } from '../mergeAxeResults.js';
|
12
12
|
|
13
13
|
const filename = fileURLToPath(import.meta.url);
|
@@ -128,7 +128,7 @@ export const getDefaultChromiumDataDir = () => {
|
|
128
128
|
defaultChromiumDataDir = '/tmp';
|
129
129
|
}
|
130
130
|
|
131
|
-
|
131
|
+
consoleLogger.info(`Using Chromium support directory at ${defaultChromiumDataDir}`);
|
132
132
|
}
|
133
133
|
|
134
134
|
if (defaultChromiumDataDir && fs.existsSync(defaultChromiumDataDir)) {
|
@@ -179,6 +179,7 @@ export const basicAuthRegex = /^.*\/\/.*:.*@.*$/i;
|
|
179
179
|
// for crawlers
|
180
180
|
export const axeScript = path.join(dirname, '../../node_modules/axe-core/axe.min.js');
|
181
181
|
export class UrlsCrawled {
|
182
|
+
siteName: string;
|
182
183
|
toScan: string[] = [];
|
183
184
|
scanned: PageInfo[] = [];
|
184
185
|
invalid: PageInfo[] = [];
|
@@ -361,6 +362,7 @@ const wcagLinks = {
|
|
361
362
|
// 'WCAG 1.4.10': 'https://www.w3.org/TR/WCAG22/#reflow', - TODO: review for veraPDF
|
362
363
|
'WCAG 1.4.12': 'https://www.w3.org/TR/WCAG22/#text-spacing',
|
363
364
|
'WCAG 2.1.1': 'https://www.w3.org/TR/WCAG22/#keyboard',
|
365
|
+
'WCAG 2.1.3': 'https://www.w3.org/WAI/WCAG22/Understanding/keyboard-no-exception.html', // AAA
|
364
366
|
'WCAG 2.2.1': 'https://www.w3.org/TR/WCAG22/#timing-adjustable',
|
365
367
|
'WCAG 2.2.2': 'https://www.w3.org/TR/WCAG22/#pause-stop-hide',
|
366
368
|
'WCAG 2.2.4': 'https://www.w3.org/TR/WCAG22/#interruptions', // AAA
|
@@ -564,3 +566,46 @@ export const STATUS_CODE_METADATA: Record<number,string> = {
|
|
564
566
|
511: '511 - Network Authentication Required',
|
565
567
|
|
566
568
|
};
|
569
|
+
|
570
|
+
// Elements that should not be clicked or enqueued
|
571
|
+
// With reference from https://chromeenterprise.google/policies/url-patterns/
|
572
|
+
export const disallowedListOfPatterns = [
|
573
|
+
"#",
|
574
|
+
"mailto:",
|
575
|
+
"tel:",
|
576
|
+
"sms:",
|
577
|
+
"skype:",
|
578
|
+
"zoommtg:",
|
579
|
+
"msteams:",
|
580
|
+
"whatsapp:",
|
581
|
+
"slack:",
|
582
|
+
"viber:",
|
583
|
+
"tg:",
|
584
|
+
"line:",
|
585
|
+
"meet:",
|
586
|
+
"facetime:",
|
587
|
+
"imessage:",
|
588
|
+
"discord:",
|
589
|
+
"sgnl:",
|
590
|
+
"webex:",
|
591
|
+
"intent:",
|
592
|
+
"ms-outlook:",
|
593
|
+
"ms-onedrive:",
|
594
|
+
"ms-word:",
|
595
|
+
"ms-excel:",
|
596
|
+
"ms-powerpoint:",
|
597
|
+
"ms-office:",
|
598
|
+
"onenote:",
|
599
|
+
"vs:",
|
600
|
+
"chrome-extension:",
|
601
|
+
"chrome-search:",
|
602
|
+
"chrome:",
|
603
|
+
"chrome-untrusted:",
|
604
|
+
"devtools:",
|
605
|
+
"isolated-app:"
|
606
|
+
];
|
607
|
+
|
608
|
+
export const disallowedSelectorPatterns = disallowedListOfPatterns
|
609
|
+
.map(pattern => `a[href^="${pattern}"]`)
|
610
|
+
.join(',')
|
611
|
+
.replace(/\s+/g, '');
|
@@ -1,13 +1,14 @@
|
|
1
1
|
import crawlee, { CrawlingContext, PlaywrightGotoOptions, Request } from 'crawlee';
|
2
2
|
import axe, { AxeResults, ImpactValue, NodeResult, Result, resultGroups, TagValue } from 'axe-core';
|
3
|
-
import { BrowserContext, Page } from 'playwright';
|
3
|
+
import { BrowserContext, ElementHandle, Page } from 'playwright';
|
4
4
|
import {
|
5
5
|
axeScript,
|
6
|
+
disallowedListOfPatterns,
|
6
7
|
guiInfoStatusTypes,
|
7
8
|
RuleFlags,
|
8
9
|
saflyIconSelector,
|
9
10
|
} from '../constants/constants.js';
|
10
|
-
import { guiInfoLog, silentLogger } from '../logs.js';
|
11
|
+
import { consoleLogger, guiInfoLog, silentLogger } from '../logs.js';
|
11
12
|
import { takeScreenshotForHTMLElements } from '../screenshotFunc/htmlScreenshotFunc.js';
|
12
13
|
import { isFilePath } from '../constants/common.js';
|
13
14
|
import { extractAndGradeText } from './custom/extractAndGradeText.js';
|
@@ -19,6 +20,7 @@ import { findElementByCssSelector } from './custom/findElementByCssSelector.js';
|
|
19
20
|
import { getAxeConfiguration } from './custom/getAxeConfiguration.js';
|
20
21
|
import { flagUnlabelledClickableElements } from './custom/flagUnlabelledClickableElements.js';
|
21
22
|
import xPathToCss from './custom/xPathToCss.js';
|
23
|
+
import type { Response as PlaywrightResponse } from 'playwright';
|
22
24
|
|
23
25
|
// types
|
24
26
|
interface AxeResultsWithScreenshot extends AxeResults {
|
@@ -305,7 +307,7 @@ export const runAxeScript = async ({
|
|
305
307
|
});
|
306
308
|
});
|
307
309
|
} catch (e) {
|
308
|
-
|
310
|
+
// do nothing, just continue
|
309
311
|
}
|
310
312
|
|
311
313
|
// Omit logging of browser console errors to reduce unnecessary verbosity
|
@@ -459,9 +461,9 @@ export const runAxeScript = async ({
|
|
459
461
|
try {
|
460
462
|
pageTitle = await page.evaluate(() => document.title);
|
461
463
|
} catch (e) {
|
462
|
-
|
464
|
+
consoleLogger.info(`Error while getting page title: ${e}`);
|
463
465
|
if (page.isClosed()) {
|
464
|
-
|
466
|
+
consoleLogger.info(`Page was closed for ${requestUrl}, creating new page`);
|
465
467
|
page = await browserContext.newPage();
|
466
468
|
await page.goto(requestUrl, { waitUntil: 'domcontentloaded' });
|
467
469
|
pageTitle = await page.evaluate(() => document.title);
|
@@ -508,3 +510,80 @@ export const isUrlPdf = (url: string) => {
|
|
508
510
|
const parsedUrl = new URL(url);
|
509
511
|
return /\.pdf($|\?|#)/i.test(parsedUrl.pathname) || /\.pdf($|\?|#)/i.test(parsedUrl.href);
|
510
512
|
};
|
513
|
+
|
514
|
+
export async function shouldSkipClickDueToDisallowedHref(
|
515
|
+
page: Page,
|
516
|
+
element: ElementHandle
|
517
|
+
): Promise<boolean> {
|
518
|
+
return await page.evaluate(
|
519
|
+
({ el, disallowedPrefixes }) => {
|
520
|
+
function isDisallowedHref(href: string | null): boolean {
|
521
|
+
if (!href) return false;
|
522
|
+
href = href.toLowerCase();
|
523
|
+
return disallowedPrefixes.some((prefix: string) => href.startsWith(prefix));
|
524
|
+
}
|
525
|
+
|
526
|
+
const castEl = el as HTMLElement;
|
527
|
+
|
528
|
+
// Check descendant <a href="">
|
529
|
+
const descendants = castEl.querySelectorAll('a[href]');
|
530
|
+
for (const a of descendants) {
|
531
|
+
const href = a.getAttribute('href');
|
532
|
+
if (isDisallowedHref(href)) {
|
533
|
+
return true;
|
534
|
+
}
|
535
|
+
}
|
536
|
+
|
537
|
+
// Check self and ancestors for disallowed <a>
|
538
|
+
let current: HTMLElement | null = castEl;
|
539
|
+
while (current) {
|
540
|
+
if (
|
541
|
+
current.tagName === 'A' &&
|
542
|
+
isDisallowedHref(current.getAttribute('href'))
|
543
|
+
) {
|
544
|
+
return true;
|
545
|
+
}
|
546
|
+
current = current.parentElement;
|
547
|
+
}
|
548
|
+
|
549
|
+
return false;
|
550
|
+
},
|
551
|
+
{
|
552
|
+
el: element,
|
553
|
+
disallowedPrefixes: disallowedListOfPatterns,
|
554
|
+
}
|
555
|
+
);
|
556
|
+
}
|
557
|
+
|
558
|
+
/**
|
559
|
+
* Check if response should be skipped based on content headers.
|
560
|
+
* @param response - Playwright Response object
|
561
|
+
* @param requestUrl - Optional: request URL for logging
|
562
|
+
* @returns true if the content should be skipped
|
563
|
+
*/
|
564
|
+
export const shouldSkipDueToUnsupportedContent = (
|
565
|
+
response: PlaywrightResponse,
|
566
|
+
requestUrl: string = ''
|
567
|
+
): boolean => {
|
568
|
+
if (!response) return false;
|
569
|
+
|
570
|
+
const headers = response.headers();
|
571
|
+
const contentDisposition = headers['content-disposition'] || '';
|
572
|
+
const contentType = headers['content-type'] || '';
|
573
|
+
|
574
|
+
if (contentDisposition.includes('attachment')) {
|
575
|
+
// consoleLogger.info(`Skipping attachment (content-disposition) at ${requestUrl}`);
|
576
|
+
return true;
|
577
|
+
}
|
578
|
+
|
579
|
+
if (
|
580
|
+
contentType.startsWith('application/') ||
|
581
|
+
contentType.includes('octet-stream') ||
|
582
|
+
(!contentType.startsWith('text/') && !contentType.includes('html'))
|
583
|
+
) {
|
584
|
+
// consoleLogger.info(`Skipping non-processible content-type "${contentType}" at ${requestUrl}`);
|
585
|
+
return true;
|
586
|
+
}
|
587
|
+
|
588
|
+
return false;
|
589
|
+
};
|