@govtechsg/oobee 0.10.76 → 0.10.77
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/publish.yml +8 -1
- package/INTEGRATION.md +7 -3
- package/dist/cli.js +252 -0
- package/dist/combine.js +221 -0
- package/dist/constants/cliFunctions.js +306 -0
- package/dist/constants/common.js +1669 -0
- package/dist/constants/constants.js +913 -0
- package/dist/constants/errorMeta.json +319 -0
- package/dist/constants/itemTypeDescription.js +7 -0
- package/dist/constants/oobeeAi.js +121 -0
- package/dist/constants/questions.js +151 -0
- package/dist/constants/sampleData.js +176 -0
- package/dist/crawlers/commonCrawlerFunc.js +428 -0
- package/dist/crawlers/crawlDomain.js +613 -0
- package/dist/crawlers/crawlIntelligentSitemap.js +135 -0
- package/dist/crawlers/crawlLocalFile.js +151 -0
- package/dist/crawlers/crawlSitemap.js +303 -0
- package/dist/crawlers/custom/escapeCssSelector.js +10 -0
- package/dist/crawlers/custom/evaluateAltText.js +11 -0
- package/dist/crawlers/custom/extractAndGradeText.js +44 -0
- package/dist/crawlers/custom/extractText.js +27 -0
- package/dist/crawlers/custom/findElementByCssSelector.js +36 -0
- package/dist/crawlers/custom/flagUnlabelledClickableElements.js +963 -0
- package/dist/crawlers/custom/framesCheck.js +37 -0
- package/dist/crawlers/custom/getAxeConfiguration.js +111 -0
- package/dist/crawlers/custom/gradeReadability.js +23 -0
- package/dist/crawlers/custom/utils.js +1024 -0
- package/dist/crawlers/custom/xPathToCss.js +147 -0
- package/dist/crawlers/guards/urlGuard.js +71 -0
- package/dist/crawlers/pdfScanFunc.js +276 -0
- package/dist/crawlers/runCustom.js +89 -0
- package/dist/exclusions.txt +7 -0
- package/dist/generateHtmlReport.js +144 -0
- package/dist/index.js +62 -0
- package/dist/logs.js +84 -0
- package/dist/mergeAxeResults.js +1571 -0
- package/dist/npmIndex.js +429 -0
- package/dist/proxyService.js +360 -0
- package/dist/runGenerateJustHtmlReport.js +16 -0
- package/dist/screenshotFunc/htmlScreenshotFunc.js +355 -0
- package/dist/screenshotFunc/pdfScreenshotFunc.js +645 -0
- package/dist/services/s3Uploader.js +127 -0
- package/dist/static/ejs/partials/components/allIssues/AllIssues.ejs +9 -0
- package/dist/static/ejs/partials/components/allIssues/CategoryBadges.ejs +82 -0
- package/dist/static/ejs/partials/components/allIssues/FilterBar.ejs +33 -0
- package/dist/static/ejs/partials/components/allIssues/IssuesTable.ejs +41 -0
- package/dist/static/ejs/partials/components/header/SiteInfo.ejs +119 -0
- package/dist/static/ejs/partials/components/header/aboutScanModal/AboutScanModal.ejs +15 -0
- package/dist/static/ejs/partials/components/header/aboutScanModal/ScanConfiguration.ejs +44 -0
- package/dist/static/ejs/partials/components/header/aboutScanModal/ScanDetails.ejs +142 -0
- package/dist/static/ejs/partials/components/prioritiseIssues/IssueDetailCard.ejs +36 -0
- package/dist/static/ejs/partials/components/prioritiseIssues/PrioritiseIssues.ejs +47 -0
- package/dist/static/ejs/partials/components/ruleModal/ruleOffcanvas.ejs +196 -0
- package/dist/static/ejs/partials/components/scannedPagesSegmentedTabs.ejs +48 -0
- package/dist/static/ejs/partials/components/screenshotLightbox.ejs +13 -0
- package/dist/static/ejs/partials/components/shared/InfoAlert.ejs +3 -0
- package/dist/static/ejs/partials/components/summaryScanAbout.ejs +141 -0
- package/dist/static/ejs/partials/components/summaryScanResults.ejs +16 -0
- package/dist/static/ejs/partials/components/summaryTable.ejs +20 -0
- package/dist/static/ejs/partials/components/summaryWcagCompliance.ejs +94 -0
- package/dist/static/ejs/partials/components/topTen.ejs +6 -0
- package/dist/static/ejs/partials/components/wcagCompliance/FailedCriteria.ejs +47 -0
- package/dist/static/ejs/partials/components/wcagCompliance/WcagCompliance.ejs +16 -0
- package/dist/static/ejs/partials/components/wcagCompliance/WcagGaugeBar.ejs +16 -0
- package/dist/static/ejs/partials/components/wcagCoverageDetails.ejs +18 -0
- package/dist/static/ejs/partials/footer.ejs +24 -0
- package/dist/static/ejs/partials/header.ejs +14 -0
- package/dist/static/ejs/partials/main.ejs +29 -0
- package/dist/static/ejs/partials/scripts/allIssues/AllIssues.ejs +376 -0
- package/dist/static/ejs/partials/scripts/bootstrap.ejs +8 -0
- package/dist/static/ejs/partials/scripts/categorySummary.ejs +141 -0
- package/dist/static/ejs/partials/scripts/decodeUnzipParse.ejs +3 -0
- package/dist/static/ejs/partials/scripts/header/SiteInfo.ejs +44 -0
- package/dist/static/ejs/partials/scripts/header/aboutScanModal/AboutScanModal.ejs +51 -0
- package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +127 -0
- package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanDetails.ejs +60 -0
- package/dist/static/ejs/partials/scripts/highlightjs.ejs +335 -0
- package/dist/static/ejs/partials/scripts/popper.ejs +7 -0
- package/dist/static/ejs/partials/scripts/prioritiseIssues/IssueDetailCard.ejs +137 -0
- package/dist/static/ejs/partials/scripts/prioritiseIssues/PrioritiseIssues.ejs +214 -0
- package/dist/static/ejs/partials/scripts/prioritiseIssues/wcagSvgMap.ejs +861 -0
- package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +957 -0
- package/dist/static/ejs/partials/scripts/ruleModal/itemCardRenderer.ejs +353 -0
- package/dist/static/ejs/partials/scripts/ruleModal/pageAccordionBuilder.ejs +468 -0
- package/dist/static/ejs/partials/scripts/ruleModal/ruleOffcanvas.ejs +306 -0
- package/dist/static/ejs/partials/scripts/ruleModal/utilities.ejs +483 -0
- package/dist/static/ejs/partials/scripts/scannedPagesSegmentedTabs.ejs +35 -0
- package/dist/static/ejs/partials/scripts/screenshotLightbox.ejs +75 -0
- package/dist/static/ejs/partials/scripts/summaryScanResults.ejs +14 -0
- package/dist/static/ejs/partials/scripts/summaryTable.ejs +78 -0
- package/dist/static/ejs/partials/scripts/topTen.ejs +61 -0
- package/dist/static/ejs/partials/scripts/utils.ejs +453 -0
- package/dist/static/ejs/partials/scripts/wcagCompliance/FailedCriteria.ejs +103 -0
- package/dist/static/ejs/partials/scripts/wcagCompliance/WcagGaugeBar.ejs +47 -0
- package/dist/static/ejs/partials/scripts/wcagCompliance.ejs +15 -0
- package/dist/static/ejs/partials/scripts/wcagCoverageDetails.ejs +75 -0
- package/dist/static/ejs/partials/styles/allIssues/AllIssues.ejs +384 -0
- package/dist/static/ejs/partials/styles/bootstrap.ejs +12391 -0
- package/dist/static/ejs/partials/styles/header/SiteInfo.ejs +121 -0
- package/dist/static/ejs/partials/styles/header/aboutScanModal/AboutScanModal.ejs +82 -0
- package/dist/static/ejs/partials/styles/header/aboutScanModal/ScanConfiguration.ejs +50 -0
- package/dist/static/ejs/partials/styles/header/aboutScanModal/ScanDetails.ejs +149 -0
- package/dist/static/ejs/partials/styles/header.ejs +7 -0
- package/dist/static/ejs/partials/styles/highlightjs.ejs +54 -0
- package/dist/static/ejs/partials/styles/prioritiseIssues/IssueDetailCard.ejs +141 -0
- package/dist/static/ejs/partials/styles/prioritiseIssues/PrioritiseIssues.ejs +204 -0
- package/dist/static/ejs/partials/styles/ruleModal/ruleOffcanvas.ejs +456 -0
- package/dist/static/ejs/partials/styles/scannedPagesSegmentedTabs.ejs +46 -0
- package/dist/static/ejs/partials/styles/shared/InfoAlert.ejs +12 -0
- package/dist/static/ejs/partials/styles/styles.ejs +1607 -0
- package/dist/static/ejs/partials/styles/summaryBootstrap.ejs +12458 -0
- package/dist/static/ejs/partials/styles/topTenCard.ejs +44 -0
- package/dist/static/ejs/partials/styles/wcagCompliance/FailedCriteria.ejs +59 -0
- package/dist/static/ejs/partials/styles/wcagCompliance/WcagGaugeBar.ejs +62 -0
- package/dist/static/ejs/partials/styles/wcagCompliance.ejs +36 -0
- package/dist/static/ejs/partials/styles/wcagCoverageDetails.ejs +33 -0
- package/dist/static/ejs/partials/summaryHeader.ejs +70 -0
- package/dist/static/ejs/partials/summaryMain.ejs +49 -0
- package/dist/static/ejs/report.ejs +226 -0
- package/dist/static/ejs/summary.ejs +47 -0
- package/dist/types/types.js +1 -0
- package/dist/utils.js +1070 -0
- package/examples/oobee-cypress-integration-js/cypress/support/e2e.js +36 -6
- package/examples/oobee-cypress-integration-js/cypress.config.js +45 -1
- package/examples/oobee-cypress-integration-ts/cypress.config.ts +47 -1
- package/examples/oobee-cypress-integration-ts/src/cypress/support/e2e.ts +36 -6
- package/examples/oobee-playwright-integration-js/oobee-playwright-demo.js +2 -1
- package/examples/oobee-playwright-integration-ts/src/oobee-playwright-demo.ts +2 -1
- package/package.json +9 -3
- package/src/constants/common.ts +2 -2
- package/src/constants/constants.ts +3 -1
- package/src/crawlers/crawlDomain.ts +1 -0
- package/src/crawlers/runCustom.ts +0 -1
- package/src/npmIndex.ts +42 -24
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
export default function xPathToCss(expr) {
|
|
2
|
+
const isValidXPath = (expr) => typeof expr !== 'undefined' &&
|
|
3
|
+
expr.replace(/[\s-_=]/g, '') !== '' &&
|
|
4
|
+
expr.length ===
|
|
5
|
+
expr.replace(/[-_\w:.]+\(\)\s*=|=\s*[-_\w:.]+\(\)|\sor\s|\sand\s|\[(?:[^\/\]]+[\/\[]\/?.+)+\]|starts-with\(|\[.*last\(\)\s*[-\+<>=].+\]|number\(\)|not\(|count\(|text\(|first\(|normalize-space|[^\/]following-sibling|concat\(|descendant::|parent::|self::|child::|/gi, '').length;
|
|
6
|
+
const getValidationRegex = () => {
|
|
7
|
+
let regex = '(?P<node>' +
|
|
8
|
+
'(' +
|
|
9
|
+
'^id\\(["\\\']?(?P<idvalue>%(value)s)["\\\']?\\)' + // special case! `id(idValue)`
|
|
10
|
+
'|' +
|
|
11
|
+
'(?P<nav>//?(?:following-sibling::)?)(?P<tag>%(tag)s)' + // `//div`
|
|
12
|
+
'(\\[(' +
|
|
13
|
+
'(?P<matched>(?P<mattr>@?%(attribute)s=["\\\'](?P<mvalue>%(value)s))["\\\']' + // `[@id="well"]` supported and `[text()="yes"]` is not
|
|
14
|
+
'|' +
|
|
15
|
+
'(?P<contained>contains\\((?P<cattr>@?%(attribute)s,\\s*["\\\'](?P<cvalue>%(value)s)["\\\']\\))' + // `[contains(@id, "bleh")]` supported and `[contains(text(), "some")]` is not
|
|
16
|
+
')\\])?' +
|
|
17
|
+
'(\\[\\s*(?P<nth>\\d+|last\\(\\s*\\))\\s*\\])?' +
|
|
18
|
+
')' +
|
|
19
|
+
')';
|
|
20
|
+
const subRegexes = {
|
|
21
|
+
tag: '([a-zA-Z][a-zA-Z0-9:-]*|\\*)',
|
|
22
|
+
attribute: '[.a-zA-Z_:][-\\w:.]*(\\(\\))?)',
|
|
23
|
+
value: '\\s*[\\w/:][-/\\w\\s,:;.]*',
|
|
24
|
+
};
|
|
25
|
+
Object.keys(subRegexes).forEach((key) => {
|
|
26
|
+
regex = regex.replace(new RegExp(`%\\(${key}\\)s`, 'gi'), subRegexes[key]);
|
|
27
|
+
});
|
|
28
|
+
regex = regex.replace(/\?P<node>|\?P<idvalue>|\?P<nav>|\?P<tag>|\?P<matched>|\?P<mattr>|\?P<mvalue>|\?P<contained>|\?P<cattr>|\?P<cvalue>|\?P<nth>/gi, '');
|
|
29
|
+
return new RegExp(regex, 'gi');
|
|
30
|
+
};
|
|
31
|
+
const preParseXpath = (expr) => expr.replace(/contains\s*\(\s*concat\(["']\s+["']\s*,\s*@class\s*,\s*["']\s+["']\)\s*,\s*["']\s+([a-zA-Z0-9-_]+)\s+["']\)/gi, '@class="$1"');
|
|
32
|
+
function escapeCssIdSelectors(cssSelector) {
|
|
33
|
+
return cssSelector.replace(/#([^ >]+)/g, (_match, id) => {
|
|
34
|
+
// Escape special characters in the id part
|
|
35
|
+
return `#${id.replace(/[!"#$%&'()*+,./:;<=>?@[\\\]^`{|}~]/g, '\\$&')}`;
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
if (!expr) {
|
|
39
|
+
throw new Error('Missing XPath expression');
|
|
40
|
+
}
|
|
41
|
+
expr = preParseXpath(expr);
|
|
42
|
+
if (!isValidXPath(expr)) {
|
|
43
|
+
console.error(`Invalid or unsupported XPath: ${expr}`);
|
|
44
|
+
// do not throw error so that this function proceeds to convert xpath that it does not support
|
|
45
|
+
// for example, //*[@id="google_ads_iframe_/4654/dweb/imu1/homepage/landingpage/na_0"]/html/body/div[1]/a
|
|
46
|
+
// becomes #google_ads_iframe_/4654/dweb/imu1/homepage/landingpage/na_0 > html > body > div:first-of-type > div > a
|
|
47
|
+
// which is invalid because the slashes in the id selector are not escaped
|
|
48
|
+
// throw new Error('Invalid or unsupported XPath: ' + expr);
|
|
49
|
+
}
|
|
50
|
+
const xPathArr = expr.split('|');
|
|
51
|
+
const prog = getValidationRegex();
|
|
52
|
+
const cssSelectors = [];
|
|
53
|
+
let xindex = 0;
|
|
54
|
+
while (xPathArr[xindex]) {
|
|
55
|
+
const css = [];
|
|
56
|
+
let position = 0;
|
|
57
|
+
let nodes;
|
|
58
|
+
while ((nodes = prog.exec(xPathArr[xindex]))) {
|
|
59
|
+
let attr;
|
|
60
|
+
if (!nodes && position === 0) {
|
|
61
|
+
throw new Error(`Invalid or unsupported XPath: ${expr}`);
|
|
62
|
+
}
|
|
63
|
+
const match = {
|
|
64
|
+
node: nodes[5],
|
|
65
|
+
idvalue: nodes[12] || nodes[3],
|
|
66
|
+
nav: nodes[4],
|
|
67
|
+
tag: nodes[5],
|
|
68
|
+
matched: nodes[7],
|
|
69
|
+
mattr: nodes[10] || nodes[14],
|
|
70
|
+
mvalue: nodes[12] || nodes[16],
|
|
71
|
+
contained: nodes[13],
|
|
72
|
+
cattr: nodes[14],
|
|
73
|
+
cvalue: nodes[16],
|
|
74
|
+
nth: nodes[18],
|
|
75
|
+
};
|
|
76
|
+
let nav = '';
|
|
77
|
+
if (position != 0 && match.nav) {
|
|
78
|
+
if (~match.nav.indexOf('following-sibling::')) {
|
|
79
|
+
nav = ' + ';
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
nav = match.nav == '//' ? ' ' : ' > ';
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
const tag = match.tag === '*' ? '' : match.tag || '';
|
|
86
|
+
if (match.contained) {
|
|
87
|
+
if (match.cattr.indexOf('@') === 0) {
|
|
88
|
+
attr = `[${match.cattr.replace(/^@/, '')}*="${match.cvalue}"]`;
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
throw new Error(`Invalid or unsupported XPath attribute: ${match.cattr}`);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
else if (match.matched) {
|
|
95
|
+
switch (match.mattr) {
|
|
96
|
+
case '@id':
|
|
97
|
+
attr = `#${match.mvalue.replace(/^\s+|\s+$/, '').replace(/\s/g, '#')}`;
|
|
98
|
+
break;
|
|
99
|
+
case '@class':
|
|
100
|
+
attr = `.${match.mvalue.replace(/^\s+|\s+$/, '').replace(/\s/g, '.')}`;
|
|
101
|
+
break;
|
|
102
|
+
case 'text()':
|
|
103
|
+
case '.':
|
|
104
|
+
throw new Error(`Invalid or unsupported XPath attribute: ${match.mattr}`);
|
|
105
|
+
default:
|
|
106
|
+
if (match.mattr.indexOf('@') !== 0) {
|
|
107
|
+
throw new Error(`Invalid or unsupported XPath attribute: ${match.mattr}`);
|
|
108
|
+
}
|
|
109
|
+
if (match.mvalue.indexOf(' ') !== -1) {
|
|
110
|
+
match.mvalue = `\"${match.mvalue.replace(/^\s+|\s+$/, '')}\"`;
|
|
111
|
+
}
|
|
112
|
+
attr = `[${match.mattr.replace('@', '')}="${match.mvalue}"]`;
|
|
113
|
+
break;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
else if (match.idvalue) {
|
|
117
|
+
attr = `#${match.idvalue.replace(/\s/, '#')}`;
|
|
118
|
+
}
|
|
119
|
+
else {
|
|
120
|
+
attr = '';
|
|
121
|
+
}
|
|
122
|
+
let nth = '';
|
|
123
|
+
if (match.nth) {
|
|
124
|
+
if (match.nth.indexOf('last') === -1) {
|
|
125
|
+
if (isNaN(parseInt(match.nth, 10))) {
|
|
126
|
+
throw new Error(`Invalid or unsupported XPath attribute: ${match.nth}`);
|
|
127
|
+
}
|
|
128
|
+
nth = parseInt(match.nth, 10) !== 1 ? `:nth-of-type(${match.nth})` : ':first-of-type';
|
|
129
|
+
}
|
|
130
|
+
else {
|
|
131
|
+
nth = ':last-of-type';
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
css.push(nav + tag + attr + nth);
|
|
135
|
+
position++;
|
|
136
|
+
}
|
|
137
|
+
const result = css.join('');
|
|
138
|
+
if (result === '') {
|
|
139
|
+
throw new Error('Invalid or unsupported XPath');
|
|
140
|
+
}
|
|
141
|
+
cssSelectors.push(result);
|
|
142
|
+
xindex++;
|
|
143
|
+
}
|
|
144
|
+
// return cssSelectors.join(', ');
|
|
145
|
+
const originalResult = cssSelectors.join(', ');
|
|
146
|
+
return escapeCssIdSelectors(originalResult);
|
|
147
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
const ALLOWED_PROTOCOLS = new Set(['http:', 'https:']);
|
|
2
|
+
export function addUrlGuardScript(context, opts = {}) {
|
|
3
|
+
const { fallbackUrl } = opts;
|
|
4
|
+
const lastAllowedUrlByPage = new WeakMap();
|
|
5
|
+
const attachGuardsToPage = (page) => {
|
|
6
|
+
if (!lastAllowedUrlByPage.has(page) && fallbackUrl) {
|
|
7
|
+
lastAllowedUrlByPage.set(page, String(fallbackUrl));
|
|
8
|
+
}
|
|
9
|
+
page.addInitScript(() => {
|
|
10
|
+
const isAllowedProtocol = (value) => {
|
|
11
|
+
try {
|
|
12
|
+
const s = value instanceof URL ? value.toString() : String(value);
|
|
13
|
+
const protocol = new URL(s, window.location.href).protocol;
|
|
14
|
+
return protocol === 'http:' || protocol === 'https:';
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
return false;
|
|
18
|
+
}
|
|
19
|
+
};
|
|
20
|
+
const win = window;
|
|
21
|
+
const openOriginal = win.open;
|
|
22
|
+
win.open = function (targetUrl, ...args) {
|
|
23
|
+
if (!isAllowedProtocol(targetUrl))
|
|
24
|
+
return null;
|
|
25
|
+
return openOriginal.call(this, targetUrl, ...args);
|
|
26
|
+
};
|
|
27
|
+
const assignOriginal = win.location.assign.bind(win.location);
|
|
28
|
+
const replaceOriginal = win.location.replace.bind(win.location);
|
|
29
|
+
win.location.assign = (nextUrl) => { if (isAllowedProtocol(nextUrl))
|
|
30
|
+
assignOriginal(nextUrl); };
|
|
31
|
+
win.location.replace = (nextUrl) => { if (isAllowedProtocol(nextUrl))
|
|
32
|
+
replaceOriginal(nextUrl); };
|
|
33
|
+
Object.defineProperty(win.location, 'href', {
|
|
34
|
+
get() { return String(win.location.toString()); },
|
|
35
|
+
set(nextUrl) { if (isAllowedProtocol(nextUrl))
|
|
36
|
+
assignOriginal(nextUrl); },
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
const restoreToSafeUrl = async (page, attemptedUrl) => {
|
|
40
|
+
try {
|
|
41
|
+
const safeUrl = lastAllowedUrlByPage.get(page) || fallbackUrl || 'about:blank';
|
|
42
|
+
await page.goto(safeUrl, { waitUntil: 'domcontentloaded' });
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
// page might be closing; ignore
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
page.on('framenavigated', async (frame) => {
|
|
49
|
+
if (frame !== page.mainFrame())
|
|
50
|
+
return;
|
|
51
|
+
const urlStr = frame.url();
|
|
52
|
+
let urlObj;
|
|
53
|
+
try {
|
|
54
|
+
urlObj = new URL(urlStr);
|
|
55
|
+
}
|
|
56
|
+
catch {
|
|
57
|
+
return restoreToSafeUrl(page, urlStr);
|
|
58
|
+
}
|
|
59
|
+
if (ALLOWED_PROTOCOLS.has(urlObj.protocol)) {
|
|
60
|
+
lastAllowedUrlByPage.set(page, urlObj.toString());
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
await restoreToSafeUrl(page, urlStr);
|
|
64
|
+
});
|
|
65
|
+
};
|
|
66
|
+
// Guard existing and future pages
|
|
67
|
+
for (const page of context.pages())
|
|
68
|
+
attachGuardsToPage(page);
|
|
69
|
+
context.on('page', attachGuardsToPage);
|
|
70
|
+
}
|
|
71
|
+
export default addUrlGuardScript;
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
import { spawnSync } from 'child_process';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import { randomUUID } from 'crypto';
|
|
4
|
+
import { createRequire } from 'module';
|
|
5
|
+
import os from 'os';
|
|
6
|
+
import path from 'path';
|
|
7
|
+
import { ensureDirSync } from 'fs-extra';
|
|
8
|
+
import { getPageFromContext, getPdfScreenshots } from '../screenshotFunc/pdfScreenshotFunc.js';
|
|
9
|
+
import { consoleLogger, guiInfoLog } from '../logs.js';
|
|
10
|
+
import constants, { getExecutablePath, guiInfoStatusTypes, STATUS_CODE_METADATA, } from '../constants/constants.js';
|
|
11
|
+
import { cleanUpAndExit, getPdfStoragePath, getStoragePath } from '../utils.js';
|
|
12
|
+
const require = createRequire(import.meta.url);
|
|
13
|
+
// Classes
|
|
14
|
+
class TranslatedObject {
|
|
15
|
+
constructor() {
|
|
16
|
+
this.url = '';
|
|
17
|
+
this.pageTitle = '';
|
|
18
|
+
this.filePath = '';
|
|
19
|
+
this.totalItems = 0;
|
|
20
|
+
this.goodToFix = {
|
|
21
|
+
rules: {},
|
|
22
|
+
totalItems: 0,
|
|
23
|
+
};
|
|
24
|
+
this.mustFix = {
|
|
25
|
+
rules: {},
|
|
26
|
+
totalItems: 0,
|
|
27
|
+
};
|
|
28
|
+
this.needsReview = {
|
|
29
|
+
rules: {},
|
|
30
|
+
totalItems: 0,
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
export class TransformedRuleObject {
|
|
35
|
+
constructor() {
|
|
36
|
+
this.description = '';
|
|
37
|
+
this.totalItems = 0;
|
|
38
|
+
this.conformance = [];
|
|
39
|
+
this.items = [];
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
// AAA: 1.4.8, 2.4.9
|
|
43
|
+
// AA: 1.3.4, 1.4.3, 1.4.4, 1.4.10
|
|
44
|
+
// A: 1.3.1, 4.1.1, 4.1.2
|
|
45
|
+
const LEVEL_AAA = ['2.4.9', '1.4.8'];
|
|
46
|
+
const LEVEL_AA = ['1.3.4', '1.4.3', '1.4.4', '1.4.10'];
|
|
47
|
+
const LEVEL_A = ['1.3.1', '4.1.1', '4.1.2'];
|
|
48
|
+
const clauseToLevel = {
|
|
49
|
+
// mapping of clause to its A/AA/AAA level
|
|
50
|
+
...LEVEL_AAA.reduce((prev, curr) => {
|
|
51
|
+
prev[curr] = 'wcag2aaa';
|
|
52
|
+
return prev;
|
|
53
|
+
}, {}),
|
|
54
|
+
...LEVEL_AA.reduce((prev, curr) => {
|
|
55
|
+
prev[curr] = 'wcag2aa';
|
|
56
|
+
return prev;
|
|
57
|
+
}, {}),
|
|
58
|
+
...LEVEL_A.reduce((prev, curr) => {
|
|
59
|
+
prev[curr] = 'wcag2a';
|
|
60
|
+
return prev;
|
|
61
|
+
}, {}),
|
|
62
|
+
};
|
|
63
|
+
const metaToCategoryMap = {
|
|
64
|
+
critical: 'mustFix',
|
|
65
|
+
error: 'goodToFix',
|
|
66
|
+
serious: 'goodToFix',
|
|
67
|
+
warning: 'goodToFix',
|
|
68
|
+
ignore: 'goodToFix',
|
|
69
|
+
};
|
|
70
|
+
const EXCLUDED_RULES = {
|
|
71
|
+
'1.3.4': { 1: true }, // test for page orientation deemed a false positive, so its excluded
|
|
72
|
+
};
|
|
73
|
+
const isRuleExcluded = (rule) => {
|
|
74
|
+
const isExcluded = EXCLUDED_RULES[rule.clause]
|
|
75
|
+
? EXCLUDED_RULES[rule.clause][rule.testNumber]
|
|
76
|
+
: false;
|
|
77
|
+
return isExcluded || LEVEL_AAA.includes(rule.clause);
|
|
78
|
+
};
|
|
79
|
+
const getVeraExecutable = () => {
|
|
80
|
+
let veraPdfExe;
|
|
81
|
+
if (os.platform() === 'win32') {
|
|
82
|
+
veraPdfExe = getExecutablePath('**/verapdf', 'verapdf.bat');
|
|
83
|
+
}
|
|
84
|
+
else {
|
|
85
|
+
veraPdfExe = getExecutablePath('**/verapdf', 'verapdf');
|
|
86
|
+
}
|
|
87
|
+
if (!veraPdfExe) {
|
|
88
|
+
const veraPdfExeNotFoundError = 'Could not find veraPDF executable. Please ensure veraPDF is installed at current directory.';
|
|
89
|
+
consoleLogger.error(veraPdfExeNotFoundError);
|
|
90
|
+
consoleLogger.error(veraPdfExeNotFoundError);
|
|
91
|
+
}
|
|
92
|
+
return veraPdfExe;
|
|
93
|
+
};
|
|
94
|
+
const isPDF = (buffer) => {
|
|
95
|
+
return (Buffer.isBuffer(buffer) && buffer.lastIndexOf('%PDF-') === 0 && buffer.lastIndexOf('%%EOF') > -1);
|
|
96
|
+
};
|
|
97
|
+
export const handlePdfDownload = (randomToken, pdfDownloads, request, sendRequest, urlsCrawled) => {
|
|
98
|
+
const pdfFileName = randomUUID();
|
|
99
|
+
const { url } = request;
|
|
100
|
+
const pageTitle = decodeURI(request.url).split('/').pop();
|
|
101
|
+
pdfDownloads.push(new Promise(async (resolve) => {
|
|
102
|
+
let buf;
|
|
103
|
+
// Download from remote URL
|
|
104
|
+
const response = await sendRequest({ responseType: 'buffer' });
|
|
105
|
+
if (response.statusCode !== 200) {
|
|
106
|
+
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
|
107
|
+
numScanned: urlsCrawled.scanned.length,
|
|
108
|
+
urlScanned: request.url,
|
|
109
|
+
});
|
|
110
|
+
urlsCrawled.userExcluded.push({
|
|
111
|
+
url: request.url,
|
|
112
|
+
pageTitle: request.url,
|
|
113
|
+
actualUrl: request.url, // because about:blank is not useful
|
|
114
|
+
metadata: STATUS_CODE_METADATA[response.statusCode] || STATUS_CODE_METADATA[1],
|
|
115
|
+
httpStatusCode: 0,
|
|
116
|
+
});
|
|
117
|
+
resolve();
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
buf = Buffer.isBuffer(response) ? response : response.body;
|
|
121
|
+
const downloadFile = fs.createWriteStream(`${getPdfStoragePath(randomToken)}/${pdfFileName}.pdf`, {
|
|
122
|
+
flags: 'w',
|
|
123
|
+
});
|
|
124
|
+
downloadFile.write(buf, 'binary');
|
|
125
|
+
downloadFile.end();
|
|
126
|
+
downloadFile.on('finish', () => {
|
|
127
|
+
if (isPDF(buf)) {
|
|
128
|
+
guiInfoLog(guiInfoStatusTypes.SCANNED, {
|
|
129
|
+
numScanned: urlsCrawled.scanned.length,
|
|
130
|
+
urlScanned: request.url,
|
|
131
|
+
});
|
|
132
|
+
urlsCrawled.scanned.push({
|
|
133
|
+
url: request.url,
|
|
134
|
+
pageTitle,
|
|
135
|
+
actualUrl: url,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
139
|
+
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
|
140
|
+
numScanned: urlsCrawled.scanned.length,
|
|
141
|
+
urlScanned: request.url,
|
|
142
|
+
});
|
|
143
|
+
urlsCrawled.invalid.push({
|
|
144
|
+
url: request.url,
|
|
145
|
+
pageTitle: url,
|
|
146
|
+
actualUrl: url,
|
|
147
|
+
metadata: STATUS_CODE_METADATA[1],
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
resolve();
|
|
151
|
+
});
|
|
152
|
+
}));
|
|
153
|
+
return { pdfFileName, url };
|
|
154
|
+
};
|
|
155
|
+
export const runPdfScan = async (randomToken) => {
|
|
156
|
+
const execFile = getVeraExecutable();
|
|
157
|
+
const veraPdfExe = `"${execFile}"`;
|
|
158
|
+
// const veraPdfProfile = getVeraProfile();
|
|
159
|
+
const veraPdfProfile = `"${path.join(path.dirname(execFile), 'profiles/veraPDF-validation-profiles-rel-1.26/PDF_UA/WCAG-2-2.xml')}"`;
|
|
160
|
+
if (!veraPdfExe || !veraPdfProfile) {
|
|
161
|
+
cleanUpAndExit(1);
|
|
162
|
+
}
|
|
163
|
+
const intermediateFolder = getPdfStoragePath(randomToken);
|
|
164
|
+
// store in a intermediate folder as we transfer final results later
|
|
165
|
+
const intermediateResultPath = `${intermediateFolder}/${constants.pdfScanResultFileName}`;
|
|
166
|
+
const veraPdfCmdArgs = [
|
|
167
|
+
'-p',
|
|
168
|
+
veraPdfProfile,
|
|
169
|
+
'--format',
|
|
170
|
+
'json',
|
|
171
|
+
'-r', // recurse through directory
|
|
172
|
+
`"${intermediateFolder}"`,
|
|
173
|
+
];
|
|
174
|
+
const ls = spawnSync(veraPdfExe, veraPdfCmdArgs, { shell: true });
|
|
175
|
+
if (ls.stderr && ls.stderr.length > 0)
|
|
176
|
+
consoleLogger.error(ls.stderr.toString());
|
|
177
|
+
fs.writeFileSync(intermediateResultPath, ls.stdout, { encoding: 'utf-8' });
|
|
178
|
+
};
|
|
179
|
+
// transform results from veraPDF to desired format for report
|
|
180
|
+
export const mapPdfScanResults = async (randomToken, uuidToUrlMapping) => {
|
|
181
|
+
const intermediateFolder = getPdfStoragePath(randomToken);
|
|
182
|
+
const intermediateResultPath = `${intermediateFolder}/${constants.pdfScanResultFileName}`;
|
|
183
|
+
const rawdata = fs.readFileSync(intermediateResultPath, 'utf-8');
|
|
184
|
+
let parsedJsonData;
|
|
185
|
+
try {
|
|
186
|
+
parsedJsonData = JSON.parse(rawdata);
|
|
187
|
+
}
|
|
188
|
+
catch (err) {
|
|
189
|
+
consoleLogger.error(err);
|
|
190
|
+
}
|
|
191
|
+
const errorMeta = require('../constants/errorMeta.json');
|
|
192
|
+
const resultsList = [];
|
|
193
|
+
if (parsedJsonData) {
|
|
194
|
+
// jobs: files that are scanned
|
|
195
|
+
const { report: { jobs }, } = parsedJsonData;
|
|
196
|
+
// loop through all jobs
|
|
197
|
+
for (let jobIdx = 0; jobIdx < jobs.length; jobIdx++) {
|
|
198
|
+
const translated = new TranslatedObject();
|
|
199
|
+
const { itemDetails, validationResult } = jobs[jobIdx];
|
|
200
|
+
const { name: fileName } = itemDetails;
|
|
201
|
+
const rawFileName = fileName.split(os.platform() === 'win32' ? '\\' : '/').pop();
|
|
202
|
+
const fileNameWithoutExt = rawFileName.replace(/\.pdf$/i, '');
|
|
203
|
+
const url = uuidToUrlMapping[rawFileName] || // exact match like 'Some-filename.pdf'
|
|
204
|
+
uuidToUrlMapping[fileNameWithoutExt] || // uuid-based key like 'a9f7ebbd-5a90...'
|
|
205
|
+
`file://${fileName}`; // fallback
|
|
206
|
+
const filePath = path.join(getPdfStoragePath(randomToken), rawFileName);
|
|
207
|
+
const pageTitle = decodeURI(url).split('/').pop();
|
|
208
|
+
translated.url = url;
|
|
209
|
+
translated.pageTitle = pageTitle;
|
|
210
|
+
translated.url = url;
|
|
211
|
+
translated.pageTitle = pageTitle;
|
|
212
|
+
translated.filePath = filePath;
|
|
213
|
+
if (!validationResult) {
|
|
214
|
+
// check for error in scan
|
|
215
|
+
consoleLogger.info(`Unable to scan ${pageTitle}, skipping`);
|
|
216
|
+
continue; // skip this job
|
|
217
|
+
}
|
|
218
|
+
// destructure validation result
|
|
219
|
+
const { passedChecks, failedChecks, ruleSummaries } = validationResult.details;
|
|
220
|
+
const totalChecks = passedChecks + failedChecks;
|
|
221
|
+
translated.totalItems = totalChecks;
|
|
222
|
+
// loop through all failed rules
|
|
223
|
+
for (let ruleIdx = 0; ruleIdx < ruleSummaries.length; ruleIdx++) {
|
|
224
|
+
const rule = ruleSummaries[ruleIdx];
|
|
225
|
+
const { specification, testNumber, clause } = rule;
|
|
226
|
+
if (isRuleExcluded(rule))
|
|
227
|
+
continue;
|
|
228
|
+
const [ruleId, transformedRule] = await transformRule(rule, filePath);
|
|
229
|
+
// ignore if violation is not in the meta file
|
|
230
|
+
const meta = errorMeta[specification][clause][testNumber]?.STATUS ?? 'ignore';
|
|
231
|
+
const category = translated[metaToCategoryMap[meta]];
|
|
232
|
+
category.rules[ruleId] = transformedRule;
|
|
233
|
+
category.totalItems += transformedRule.totalItems;
|
|
234
|
+
}
|
|
235
|
+
resultsList.push(translated);
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
return resultsList;
|
|
239
|
+
};
|
|
240
|
+
const transformRule = async (rule, filePath) => {
|
|
241
|
+
// get specific rule
|
|
242
|
+
const transformed = new TransformedRuleObject();
|
|
243
|
+
const { specification, description, clause, testNumber, checks } = rule;
|
|
244
|
+
transformed.description = description;
|
|
245
|
+
transformed.totalItems = checks.length;
|
|
246
|
+
if (specification === 'WCAG2.1') {
|
|
247
|
+
transformed.conformance = [clauseToLevel[clause], `wcag${clause.split('.').join('')}`];
|
|
248
|
+
}
|
|
249
|
+
else {
|
|
250
|
+
transformed.conformance = ['best-practice'];
|
|
251
|
+
}
|
|
252
|
+
transformed.items = [];
|
|
253
|
+
for (let checkIdx = 0; checkIdx < checks.length; checkIdx++) {
|
|
254
|
+
const { errorMessage, context } = checks[checkIdx];
|
|
255
|
+
const page = await getPageFromContext(context, filePath);
|
|
256
|
+
transformed.items.push({ message: errorMessage, page, context });
|
|
257
|
+
}
|
|
258
|
+
const ruleId = `pdf-${specification}-${clause}-${testNumber}`.replaceAll(' ', '_');
|
|
259
|
+
return [ruleId, transformed];
|
|
260
|
+
};
|
|
261
|
+
export const doPdfScreenshots = async (randomToken, result) => {
|
|
262
|
+
const { filePath, pageTitle } = result;
|
|
263
|
+
const formattedPageTitle = pageTitle.replaceAll(' ', '_').split('.')[0];
|
|
264
|
+
const screenshotsDir = path.join(getStoragePath(randomToken), 'elemScreenshots', 'pdf');
|
|
265
|
+
ensureDirSync(screenshotsDir);
|
|
266
|
+
for (const category of ['mustFix', 'goodToFix']) {
|
|
267
|
+
const ruleItems = Object.entries(result[category].rules);
|
|
268
|
+
for (const [ruleId, ruleInfo] of ruleItems) {
|
|
269
|
+
const { items } = ruleInfo;
|
|
270
|
+
const filename = `${formattedPageTitle}-${category}-${ruleId}`;
|
|
271
|
+
const screenshotPath = path.join(screenshotsDir, filename);
|
|
272
|
+
const newItems = await getPdfScreenshots(filePath, items, screenshotPath);
|
|
273
|
+
ruleInfo.items = newItems;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
};
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/* eslint-env browser */
|
|
2
|
+
import { chromium } from 'playwright';
|
|
3
|
+
import { createCrawleeSubFolders } from './commonCrawlerFunc.js';
|
|
4
|
+
import { cleanUpAndExit, register, registerSoftClose } from '../utils.js';
|
|
5
|
+
import constants, { getIntermediateScreenshotsPath, guiInfoStatusTypes, } from '../constants/constants.js';
|
|
6
|
+
import { initNewPage, log } from './custom/utils.js';
|
|
7
|
+
import { guiInfoLog } from '../logs.js';
|
|
8
|
+
import { addUrlGuardScript } from './guards/urlGuard.js';
|
|
9
|
+
// Export of classes
|
|
10
|
+
export class ProcessPageParams {
|
|
11
|
+
constructor(scannedIdx, blacklistedPatterns, includeScreenshots, dataset, intermediateScreenshotsPath, urlsCrawled, randomToken) {
|
|
12
|
+
this.scannedIdx = scannedIdx;
|
|
13
|
+
this.blacklistedPatterns = blacklistedPatterns;
|
|
14
|
+
this.includeScreenshots = includeScreenshots;
|
|
15
|
+
this.dataset = dataset;
|
|
16
|
+
this.intermediateScreenshotsPath = intermediateScreenshotsPath;
|
|
17
|
+
this.urlsCrawled = urlsCrawled;
|
|
18
|
+
this.randomToken = randomToken;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
const runCustom = async (url, randomToken, viewportSettings, blacklistedPatterns, includeScreenshots, initialCustomFlowLabel) => {
|
|
22
|
+
// checks and delete datasets path if it already exists
|
|
23
|
+
process.env.CRAWLEE_STORAGE_DIR = randomToken;
|
|
24
|
+
const urlsCrawled = { ...constants.urlsCrawledObj };
|
|
25
|
+
const { dataset } = await createCrawleeSubFolders(randomToken);
|
|
26
|
+
const intermediateScreenshotsPath = getIntermediateScreenshotsPath(randomToken);
|
|
27
|
+
const processPageParams = new ProcessPageParams(0, // scannedIdx
|
|
28
|
+
blacklistedPatterns, includeScreenshots, dataset, intermediateScreenshotsPath, urlsCrawled, randomToken);
|
|
29
|
+
if (initialCustomFlowLabel && initialCustomFlowLabel.trim()) {
|
|
30
|
+
processPageParams.customFlowLabel = initialCustomFlowLabel.trim();
|
|
31
|
+
}
|
|
32
|
+
const pagesDict = {};
|
|
33
|
+
const pageClosePromises = [];
|
|
34
|
+
try {
|
|
35
|
+
const deviceConfig = viewportSettings.playwrightDeviceDetailsObject;
|
|
36
|
+
const hasCustomViewport = !!deviceConfig;
|
|
37
|
+
const browser = await chromium.launch({
|
|
38
|
+
args: hasCustomViewport ? ['--window-size=1920,1040'] : ['--start-maximized'],
|
|
39
|
+
headless: false,
|
|
40
|
+
channel: 'chrome',
|
|
41
|
+
// bypassCSP: true,
|
|
42
|
+
});
|
|
43
|
+
const context = await browser.newContext({
|
|
44
|
+
ignoreHTTPSErrors: true,
|
|
45
|
+
serviceWorkers: 'block',
|
|
46
|
+
viewport: null,
|
|
47
|
+
...(hasCustomViewport ? deviceConfig : {}),
|
|
48
|
+
});
|
|
49
|
+
register(context);
|
|
50
|
+
processPageParams.stopAll = async () => {
|
|
51
|
+
try {
|
|
52
|
+
await context.close().catch(() => { });
|
|
53
|
+
await browser.close().catch(() => { });
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
// For handling closing playwright browser and continue generate artifacts etc
|
|
59
|
+
registerSoftClose(processPageParams.stopAll);
|
|
60
|
+
addUrlGuardScript(context, { fallbackUrl: url });
|
|
61
|
+
// Detection of new page
|
|
62
|
+
context.on('page', async (newPage) => {
|
|
63
|
+
await initNewPage(newPage, pageClosePromises, processPageParams, pagesDict);
|
|
64
|
+
});
|
|
65
|
+
const page = await context.newPage();
|
|
66
|
+
await page.goto(url, { timeout: 0 });
|
|
67
|
+
// to execute and wait for all pages to close
|
|
68
|
+
// idea is for promise to be pending until page.on('close') detected
|
|
69
|
+
const allPagesClosedPromise = async (promises) => Promise.all(promises)
|
|
70
|
+
// necessary to recheck as during time of execution, more pages added
|
|
71
|
+
.then(() => {
|
|
72
|
+
if (Object.keys(pagesDict).length > 0) {
|
|
73
|
+
return allPagesClosedPromise(promises);
|
|
74
|
+
}
|
|
75
|
+
return Promise.resolve(true);
|
|
76
|
+
});
|
|
77
|
+
await allPagesClosedPromise(pageClosePromises);
|
|
78
|
+
}
|
|
79
|
+
catch (error) {
|
|
80
|
+
log(`PLAYWRIGHT EXECUTION ERROR ${error}`);
|
|
81
|
+
cleanUpAndExit(1, randomToken, true);
|
|
82
|
+
}
|
|
83
|
+
guiInfoLog(guiInfoStatusTypes.COMPLETED, {});
|
|
84
|
+
return {
|
|
85
|
+
urlsCrawled,
|
|
86
|
+
customFlowLabel: processPageParams.customFlowLabel,
|
|
87
|
+
};
|
|
88
|
+
};
|
|
89
|
+
export default runCustom;
|