@govtechsg/oobee 0.10.36 → 0.10.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/.github/workflows/docker-test.yml +1 -1
  2. package/DETAILS.md +3 -3
  3. package/INTEGRATION.md +142 -53
  4. package/README.md +17 -0
  5. package/REPORTS.md +362 -0
  6. package/exclusions.txt +4 -1
  7. package/package.json +2 -2
  8. package/src/constants/cliFunctions.ts +0 -7
  9. package/src/constants/common.ts +39 -1
  10. package/src/constants/constants.ts +9 -8
  11. package/src/crawlers/commonCrawlerFunc.ts +95 -220
  12. package/src/crawlers/crawlDomain.ts +10 -23
  13. package/src/crawlers/crawlLocalFile.ts +2 -0
  14. package/src/crawlers/crawlSitemap.ts +6 -4
  15. package/src/crawlers/custom/escapeCssSelector.ts +10 -0
  16. package/src/crawlers/custom/evaluateAltText.ts +13 -0
  17. package/src/crawlers/custom/extractAndGradeText.ts +0 -2
  18. package/src/crawlers/custom/extractText.ts +28 -0
  19. package/src/crawlers/custom/findElementByCssSelector.ts +46 -0
  20. package/src/crawlers/custom/flagUnlabelledClickableElements.ts +982 -842
  21. package/src/crawlers/custom/framesCheck.ts +51 -0
  22. package/src/crawlers/custom/getAxeConfiguration.ts +126 -0
  23. package/src/crawlers/custom/gradeReadability.ts +30 -0
  24. package/src/crawlers/custom/xPathToCss.ts +178 -0
  25. package/src/crawlers/pdfScanFunc.ts +67 -26
  26. package/src/mergeAxeResults.ts +535 -132
  27. package/src/npmIndex.ts +130 -62
  28. package/src/screenshotFunc/htmlScreenshotFunc.ts +1 -1
  29. package/src/screenshotFunc/pdfScreenshotFunc.ts +34 -1
  30. package/src/static/ejs/partials/components/ruleOffcanvas.ejs +1 -1
  31. package/src/static/ejs/partials/components/scanAbout.ejs +1 -1
  32. package/src/static/ejs/partials/footer.ejs +3 -3
  33. package/src/static/ejs/partials/scripts/reportSearch.ejs +112 -74
  34. package/src/static/ejs/partials/scripts/ruleOffcanvas.ejs +2 -2
  35. package/src/static/ejs/partials/summaryMain.ejs +3 -3
  36. package/src/static/ejs/report.ejs +3 -3
  37. package/src/utils.ts +289 -13
  38. package/src/xPathToCssCypress.ts +178 -0
  39. package/src/crawlers/customAxeFunctions.ts +0 -82
@@ -0,0 +1,51 @@
1
+ export function framesCheck(cssSelector: string): {
2
+ doc: Document;
3
+ remainingSelector: string;
4
+ } {
5
+ let doc = document; // Start with the main document
6
+ let remainingSelector = ''; // To store the last part of the selector
7
+ let targetIframe = null;
8
+
9
+ // Split the selector into parts at "> html"
10
+ const diffParts = cssSelector.split(/\s*>\s*html\s*/);
11
+
12
+ for (let i = 0; i < diffParts.length - 1; i++) {
13
+ let iframeSelector = `${diffParts[i].trim()}`;
14
+
15
+ // Add back '> html' to the current part
16
+ if (i > 0) {
17
+ iframeSelector = `html > ${iframeSelector}`;
18
+ }
19
+
20
+ let frameset = null;
21
+ // Find the iframe using the current document context
22
+ if (doc.querySelector('frameset')) {
23
+ frameset = doc.querySelector('frameset');
24
+ }
25
+
26
+ if (frameset) {
27
+ doc = frameset;
28
+ iframeSelector = iframeSelector.split('body >')[1].trim();
29
+ }
30
+ targetIframe = doc.querySelector(iframeSelector);
31
+
32
+ if (targetIframe && targetIframe.contentDocument) {
33
+ // Update the document to the iframe's contentDocument
34
+ doc = targetIframe.contentDocument;
35
+ } else {
36
+ console.warn(
37
+ `Iframe not found or contentDocument inaccessible for selector: ${iframeSelector}`,
38
+ );
39
+ return { doc, remainingSelector: cssSelector }; // Return original selector if iframe not found
40
+ }
41
+ }
42
+
43
+ // The last part is the remaining CSS selector
44
+ remainingSelector = diffParts[diffParts.length - 1].trim();
45
+
46
+ // Remove any leading '>' combinators from remainingSelector
47
+ remainingSelector = `html${remainingSelector}`;
48
+
49
+ return { doc, remainingSelector };
50
+ }
51
+
@@ -0,0 +1,126 @@
1
+ import { ImpactValue } from "axe-core";
2
+ import { evaluateAltText } from "./evaluateAltText.js";
3
+
4
+ export function getAxeConfiguration({
5
+ enableWcagAaa = false,
6
+ gradingReadabilityFlag = '',
7
+ disableOobee = false,
8
+ }: {
9
+ enableWcagAaa?: boolean;
10
+ gradingReadabilityFlag?: string;
11
+ disableOobee?: boolean;
12
+ }) {
13
+ return {
14
+ branding: {
15
+ application: 'oobee',
16
+ },
17
+ checks: [
18
+ {
19
+ id: 'oobee-confusing-alt-text',
20
+ metadata: {
21
+ impact: 'serious' as ImpactValue,
22
+ messages: {
23
+ pass: 'The image alt text is probably useful.',
24
+ fail: "The image alt text set as 'img', 'image', 'picture', 'photo', or 'graphic' is confusing or not useful.",
25
+ },
26
+ },
27
+ evaluate: evaluateAltText,
28
+ },
29
+ {
30
+ id: 'oobee-accessible-label',
31
+ metadata: {
32
+ impact: 'serious' as ImpactValue,
33
+ messages: {
34
+ pass: 'The clickable element has an accessible label.',
35
+ fail: 'The clickable element does not have an accessible label.',
36
+ },
37
+ },
38
+ evaluate: (node: HTMLElement) => {
39
+ return !node.dataset.flagged; // fail any element with a data-flagged attribute set to true
40
+ },
41
+ },
42
+ ...(enableWcagAaa
43
+ ? [
44
+ {
45
+ id: 'oobee-grading-text-contents',
46
+ metadata: {
47
+ impact: 'moderate' as ImpactValue,
48
+ messages: {
49
+ pass: 'The text content is easy to understand.',
50
+ fail: 'The text content is potentially difficult to understand.',
51
+ incomplete: `The text content is potentially difficult to read, with a Flesch-Kincaid Reading Ease score of ${gradingReadabilityFlag
52
+ }.\nThe target passing score is above 50, indicating content readable by university students and lower grade levels.\nA higher score reflects better readability.`,
53
+ },
54
+ },
55
+ evaluate: (_node: HTMLElement) => {
56
+ if (gradingReadabilityFlag === '') {
57
+ return true; // Pass if no readability issues
58
+ }
59
+ // Fail if readability issues are detected
60
+ },
61
+ },
62
+ ]
63
+ : []),
64
+ ],
65
+ rules: [
66
+ { id: 'target-size', enabled: true },
67
+ {
68
+ id: 'oobee-confusing-alt-text',
69
+ selector: 'img[alt]',
70
+ enabled: true,
71
+ any: ['oobee-confusing-alt-text'],
72
+ tags: ['wcag2a', 'wcag111'],
73
+ metadata: {
74
+ description: 'Ensures image alt text is clear and useful.',
75
+ help: 'Image alt text must not be vague or unhelpful.',
76
+ helpUrl: 'https://www.deque.com/blog/great-alt-text-introduction/',
77
+ },
78
+ },
79
+ {
80
+ id: 'oobee-accessible-label',
81
+ // selector: '*', // to be set with the checker function output xpaths converted to css selectors
82
+ enabled: true,
83
+ any: ['oobee-accessible-label'],
84
+ tags: ['wcag2a', 'wcag211', 'wcag412'],
85
+ metadata: {
86
+ description: 'Ensures clickable elements have an accessible label.',
87
+ help: 'Clickable elements must have accessible labels.',
88
+ helpUrl: 'https://www.deque.com/blog/accessible-aria-buttons',
89
+ },
90
+ },
91
+ {
92
+ id: 'oobee-grading-text-contents',
93
+ selector: 'html',
94
+ enabled: true,
95
+ any: ['oobee-grading-text-contents'],
96
+ tags: ['wcag2aaa', 'wcag315'],
97
+ metadata: {
98
+ description:
99
+ 'Text content should be easy to understand for individuals with education levels up to university graduates. If the text content is difficult to understand, provide supplemental content or a version that is easy to understand.',
100
+ help: 'Text content should be clear and plain to ensure that it is easily understood.',
101
+ helpUrl: 'https://www.wcag.com/uncategorized/3-1-5-reading-level/',
102
+ },
103
+ },
104
+ ]
105
+ .filter(rule => (disableOobee ? !rule.id.startsWith('oobee') : true))
106
+ .concat(
107
+ enableWcagAaa
108
+ ? [
109
+ {
110
+ id: 'color-contrast-enhanced',
111
+ enabled: true,
112
+ },
113
+ {
114
+ id: 'identical-links-same-purpose',
115
+ enabled: true,
116
+ },
117
+ {
118
+ id: 'meta-refresh-no-exceptions',
119
+ enabled: true,
120
+ },
121
+ ]
122
+ : [],
123
+ ),
124
+ };
125
+ }
126
+
@@ -0,0 +1,30 @@
1
+ import textReadability from 'text-readability';
2
+
3
+ export function gradeReadability(sentences: string[]): string {
4
+ try {
5
+ // Check if any valid sentences were extracted
6
+ if (sentences.length === 0) {
7
+ return ''; // Return an empty string if no valid sentences are found
8
+ }
9
+
10
+ // Join the valid sentences into a single string
11
+ const filteredText = sentences.join(' ').trim();
12
+
13
+ // Count the total number of words in the filtered text
14
+ const wordCount = filteredText.split(/\s+/).length;
15
+
16
+ // Grade the text content only if there are 20 words or more
17
+ const readabilityScore = wordCount >= 20 ? textReadability.fleschReadingEase(filteredText) : 0;
18
+
19
+ // Log details for debugging
20
+
21
+ // Determine the return value
22
+ const result =
23
+ readabilityScore === 0 || readabilityScore > 50 ? '' : readabilityScore.toString(); // Convert readabilityScore to string
24
+
25
+ return result;
26
+ } catch (error) {
27
+ console.error('Error extracting and grading text:', error);
28
+ return ''; // Return an empty string in case of an error
29
+ }
30
+ }
@@ -0,0 +1,178 @@
1
+ export function xPathToCss(expr: string) {
2
+ const isValidXPath = expr =>
3
+ typeof expr !== 'undefined' &&
4
+ expr.replace(/[\s-_=]/g, '') !== '' &&
5
+ expr.length ===
6
+ expr.replace(
7
+ /[-_\w:.]+\(\)\s*=|=\s*[-_\w:.]+\(\)|\sor\s|\sand\s|\[(?:[^\/\]]+[\/\[]\/?.+)+\]|starts-with\(|\[.*last\(\)\s*[-\+<>=].+\]|number\(\)|not\(|count\(|text\(|first\(|normalize-space|[^\/]following-sibling|concat\(|descendant::|parent::|self::|child::|/gi,
8
+ '',
9
+ ).length;
10
+
11
+ const getValidationRegex = () => {
12
+ let regex =
13
+ '(?P<node>' +
14
+ '(' +
15
+ '^id\\(["\\\']?(?P<idvalue>%(value)s)["\\\']?\\)' + // special case! `id(idValue)`
16
+ '|' +
17
+ '(?P<nav>//?(?:following-sibling::)?)(?P<tag>%(tag)s)' + // `//div`
18
+ '(\\[(' +
19
+ '(?P<matched>(?P<mattr>@?%(attribute)s=["\\\'](?P<mvalue>%(value)s))["\\\']' + // `[@id="well"]` supported and `[text()="yes"]` is not
20
+ '|' +
21
+ '(?P<contained>contains\\((?P<cattr>@?%(attribute)s,\\s*["\\\'](?P<cvalue>%(value)s)["\\\']\\))' + // `[contains(@id, "bleh")]` supported and `[contains(text(), "some")]` is not
22
+ ')\\])?' +
23
+ '(\\[\\s*(?P<nth>\\d+|last\\(\\s*\\))\\s*\\])?' +
24
+ ')' +
25
+ ')';
26
+
27
+ const subRegexes = {
28
+ tag: '([a-zA-Z][a-zA-Z0-9:-]*|\\*)',
29
+ attribute: '[.a-zA-Z_:][-\\w:.]*(\\(\\))?)',
30
+ value: '\\s*[\\w/:][-/\\w\\s,:;.]*',
31
+ };
32
+
33
+ Object.keys(subRegexes).forEach(key => {
34
+ regex = regex.replace(new RegExp(`%\\(${key}\\)s`, 'gi'), subRegexes[key]);
35
+ });
36
+
37
+ regex = regex.replace(
38
+ /\?P<node>|\?P<idvalue>|\?P<nav>|\?P<tag>|\?P<matched>|\?P<mattr>|\?P<mvalue>|\?P<contained>|\?P<cattr>|\?P<cvalue>|\?P<nth>/gi,
39
+ '',
40
+ );
41
+
42
+ return new RegExp(regex, 'gi');
43
+ };
44
+
45
+ const preParseXpath = expr =>
46
+ expr.replace(
47
+ /contains\s*\(\s*concat\(["']\s+["']\s*,\s*@class\s*,\s*["']\s+["']\)\s*,\s*["']\s+([a-zA-Z0-9-_]+)\s+["']\)/gi,
48
+ '@class="$1"',
49
+ );
50
+
51
+ function escapeCssIdSelectors(cssSelector) {
52
+ return cssSelector.replace(/#([^ >]+)/g, (match, id) => {
53
+ // Escape special characters in the id part
54
+ return `#${id.replace(/[!"#$%&'()*+,./:;<=>?@[\\\]^`{|}~]/g, '\\$&')}`;
55
+ });
56
+ }
57
+ if (!expr) {
58
+ throw new Error('Missing XPath expression');
59
+ }
60
+
61
+ expr = preParseXpath(expr);
62
+
63
+ if (!isValidXPath(expr)) {
64
+ console.error(`Invalid or unsupported XPath: ${expr}`);
65
+ // do not throw error so that this function proceeds to convert xpath that it does not support
66
+ // for example, //*[@id="google_ads_iframe_/4654/dweb/imu1/homepage/landingpage/na_0"]/html/body/div[1]/a
67
+ // becomes #google_ads_iframe_/4654/dweb/imu1/homepage/landingpage/na_0 > html > body > div:first-of-type > div > a
68
+ // which is invalid because the slashes in the id selector are not escaped
69
+ // throw new Error('Invalid or unsupported XPath: ' + expr);
70
+ }
71
+
72
+ const xPathArr = expr.split('|');
73
+ const prog = getValidationRegex();
74
+ const cssSelectors = [];
75
+ let xindex = 0;
76
+
77
+ while (xPathArr[xindex]) {
78
+ const css = [];
79
+ let position = 0;
80
+ let nodes;
81
+
82
+ while ((nodes = prog.exec(xPathArr[xindex]))) {
83
+ let attr;
84
+
85
+ if (!nodes && position === 0) {
86
+ throw new Error(`Invalid or unsupported XPath: ${expr}`);
87
+ }
88
+
89
+ const match = {
90
+ node: nodes[5],
91
+ idvalue: nodes[12] || nodes[3],
92
+ nav: nodes[4],
93
+ tag: nodes[5],
94
+ matched: nodes[7],
95
+ mattr: nodes[10] || nodes[14],
96
+ mvalue: nodes[12] || nodes[16],
97
+ contained: nodes[13],
98
+ cattr: nodes[14],
99
+ cvalue: nodes[16],
100
+ nth: nodes[18],
101
+ };
102
+
103
+ let nav = '';
104
+
105
+ if (position != 0 && match.nav) {
106
+ if (~match.nav.indexOf('following-sibling::')) {
107
+ nav = ' + ';
108
+ } else {
109
+ nav = match.nav == '//' ? ' ' : ' > ';
110
+ }
111
+ }
112
+
113
+ const tag = match.tag === '*' ? '' : match.tag || '';
114
+
115
+ if (match.contained) {
116
+ if (match.cattr.indexOf('@') === 0) {
117
+ attr = `[${match.cattr.replace(/^@/, '')}*="${match.cvalue}"]`;
118
+ } else {
119
+ throw new Error(`Invalid or unsupported XPath attribute: ${match.cattr}`);
120
+ }
121
+ } else if (match.matched) {
122
+ switch (match.mattr) {
123
+ case '@id':
124
+ attr = `#${match.mvalue.replace(/^\s+|\s+$/, '').replace(/\s/g, '#')}`;
125
+ break;
126
+ case '@class':
127
+ attr = `.${match.mvalue.replace(/^\s+|\s+$/, '').replace(/\s/g, '.')}`;
128
+ break;
129
+ case 'text()':
130
+ case '.':
131
+ throw new Error(`Invalid or unsupported XPath attribute: ${match.mattr}`);
132
+ default:
133
+ if (match.mattr.indexOf('@') !== 0) {
134
+ throw new Error(`Invalid or unsupported XPath attribute: ${match.mattr}`);
135
+ }
136
+ if (match.mvalue.indexOf(' ') !== -1) {
137
+ match.mvalue = `\"${match.mvalue.replace(/^\s+|\s+$/, '')}\"`;
138
+ }
139
+ attr = `[${match.mattr.replace('@', '')}="${match.mvalue}"]`;
140
+ break;
141
+ }
142
+ } else if (match.idvalue) {
143
+ attr = `#${match.idvalue.replace(/\s/, '#')}`;
144
+ } else {
145
+ attr = '';
146
+ }
147
+
148
+ let nth = '';
149
+
150
+ if (match.nth) {
151
+ if (match.nth.indexOf('last') === -1) {
152
+ if (isNaN(parseInt(match.nth, 10))) {
153
+ throw new Error(`Invalid or unsupported XPath attribute: ${match.nth}`);
154
+ }
155
+ nth = parseInt(match.nth, 10) !== 1 ? `:nth-of-type(${match.nth})` : ':first-of-type';
156
+ } else {
157
+ nth = ':last-of-type';
158
+ }
159
+ }
160
+
161
+ css.push(nav + tag + attr + nth);
162
+ position++;
163
+ }
164
+
165
+ const result = css.join('');
166
+
167
+ if (result === '') {
168
+ throw new Error('Invalid or unsupported XPath');
169
+ }
170
+
171
+ cssSelectors.push(result);
172
+ xindex++;
173
+ }
174
+
175
+ // return cssSelectors.join(', ');
176
+ const originalResult = cssSelectors.join(', ');
177
+ return escapeCssIdSelectors(originalResult);
178
+ }
@@ -256,30 +256,63 @@ export const handlePdfDownload = (
256
256
 
257
257
  pdfDownloads.push(
258
258
  new Promise<void>(async resolve => {
259
- const bufs = [];
260
- let pdfResponse: ReadStream;
259
+ let bufs: Buffer[] = [];
260
+ let buf: Buffer;
261
261
 
262
262
  if (isFilePath(url)) {
263
- // Read the file from the file system
263
+ // Read from local file system
264
264
  const filePath = new URL(url).pathname;
265
- pdfResponse = fs.createReadStream(filePath, { encoding: 'binary' });
265
+ const pdfResponse = fs.createReadStream(filePath, { encoding: 'binary' });
266
+
267
+ const downloadFile = fs.createWriteStream(`${randomToken}/${pdfFileName}.pdf`, {
268
+ flags: 'a',
269
+ });
270
+
271
+ pdfResponse.on('data', (chunk: Buffer) => {
272
+ downloadFile.write(chunk, 'binary');
273
+ bufs.push(Buffer.from(chunk));
274
+ });
275
+
276
+ pdfResponse.on('end', () => {
277
+ downloadFile.end();
278
+ buf = Buffer.concat(bufs);
279
+
280
+ if (isPDF(buf)) {
281
+ guiInfoLog(guiInfoStatusTypes.SCANNED, {
282
+ numScanned: urlsCrawled.scanned.length,
283
+ urlScanned: request.url,
284
+ });
285
+ urlsCrawled.scanned.push({
286
+ url: request.url,
287
+ pageTitle,
288
+ actualUrl: url,
289
+ });
290
+ } else {
291
+ guiInfoLog(guiInfoStatusTypes.SKIPPED, {
292
+ numScanned: urlsCrawled.scanned.length,
293
+ urlScanned: request.url,
294
+ });
295
+ urlsCrawled.invalid.push({
296
+ url: request.url,
297
+ pageTitle: url,
298
+ actualUrl: url,
299
+ });
300
+ }
301
+
302
+ resolve();
303
+ });
266
304
  } else {
267
- // Send HTTP/HTTPS request
268
- pdfResponse = await sendRequest({ responseType: 'buffer', isStream: true });
269
- pdfResponse.setEncoding('binary');
270
- }
271
- const downloadFile = fs.createWriteStream(`${randomToken}/${pdfFileName}.pdf`, {
272
- flags: 'a',
273
- });
305
+ // Download from remote URL
306
+ const response = await sendRequest({ responseType: 'buffer' });
307
+ buf = Buffer.isBuffer(response) ? response : response.body;
274
308
 
275
- pdfResponse.on('data', (chunk: Buffer) => {
276
- downloadFile.write(chunk, 'binary');
277
- bufs.push(Buffer.from(chunk));
278
- });
309
+ const downloadFile = fs.createWriteStream(`${randomToken}/${pdfFileName}.pdf`, {
310
+ flags: 'a',
311
+ });
279
312
 
280
- pdfResponse.on('end', () => {
313
+ downloadFile.write(buf, 'binary');
281
314
  downloadFile.end();
282
- const buf = Buffer.concat(bufs);
315
+
283
316
  if (isPDF(buf)) {
284
317
  guiInfoLog(guiInfoStatusTypes.SCANNED, {
285
318
  numScanned: urlsCrawled.scanned.length,
@@ -298,11 +331,12 @@ export const handlePdfDownload = (
298
331
  urlsCrawled.invalid.push({
299
332
  url: request.url,
300
333
  pageTitle: url,
301
- actualUrl: url, // i.e. actualUrl
334
+ actualUrl: url,
302
335
  });
303
336
  }
337
+
304
338
  resolve();
305
- });
339
+ }
306
340
  }),
307
341
  );
308
342
 
@@ -374,14 +408,21 @@ export const mapPdfScanResults = async (
374
408
  const { itemDetails, validationResult } = jobs[jobIdx];
375
409
  const { name: fileName } = itemDetails;
376
410
 
377
- const uuid = fileName
378
- .split(os.platform() === 'win32' ? '\\' : '/')
379
- .pop()
380
- .split('.')[0];
381
- const url = uuidToUrlMapping[uuid];
382
- const pageTitle = decodeURI(url).split('/').pop();
383
- const filePath = `${randomToken}/${uuid}.pdf`;
411
+ const rawFileName = fileName.split(os.platform() === 'win32' ? '\\' : '/').pop();
412
+ const fileNameWithoutExt = rawFileName.replace(/\.pdf$/i, '');
384
413
 
414
+ const url =
415
+ uuidToUrlMapping[rawFileName] || // exact match like 'Some-filename.pdf'
416
+ uuidToUrlMapping[fileNameWithoutExt] || // uuid-based key like 'a9f7ebbd-5a90...'
417
+ `file://${fileName}`; // fallback
418
+
419
+ const filePath = `${randomToken}/${rawFileName}`;
420
+
421
+
422
+ const pageTitle = decodeURI(url).split('/').pop();
423
+ translated.url = url;
424
+ translated.pageTitle = pageTitle;
425
+
385
426
  translated.url = url;
386
427
  translated.pageTitle = pageTitle;
387
428
  translated.filePath = filePath;