@govtechsg/oobee 0.10.28 → 0.10.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +1 -1
- package/INSTALLATION.md +1 -1
- package/exclusions.txt +2 -1
- package/package.json +2 -2
- package/src/combine.ts +4 -0
- package/src/constants/cliFunctions.ts +1 -1
- package/src/constants/common.ts +62 -3
- package/src/constants/constants.ts +1 -1
- package/src/crawlers/commonCrawlerFunc.ts +57 -47
- package/src/crawlers/crawlDomain.ts +20 -24
- package/src/crawlers/crawlLocalFile.ts +5 -3
- package/src/crawlers/crawlSitemap.ts +77 -22
- package/src/crawlers/custom/utils.ts +7 -2
- package/src/crawlers/customAxeFunctions.ts +1 -1
- package/src/mergeAxeResults.ts +40 -2
package/Dockerfile
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Use Microsoft Playwright image as base image
|
2
2
|
# Node version is v22
|
3
|
-
FROM mcr.microsoft.com/playwright:v1.
|
3
|
+
FROM mcr.microsoft.com/playwright:v1.50.0-noble
|
4
4
|
|
5
5
|
# Installation of packages for oobee and runner
|
6
6
|
RUN apt-get update && apt-get install -y zip git
|
package/INSTALLATION.md
CHANGED
@@ -6,7 +6,7 @@ Oobee (CLI) is provided as a portable distribution which minimises installation
|
|
6
6
|
|
7
7
|
Oobee is a customisable, automated accessibility testing tool that allows software development teams to find and fix accessibility problems to improve persons with disabilities (PWDs) access to digital services.
|
8
8
|
|
9
|
-
Oobee (CLI) allows software engineers to run Oobee as part of their software development environment as the command line, as well as [integrate it into their CI/CD pipleline](
|
9
|
+
Oobee (CLI) allows software engineers to run Oobee as part of their software development environment as the command line, as well as [integrate it into their CI/CD pipleline](INTEGRATION.md).
|
10
10
|
|
11
11
|
## System Requirements
|
12
12
|
|
package/exclusions.txt
CHANGED
package/package.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
{
|
2
2
|
"name": "@govtechsg/oobee",
|
3
3
|
"main": "dist/npmIndex.js",
|
4
|
-
"version": "0.10.
|
4
|
+
"version": "0.10.33",
|
5
5
|
"type": "module",
|
6
6
|
"author": "Government Technology Agency <info@tech.gov.sg>",
|
7
7
|
"dependencies": {
|
@@ -23,7 +23,7 @@
|
|
23
23
|
"mime-types": "^2.1.35",
|
24
24
|
"minimatch": "^9.0.3",
|
25
25
|
"pdfjs-dist": "github:veraPDF/pdfjs-dist#v4.4.168-taggedPdf-0.1.20",
|
26
|
-
"playwright": "1.
|
26
|
+
"playwright": "1.50.1",
|
27
27
|
"prettier": "^3.1.0",
|
28
28
|
"print-message": "^3.0.1",
|
29
29
|
"safe-regex": "^2.1.1",
|
package/src/combine.ts
CHANGED
@@ -97,6 +97,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
97
97
|
isEnableWcagAaa: envDetails.ruleset,
|
98
98
|
isSlowScanMode: envDetails.specifiedMaxConcurrency,
|
99
99
|
isAdhereRobots: envDetails.followRobots,
|
100
|
+
deviceChosen: deviceToScan,
|
100
101
|
};
|
101
102
|
|
102
103
|
const viewportSettings: ViewportSettingsClass = new ViewportSettingsClass(
|
@@ -209,6 +210,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
209
210
|
...urlsCrawledObj.error,
|
210
211
|
...urlsCrawledObj.invalid,
|
211
212
|
...urlsCrawledObj.forbidden,
|
213
|
+
...urlsCrawledObj.userExcluded,
|
212
214
|
];
|
213
215
|
const basicFormHTMLSnippet = await generateArtifacts(
|
214
216
|
randomToken,
|
@@ -239,6 +241,8 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
239
241
|
pagesNotScanned.length,
|
240
242
|
metadata,
|
241
243
|
);
|
244
|
+
} else {
|
245
|
+
printMessage([`No pages were scanned.`], alertMessageOptions);
|
242
246
|
}
|
243
247
|
} else {
|
244
248
|
printMessage([`No pages were scanned.`], alertMessageOptions);
|
@@ -269,7 +269,7 @@ export const cliOptions: { [key: string]: Options } = {
|
|
269
269
|
default: 'default',
|
270
270
|
coerce: option => {
|
271
271
|
const validChoices = Object.values(RuleFlags);
|
272
|
-
const userChoices: string[] = option.split(',');
|
272
|
+
const userChoices: string[] = String(option).split(',');
|
273
273
|
const invalidUserChoices = userChoices.filter(
|
274
274
|
choice => !validChoices.includes(choice as RuleFlags),
|
275
275
|
);
|
package/src/constants/common.ts
CHANGED
@@ -1819,13 +1819,72 @@ export const urlWithoutAuth = (url: string): string => {
|
|
1819
1819
|
};
|
1820
1820
|
|
1821
1821
|
export const waitForPageLoaded = async (page, timeout = 10000) => {
|
1822
|
+
const OBSERVER_TIMEOUT = timeout; // Ensure observer timeout does not exceed the main timeout
|
1823
|
+
|
1822
1824
|
return Promise.race([
|
1823
|
-
page.waitForLoadState('load'),
|
1824
|
-
page.waitForLoadState('networkidle'),
|
1825
|
-
new Promise(resolve => setTimeout(resolve, timeout)),
|
1825
|
+
page.waitForLoadState('load'), // Ensure page load completes
|
1826
|
+
page.waitForLoadState('networkidle'), // Wait for network requests to settle
|
1827
|
+
new Promise(resolve => setTimeout(resolve, timeout)), // Hard timeout as a fallback
|
1828
|
+
page.evaluate((OBSERVER_TIMEOUT) => {
|
1829
|
+
return new Promise((resolve) => {
|
1830
|
+
// Skip mutation check for PDFs
|
1831
|
+
if (document.contentType === 'application/pdf') {
|
1832
|
+
resolve('Skipping DOM mutation check for PDF.');
|
1833
|
+
return;
|
1834
|
+
}
|
1835
|
+
|
1836
|
+
let timeout;
|
1837
|
+
let mutationCount = 0;
|
1838
|
+
const MAX_MUTATIONS = 250; // Limit max mutations
|
1839
|
+
const mutationHash = {};
|
1840
|
+
|
1841
|
+
const observer = new MutationObserver(mutationsList => {
|
1842
|
+
clearTimeout(timeout);
|
1843
|
+
|
1844
|
+
mutationCount++;
|
1845
|
+
if (mutationCount > MAX_MUTATIONS) {
|
1846
|
+
observer.disconnect();
|
1847
|
+
resolve('Too many mutations detected, exiting.');
|
1848
|
+
return;
|
1849
|
+
}
|
1850
|
+
|
1851
|
+
mutationsList.forEach(mutation => {
|
1852
|
+
if (mutation.target instanceof Element) {
|
1853
|
+
Array.from(mutation.target.attributes).forEach(attr => {
|
1854
|
+
const mutationKey = `${mutation.target.nodeName}-${attr.name}`;
|
1855
|
+
|
1856
|
+
if (mutationKey) {
|
1857
|
+
mutationHash[mutationKey] = (mutationHash[mutationKey] || 0) + 1;
|
1858
|
+
|
1859
|
+
if (mutationHash[mutationKey] >= 10) {
|
1860
|
+
observer.disconnect();
|
1861
|
+
resolve(`Repeated mutation detected for ${mutationKey}, exiting.`);
|
1862
|
+
}
|
1863
|
+
}
|
1864
|
+
});
|
1865
|
+
}
|
1866
|
+
});
|
1867
|
+
|
1868
|
+
// If no mutations occur for 1 second, resolve
|
1869
|
+
timeout = setTimeout(() => {
|
1870
|
+
observer.disconnect();
|
1871
|
+
resolve('DOM stabilized after mutations.');
|
1872
|
+
}, 1000);
|
1873
|
+
});
|
1874
|
+
|
1875
|
+
// Final timeout to avoid infinite waiting
|
1876
|
+
timeout = setTimeout(() => {
|
1877
|
+
observer.disconnect();
|
1878
|
+
resolve('Observer timeout reached, exiting.');
|
1879
|
+
}, OBSERVER_TIMEOUT);
|
1880
|
+
|
1881
|
+
observer.observe(document.documentElement, { childList: true, subtree: true, attributes: true });
|
1882
|
+
});
|
1883
|
+
}, OBSERVER_TIMEOUT), // Pass OBSERVER_TIMEOUT dynamically to the browser context
|
1826
1884
|
]);
|
1827
1885
|
};
|
1828
1886
|
|
1887
|
+
|
1829
1888
|
function isValidHttpUrl(urlString) {
|
1830
1889
|
const pattern = /^(http|https):\/\/[^ "]+$/;
|
1831
1890
|
return pattern.test(urlString);
|
@@ -186,7 +186,7 @@ export class UrlsCrawled {
|
|
186
186
|
error: { url: string }[] = [];
|
187
187
|
exceededRequests: string[] = [];
|
188
188
|
forbidden: string[] = [];
|
189
|
-
userExcluded: string[] = [];
|
189
|
+
userExcluded: { url: string; actualUrl: string; pageTitle: string }[] = [];
|
190
190
|
everything: string[] = [];
|
191
191
|
|
192
192
|
constructor(urlsCrawled?: Partial<UrlsCrawled>) {
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import crawlee, { CrawlingContext, PlaywrightGotoOptions } from 'crawlee';
|
2
2
|
import axe, { AxeResults, ImpactValue, NodeResult, Result, resultGroups, TagValue } from 'axe-core';
|
3
|
-
import { xPathToCss } from '../xPathToCss.js';
|
4
3
|
import { BrowserContext, Page } from 'playwright';
|
4
|
+
import { xPathToCss } from '../xPathToCss.js';
|
5
5
|
import {
|
6
6
|
axeScript,
|
7
7
|
guiInfoStatusTypes,
|
@@ -357,24 +357,28 @@ export const runAxeScript = async ({
|
|
357
357
|
return !node.dataset.flagged; // fail any element with a data-flagged attribute set to true
|
358
358
|
},
|
359
359
|
},
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
360
|
+
...(enableWcagAaa
|
361
|
+
? [
|
362
|
+
{
|
363
|
+
...customAxeConfig.checks[2],
|
364
|
+
evaluate: (_node: HTMLElement) => {
|
365
|
+
if (gradingReadabilityFlag === '') {
|
366
|
+
return true; // Pass if no readability issues
|
367
|
+
}
|
368
|
+
// Dynamically update the grading messages
|
369
|
+
const gradingCheck = customAxeConfig.checks.find(
|
370
|
+
check => check.id === 'oobee-grading-text-contents',
|
371
|
+
);
|
372
|
+
if (gradingCheck) {
|
373
|
+
gradingCheck.metadata.messages.incomplete = `The text content is potentially difficult to read, with a Flesch-Kincaid Reading Ease score of ${gradingReadabilityFlag
|
374
|
+
}.\nThe target passing score is above 50, indicating content readable by university students and lower grade levels.\nA higher score reflects better readability.`;
|
375
|
+
}
|
376
|
+
|
377
|
+
// Fail if readability issues are detected
|
378
|
+
},
|
379
|
+
},
|
380
|
+
]
|
381
|
+
: []),
|
378
382
|
],
|
379
383
|
rules: customAxeConfig.rules
|
380
384
|
.filter(rule => (disableOobee ? !rule.id.startsWith('oobee') : true))
|
@@ -416,9 +420,12 @@ export const runAxeScript = async ({
|
|
416
420
|
const escapedCssSelectors =
|
417
421
|
oobeeAccessibleLabelFlaggedCssSelectors.map(escapeCSSSelector);
|
418
422
|
|
419
|
-
function framesCheck(cssSelector: string): {
|
423
|
+
function framesCheck(cssSelector: string): {
|
424
|
+
doc: Document;
|
425
|
+
remainingSelector: string;
|
426
|
+
} {
|
420
427
|
let doc = document; // Start with the main document
|
421
|
-
let remainingSelector =
|
428
|
+
let remainingSelector = ''; // To store the last part of the selector
|
422
429
|
let targetIframe = null;
|
423
430
|
|
424
431
|
// Split the selector into parts at "> html"
|
@@ -429,18 +436,18 @@ export const runAxeScript = async ({
|
|
429
436
|
|
430
437
|
// Add back '> html' to the current part
|
431
438
|
if (i > 0) {
|
432
|
-
iframeSelector =
|
439
|
+
iframeSelector = `html > ${iframeSelector}`;
|
433
440
|
}
|
434
441
|
|
435
442
|
let frameset = null;
|
436
443
|
// Find the iframe using the current document context
|
437
|
-
if (doc.querySelector(
|
438
|
-
frameset = doc.querySelector(
|
444
|
+
if (doc.querySelector('frameset')) {
|
445
|
+
frameset = doc.querySelector('frameset');
|
439
446
|
}
|
440
447
|
|
441
448
|
if (frameset) {
|
442
449
|
doc = frameset;
|
443
|
-
iframeSelector = iframeSelector.split(
|
450
|
+
iframeSelector = iframeSelector.split('body >')[1].trim();
|
444
451
|
}
|
445
452
|
targetIframe = doc.querySelector(iframeSelector);
|
446
453
|
|
@@ -448,7 +455,9 @@ export const runAxeScript = async ({
|
|
448
455
|
// Update the document to the iframe's contentDocument
|
449
456
|
doc = targetIframe.contentDocument;
|
450
457
|
} else {
|
451
|
-
console.warn(
|
458
|
+
console.warn(
|
459
|
+
`Iframe not found or contentDocument inaccessible for selector: ${iframeSelector}`,
|
460
|
+
);
|
452
461
|
return { doc, remainingSelector: cssSelector }; // Return original selector if iframe not found
|
453
462
|
}
|
454
463
|
}
|
@@ -457,19 +466,18 @@ export const runAxeScript = async ({
|
|
457
466
|
remainingSelector = diffParts[diffParts.length - 1].trim();
|
458
467
|
|
459
468
|
// Remove any leading '>' combinators from remainingSelector
|
460
|
-
remainingSelector =
|
469
|
+
remainingSelector = `html${remainingSelector}`;
|
461
470
|
|
462
471
|
return { doc, remainingSelector };
|
463
472
|
}
|
464
473
|
|
465
|
-
|
466
474
|
function findElementByCssSelector(cssSelector: string): string | null {
|
467
475
|
let doc = document;
|
468
476
|
|
469
477
|
// Check if the selector includes 'frame' or 'iframe' and update doc and selector
|
470
478
|
|
471
479
|
if (/\s*>\s*html\s*/.test(cssSelector)) {
|
472
|
-
|
480
|
+
const inFrames = framesCheck(cssSelector);
|
473
481
|
doc = inFrames.doc;
|
474
482
|
cssSelector = inFrames.remainingSelector;
|
475
483
|
}
|
@@ -515,24 +523,26 @@ export const runAxeScript = async ({
|
|
515
523
|
description: 'Ensures clickable elements have an accessible label.',
|
516
524
|
help: 'Clickable elements (i.e. elements with mouse-click interaction) must have accessible labels.',
|
517
525
|
helpUrl: 'https://www.deque.com/blog/accessible-aria-buttons',
|
518
|
-
nodes: escapedCssSelectors
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
526
|
+
nodes: escapedCssSelectors
|
527
|
+
.map(cssSelector => ({
|
528
|
+
html: findElementByCssSelector(cssSelector),
|
529
|
+
target: [cssSelector],
|
530
|
+
impact: 'serious' as ImpactValue,
|
531
|
+
failureSummary:
|
532
|
+
'Fix any of the following:\n The clickable element does not have an accessible label.',
|
533
|
+
any: [
|
534
|
+
{
|
535
|
+
id: 'oobee-accessible-label',
|
536
|
+
data: null,
|
537
|
+
relatedNodes: [],
|
538
|
+
impact: 'serious',
|
539
|
+
message: 'The clickable element does not have an accessible label.',
|
540
|
+
},
|
541
|
+
],
|
542
|
+
all: [],
|
543
|
+
none: [],
|
544
|
+
}))
|
545
|
+
.filter(item => item.html),
|
536
546
|
};
|
537
547
|
|
538
548
|
results.violations = [...results.violations, oobeeAccessibleLabelViolations];
|
@@ -40,8 +40,7 @@ import {
|
|
40
40
|
import { silentLogger, guiInfoLog } from '../logs.js';
|
41
41
|
import { ViewportSettingsClass } from '../combine.js';
|
42
42
|
|
43
|
-
const isBlacklisted = (url: string) => {
|
44
|
-
const blacklistedPatterns = getBlackListedPatterns(null);
|
43
|
+
const isBlacklisted = (url: string, blacklistedPatterns: string[]) => {
|
45
44
|
if (!blacklistedPatterns) {
|
46
45
|
return false;
|
47
46
|
}
|
@@ -122,18 +121,10 @@ const crawlDomain = async ({
|
|
122
121
|
const isScanPdfs = ['all', 'pdf-only'].includes(fileTypes);
|
123
122
|
const { maxConcurrency } = constants;
|
124
123
|
const { playwrightDeviceDetailsObject } = viewportSettings;
|
125
|
-
const isBlacklistedUrl = isBlacklisted(url);
|
124
|
+
const isBlacklistedUrl = isBlacklisted(url, blacklistedPatterns);
|
126
125
|
|
127
126
|
const httpsAgent = new https.Agent({ rejectUnauthorized: false });
|
128
127
|
|
129
|
-
if (isBlacklistedUrl) {
|
130
|
-
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
131
|
-
numScanned: urlsCrawled.scanned.length,
|
132
|
-
urlScanned: url,
|
133
|
-
});
|
134
|
-
return;
|
135
|
-
}
|
136
|
-
|
137
128
|
// Boolean to omit axe scan for basic auth URL
|
138
129
|
let isBasicAuth = false;
|
139
130
|
let authHeader = '';
|
@@ -315,7 +306,7 @@ const crawlDomain = async ({
|
|
315
306
|
|
316
307
|
const isExcluded = (newPageUrl: string): boolean => {
|
317
308
|
const isAlreadyScanned: boolean = urlsCrawled.scanned.some(item => item.url === newPageUrl);
|
318
|
-
const isBlacklistedUrl: boolean = isBlacklisted(newPageUrl);
|
309
|
+
const isBlacklistedUrl: boolean = isBlacklisted(newPageUrl, blacklistedPatterns);
|
319
310
|
const isNotFollowStrategy: boolean = !isFollowStrategy(newPageUrl, initialPageUrl, strategy);
|
320
311
|
return isAlreadyScanned || isBlacklistedUrl || isNotFollowStrategy;
|
321
312
|
};
|
@@ -609,13 +600,13 @@ const crawlDomain = async ({
|
|
609
600
|
}
|
610
601
|
|
611
602
|
await waitForPageLoaded(page, 10000);
|
612
|
-
let actualUrl = request.url;
|
603
|
+
let actualUrl = page.url() || request.loadedUrl || request.url;
|
613
604
|
|
614
605
|
if (page.url() !== 'about:blank') {
|
615
606
|
actualUrl = page.url();
|
616
607
|
}
|
617
608
|
|
618
|
-
if (isBlacklisted(actualUrl) || (isUrlPdf(actualUrl) && !isScanPdfs)) {
|
609
|
+
if (!isFollowStrategy(url, actualUrl, strategy) && (isBlacklisted(actualUrl, blacklistedPatterns) || (isUrlPdf(actualUrl) && !isScanPdfs))) {
|
619
610
|
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
620
611
|
numScanned: urlsCrawled.scanned.length,
|
621
612
|
urlScanned: actualUrl,
|
@@ -684,8 +675,13 @@ const crawlDomain = async ({
|
|
684
675
|
return;
|
685
676
|
}
|
686
677
|
|
687
|
-
if (blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
|
688
|
-
urlsCrawled.userExcluded.push(
|
678
|
+
if (!isFollowStrategy(url, actualUrl, strategy) && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
|
679
|
+
urlsCrawled.userExcluded.push({
|
680
|
+
url: request.url,
|
681
|
+
pageTitle: request.url,
|
682
|
+
actualUrl: actualUrl,
|
683
|
+
});
|
684
|
+
|
689
685
|
await enqueueProcess(page, enqueueLinks, browserContext);
|
690
686
|
return;
|
691
687
|
}
|
@@ -710,18 +706,18 @@ const crawlDomain = async ({
|
|
710
706
|
|
711
707
|
if (isScanHtml) {
|
712
708
|
// For deduplication, if the URL is redirected, we want to store the original URL and the redirected URL (actualUrl)
|
713
|
-
const isRedirected = !areLinksEqual(
|
709
|
+
const isRedirected = !areLinksEqual(actualUrl, request.url);
|
714
710
|
|
715
711
|
// check if redirected link is following strategy (same-domain/same-hostname)
|
716
712
|
const isLoadedUrlFollowStrategy = isFollowStrategy(
|
717
|
-
|
713
|
+
actualUrl,
|
718
714
|
request.url,
|
719
715
|
strategy,
|
720
716
|
);
|
721
717
|
if (isRedirected && !isLoadedUrlFollowStrategy) {
|
722
718
|
urlsCrawled.notScannedRedirects.push({
|
723
719
|
fromUrl: request.url,
|
724
|
-
toUrl:
|
720
|
+
toUrl: actualUrl, // i.e. actualUrl
|
725
721
|
});
|
726
722
|
return;
|
727
723
|
}
|
@@ -730,13 +726,13 @@ const crawlDomain = async ({
|
|
730
726
|
|
731
727
|
if (isRedirected) {
|
732
728
|
const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(
|
733
|
-
item => (item.actualUrl || item.url) ===
|
729
|
+
item => (item.actualUrl || item.url) === actualUrl,
|
734
730
|
);
|
735
731
|
|
736
732
|
if (isLoadedUrlInCrawledUrls) {
|
737
733
|
urlsCrawled.notScannedRedirects.push({
|
738
734
|
fromUrl: request.url,
|
739
|
-
toUrl:
|
735
|
+
toUrl: actualUrl, // i.e. actualUrl
|
740
736
|
});
|
741
737
|
return;
|
742
738
|
}
|
@@ -751,16 +747,16 @@ const crawlDomain = async ({
|
|
751
747
|
urlsCrawled.scanned.push({
|
752
748
|
url: urlWithoutAuth(request.url),
|
753
749
|
pageTitle: results.pageTitle,
|
754
|
-
actualUrl:
|
750
|
+
actualUrl: actualUrl, // i.e. actualUrl
|
755
751
|
});
|
756
752
|
|
757
753
|
urlsCrawled.scannedRedirects.push({
|
758
754
|
fromUrl: urlWithoutAuth(request.url),
|
759
|
-
toUrl:
|
755
|
+
toUrl: actualUrl, // i.e. actualUrl
|
760
756
|
});
|
761
757
|
|
762
758
|
results.url = request.url;
|
763
|
-
results.actualUrl =
|
759
|
+
results.actualUrl = actualUrl;
|
764
760
|
await dataset.pushData(results);
|
765
761
|
}
|
766
762
|
} else {
|
@@ -153,6 +153,8 @@ const crawlLocalFile = async (
|
|
153
153
|
await page.goto(request.url);
|
154
154
|
const results = await runAxeScript({ includeScreenshots, page, randomToken });
|
155
155
|
|
156
|
+
const actualUrl = page.url() || request.loadedUrl || request.url;
|
157
|
+
|
156
158
|
guiInfoLog(guiInfoStatusTypes.SCANNED, {
|
157
159
|
numScanned: urlsCrawled.scanned.length,
|
158
160
|
urlScanned: request.url,
|
@@ -161,16 +163,16 @@ const crawlLocalFile = async (
|
|
161
163
|
urlsCrawled.scanned.push({
|
162
164
|
url: request.url,
|
163
165
|
pageTitle: results.pageTitle,
|
164
|
-
actualUrl:
|
166
|
+
actualUrl: actualUrl, // i.e. actualUrl
|
165
167
|
});
|
166
168
|
|
167
169
|
urlsCrawled.scannedRedirects.push({
|
168
170
|
fromUrl: request.url,
|
169
|
-
toUrl:
|
171
|
+
toUrl: actualUrl, // i.e. actualUrl
|
170
172
|
});
|
171
173
|
|
172
174
|
results.url = request.url;
|
173
|
-
|
175
|
+
results.actualUrl = actualUrl;
|
174
176
|
|
175
177
|
await dataset.pushData(results);
|
176
178
|
} else {
|
@@ -18,7 +18,7 @@ import {
|
|
18
18
|
waitForPageLoaded,
|
19
19
|
isFilePath,
|
20
20
|
} from '../constants/common.js';
|
21
|
-
import { areLinksEqual, isWhitelistedContentType } from '../utils.js';
|
21
|
+
import { areLinksEqual, isWhitelistedContentType, isFollowStrategy } from '../utils.js';
|
22
22
|
import { handlePdfDownload, runPdfScan, mapPdfScanResults } from './pdfScanFunc.js';
|
23
23
|
import { guiInfoLog } from '../logs.js';
|
24
24
|
|
@@ -161,21 +161,67 @@ const crawlSitemap = async (
|
|
161
161
|
],
|
162
162
|
},
|
163
163
|
requestList,
|
164
|
+
postNavigationHooks: [
|
165
|
+
async ({ page, request }) => {
|
166
|
+
try {
|
167
|
+
// Wait for a quiet period in the DOM, but with safeguards
|
168
|
+
await page.evaluate(() => {
|
169
|
+
return new Promise((resolve) => {
|
170
|
+
let timeout;
|
171
|
+
let mutationCount = 0;
|
172
|
+
const MAX_MUTATIONS = 250; // Prevent infinite mutations
|
173
|
+
const OBSERVER_TIMEOUT = 5000; // Hard timeout to exit
|
174
|
+
|
175
|
+
const observer = new MutationObserver(() => {
|
176
|
+
clearTimeout(timeout);
|
177
|
+
|
178
|
+
mutationCount++;
|
179
|
+
if (mutationCount > MAX_MUTATIONS) {
|
180
|
+
observer.disconnect();
|
181
|
+
resolve('Too many mutations detected, exiting.');
|
182
|
+
return;
|
183
|
+
}
|
184
|
+
|
185
|
+
timeout = setTimeout(() => {
|
186
|
+
observer.disconnect();
|
187
|
+
resolve('DOM stabilized after mutations.');
|
188
|
+
}, 1000);
|
189
|
+
});
|
190
|
+
|
191
|
+
timeout = setTimeout(() => {
|
192
|
+
observer.disconnect();
|
193
|
+
resolve('Observer timeout reached, exiting.');
|
194
|
+
}, OBSERVER_TIMEOUT); // Ensure the observer stops after X seconds
|
195
|
+
|
196
|
+
observer.observe(document.documentElement, { childList: true, subtree: true });
|
197
|
+
|
198
|
+
});
|
199
|
+
});
|
200
|
+
} catch (err) {
|
201
|
+
// Handle page navigation errors gracefully
|
202
|
+
if (err.message.includes('was destroyed')) {
|
203
|
+
return; // Page navigated or closed, no need to handle
|
204
|
+
}
|
205
|
+
throw err; // Rethrow unknown errors
|
206
|
+
}
|
207
|
+
},
|
208
|
+
],
|
209
|
+
|
164
210
|
preNavigationHooks: isBasicAuth
|
165
211
|
? [
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
212
|
+
async ({ page }) => {
|
213
|
+
await page.setExtraHTTPHeaders({
|
214
|
+
Authorization: authHeader,
|
215
|
+
...extraHTTPHeaders,
|
216
|
+
});
|
217
|
+
},
|
218
|
+
]
|
173
219
|
: [
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
220
|
+
async () => {
|
221
|
+
preNavigationHooks(extraHTTPHeaders);
|
222
|
+
// insert other code here
|
223
|
+
},
|
224
|
+
],
|
179
225
|
requestHandlerTimeoutSecs: 90,
|
180
226
|
requestHandler: async ({ page, request, response, sendRequest }) => {
|
181
227
|
await waitForPageLoaded(page, 10000);
|
@@ -191,7 +237,7 @@ const crawlSitemap = async (
|
|
191
237
|
request.url = currentUrl.href;
|
192
238
|
}
|
193
239
|
|
194
|
-
const actualUrl = request.loadedUrl || request.url;
|
240
|
+
const actualUrl = page.url() || request.loadedUrl || request.url;
|
195
241
|
|
196
242
|
if (urlsCrawled.scanned.length >= maxRequestsPerCrawl) {
|
197
243
|
crawler.autoscaledPool.abort();
|
@@ -223,8 +269,17 @@ const crawlSitemap = async (
|
|
223
269
|
const contentType = response.headers()['content-type'];
|
224
270
|
const status = response.status();
|
225
271
|
|
226
|
-
if (blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
|
227
|
-
urlsCrawled.userExcluded.push(
|
272
|
+
if (blacklistedPatterns && !isFollowStrategy(actualUrl, request.url, "same-hostname") && isSkippedUrl(actualUrl, blacklistedPatterns)) {
|
273
|
+
urlsCrawled.userExcluded.push({
|
274
|
+
url: request.url,
|
275
|
+
pageTitle: request.url,
|
276
|
+
actualUrl: actualUrl,
|
277
|
+
});
|
278
|
+
|
279
|
+
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
280
|
+
numScanned: urlsCrawled.scanned.length,
|
281
|
+
urlScanned: request.url,
|
282
|
+
});
|
228
283
|
return;
|
229
284
|
}
|
230
285
|
|
@@ -255,16 +310,16 @@ const crawlSitemap = async (
|
|
255
310
|
urlScanned: request.url,
|
256
311
|
});
|
257
312
|
|
258
|
-
const isRedirected = !areLinksEqual(
|
313
|
+
const isRedirected = !areLinksEqual(page.url(), request.url);
|
259
314
|
if (isRedirected) {
|
260
315
|
const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(
|
261
|
-
item => (item.actualUrl || item.url.href) ===
|
316
|
+
item => (item.actualUrl || item.url.href) === page,
|
262
317
|
);
|
263
318
|
|
264
319
|
if (isLoadedUrlInCrawledUrls) {
|
265
320
|
urlsCrawled.notScannedRedirects.push({
|
266
321
|
fromUrl: request.url,
|
267
|
-
toUrl:
|
322
|
+
toUrl: actualUrl, // i.e. actualUrl
|
268
323
|
});
|
269
324
|
return;
|
270
325
|
}
|
@@ -272,16 +327,16 @@ const crawlSitemap = async (
|
|
272
327
|
urlsCrawled.scanned.push({
|
273
328
|
url: urlWithoutAuth(request.url),
|
274
329
|
pageTitle: results.pageTitle,
|
275
|
-
actualUrl:
|
330
|
+
actualUrl: actualUrl, // i.e. actualUrl
|
276
331
|
});
|
277
332
|
|
278
333
|
urlsCrawled.scannedRedirects.push({
|
279
334
|
fromUrl: urlWithoutAuth(request.url),
|
280
|
-
toUrl:
|
335
|
+
toUrl: actualUrl,
|
281
336
|
});
|
282
337
|
|
283
338
|
results.url = request.url;
|
284
|
-
results.actualUrl =
|
339
|
+
results.actualUrl = actualUrl;
|
285
340
|
} else {
|
286
341
|
urlsCrawled.scanned.push({
|
287
342
|
url: urlWithoutAuth(request.url),
|
@@ -152,7 +152,12 @@ export const processPage = async (page, processPageParams) => {
|
|
152
152
|
window.confirm('Page has been excluded, would you still like to proceed with the scan?'),
|
153
153
|
);
|
154
154
|
if (!continueScan) {
|
155
|
-
urlsCrawled.userExcluded.push(
|
155
|
+
urlsCrawled.userExcluded.push({
|
156
|
+
url: pageUrl,
|
157
|
+
pageTitle: pageUrl,
|
158
|
+
actualUrl: pageUrl,
|
159
|
+
});
|
160
|
+
|
156
161
|
return;
|
157
162
|
}
|
158
163
|
}
|
@@ -396,7 +401,7 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
|
|
396
401
|
// eslint-disable-next-line no-underscore-dangle
|
397
402
|
const pageId = page._guid;
|
398
403
|
|
399
|
-
page.on('dialog', () => {});
|
404
|
+
page.on('dialog', () => { });
|
400
405
|
|
401
406
|
const pageClosePromise = new Promise(resolve => {
|
402
407
|
page.on('close', () => {
|
@@ -68,7 +68,7 @@ export const customAxeConfig: Spec = {
|
|
68
68
|
selector: 'html',
|
69
69
|
enabled: true,
|
70
70
|
any: ['oobee-grading-text-contents'],
|
71
|
-
tags: ['
|
71
|
+
tags: ['wcag2aaa', 'wcag315'],
|
72
72
|
metadata: {
|
73
73
|
description:
|
74
74
|
'Text content should be easy to understand for individuals with education levels up to university graduates. If the text content is difficult to understand, provide supplemental content or a version that is easy to understand.',
|
package/src/mergeAxeResults.ts
CHANGED
@@ -219,8 +219,46 @@ const writeCsv = async (allIssues, storagePath) => {
|
|
219
219
|
includeEmptyRows: true,
|
220
220
|
};
|
221
221
|
|
222
|
+
// Create the parse stream (it’s asynchronous)
|
222
223
|
const parser = new AsyncParser(opts);
|
223
|
-
parser.parse(allIssues)
|
224
|
+
const parseStream = parser.parse(allIssues);
|
225
|
+
|
226
|
+
// Pipe JSON2CSV output into the file, but don't end automatically
|
227
|
+
parseStream.pipe(csvOutput, { end: false });
|
228
|
+
|
229
|
+
// Once JSON2CSV is done writing all normal rows, append any "pagesNotScanned"
|
230
|
+
parseStream.on('end', () => {
|
231
|
+
if (allIssues.pagesNotScanned && allIssues.pagesNotScanned.length > 0) {
|
232
|
+
csvOutput.write('\n');
|
233
|
+
allIssues.pagesNotScanned.forEach(page => {
|
234
|
+
const skippedPage = {
|
235
|
+
customFlowLabel: allIssues.customFlowLabel || '',
|
236
|
+
deviceChosen: allIssues.deviceChosen || '',
|
237
|
+
scanCompletedAt: allIssues.endTime ? allIssues.endTime.toISOString() : '',
|
238
|
+
severity: 'error',
|
239
|
+
issueId: 'error-pages-skipped',
|
240
|
+
issueDescription: 'Page was skipped during the scan',
|
241
|
+
wcagConformance: '',
|
242
|
+
url: page.url || '',
|
243
|
+
pageTitle: '',
|
244
|
+
context: '',
|
245
|
+
howToFix: '',
|
246
|
+
axeImpact: '',
|
247
|
+
xpath: '',
|
248
|
+
learnMore: '',
|
249
|
+
};
|
250
|
+
csvOutput.write(`${Object.values(skippedPage).join(',')}\n`);
|
251
|
+
});
|
252
|
+
}
|
253
|
+
|
254
|
+
// Now close the CSV file
|
255
|
+
csvOutput.end();
|
256
|
+
});
|
257
|
+
|
258
|
+
parseStream.on('error', err => {
|
259
|
+
console.error('Error parsing CSV:', err);
|
260
|
+
csvOutput.end();
|
261
|
+
});
|
224
262
|
};
|
225
263
|
|
226
264
|
const compileHtmlWithEJS = async (
|
@@ -234,7 +272,7 @@ const compileHtmlWithEJS = async (
|
|
234
272
|
filename: path.join(dirname, './static/ejs/report.ejs'),
|
235
273
|
});
|
236
274
|
|
237
|
-
const html = template({...allIssues, storagePath: JSON.stringify(storagePath)});
|
275
|
+
const html = template({ ...allIssues, storagePath: JSON.stringify(storagePath) });
|
238
276
|
await fs.writeFile(htmlFilePath, html);
|
239
277
|
|
240
278
|
let htmlContent = await fs.readFile(htmlFilePath, { encoding: 'utf8' });
|