@govtechsg/oobee 0.10.86 → 0.10.88
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/docker-push-ghcr.yml +49 -0
- package/.github/workflows/image.yml +2 -3
- package/DETAILS_OUTPUT_EXAMPLES.md +178 -0
- package/Dockerfile +6 -7
- package/dist/cli.js +18 -5
- package/dist/combine.js +3 -0
- package/dist/constants/cliFunctions.js +2 -2
- package/dist/constants/common.js +55 -13
- package/dist/crawlers/commonCrawlerFunc.js +523 -2
- package/dist/crawlers/crawlDomain.js +38 -13
- package/dist/crawlers/crawlIntelligentSitemap.js +62 -30
- package/dist/crawlers/crawlLocalFile.js +2 -2
- package/dist/crawlers/crawlSitemap.js +44 -5
- package/dist/crawlers/custom/extractAndGradeText.js +1 -1
- package/dist/crawlers/custom/getAxeConfiguration.js +26 -21
- package/dist/crawlers/custom/gradeReadability.js +1 -1
- package/dist/crawlers/custom/utils.js +81 -40
- package/dist/generateHtmlReport.js +18 -11
- package/dist/mergeAxeResults/itemReferences.js +60 -25
- package/dist/mergeAxeResults/sentryTelemetry.js +4 -1
- package/dist/mergeAxeResults.js +18 -9
- package/dist/npmIndex.js +16 -12
- package/dist/screenshotFunc/htmlScreenshotFunc.js +67 -0
- package/dist/static/ejs/partials/scripts/decodeUnzipParse.ejs +6 -3
- package/dist/static/ejs/partials/scripts/ruleModal/itemCardRenderer.ejs +45 -6
- package/dist/static/ejs/partials/scripts/ruleModal/pageAccordionBuilder.ejs +8 -5
- package/dist/static/ejs/partials/scripts/ruleModal/ruleOffcanvas.ejs +4 -4
- package/dist/static/ejs/partials/scripts/ruleModal/utilities.ejs +2 -1
- package/dist/static/ejs/summary.ejs +18 -12
- package/dist/utils.js +4 -3
- package/examples/oobee-test-details-runner.js +214 -0
- package/examples/test-violations.html +42 -0
- package/fix-summary-html-oom-pr.md +62 -0
- package/package.json +5 -5
- package/src/cli.ts +19 -5
- package/src/combine.ts +3 -0
- package/src/constants/cliFunctions.ts +2 -2
- package/src/constants/common.ts +65 -12
- package/src/crawlers/commonCrawlerFunc.ts +625 -2
- package/src/crawlers/crawlDomain.ts +39 -13
- package/src/crawlers/crawlIntelligentSitemap.ts +63 -30
- package/src/crawlers/crawlLocalFile.ts +4 -1
- package/src/crawlers/crawlSitemap.ts +50 -3
- package/src/crawlers/custom/extractAndGradeText.ts +1 -1
- package/src/crawlers/custom/getAxeConfiguration.ts +25 -23
- package/src/crawlers/custom/gradeReadability.ts +1 -1
- package/src/crawlers/custom/utils.ts +99 -43
- package/src/generateHtmlReport.ts +21 -11
- package/src/mergeAxeResults/itemReferences.ts +70 -26
- package/src/mergeAxeResults/sentryTelemetry.ts +4 -1
- package/src/mergeAxeResults.ts +21 -11
- package/src/npmIndex.ts +17 -12
- package/src/screenshotFunc/htmlScreenshotFunc.ts +81 -1
- package/src/static/ejs/partials/scripts/decodeUnzipParse.ejs +6 -3
- package/src/static/ejs/partials/scripts/ruleModal/itemCardRenderer.ejs +45 -6
- package/src/static/ejs/partials/scripts/ruleModal/pageAccordionBuilder.ejs +8 -5
- package/src/static/ejs/partials/scripts/ruleModal/ruleOffcanvas.ejs +4 -4
- package/src/static/ejs/partials/scripts/ruleModal/utilities.ejs +2 -1
- package/src/static/ejs/summary.ejs +18 -12
- package/src/utils.ts +4 -3
- package/testStaticJSScanner.html +1 -1
|
@@ -29,7 +29,7 @@ import {
|
|
|
29
29
|
getUrlsFromRobotsTxt,
|
|
30
30
|
waitForPageLoaded,
|
|
31
31
|
} from '../constants/common.js';
|
|
32
|
-
import { areLinksEqual, isFollowStrategy, register } from '../utils.js';
|
|
32
|
+
import { areLinksEqual, isFollowStrategy, normUrl, register } from '../utils.js';
|
|
33
33
|
import {
|
|
34
34
|
handlePdfDownload,
|
|
35
35
|
runPdfScan,
|
|
@@ -116,9 +116,9 @@ const crawlDomain = async ({
|
|
|
116
116
|
const pdfDownloads: Promise<void>[] = [];
|
|
117
117
|
const uuidToPdfMapping: Record<string, string> = {};
|
|
118
118
|
const queuedUrlSet = new Set<string>();
|
|
119
|
-
const scannedUrlSet = new Set<string>(urlsCrawled.scanned.map(item => item.url));
|
|
119
|
+
const scannedUrlSet = new Set<string>(urlsCrawled.scanned.map(item => normUrl(item.url)));
|
|
120
120
|
const scannedResolvedUrlSet = new Set<string>(
|
|
121
|
-
urlsCrawled.scanned.map(item => item.actualUrl || item.url),
|
|
121
|
+
urlsCrawled.scanned.map(item => normUrl(item.actualUrl || item.url)),
|
|
122
122
|
);
|
|
123
123
|
const isScanHtml = [FileTypes.All, FileTypes.HtmlOnly].includes(fileTypes as FileTypes);
|
|
124
124
|
const isScanPdfs = [FileTypes.All, FileTypes.PdfOnly].includes(fileTypes as FileTypes);
|
|
@@ -166,13 +166,14 @@ const crawlDomain = async ({
|
|
|
166
166
|
const selectedElementsString = cssQuerySelectors.join(', ');
|
|
167
167
|
|
|
168
168
|
const isExcluded = (newPageUrl: string): boolean => {
|
|
169
|
-
const isAlreadyScanned: boolean =
|
|
169
|
+
const isAlreadyScanned: boolean = scannedUrlSet.has(normUrl(newPageUrl));
|
|
170
170
|
const isBlacklistedUrl: boolean = isBlacklisted(newPageUrl, blacklistedPatterns);
|
|
171
171
|
const isNotFollowStrategy: boolean = !isFollowStrategy(newPageUrl, initialPageUrl, strategy);
|
|
172
172
|
const isNotSupportedDocument: boolean = disallowedListOfPatterns.some(pattern =>
|
|
173
173
|
newPageUrl.toLowerCase().startsWith(pattern),
|
|
174
174
|
);
|
|
175
|
-
|
|
175
|
+
const isRobotsDisallowed: boolean = isDisallowedInRobotsTxt(newPageUrl);
|
|
176
|
+
return isNotSupportedDocument || isAlreadyScanned || isBlacklistedUrl || isNotFollowStrategy || isRobotsDisallowed;
|
|
176
177
|
};
|
|
177
178
|
const setPageListeners = (pageListener: Page): void => {
|
|
178
179
|
// event listener to handle new page popups upon button click
|
|
@@ -341,7 +342,7 @@ const crawlDomain = async ({
|
|
|
341
342
|
} catch (e) {
|
|
342
343
|
consoleLogger.error(e);
|
|
343
344
|
}
|
|
344
|
-
if (scannedUrlSet.has(req.url)) {
|
|
345
|
+
if (scannedUrlSet.has(normUrl(req.url))) {
|
|
345
346
|
req.skipNavigation = true;
|
|
346
347
|
}
|
|
347
348
|
if (isDisallowedInRobotsTxt(req.url)) return null;
|
|
@@ -481,7 +482,7 @@ const crawlDomain = async ({
|
|
|
481
482
|
}
|
|
482
483
|
|
|
483
484
|
const isRedirected = !areLinksEqual(finalUrl, requestLabelUrl);
|
|
484
|
-
if (isRedirected) {
|
|
485
|
+
if (isRedirected && !isDisallowedInRobotsTxt(finalUrl)) {
|
|
485
486
|
await enqueueUniqueRequest({ url: finalUrl, label: finalUrl });
|
|
486
487
|
} else {
|
|
487
488
|
request.skipNavigation = false;
|
|
@@ -537,7 +538,7 @@ const crawlDomain = async ({
|
|
|
537
538
|
}
|
|
538
539
|
|
|
539
540
|
// if URL has already been scanned
|
|
540
|
-
if (scannedUrlSet.has(request.url)) {
|
|
541
|
+
if (scannedUrlSet.has(normUrl(request.url))) {
|
|
541
542
|
await enqueueProcess(page, enqueueLinks, browserContext);
|
|
542
543
|
return;
|
|
543
544
|
}
|
|
@@ -654,8 +655,33 @@ const crawlDomain = async ({
|
|
|
654
655
|
|
|
655
656
|
const results = await runAxeScript({ includeScreenshots, page, randomToken, ruleset });
|
|
656
657
|
|
|
658
|
+
// Detect JS redirects that fire during/after axe scan.
|
|
659
|
+
// Listen for navigation, then give a brief window for pending redirects to complete.
|
|
660
|
+
try {
|
|
661
|
+
let navigatedToUrl: string | null = null;
|
|
662
|
+
const onFrameNavigated = (frame: Frame) => {
|
|
663
|
+
if (frame === page.mainFrame()) {
|
|
664
|
+
navigatedToUrl = frame.url();
|
|
665
|
+
}
|
|
666
|
+
};
|
|
667
|
+
page.on('framenavigated', onFrameNavigated);
|
|
668
|
+
await page.waitForTimeout(1000);
|
|
669
|
+
page.off('framenavigated', onFrameNavigated);
|
|
670
|
+
|
|
671
|
+
const postScanUrl = navigatedToUrl || page.url();
|
|
672
|
+
if (postScanUrl && postScanUrl !== 'about:blank' && !isFollowStrategy(postScanUrl, request.url, 'same-hostname')) {
|
|
673
|
+
urlsCrawled.notScannedRedirects.push({
|
|
674
|
+
fromUrl: request.url,
|
|
675
|
+
toUrl: postScanUrl,
|
|
676
|
+
});
|
|
677
|
+
return;
|
|
678
|
+
}
|
|
679
|
+
} catch (_) {
|
|
680
|
+
// Page/context was destroyed during navigation — handled by outer catch
|
|
681
|
+
}
|
|
682
|
+
|
|
657
683
|
if (isRedirected) {
|
|
658
|
-
const isLoadedUrlInCrawledUrls = scannedResolvedUrlSet.has(actualUrl);
|
|
684
|
+
const isLoadedUrlInCrawledUrls = scannedResolvedUrlSet.has(normUrl(actualUrl));
|
|
659
685
|
|
|
660
686
|
if (isLoadedUrlInCrawledUrls) {
|
|
661
687
|
urlsCrawled.notScannedRedirects.push({
|
|
@@ -677,8 +703,8 @@ const crawlDomain = async ({
|
|
|
677
703
|
pageTitle: results.pageTitle,
|
|
678
704
|
actualUrl, // i.e. actualUrl
|
|
679
705
|
});
|
|
680
|
-
scannedUrlSet.add(request.url);
|
|
681
|
-
scannedResolvedUrlSet.add(actualUrl);
|
|
706
|
+
scannedUrlSet.add(normUrl(request.url));
|
|
707
|
+
scannedResolvedUrlSet.add(normUrl(actualUrl));
|
|
682
708
|
|
|
683
709
|
urlsCrawled.scannedRedirects.push({
|
|
684
710
|
fromUrl: request.url,
|
|
@@ -700,8 +726,8 @@ const crawlDomain = async ({
|
|
|
700
726
|
actualUrl: request.url,
|
|
701
727
|
pageTitle: results.pageTitle,
|
|
702
728
|
});
|
|
703
|
-
scannedUrlSet.add(request.url);
|
|
704
|
-
scannedResolvedUrlSet.add(request.url);
|
|
729
|
+
scannedUrlSet.add(normUrl(request.url));
|
|
730
|
+
scannedResolvedUrlSet.add(normUrl(request.url));
|
|
705
731
|
await dataset.pushData(results);
|
|
706
732
|
}
|
|
707
733
|
} else {
|
|
@@ -7,7 +7,7 @@ import { consoleLogger, guiInfoLog } from '../logs.js';
|
|
|
7
7
|
import crawlDomain from './crawlDomain.js';
|
|
8
8
|
import crawlSitemap from './crawlSitemap.js';
|
|
9
9
|
import { ViewportSettingsClass } from '../combine.js';
|
|
10
|
-
import { getPlaywrightLaunchOptions } from '../constants/common.js';
|
|
10
|
+
import { getPlaywrightLaunchOptions, getSitemapsFromRobotsTxt } from '../constants/common.js';
|
|
11
11
|
import { register } from '../utils.js';
|
|
12
12
|
|
|
13
13
|
const crawlIntelligentSitemap = async (
|
|
@@ -100,12 +100,30 @@ const crawlIntelligentSitemap = async (
|
|
|
100
100
|
}
|
|
101
101
|
};
|
|
102
102
|
|
|
103
|
+
// Discover sitemaps from robots.txt first (supports multiple Sitemap: directives)
|
|
104
|
+
let sitemapUrls: string[] = [];
|
|
103
105
|
try {
|
|
104
|
-
|
|
106
|
+
sitemapUrls = await getSitemapsFromRobotsTxt(url, browser, userDataDirectory, extraHTTPHeaders);
|
|
107
|
+
if (sitemapUrls.length > 0) {
|
|
108
|
+
console.log(`Found ${sitemapUrls.length} sitemap(s) in robots.txt: ${sitemapUrls.join(', ')}`);
|
|
109
|
+
sitemapExist = true;
|
|
110
|
+
}
|
|
105
111
|
} catch (error) {
|
|
106
112
|
consoleLogger.error(error);
|
|
107
113
|
}
|
|
108
114
|
|
|
115
|
+
// Fall back to hardcoded path probing if robots.txt had no sitemaps
|
|
116
|
+
if (!sitemapExist) {
|
|
117
|
+
try {
|
|
118
|
+
sitemapUrl = await findSitemap(url, userDataDirectory, extraHTTPHeaders);
|
|
119
|
+
if (sitemapExist) {
|
|
120
|
+
sitemapUrls = [sitemapUrl];
|
|
121
|
+
}
|
|
122
|
+
} catch (error) {
|
|
123
|
+
consoleLogger.error(error);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
109
127
|
if (!sitemapExist) {
|
|
110
128
|
console.log('Unable to find sitemap. Commencing website crawl instead.');
|
|
111
129
|
return await crawlDomain({
|
|
@@ -124,38 +142,53 @@ const crawlIntelligentSitemap = async (
|
|
|
124
142
|
followRobots,
|
|
125
143
|
extraHTTPHeaders,
|
|
126
144
|
safeMode,
|
|
127
|
-
scanDuration,
|
|
145
|
+
scanDuration,
|
|
128
146
|
});
|
|
129
147
|
}
|
|
130
148
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
149
|
+
// Process all discovered sitemaps sequentially, sharing dataset and urlsCrawled
|
|
150
|
+
for (const currentSitemapUrl of sitemapUrls) {
|
|
151
|
+
if (urlsCrawled.scanned.length >= maxRequestsPerCrawl) break;
|
|
152
|
+
|
|
153
|
+
const elapsed = Date.now() - startTime;
|
|
154
|
+
const remainingDuration = scanDuration > 0 ? Math.max(scanDuration - elapsed / 1000, 0) : scanDuration;
|
|
155
|
+
if (scanDuration > 0 && remainingDuration <= 0) {
|
|
156
|
+
durationExceeded = true;
|
|
157
|
+
break;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
console.log(`Processing sitemap: ${currentSitemapUrl}`);
|
|
161
|
+
urlsCrawledFinal = await crawlSitemap({
|
|
162
|
+
sitemapUrl: currentSitemapUrl,
|
|
163
|
+
randomToken,
|
|
164
|
+
host,
|
|
165
|
+
viewportSettings,
|
|
166
|
+
maxRequestsPerCrawl,
|
|
167
|
+
browser,
|
|
168
|
+
userDataDirectory,
|
|
169
|
+
specifiedMaxConcurrency,
|
|
170
|
+
fileTypes,
|
|
171
|
+
blacklistedPatterns,
|
|
172
|
+
includeScreenshots,
|
|
173
|
+
extraHTTPHeaders,
|
|
174
|
+
strategy,
|
|
175
|
+
userUrl: url,
|
|
176
|
+
fromCrawlIntelligentSitemap,
|
|
177
|
+
userUrlInputFromIntelligent: url,
|
|
178
|
+
datasetFromIntelligent: dataset,
|
|
179
|
+
urlsCrawledFromIntelligent: urlsCrawled,
|
|
180
|
+
crawledFromLocalFile: false,
|
|
181
|
+
scanDuration: scanDuration > 0 ? remainingDuration : 0,
|
|
182
|
+
});
|
|
183
|
+
}
|
|
152
184
|
|
|
153
185
|
const elapsed = Date.now() - startTime;
|
|
154
|
-
const remainingScanDuration = Math.max(scanDuration - elapsed / 1000, 0)
|
|
186
|
+
const remainingScanDuration = scanDuration > 0 ? Math.max(scanDuration - elapsed / 1000, 0) : 0;
|
|
187
|
+
const hasDurationRemaining = scanDuration === 0 || remainingScanDuration > 0;
|
|
155
188
|
|
|
156
|
-
if (
|
|
189
|
+
if (urlsCrawled.scanned.length < maxRequestsPerCrawl && hasDurationRemaining) {
|
|
157
190
|
console.log(
|
|
158
|
-
`Continuing crawl from root website
|
|
191
|
+
`Continuing crawl from root website.${scanDuration > 0 ? ` Remaining scan time: ${remainingScanDuration.toFixed(1)}s` : ''}`,
|
|
159
192
|
);
|
|
160
193
|
urlsCrawledFinal = await crawlDomain({
|
|
161
194
|
url,
|
|
@@ -175,10 +208,10 @@ const crawlIntelligentSitemap = async (
|
|
|
175
208
|
safeMode,
|
|
176
209
|
fromCrawlIntelligentSitemap,
|
|
177
210
|
datasetFromIntelligent: dataset,
|
|
178
|
-
urlsCrawledFromIntelligent:
|
|
211
|
+
urlsCrawledFromIntelligent: urlsCrawled,
|
|
179
212
|
scanDuration: remainingScanDuration,
|
|
180
213
|
});
|
|
181
|
-
} else if (
|
|
214
|
+
} else if (!hasDurationRemaining) {
|
|
182
215
|
console.log(
|
|
183
216
|
`Crawl duration exceeded before more pages could be found (limit: ${scanDuration}s).`,
|
|
184
217
|
);
|
|
@@ -186,7 +219,7 @@ const crawlIntelligentSitemap = async (
|
|
|
186
219
|
}
|
|
187
220
|
|
|
188
221
|
guiInfoLog(guiInfoStatusTypes.COMPLETED, {});
|
|
189
|
-
return { urlsCrawled
|
|
222
|
+
return { urlsCrawled, durationExceeded };
|
|
190
223
|
};
|
|
191
224
|
|
|
192
225
|
export default crawlIntelligentSitemap;
|
|
@@ -8,6 +8,7 @@ import constants, {
|
|
|
8
8
|
UrlsCrawled,
|
|
9
9
|
STATUS_CODE_METADATA,
|
|
10
10
|
FileTypes,
|
|
11
|
+
RuleFlags,
|
|
11
12
|
} from '../constants/constants.js';
|
|
12
13
|
import { ViewportSettingsClass } from '../combine.js';
|
|
13
14
|
import {
|
|
@@ -35,6 +36,7 @@ export const crawlLocalFile = async ({
|
|
|
35
36
|
includeScreenshots,
|
|
36
37
|
extraHTTPHeaders,
|
|
37
38
|
scanDuration = 0,
|
|
39
|
+
ruleset = [],
|
|
38
40
|
fromCrawlIntelligentSitemap = false,
|
|
39
41
|
userUrlInputFromIntelligent = null,
|
|
40
42
|
datasetFromIntelligent = null,
|
|
@@ -53,6 +55,7 @@ export const crawlLocalFile = async ({
|
|
|
53
55
|
includeScreenshots: boolean;
|
|
54
56
|
extraHTTPHeaders: Record<string, string>;
|
|
55
57
|
scanDuration?: number;
|
|
58
|
+
ruleset?: RuleFlags[];
|
|
56
59
|
fromCrawlIntelligentSitemap?: boolean;
|
|
57
60
|
userUrlInputFromIntelligent?: string | null;
|
|
58
61
|
datasetFromIntelligent?: Dataset | null;
|
|
@@ -178,7 +181,7 @@ export const crawlLocalFile = async ({
|
|
|
178
181
|
return urlsCrawled;
|
|
179
182
|
}
|
|
180
183
|
|
|
181
|
-
const results = await runAxeScript({ includeScreenshots, page, randomToken });
|
|
184
|
+
const results = await runAxeScript({ includeScreenshots, page, randomToken, ruleset });
|
|
182
185
|
|
|
183
186
|
const actualUrl = page.url() || request.loadedUrl || url;
|
|
184
187
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import crawlee, { LaunchContext, Request, RequestList, Dataset } from 'crawlee';
|
|
1
|
+
import crawlee, { EnqueueStrategy, LaunchContext, Request, RequestList, Dataset } from 'crawlee';
|
|
2
2
|
import fs from 'fs';
|
|
3
3
|
import * as path from 'path';
|
|
4
4
|
import fsp from 'fs/promises';
|
|
@@ -23,7 +23,7 @@ import {
|
|
|
23
23
|
waitForPageLoaded,
|
|
24
24
|
isFilePath,
|
|
25
25
|
} from '../constants/common.js';
|
|
26
|
-
import { areLinksEqual, isWhitelistedContentType, register } from '../utils.js';
|
|
26
|
+
import { areLinksEqual, isFollowStrategy, isWhitelistedContentType, normUrl, register } from '../utils.js';
|
|
27
27
|
import {
|
|
28
28
|
handlePdfDownload,
|
|
29
29
|
runPdfScan,
|
|
@@ -46,6 +46,8 @@ const crawlSitemap = async ({
|
|
|
46
46
|
blacklistedPatterns,
|
|
47
47
|
includeScreenshots,
|
|
48
48
|
extraHTTPHeaders,
|
|
49
|
+
strategy = EnqueueStrategy.All,
|
|
50
|
+
userUrl = '',
|
|
49
51
|
scanDuration = 0,
|
|
50
52
|
fromCrawlIntelligentSitemap = false,
|
|
51
53
|
userUrlInputFromIntelligent = null,
|
|
@@ -65,6 +67,8 @@ const crawlSitemap = async ({
|
|
|
65
67
|
blacklistedPatterns: string[];
|
|
66
68
|
includeScreenshots: boolean;
|
|
67
69
|
extraHTTPHeaders: Record<string, string>;
|
|
70
|
+
strategy?: EnqueueStrategy;
|
|
71
|
+
userUrl?: string;
|
|
68
72
|
scanDuration?: number;
|
|
69
73
|
fromCrawlIntelligentSitemap?: boolean;
|
|
70
74
|
userUrlInputFromIntelligent?: string;
|
|
@@ -99,6 +103,8 @@ const crawlSitemap = async ({
|
|
|
99
103
|
userUrlInputFromIntelligent,
|
|
100
104
|
fromCrawlIntelligentSitemap,
|
|
101
105
|
extraHTTPHeaders,
|
|
106
|
+
strategy,
|
|
107
|
+
userUrl || sitemapUrl,
|
|
102
108
|
);
|
|
103
109
|
|
|
104
110
|
sitemapUrl = encodeURI(sitemapUrl);
|
|
@@ -299,7 +305,7 @@ const crawlSitemap = async ({
|
|
|
299
305
|
if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
|
|
300
306
|
const isRedirected = !areLinksEqual(page.url(), request.url);
|
|
301
307
|
const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(
|
|
302
|
-
item => (item.actualUrl || item.url) === page.url(),
|
|
308
|
+
item => normUrl(item.actualUrl || item.url) === normUrl(page.url()),
|
|
303
309
|
);
|
|
304
310
|
|
|
305
311
|
if (isRedirected && isLoadedUrlInCrawledUrls) {
|
|
@@ -327,8 +333,49 @@ const crawlSitemap = async ({
|
|
|
327
333
|
return;
|
|
328
334
|
}
|
|
329
335
|
|
|
336
|
+
if (isRedirected && !isFollowStrategy(actualUrl, request.url, 'same-hostname')) {
|
|
337
|
+
urlsCrawled.notScannedRedirects.push({
|
|
338
|
+
fromUrl: request.url,
|
|
339
|
+
toUrl: actualUrl,
|
|
340
|
+
});
|
|
341
|
+
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
|
342
|
+
numScanned: urlsCrawled.scanned.length,
|
|
343
|
+
urlScanned: request.url,
|
|
344
|
+
});
|
|
345
|
+
return;
|
|
346
|
+
}
|
|
347
|
+
|
|
330
348
|
const results = await runAxeScript({ includeScreenshots, page, randomToken });
|
|
331
349
|
|
|
350
|
+
// Detect JS redirects that fire during/after axe scan.
|
|
351
|
+
// Listen for navigation, then give a brief window for pending redirects to complete.
|
|
352
|
+
try {
|
|
353
|
+
let navigatedToUrl: string | null = null;
|
|
354
|
+
const onFrameNavigated = (frame: any) => {
|
|
355
|
+
if (frame === page.mainFrame()) {
|
|
356
|
+
navigatedToUrl = frame.url();
|
|
357
|
+
}
|
|
358
|
+
};
|
|
359
|
+
page.on('framenavigated', onFrameNavigated);
|
|
360
|
+
await page.waitForTimeout(1000);
|
|
361
|
+
page.off('framenavigated', onFrameNavigated);
|
|
362
|
+
|
|
363
|
+
const postScanUrl = navigatedToUrl || page.url();
|
|
364
|
+
if (postScanUrl && postScanUrl !== 'about:blank' && !isFollowStrategy(postScanUrl, request.url, 'same-hostname')) {
|
|
365
|
+
urlsCrawled.notScannedRedirects.push({
|
|
366
|
+
fromUrl: request.url,
|
|
367
|
+
toUrl: postScanUrl,
|
|
368
|
+
});
|
|
369
|
+
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
|
370
|
+
numScanned: urlsCrawled.scanned.length,
|
|
371
|
+
urlScanned: request.url,
|
|
372
|
+
});
|
|
373
|
+
return;
|
|
374
|
+
}
|
|
375
|
+
} catch (_) {
|
|
376
|
+
// Page/context was destroyed during navigation — handled by outer catch
|
|
377
|
+
}
|
|
378
|
+
|
|
332
379
|
guiInfoLog(guiInfoStatusTypes.SCANNED, {
|
|
333
380
|
numScanned: urlsCrawled.scanned.length,
|
|
334
381
|
urlScanned: request.url,
|
|
@@ -45,7 +45,7 @@ export async function extractAndGradeText(page: Page): Promise<string> {
|
|
|
45
45
|
|
|
46
46
|
// Determine the return value
|
|
47
47
|
const result =
|
|
48
|
-
readabilityScore
|
|
48
|
+
readabilityScore <= 0 || readabilityScore > 50 ? '' : readabilityScore.toString();
|
|
49
49
|
|
|
50
50
|
return result;
|
|
51
51
|
} catch (error) {
|
|
@@ -10,6 +10,12 @@ export function getAxeConfiguration({
|
|
|
10
10
|
gradingReadabilityFlag?: string;
|
|
11
11
|
disableOobee?: boolean;
|
|
12
12
|
}) {
|
|
13
|
+
function getReadabilityInterpretation(score: string): string {
|
|
14
|
+
const num = parseFloat(score);
|
|
15
|
+
if (Number.isNaN(num)) return '';
|
|
16
|
+
if (num > 30) return 'It is targeted for junior college (JC) level comprehension and above.';
|
|
17
|
+
return 'It is targeted for university graduate level comprehension and above.';
|
|
18
|
+
}
|
|
13
19
|
return {
|
|
14
20
|
branding: {
|
|
15
21
|
application: 'oobee',
|
|
@@ -39,7 +45,7 @@ export function getAxeConfiguration({
|
|
|
39
45
|
return !node.dataset.flagged; // fail any element with a data-flagged attribute set to true
|
|
40
46
|
},
|
|
41
47
|
},
|
|
42
|
-
...(enableWcagAaa
|
|
48
|
+
...((enableWcagAaa && gradingReadabilityFlag !== '')
|
|
43
49
|
? [
|
|
44
50
|
{
|
|
45
51
|
id: 'oobee-grading-text-contents',
|
|
@@ -47,17 +53,11 @@ export function getAxeConfiguration({
|
|
|
47
53
|
impact: 'moderate' as ImpactValue,
|
|
48
54
|
messages: {
|
|
49
55
|
pass: 'The text content is easy to understand.',
|
|
50
|
-
fail:
|
|
51
|
-
incomplete: `
|
|
52
|
-
}.\nThe target passing score is above 50, indicating content readable by university students and lower grade levels.\nA higher score reflects better readability.`,
|
|
56
|
+
fail: `Text content is potentially difficult to read.\n It scored ${gradingReadabilityFlag} out of 50 on the Flesch-Kincaid Readability Test.\n ${getReadabilityInterpretation(gradingReadabilityFlag)}`,
|
|
57
|
+
incomplete: `Text content is potentially difficult to read.\n It scored ${gradingReadabilityFlag} out of 50 on the Flesch-Kincaid Readability Test.\n ${getReadabilityInterpretation(gradingReadabilityFlag)}`,
|
|
53
58
|
},
|
|
54
59
|
},
|
|
55
|
-
evaluate: (_node: HTMLElement) =>
|
|
56
|
-
if (gradingReadabilityFlag === '') {
|
|
57
|
-
return true; // Pass if no readability issues
|
|
58
|
-
}
|
|
59
|
-
// Fail if readability issues are detected
|
|
60
|
-
},
|
|
60
|
+
evaluate: (_node: HTMLElement) => false,
|
|
61
61
|
},
|
|
62
62
|
]
|
|
63
63
|
: []),
|
|
@@ -88,19 +88,21 @@ export function getAxeConfiguration({
|
|
|
88
88
|
helpUrl: 'https://www.deque.com/blog/accessible-aria-buttons',
|
|
89
89
|
},
|
|
90
90
|
},
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
91
|
+
...((enableWcagAaa && gradingReadabilityFlag !== '')
|
|
92
|
+
? [{
|
|
93
|
+
id: 'oobee-grading-text-contents',
|
|
94
|
+
selector: 'html',
|
|
95
|
+
enabled: true,
|
|
96
|
+
any: ['oobee-grading-text-contents'],
|
|
97
|
+
tags: ['wcag2aaa', 'wcag315'],
|
|
98
|
+
metadata: {
|
|
99
|
+
description:
|
|
100
|
+
'Text content should be easy to understand for individuals with education levels up to university graduates. If the text content is difficult to understand, provide supplemental content or a version that is easy to understand.',
|
|
101
|
+
help: 'Text content should be clear and plain to ensure that it is easily understood.',
|
|
102
|
+
helpUrl: 'https://www.wcag.com/uncategorized/3-1-5-reading-level/',
|
|
103
|
+
},
|
|
104
|
+
}]
|
|
105
|
+
: []),
|
|
104
106
|
]
|
|
105
107
|
.filter(rule => (disableOobee ? !rule.id.startsWith('oobee') : true))
|
|
106
108
|
.concat(
|
|
@@ -20,7 +20,7 @@ export function gradeReadability(sentences: string[]): string {
|
|
|
20
20
|
|
|
21
21
|
// Determine the return value
|
|
22
22
|
const result =
|
|
23
|
-
readabilityScore
|
|
23
|
+
readabilityScore <= 0 || readabilityScore > 50 ? '' : readabilityScore.toString();
|
|
24
24
|
|
|
25
25
|
return result;
|
|
26
26
|
} catch (error) {
|