@govtechsg/oobee 0.10.84 → 0.10.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/image.yml +3 -2
- package/.github/workflows/publish.yml +10 -0
- package/DETAILS.md +29 -0
- package/dist/cli.js +7 -6
- package/dist/combine.js +1 -1
- package/dist/constants/common.js +15 -4
- package/dist/constants/constants.js +604 -1
- package/dist/crawlers/commonCrawlerFunc.js +3 -2
- package/dist/crawlers/crawlSitemap.js +98 -80
- package/dist/crawlers/custom/utils.js +218 -71
- package/dist/crawlers/guards/urlGuard.js +8 -15
- package/dist/crawlers/runCustom.js +24 -15
- package/dist/generateOobeeClientScanner.js +570 -0
- package/dist/mergeAxeResults.js +49 -29
- package/dist/npmIndex.js +10 -2
- package/dist/proxyService.js +18 -3
- package/dist/services/s3Uploader.js +21 -10
- package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
- package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
- package/dist/static/ejs/summary.ejs +10 -5
- package/oobee-client-scanner.js +34992 -0
- package/package.json +3 -3
- package/src/cli.ts +20 -15
- package/src/combine.ts +3 -1
- package/src/constants/common.ts +22 -10
- package/src/constants/constants.ts +602 -1
- package/src/crawlers/commonCrawlerFunc.ts +4 -3
- package/src/crawlers/crawlSitemap.ts +116 -98
- package/src/crawlers/custom/utils.ts +244 -84
- package/src/crawlers/guards/urlGuard.ts +24 -31
- package/src/crawlers/runCustom.ts +38 -15
- package/src/generateOobeeClientScanner.ts +591 -0
- package/src/mergeAxeResults.ts +48 -29
- package/src/npmIndex.ts +12 -2
- package/src/proxyService.ts +25 -4
- package/src/services/s3Uploader.ts +23 -11
- package/src/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
- package/src/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
- package/src/static/ejs/summary.ejs +10 -5
- package/testStaticJSScanner.html +534 -0
|
@@ -116,7 +116,7 @@ export const filterAxeResults = (results, pageTitle, customFlowDetails) => {
|
|
|
116
116
|
return;
|
|
117
117
|
const conformance = tags.filter(tag => tag.startsWith('wcag') || tag === 'best-practice');
|
|
118
118
|
nodes.forEach(node => {
|
|
119
|
-
const { html } = node;
|
|
119
|
+
const { html, target } = node;
|
|
120
120
|
if (!(rule in passed.rules)) {
|
|
121
121
|
passed.rules[rule] = {
|
|
122
122
|
description,
|
|
@@ -128,7 +128,8 @@ export const filterAxeResults = (results, pageTitle, customFlowDetails) => {
|
|
|
128
128
|
};
|
|
129
129
|
}
|
|
130
130
|
const finalHtml = truncateHtml(html);
|
|
131
|
-
|
|
131
|
+
const xpath = target.length === 1 && typeof target[0] === 'string' ? target[0] : undefined;
|
|
132
|
+
passed.rules[rule].items.push({ html: finalHtml, screenshotPath: '', message: '', xpath: xpath || '' });
|
|
132
133
|
passed.totalItems += 1;
|
|
133
134
|
passed.rules[rule].totalItems += 1;
|
|
134
135
|
totalItems += 1;
|
|
@@ -12,6 +12,7 @@ const crawlSitemap = async ({ sitemapUrl, randomToken, host, viewportSettings, m
|
|
|
12
12
|
let dataset;
|
|
13
13
|
let urlsCrawled;
|
|
14
14
|
let durationExceeded = false;
|
|
15
|
+
let isAbortingScan = false;
|
|
15
16
|
if (fromCrawlIntelligentSitemap) {
|
|
16
17
|
dataset = datasetFromIntelligent;
|
|
17
18
|
urlsCrawled = urlsCrawledFromIntelligent;
|
|
@@ -144,106 +145,123 @@ const crawlSitemap = async ({ sitemapUrl, randomToken, host, viewportSettings, m
|
|
|
144
145
|
});
|
|
145
146
|
return;
|
|
146
147
|
}
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
if (hasExceededDuration) {
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
if (request.skipNavigation && actualUrl === 'about:blank') {
|
|
159
|
-
if (isScanPdfs) {
|
|
160
|
-
// pushes download promise into pdfDownloads
|
|
161
|
-
const { pdfFileName, url } = handlePdfDownload(randomToken, pdfDownloads, request, sendRequest, urlsCrawled);
|
|
162
|
-
uuidToPdfMapping[pdfFileName] = url;
|
|
148
|
+
try {
|
|
149
|
+
await waitForPageLoaded(page, 10000);
|
|
150
|
+
const actualUrl = page.url() || request.loadedUrl || request.url;
|
|
151
|
+
const hasExceededDuration = scanDuration > 0 && Date.now() - crawlStartTime > scanDuration * 1000;
|
|
152
|
+
if (urlsCrawled.scanned.length >= maxRequestsPerCrawl || hasExceededDuration) {
|
|
153
|
+
isAbortingScan = true;
|
|
154
|
+
if (hasExceededDuration) {
|
|
155
|
+
console.log(`Crawl duration of ${scanDuration}s exceeded. Aborting sitemap crawl.`);
|
|
156
|
+
durationExceeded = true;
|
|
157
|
+
}
|
|
158
|
+
crawler.autoscaledPool.abort(); // stops new requests
|
|
163
159
|
return;
|
|
164
160
|
}
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
});
|
|
176
|
-
return;
|
|
177
|
-
}
|
|
178
|
-
const contentType = response?.headers?.()['content-type'] || '';
|
|
179
|
-
const status = response ? response.status() : 0;
|
|
180
|
-
if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
|
|
181
|
-
const isRedirected = !areLinksEqual(page.url(), request.url);
|
|
182
|
-
const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(item => (item.actualUrl || item.url) === page.url());
|
|
183
|
-
if (isRedirected && isLoadedUrlInCrawledUrls) {
|
|
184
|
-
urlsCrawled.notScannedRedirects.push({
|
|
185
|
-
fromUrl: request.url,
|
|
186
|
-
toUrl: actualUrl, // i.e. actualUrl
|
|
161
|
+
if (request.skipNavigation && actualUrl === 'about:blank') {
|
|
162
|
+
if (isScanPdfs) {
|
|
163
|
+
// pushes download promise into pdfDownloads
|
|
164
|
+
const { pdfFileName, url } = handlePdfDownload(randomToken, pdfDownloads, request, sendRequest, urlsCrawled);
|
|
165
|
+
uuidToPdfMapping[pdfFileName] = url;
|
|
166
|
+
return;
|
|
167
|
+
}
|
|
168
|
+
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
|
169
|
+
numScanned: urlsCrawled.scanned.length,
|
|
170
|
+
urlScanned: request.url,
|
|
187
171
|
});
|
|
188
|
-
return;
|
|
189
|
-
}
|
|
190
|
-
// This logic is different from crawlDomain, as it also checks if the pae is redirected before checking if it is excluded using exclusions.txt
|
|
191
|
-
if (isRedirected && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
|
|
192
172
|
urlsCrawled.userExcluded.push({
|
|
193
173
|
url: request.url,
|
|
194
174
|
pageTitle: request.url,
|
|
195
|
-
actualUrl,
|
|
196
|
-
metadata: STATUS_CODE_METADATA[
|
|
197
|
-
httpStatusCode:
|
|
175
|
+
actualUrl: request.url, // because about:blank is not useful
|
|
176
|
+
metadata: STATUS_CODE_METADATA[1],
|
|
177
|
+
httpStatusCode: 1,
|
|
198
178
|
});
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
181
|
+
const contentType = response?.headers?.()['content-type'] || '';
|
|
182
|
+
const status = response ? response.status() : 0;
|
|
183
|
+
if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
|
|
184
|
+
const isRedirected = !areLinksEqual(page.url(), request.url);
|
|
185
|
+
const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(item => (item.actualUrl || item.url) === page.url());
|
|
186
|
+
if (isRedirected && isLoadedUrlInCrawledUrls) {
|
|
187
|
+
urlsCrawled.notScannedRedirects.push({
|
|
188
|
+
fromUrl: request.url,
|
|
189
|
+
toUrl: actualUrl, // i.e. actualUrl
|
|
190
|
+
});
|
|
191
|
+
return;
|
|
192
|
+
}
|
|
193
|
+
// This logic is different from crawlDomain, as it also checks if the pae is redirected before checking if it is excluded using exclusions.txt
|
|
194
|
+
if (isRedirected && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
|
|
195
|
+
urlsCrawled.userExcluded.push({
|
|
196
|
+
url: request.url,
|
|
197
|
+
pageTitle: request.url,
|
|
198
|
+
actualUrl,
|
|
199
|
+
metadata: STATUS_CODE_METADATA[0],
|
|
200
|
+
httpStatusCode: 0,
|
|
201
|
+
});
|
|
202
|
+
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
|
203
|
+
numScanned: urlsCrawled.scanned.length,
|
|
204
|
+
urlScanned: request.url,
|
|
205
|
+
});
|
|
206
|
+
return;
|
|
207
|
+
}
|
|
208
|
+
const results = await runAxeScript({ includeScreenshots, page, randomToken });
|
|
209
|
+
guiInfoLog(guiInfoStatusTypes.SCANNED, {
|
|
210
|
+
numScanned: urlsCrawled.scanned.length,
|
|
211
|
+
urlScanned: request.url,
|
|
212
|
+
});
|
|
213
|
+
urlsCrawled.scanned.push({
|
|
214
|
+
url: request.url,
|
|
215
|
+
pageTitle: results.pageTitle,
|
|
216
|
+
actualUrl, // i.e. actualUrl
|
|
217
|
+
});
|
|
218
|
+
urlsCrawled.scannedRedirects.push({
|
|
219
|
+
fromUrl: request.url,
|
|
220
|
+
toUrl: actualUrl,
|
|
221
|
+
});
|
|
222
|
+
results.url = request.url;
|
|
223
|
+
results.actualUrl = actualUrl;
|
|
224
|
+
await dataset.pushData(results);
|
|
225
|
+
}
|
|
226
|
+
else {
|
|
199
227
|
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
|
200
228
|
numScanned: urlsCrawled.scanned.length,
|
|
201
229
|
urlScanned: request.url,
|
|
202
230
|
});
|
|
203
|
-
|
|
231
|
+
if (isScanHtml) {
|
|
232
|
+
// carry through the HTTP status metadata
|
|
233
|
+
const status = response?.status();
|
|
234
|
+
const metadata = typeof status === 'number'
|
|
235
|
+
? STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599]
|
|
236
|
+
: STATUS_CODE_METADATA[2];
|
|
237
|
+
urlsCrawled.invalid.push({
|
|
238
|
+
actualUrl,
|
|
239
|
+
url: request.url,
|
|
240
|
+
pageTitle: request.url,
|
|
241
|
+
metadata,
|
|
242
|
+
httpStatusCode: typeof status === 'number' ? status : 0,
|
|
243
|
+
});
|
|
244
|
+
}
|
|
204
245
|
}
|
|
205
|
-
const results = await runAxeScript({ includeScreenshots, page, randomToken });
|
|
206
|
-
guiInfoLog(guiInfoStatusTypes.SCANNED, {
|
|
207
|
-
numScanned: urlsCrawled.scanned.length,
|
|
208
|
-
urlScanned: request.url,
|
|
209
|
-
});
|
|
210
|
-
urlsCrawled.scanned.push({
|
|
211
|
-
url: request.url,
|
|
212
|
-
pageTitle: results.pageTitle,
|
|
213
|
-
actualUrl, // i.e. actualUrl
|
|
214
|
-
});
|
|
215
|
-
urlsCrawled.scannedRedirects.push({
|
|
216
|
-
fromUrl: request.url,
|
|
217
|
-
toUrl: actualUrl,
|
|
218
|
-
});
|
|
219
|
-
results.url = request.url;
|
|
220
|
-
results.actualUrl = actualUrl;
|
|
221
|
-
await dataset.pushData(results);
|
|
222
246
|
}
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
const status = response?.status();
|
|
231
|
-
const metadata = typeof status === 'number'
|
|
232
|
-
? STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599]
|
|
233
|
-
: STATUS_CODE_METADATA[2];
|
|
234
|
-
urlsCrawled.invalid.push({
|
|
235
|
-
actualUrl,
|
|
247
|
+
catch (e) {
|
|
248
|
+
if (!isAbortingScan) {
|
|
249
|
+
guiInfoLog(guiInfoStatusTypes.ERROR, {
|
|
250
|
+
numScanned: urlsCrawled.scanned.length,
|
|
251
|
+
urlScanned: request.url,
|
|
252
|
+
});
|
|
253
|
+
urlsCrawled.error.push({
|
|
236
254
|
url: request.url,
|
|
237
255
|
pageTitle: request.url,
|
|
238
|
-
|
|
239
|
-
|
|
256
|
+
actualUrl: request.url,
|
|
257
|
+
metadata: STATUS_CODE_METADATA[2],
|
|
258
|
+
httpStatusCode: 0,
|
|
240
259
|
});
|
|
241
260
|
}
|
|
242
261
|
}
|
|
243
262
|
},
|
|
244
263
|
failedRequestHandler: async ({ request, response, error }) => {
|
|
245
|
-
|
|
246
|
-
if (urlsCrawled.scanned.length >= maxRequestsPerCrawl) {
|
|
264
|
+
if (isAbortingScan) {
|
|
247
265
|
return;
|
|
248
266
|
}
|
|
249
267
|
guiInfoLog(guiInfoStatusTypes.ERROR, {
|