@govtechsg/oobee 0.10.84 → 0.10.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/image.yml +3 -2
- package/.github/workflows/publish.yml +10 -0
- package/DETAILS.md +29 -0
- package/dist/cli.js +7 -6
- package/dist/combine.js +1 -1
- package/dist/constants/common.js +15 -4
- package/dist/constants/constants.js +604 -1
- package/dist/crawlers/commonCrawlerFunc.js +3 -2
- package/dist/crawlers/crawlSitemap.js +98 -80
- package/dist/crawlers/custom/utils.js +218 -71
- package/dist/crawlers/guards/urlGuard.js +8 -15
- package/dist/crawlers/runCustom.js +24 -15
- package/dist/generateOobeeClientScanner.js +570 -0
- package/dist/mergeAxeResults.js +49 -29
- package/dist/npmIndex.js +10 -2
- package/dist/proxyService.js +18 -3
- package/dist/services/s3Uploader.js +21 -10
- package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
- package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
- package/dist/static/ejs/summary.ejs +10 -5
- package/oobee-client-scanner.js +34992 -0
- package/package.json +3 -3
- package/src/cli.ts +20 -15
- package/src/combine.ts +3 -1
- package/src/constants/common.ts +22 -10
- package/src/constants/constants.ts +602 -1
- package/src/crawlers/commonCrawlerFunc.ts +4 -3
- package/src/crawlers/crawlSitemap.ts +116 -98
- package/src/crawlers/custom/utils.ts +244 -84
- package/src/crawlers/guards/urlGuard.ts +24 -31
- package/src/crawlers/runCustom.ts +38 -15
- package/src/generateOobeeClientScanner.ts +591 -0
- package/src/mergeAxeResults.ts +48 -29
- package/src/npmIndex.ts +12 -2
- package/src/proxyService.ts +25 -4
- package/src/services/s3Uploader.ts +23 -11
- package/src/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
- package/src/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
- package/src/static/ejs/summary.ejs +10 -5
- package/testStaticJSScanner.html +534 -0
|
@@ -196,7 +196,7 @@ export const filterAxeResults = (
|
|
|
196
196
|
const conformance = tags.filter(tag => tag.startsWith('wcag') || tag === 'best-practice');
|
|
197
197
|
|
|
198
198
|
nodes.forEach(node => {
|
|
199
|
-
const { html } = node;
|
|
199
|
+
const { html, target } = node;
|
|
200
200
|
if (!(rule in passed.rules)) {
|
|
201
201
|
passed.rules[rule] = {
|
|
202
202
|
description,
|
|
@@ -207,9 +207,10 @@ export const filterAxeResults = (
|
|
|
207
207
|
items: [],
|
|
208
208
|
};
|
|
209
209
|
}
|
|
210
|
-
|
|
210
|
+
|
|
211
211
|
const finalHtml = truncateHtml(html);
|
|
212
|
-
|
|
212
|
+
const xpath = target.length === 1 && typeof target[0] === 'string' ? target[0] : undefined;
|
|
213
|
+
passed.rules[rule].items.push({ html: finalHtml, screenshotPath: '', message: '', xpath: xpath || '' });
|
|
213
214
|
|
|
214
215
|
passed.totalItems += 1;
|
|
215
216
|
passed.rules[rule].totalItems += 1;
|
|
@@ -76,6 +76,7 @@ const crawlSitemap = async ({
|
|
|
76
76
|
let dataset: crawlee.Dataset;
|
|
77
77
|
let urlsCrawled: UrlsCrawled;
|
|
78
78
|
let durationExceeded = false;
|
|
79
|
+
let isAbortingScan = false;
|
|
79
80
|
|
|
80
81
|
if (fromCrawlIntelligentSitemap) {
|
|
81
82
|
dataset = datasetFromIntelligent;
|
|
@@ -244,135 +245,152 @@ const crawlSitemap = async ({
|
|
|
244
245
|
return;
|
|
245
246
|
}
|
|
246
247
|
|
|
247
|
-
|
|
248
|
+
try {
|
|
249
|
+
await waitForPageLoaded(page, 10000);
|
|
248
250
|
|
|
249
|
-
|
|
251
|
+
const actualUrl = page.url() || request.loadedUrl || request.url;
|
|
250
252
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
+
const hasExceededDuration =
|
|
254
|
+
scanDuration > 0 && Date.now() - crawlStartTime > scanDuration * 1000;
|
|
253
255
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
256
|
+
if (urlsCrawled.scanned.length >= maxRequestsPerCrawl || hasExceededDuration) {
|
|
257
|
+
isAbortingScan = true;
|
|
258
|
+
if (hasExceededDuration) {
|
|
259
|
+
console.log(`Crawl duration of ${scanDuration}s exceeded. Aborting sitemap crawl.`);
|
|
260
|
+
durationExceeded = true;
|
|
261
|
+
}
|
|
262
|
+
crawler.autoscaledPool.abort(); // stops new requests
|
|
263
|
+
return;
|
|
258
264
|
}
|
|
259
|
-
crawler.autoscaledPool.abort(); // stops new requests
|
|
260
|
-
return;
|
|
261
|
-
}
|
|
262
265
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
266
|
+
if (request.skipNavigation && actualUrl === 'about:blank') {
|
|
267
|
+
if (isScanPdfs) {
|
|
268
|
+
// pushes download promise into pdfDownloads
|
|
269
|
+
const { pdfFileName, url } = handlePdfDownload(
|
|
270
|
+
randomToken,
|
|
271
|
+
pdfDownloads,
|
|
272
|
+
request,
|
|
273
|
+
sendRequest,
|
|
274
|
+
urlsCrawled,
|
|
275
|
+
);
|
|
276
|
+
|
|
277
|
+
uuidToPdfMapping[pdfFileName] = url;
|
|
278
|
+
return;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
|
282
|
+
numScanned: urlsCrawled.scanned.length,
|
|
283
|
+
urlScanned: request.url,
|
|
284
|
+
});
|
|
285
|
+
urlsCrawled.userExcluded.push({
|
|
286
|
+
url: request.url,
|
|
287
|
+
pageTitle: request.url,
|
|
288
|
+
actualUrl: request.url, // because about:blank is not useful
|
|
289
|
+
metadata: STATUS_CODE_METADATA[1],
|
|
290
|
+
httpStatusCode: 1,
|
|
291
|
+
});
|
|
273
292
|
|
|
274
|
-
uuidToPdfMapping[pdfFileName] = url;
|
|
275
293
|
return;
|
|
276
294
|
}
|
|
277
295
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
urlScanned: request.url,
|
|
281
|
-
});
|
|
282
|
-
urlsCrawled.userExcluded.push({
|
|
283
|
-
url: request.url,
|
|
284
|
-
pageTitle: request.url,
|
|
285
|
-
actualUrl: request.url, // because about:blank is not useful
|
|
286
|
-
metadata: STATUS_CODE_METADATA[1],
|
|
287
|
-
httpStatusCode: 1,
|
|
288
|
-
});
|
|
296
|
+
const contentType = response?.headers?.()['content-type'] || '';
|
|
297
|
+
const status = response ? response.status() : 0;
|
|
289
298
|
|
|
290
|
-
|
|
291
|
-
|
|
299
|
+
if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
|
|
300
|
+
const isRedirected = !areLinksEqual(page.url(), request.url);
|
|
301
|
+
const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(
|
|
302
|
+
item => (item.actualUrl || item.url) === page.url(),
|
|
303
|
+
);
|
|
292
304
|
|
|
293
|
-
|
|
294
|
-
|
|
305
|
+
if (isRedirected && isLoadedUrlInCrawledUrls) {
|
|
306
|
+
urlsCrawled.notScannedRedirects.push({
|
|
307
|
+
fromUrl: request.url,
|
|
308
|
+
toUrl: actualUrl, // i.e. actualUrl
|
|
309
|
+
});
|
|
310
|
+
return;
|
|
311
|
+
}
|
|
295
312
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
313
|
+
// This logic is different from crawlDomain, as it also checks if the pae is redirected before checking if it is excluded using exclusions.txt
|
|
314
|
+
if (isRedirected && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
|
|
315
|
+
urlsCrawled.userExcluded.push({
|
|
316
|
+
url: request.url,
|
|
317
|
+
pageTitle: request.url,
|
|
318
|
+
actualUrl,
|
|
319
|
+
metadata: STATUS_CODE_METADATA[0],
|
|
320
|
+
httpStatusCode: 0,
|
|
321
|
+
});
|
|
301
322
|
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
}
|
|
323
|
+
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
|
324
|
+
numScanned: urlsCrawled.scanned.length,
|
|
325
|
+
urlScanned: request.url,
|
|
326
|
+
});
|
|
327
|
+
return;
|
|
328
|
+
}
|
|
309
329
|
|
|
310
|
-
|
|
311
|
-
if (isRedirected && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
|
|
312
|
-
urlsCrawled.userExcluded.push({
|
|
313
|
-
url: request.url,
|
|
314
|
-
pageTitle: request.url,
|
|
315
|
-
actualUrl,
|
|
316
|
-
metadata: STATUS_CODE_METADATA[0],
|
|
317
|
-
httpStatusCode: 0,
|
|
318
|
-
});
|
|
330
|
+
const results = await runAxeScript({ includeScreenshots, page, randomToken });
|
|
319
331
|
|
|
320
|
-
guiInfoLog(guiInfoStatusTypes.
|
|
332
|
+
guiInfoLog(guiInfoStatusTypes.SCANNED, {
|
|
321
333
|
numScanned: urlsCrawled.scanned.length,
|
|
322
334
|
urlScanned: request.url,
|
|
323
335
|
});
|
|
324
|
-
return;
|
|
325
|
-
}
|
|
326
336
|
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
});
|
|
333
|
-
|
|
334
|
-
urlsCrawled.scanned.push({
|
|
335
|
-
url: request.url,
|
|
336
|
-
pageTitle: results.pageTitle,
|
|
337
|
-
actualUrl, // i.e. actualUrl
|
|
338
|
-
});
|
|
337
|
+
urlsCrawled.scanned.push({
|
|
338
|
+
url: request.url,
|
|
339
|
+
pageTitle: results.pageTitle,
|
|
340
|
+
actualUrl, // i.e. actualUrl
|
|
341
|
+
});
|
|
339
342
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
343
|
+
urlsCrawled.scannedRedirects.push({
|
|
344
|
+
fromUrl: request.url,
|
|
345
|
+
toUrl: actualUrl,
|
|
346
|
+
});
|
|
344
347
|
|
|
345
|
-
|
|
346
|
-
|
|
348
|
+
results.url = request.url;
|
|
349
|
+
results.actualUrl = actualUrl;
|
|
347
350
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
351
|
+
await dataset.pushData(results);
|
|
352
|
+
} else {
|
|
353
|
+
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
|
354
|
+
numScanned: urlsCrawled.scanned.length,
|
|
355
|
+
urlScanned: request.url,
|
|
356
|
+
});
|
|
354
357
|
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
358
|
+
if (isScanHtml) {
|
|
359
|
+
// carry through the HTTP status metadata
|
|
360
|
+
const status = response?.status();
|
|
361
|
+
const metadata =
|
|
362
|
+
typeof status === 'number'
|
|
363
|
+
? STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599]
|
|
364
|
+
: STATUS_CODE_METADATA[2];
|
|
365
|
+
|
|
366
|
+
urlsCrawled.invalid.push({
|
|
367
|
+
actualUrl,
|
|
368
|
+
url: request.url,
|
|
369
|
+
pageTitle: request.url,
|
|
370
|
+
metadata,
|
|
371
|
+
httpStatusCode: typeof status === 'number' ? status : 0,
|
|
372
|
+
});
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
} catch (e) {
|
|
376
|
+
if (!isAbortingScan) {
|
|
377
|
+
guiInfoLog(guiInfoStatusTypes.ERROR, {
|
|
378
|
+
numScanned: urlsCrawled.scanned.length,
|
|
379
|
+
urlScanned: request.url,
|
|
380
|
+
});
|
|
362
381
|
|
|
363
|
-
urlsCrawled.
|
|
364
|
-
actualUrl,
|
|
382
|
+
urlsCrawled.error.push({
|
|
365
383
|
url: request.url,
|
|
366
384
|
pageTitle: request.url,
|
|
367
|
-
|
|
368
|
-
|
|
385
|
+
actualUrl: request.url,
|
|
386
|
+
metadata: STATUS_CODE_METADATA[2],
|
|
387
|
+
httpStatusCode: 0,
|
|
369
388
|
});
|
|
370
389
|
}
|
|
371
390
|
}
|
|
372
391
|
},
|
|
373
392
|
failedRequestHandler: async ({ request, response, error }) => {
|
|
374
|
-
|
|
375
|
-
if (urlsCrawled.scanned.length >= maxRequestsPerCrawl) {
|
|
393
|
+
if (isAbortingScan) {
|
|
376
394
|
return;
|
|
377
395
|
}
|
|
378
396
|
|