@govtechsg/oobee 0.10.84 → 0.10.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.github/workflows/image.yml +3 -2
  2. package/.github/workflows/publish.yml +10 -0
  3. package/DETAILS.md +29 -0
  4. package/dist/cli.js +7 -6
  5. package/dist/combine.js +1 -1
  6. package/dist/constants/common.js +15 -4
  7. package/dist/constants/constants.js +604 -1
  8. package/dist/crawlers/commonCrawlerFunc.js +3 -2
  9. package/dist/crawlers/crawlSitemap.js +98 -80
  10. package/dist/crawlers/custom/utils.js +218 -71
  11. package/dist/crawlers/guards/urlGuard.js +8 -15
  12. package/dist/crawlers/runCustom.js +24 -15
  13. package/dist/generateOobeeClientScanner.js +570 -0
  14. package/dist/mergeAxeResults.js +49 -29
  15. package/dist/npmIndex.js +10 -2
  16. package/dist/proxyService.js +18 -3
  17. package/dist/services/s3Uploader.js +21 -10
  18. package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
  19. package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
  20. package/dist/static/ejs/summary.ejs +10 -5
  21. package/oobee-client-scanner.js +34992 -0
  22. package/package.json +3 -3
  23. package/src/cli.ts +20 -15
  24. package/src/combine.ts +3 -1
  25. package/src/constants/common.ts +22 -10
  26. package/src/constants/constants.ts +602 -1
  27. package/src/crawlers/commonCrawlerFunc.ts +4 -3
  28. package/src/crawlers/crawlSitemap.ts +116 -98
  29. package/src/crawlers/custom/utils.ts +244 -84
  30. package/src/crawlers/guards/urlGuard.ts +24 -31
  31. package/src/crawlers/runCustom.ts +38 -15
  32. package/src/generateOobeeClientScanner.ts +591 -0
  33. package/src/mergeAxeResults.ts +48 -29
  34. package/src/npmIndex.ts +12 -2
  35. package/src/proxyService.ts +25 -4
  36. package/src/services/s3Uploader.ts +23 -11
  37. package/src/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
  38. package/src/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
  39. package/src/static/ejs/summary.ejs +10 -5
  40. package/testStaticJSScanner.html +534 -0
@@ -196,7 +196,7 @@ export const filterAxeResults = (
196
196
  const conformance = tags.filter(tag => tag.startsWith('wcag') || tag === 'best-practice');
197
197
 
198
198
  nodes.forEach(node => {
199
- const { html } = node;
199
+ const { html, target } = node;
200
200
  if (!(rule in passed.rules)) {
201
201
  passed.rules[rule] = {
202
202
  description,
@@ -207,9 +207,10 @@ export const filterAxeResults = (
207
207
  items: [],
208
208
  };
209
209
  }
210
-
210
+
211
211
  const finalHtml = truncateHtml(html);
212
- passed.rules[rule].items.push({ html: finalHtml, screenshotPath: '', message: '', xpath: '' });
212
+ const xpath = target.length === 1 && typeof target[0] === 'string' ? target[0] : undefined;
213
+ passed.rules[rule].items.push({ html: finalHtml, screenshotPath: '', message: '', xpath: xpath || '' });
213
214
 
214
215
  passed.totalItems += 1;
215
216
  passed.rules[rule].totalItems += 1;
@@ -76,6 +76,7 @@ const crawlSitemap = async ({
76
76
  let dataset: crawlee.Dataset;
77
77
  let urlsCrawled: UrlsCrawled;
78
78
  let durationExceeded = false;
79
+ let isAbortingScan = false;
79
80
 
80
81
  if (fromCrawlIntelligentSitemap) {
81
82
  dataset = datasetFromIntelligent;
@@ -244,135 +245,152 @@ const crawlSitemap = async ({
244
245
  return;
245
246
  }
246
247
 
247
- await waitForPageLoaded(page, 10000);
248
+ try {
249
+ await waitForPageLoaded(page, 10000);
248
250
 
249
- const actualUrl = page.url() || request.loadedUrl || request.url;
251
+ const actualUrl = page.url() || request.loadedUrl || request.url;
250
252
 
251
- const hasExceededDuration =
252
- scanDuration > 0 && Date.now() - crawlStartTime > scanDuration * 1000;
253
+ const hasExceededDuration =
254
+ scanDuration > 0 && Date.now() - crawlStartTime > scanDuration * 1000;
253
255
 
254
- if (urlsCrawled.scanned.length >= maxRequestsPerCrawl || hasExceededDuration) {
255
- if (hasExceededDuration) {
256
- console.log(`Crawl duration of ${scanDuration}s exceeded. Aborting sitemap crawl.`);
257
- durationExceeded = true;
256
+ if (urlsCrawled.scanned.length >= maxRequestsPerCrawl || hasExceededDuration) {
257
+ isAbortingScan = true;
258
+ if (hasExceededDuration) {
259
+ console.log(`Crawl duration of ${scanDuration}s exceeded. Aborting sitemap crawl.`);
260
+ durationExceeded = true;
261
+ }
262
+ crawler.autoscaledPool.abort(); // stops new requests
263
+ return;
258
264
  }
259
- crawler.autoscaledPool.abort(); // stops new requests
260
- return;
261
- }
262
265
 
263
- if (request.skipNavigation && actualUrl === 'about:blank') {
264
- if (isScanPdfs) {
265
- // pushes download promise into pdfDownloads
266
- const { pdfFileName, url } = handlePdfDownload(
267
- randomToken,
268
- pdfDownloads,
269
- request,
270
- sendRequest,
271
- urlsCrawled,
272
- );
266
+ if (request.skipNavigation && actualUrl === 'about:blank') {
267
+ if (isScanPdfs) {
268
+ // pushes download promise into pdfDownloads
269
+ const { pdfFileName, url } = handlePdfDownload(
270
+ randomToken,
271
+ pdfDownloads,
272
+ request,
273
+ sendRequest,
274
+ urlsCrawled,
275
+ );
276
+
277
+ uuidToPdfMapping[pdfFileName] = url;
278
+ return;
279
+ }
280
+
281
+ guiInfoLog(guiInfoStatusTypes.SKIPPED, {
282
+ numScanned: urlsCrawled.scanned.length,
283
+ urlScanned: request.url,
284
+ });
285
+ urlsCrawled.userExcluded.push({
286
+ url: request.url,
287
+ pageTitle: request.url,
288
+ actualUrl: request.url, // because about:blank is not useful
289
+ metadata: STATUS_CODE_METADATA[1],
290
+ httpStatusCode: 1,
291
+ });
273
292
 
274
- uuidToPdfMapping[pdfFileName] = url;
275
293
  return;
276
294
  }
277
295
 
278
- guiInfoLog(guiInfoStatusTypes.SKIPPED, {
279
- numScanned: urlsCrawled.scanned.length,
280
- urlScanned: request.url,
281
- });
282
- urlsCrawled.userExcluded.push({
283
- url: request.url,
284
- pageTitle: request.url,
285
- actualUrl: request.url, // because about:blank is not useful
286
- metadata: STATUS_CODE_METADATA[1],
287
- httpStatusCode: 1,
288
- });
296
+ const contentType = response?.headers?.()['content-type'] || '';
297
+ const status = response ? response.status() : 0;
289
298
 
290
- return;
291
- }
299
+ if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
300
+ const isRedirected = !areLinksEqual(page.url(), request.url);
301
+ const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(
302
+ item => (item.actualUrl || item.url) === page.url(),
303
+ );
292
304
 
293
- const contentType = response?.headers?.()['content-type'] || '';
294
- const status = response ? response.status() : 0;
305
+ if (isRedirected && isLoadedUrlInCrawledUrls) {
306
+ urlsCrawled.notScannedRedirects.push({
307
+ fromUrl: request.url,
308
+ toUrl: actualUrl, // i.e. actualUrl
309
+ });
310
+ return;
311
+ }
295
312
 
296
- if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
297
- const isRedirected = !areLinksEqual(page.url(), request.url);
298
- const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(
299
- item => (item.actualUrl || item.url) === page.url(),
300
- );
313
+ // This logic is different from crawlDomain, as it also checks if the pae is redirected before checking if it is excluded using exclusions.txt
314
+ if (isRedirected && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
315
+ urlsCrawled.userExcluded.push({
316
+ url: request.url,
317
+ pageTitle: request.url,
318
+ actualUrl,
319
+ metadata: STATUS_CODE_METADATA[0],
320
+ httpStatusCode: 0,
321
+ });
301
322
 
302
- if (isRedirected && isLoadedUrlInCrawledUrls) {
303
- urlsCrawled.notScannedRedirects.push({
304
- fromUrl: request.url,
305
- toUrl: actualUrl, // i.e. actualUrl
306
- });
307
- return;
308
- }
323
+ guiInfoLog(guiInfoStatusTypes.SKIPPED, {
324
+ numScanned: urlsCrawled.scanned.length,
325
+ urlScanned: request.url,
326
+ });
327
+ return;
328
+ }
309
329
 
310
- // This logic is different from crawlDomain, as it also checks if the pae is redirected before checking if it is excluded using exclusions.txt
311
- if (isRedirected && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
312
- urlsCrawled.userExcluded.push({
313
- url: request.url,
314
- pageTitle: request.url,
315
- actualUrl,
316
- metadata: STATUS_CODE_METADATA[0],
317
- httpStatusCode: 0,
318
- });
330
+ const results = await runAxeScript({ includeScreenshots, page, randomToken });
319
331
 
320
- guiInfoLog(guiInfoStatusTypes.SKIPPED, {
332
+ guiInfoLog(guiInfoStatusTypes.SCANNED, {
321
333
  numScanned: urlsCrawled.scanned.length,
322
334
  urlScanned: request.url,
323
335
  });
324
- return;
325
- }
326
336
 
327
- const results = await runAxeScript({ includeScreenshots, page, randomToken });
328
-
329
- guiInfoLog(guiInfoStatusTypes.SCANNED, {
330
- numScanned: urlsCrawled.scanned.length,
331
- urlScanned: request.url,
332
- });
333
-
334
- urlsCrawled.scanned.push({
335
- url: request.url,
336
- pageTitle: results.pageTitle,
337
- actualUrl, // i.e. actualUrl
338
- });
337
+ urlsCrawled.scanned.push({
338
+ url: request.url,
339
+ pageTitle: results.pageTitle,
340
+ actualUrl, // i.e. actualUrl
341
+ });
339
342
 
340
- urlsCrawled.scannedRedirects.push({
341
- fromUrl: request.url,
342
- toUrl: actualUrl,
343
- });
343
+ urlsCrawled.scannedRedirects.push({
344
+ fromUrl: request.url,
345
+ toUrl: actualUrl,
346
+ });
344
347
 
345
- results.url = request.url;
346
- results.actualUrl = actualUrl;
348
+ results.url = request.url;
349
+ results.actualUrl = actualUrl;
347
350
 
348
- await dataset.pushData(results);
349
- } else {
350
- guiInfoLog(guiInfoStatusTypes.SKIPPED, {
351
- numScanned: urlsCrawled.scanned.length,
352
- urlScanned: request.url,
353
- });
351
+ await dataset.pushData(results);
352
+ } else {
353
+ guiInfoLog(guiInfoStatusTypes.SKIPPED, {
354
+ numScanned: urlsCrawled.scanned.length,
355
+ urlScanned: request.url,
356
+ });
354
357
 
355
- if (isScanHtml) {
356
- // carry through the HTTP status metadata
357
- const status = response?.status();
358
- const metadata =
359
- typeof status === 'number'
360
- ? STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599]
361
- : STATUS_CODE_METADATA[2];
358
+ if (isScanHtml) {
359
+ // carry through the HTTP status metadata
360
+ const status = response?.status();
361
+ const metadata =
362
+ typeof status === 'number'
363
+ ? STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599]
364
+ : STATUS_CODE_METADATA[2];
365
+
366
+ urlsCrawled.invalid.push({
367
+ actualUrl,
368
+ url: request.url,
369
+ pageTitle: request.url,
370
+ metadata,
371
+ httpStatusCode: typeof status === 'number' ? status : 0,
372
+ });
373
+ }
374
+ }
375
+ } catch (e) {
376
+ if (!isAbortingScan) {
377
+ guiInfoLog(guiInfoStatusTypes.ERROR, {
378
+ numScanned: urlsCrawled.scanned.length,
379
+ urlScanned: request.url,
380
+ });
362
381
 
363
- urlsCrawled.invalid.push({
364
- actualUrl,
382
+ urlsCrawled.error.push({
365
383
  url: request.url,
366
384
  pageTitle: request.url,
367
- metadata,
368
- httpStatusCode: typeof status === 'number' ? status : 0,
385
+ actualUrl: request.url,
386
+ metadata: STATUS_CODE_METADATA[2],
387
+ httpStatusCode: 0,
369
388
  });
370
389
  }
371
390
  }
372
391
  },
373
392
  failedRequestHandler: async ({ request, response, error }) => {
374
- // check if scanned pages have reached limit due to multi-instances of handler running
375
- if (urlsCrawled.scanned.length >= maxRequestsPerCrawl) {
393
+ if (isAbortingScan) {
376
394
  return;
377
395
  }
378
396