@govtechsg/oobee 0.10.84 → 0.10.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.github/workflows/image.yml +3 -2
  2. package/.github/workflows/publish.yml +10 -0
  3. package/DETAILS.md +29 -0
  4. package/dist/cli.js +7 -6
  5. package/dist/combine.js +1 -1
  6. package/dist/constants/common.js +15 -4
  7. package/dist/constants/constants.js +604 -1
  8. package/dist/crawlers/commonCrawlerFunc.js +3 -2
  9. package/dist/crawlers/crawlSitemap.js +98 -80
  10. package/dist/crawlers/custom/utils.js +218 -71
  11. package/dist/crawlers/guards/urlGuard.js +8 -15
  12. package/dist/crawlers/runCustom.js +24 -15
  13. package/dist/generateOobeeClientScanner.js +570 -0
  14. package/dist/mergeAxeResults.js +49 -29
  15. package/dist/npmIndex.js +10 -2
  16. package/dist/proxyService.js +18 -3
  17. package/dist/services/s3Uploader.js +21 -10
  18. package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
  19. package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
  20. package/dist/static/ejs/summary.ejs +10 -5
  21. package/oobee-client-scanner.js +34992 -0
  22. package/package.json +3 -3
  23. package/src/cli.ts +20 -15
  24. package/src/combine.ts +3 -1
  25. package/src/constants/common.ts +22 -10
  26. package/src/constants/constants.ts +602 -1
  27. package/src/crawlers/commonCrawlerFunc.ts +4 -3
  28. package/src/crawlers/crawlSitemap.ts +116 -98
  29. package/src/crawlers/custom/utils.ts +244 -84
  30. package/src/crawlers/guards/urlGuard.ts +24 -31
  31. package/src/crawlers/runCustom.ts +38 -15
  32. package/src/generateOobeeClientScanner.ts +591 -0
  33. package/src/mergeAxeResults.ts +48 -29
  34. package/src/npmIndex.ts +12 -2
  35. package/src/proxyService.ts +25 -4
  36. package/src/services/s3Uploader.ts +23 -11
  37. package/src/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
  38. package/src/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
  39. package/src/static/ejs/summary.ejs +10 -5
  40. package/testStaticJSScanner.html +534 -0
@@ -116,7 +116,7 @@ export const filterAxeResults = (results, pageTitle, customFlowDetails) => {
116
116
  return;
117
117
  const conformance = tags.filter(tag => tag.startsWith('wcag') || tag === 'best-practice');
118
118
  nodes.forEach(node => {
119
- const { html } = node;
119
+ const { html, target } = node;
120
120
  if (!(rule in passed.rules)) {
121
121
  passed.rules[rule] = {
122
122
  description,
@@ -128,7 +128,8 @@ export const filterAxeResults = (results, pageTitle, customFlowDetails) => {
128
128
  };
129
129
  }
130
130
  const finalHtml = truncateHtml(html);
131
- passed.rules[rule].items.push({ html: finalHtml, screenshotPath: '', message: '', xpath: '' });
131
+ const xpath = target.length === 1 && typeof target[0] === 'string' ? target[0] : undefined;
132
+ passed.rules[rule].items.push({ html: finalHtml, screenshotPath: '', message: '', xpath: xpath || '' });
132
133
  passed.totalItems += 1;
133
134
  passed.rules[rule].totalItems += 1;
134
135
  totalItems += 1;
@@ -12,6 +12,7 @@ const crawlSitemap = async ({ sitemapUrl, randomToken, host, viewportSettings, m
12
12
  let dataset;
13
13
  let urlsCrawled;
14
14
  let durationExceeded = false;
15
+ let isAbortingScan = false;
15
16
  if (fromCrawlIntelligentSitemap) {
16
17
  dataset = datasetFromIntelligent;
17
18
  urlsCrawled = urlsCrawledFromIntelligent;
@@ -144,106 +145,123 @@ const crawlSitemap = async ({ sitemapUrl, randomToken, host, viewportSettings, m
144
145
  });
145
146
  return;
146
147
  }
147
- await waitForPageLoaded(page, 10000);
148
- const actualUrl = page.url() || request.loadedUrl || request.url;
149
- const hasExceededDuration = scanDuration > 0 && Date.now() - crawlStartTime > scanDuration * 1000;
150
- if (urlsCrawled.scanned.length >= maxRequestsPerCrawl || hasExceededDuration) {
151
- if (hasExceededDuration) {
152
- console.log(`Crawl duration of ${scanDuration}s exceeded. Aborting sitemap crawl.`);
153
- durationExceeded = true;
154
- }
155
- crawler.autoscaledPool.abort(); // stops new requests
156
- return;
157
- }
158
- if (request.skipNavigation && actualUrl === 'about:blank') {
159
- if (isScanPdfs) {
160
- // pushes download promise into pdfDownloads
161
- const { pdfFileName, url } = handlePdfDownload(randomToken, pdfDownloads, request, sendRequest, urlsCrawled);
162
- uuidToPdfMapping[pdfFileName] = url;
148
+ try {
149
+ await waitForPageLoaded(page, 10000);
150
+ const actualUrl = page.url() || request.loadedUrl || request.url;
151
+ const hasExceededDuration = scanDuration > 0 && Date.now() - crawlStartTime > scanDuration * 1000;
152
+ if (urlsCrawled.scanned.length >= maxRequestsPerCrawl || hasExceededDuration) {
153
+ isAbortingScan = true;
154
+ if (hasExceededDuration) {
155
+ console.log(`Crawl duration of ${scanDuration}s exceeded. Aborting sitemap crawl.`);
156
+ durationExceeded = true;
157
+ }
158
+ crawler.autoscaledPool.abort(); // stops new requests
163
159
  return;
164
160
  }
165
- guiInfoLog(guiInfoStatusTypes.SKIPPED, {
166
- numScanned: urlsCrawled.scanned.length,
167
- urlScanned: request.url,
168
- });
169
- urlsCrawled.userExcluded.push({
170
- url: request.url,
171
- pageTitle: request.url,
172
- actualUrl: request.url, // because about:blank is not useful
173
- metadata: STATUS_CODE_METADATA[1],
174
- httpStatusCode: 1,
175
- });
176
- return;
177
- }
178
- const contentType = response?.headers?.()['content-type'] || '';
179
- const status = response ? response.status() : 0;
180
- if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
181
- const isRedirected = !areLinksEqual(page.url(), request.url);
182
- const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(item => (item.actualUrl || item.url) === page.url());
183
- if (isRedirected && isLoadedUrlInCrawledUrls) {
184
- urlsCrawled.notScannedRedirects.push({
185
- fromUrl: request.url,
186
- toUrl: actualUrl, // i.e. actualUrl
161
+ if (request.skipNavigation && actualUrl === 'about:blank') {
162
+ if (isScanPdfs) {
163
+ // pushes download promise into pdfDownloads
164
+ const { pdfFileName, url } = handlePdfDownload(randomToken, pdfDownloads, request, sendRequest, urlsCrawled);
165
+ uuidToPdfMapping[pdfFileName] = url;
166
+ return;
167
+ }
168
+ guiInfoLog(guiInfoStatusTypes.SKIPPED, {
169
+ numScanned: urlsCrawled.scanned.length,
170
+ urlScanned: request.url,
187
171
  });
188
- return;
189
- }
190
- // This logic is different from crawlDomain, as it also checks if the pae is redirected before checking if it is excluded using exclusions.txt
191
- if (isRedirected && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
192
172
  urlsCrawled.userExcluded.push({
193
173
  url: request.url,
194
174
  pageTitle: request.url,
195
- actualUrl,
196
- metadata: STATUS_CODE_METADATA[0],
197
- httpStatusCode: 0,
175
+ actualUrl: request.url, // because about:blank is not useful
176
+ metadata: STATUS_CODE_METADATA[1],
177
+ httpStatusCode: 1,
198
178
  });
179
+ return;
180
+ }
181
+ const contentType = response?.headers?.()['content-type'] || '';
182
+ const status = response ? response.status() : 0;
183
+ if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
184
+ const isRedirected = !areLinksEqual(page.url(), request.url);
185
+ const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(item => (item.actualUrl || item.url) === page.url());
186
+ if (isRedirected && isLoadedUrlInCrawledUrls) {
187
+ urlsCrawled.notScannedRedirects.push({
188
+ fromUrl: request.url,
189
+ toUrl: actualUrl, // i.e. actualUrl
190
+ });
191
+ return;
192
+ }
193
+ // This logic is different from crawlDomain, as it also checks if the pae is redirected before checking if it is excluded using exclusions.txt
194
+ if (isRedirected && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
195
+ urlsCrawled.userExcluded.push({
196
+ url: request.url,
197
+ pageTitle: request.url,
198
+ actualUrl,
199
+ metadata: STATUS_CODE_METADATA[0],
200
+ httpStatusCode: 0,
201
+ });
202
+ guiInfoLog(guiInfoStatusTypes.SKIPPED, {
203
+ numScanned: urlsCrawled.scanned.length,
204
+ urlScanned: request.url,
205
+ });
206
+ return;
207
+ }
208
+ const results = await runAxeScript({ includeScreenshots, page, randomToken });
209
+ guiInfoLog(guiInfoStatusTypes.SCANNED, {
210
+ numScanned: urlsCrawled.scanned.length,
211
+ urlScanned: request.url,
212
+ });
213
+ urlsCrawled.scanned.push({
214
+ url: request.url,
215
+ pageTitle: results.pageTitle,
216
+ actualUrl, // i.e. actualUrl
217
+ });
218
+ urlsCrawled.scannedRedirects.push({
219
+ fromUrl: request.url,
220
+ toUrl: actualUrl,
221
+ });
222
+ results.url = request.url;
223
+ results.actualUrl = actualUrl;
224
+ await dataset.pushData(results);
225
+ }
226
+ else {
199
227
  guiInfoLog(guiInfoStatusTypes.SKIPPED, {
200
228
  numScanned: urlsCrawled.scanned.length,
201
229
  urlScanned: request.url,
202
230
  });
203
- return;
231
+ if (isScanHtml) {
232
+ // carry through the HTTP status metadata
233
+ const status = response?.status();
234
+ const metadata = typeof status === 'number'
235
+ ? STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599]
236
+ : STATUS_CODE_METADATA[2];
237
+ urlsCrawled.invalid.push({
238
+ actualUrl,
239
+ url: request.url,
240
+ pageTitle: request.url,
241
+ metadata,
242
+ httpStatusCode: typeof status === 'number' ? status : 0,
243
+ });
244
+ }
204
245
  }
205
- const results = await runAxeScript({ includeScreenshots, page, randomToken });
206
- guiInfoLog(guiInfoStatusTypes.SCANNED, {
207
- numScanned: urlsCrawled.scanned.length,
208
- urlScanned: request.url,
209
- });
210
- urlsCrawled.scanned.push({
211
- url: request.url,
212
- pageTitle: results.pageTitle,
213
- actualUrl, // i.e. actualUrl
214
- });
215
- urlsCrawled.scannedRedirects.push({
216
- fromUrl: request.url,
217
- toUrl: actualUrl,
218
- });
219
- results.url = request.url;
220
- results.actualUrl = actualUrl;
221
- await dataset.pushData(results);
222
246
  }
223
- else {
224
- guiInfoLog(guiInfoStatusTypes.SKIPPED, {
225
- numScanned: urlsCrawled.scanned.length,
226
- urlScanned: request.url,
227
- });
228
- if (isScanHtml) {
229
- // carry through the HTTP status metadata
230
- const status = response?.status();
231
- const metadata = typeof status === 'number'
232
- ? STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599]
233
- : STATUS_CODE_METADATA[2];
234
- urlsCrawled.invalid.push({
235
- actualUrl,
247
+ catch (e) {
248
+ if (!isAbortingScan) {
249
+ guiInfoLog(guiInfoStatusTypes.ERROR, {
250
+ numScanned: urlsCrawled.scanned.length,
251
+ urlScanned: request.url,
252
+ });
253
+ urlsCrawled.error.push({
236
254
  url: request.url,
237
255
  pageTitle: request.url,
238
- metadata,
239
- httpStatusCode: typeof status === 'number' ? status : 0,
256
+ actualUrl: request.url,
257
+ metadata: STATUS_CODE_METADATA[2],
258
+ httpStatusCode: 0,
240
259
  });
241
260
  }
242
261
  }
243
262
  },
244
263
  failedRequestHandler: async ({ request, response, error }) => {
245
- // check if scanned pages have reached limit due to multi-instances of handler running
246
- if (urlsCrawled.scanned.length >= maxRequestsPerCrawl) {
264
+ if (isAbortingScan) {
247
265
  return;
248
266
  }
249
267
  guiInfoLog(guiInfoStatusTypes.ERROR, {