@govtechsg/oobee 0.10.42 → 0.10.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import crawlee, { Request, RequestList } from 'crawlee';
1
+ import crawlee, { LaunchContext, Request, RequestList } from 'crawlee';
2
2
  import printMessage from 'print-message';
3
3
  import fs from 'fs';
4
4
  import {
@@ -8,7 +8,7 @@ import {
8
8
  isUrlPdf,
9
9
  } from './commonCrawlerFunc.js';
10
10
 
11
- import constants, { guiInfoStatusTypes } from '../constants/constants.js';
11
+ import constants, { STATUS_CODE_METADATA, guiInfoStatusTypes, UrlsCrawled } from '../constants/constants.js';
12
12
  import {
13
13
  getLinksFromSitemap,
14
14
  getPlaywrightLaunchOptions,
@@ -22,31 +22,32 @@ import {
22
22
  import { areLinksEqual, isWhitelistedContentType, isFollowStrategy } from '../utils.js';
23
23
  import { handlePdfDownload, runPdfScan, mapPdfScanResults } from './pdfScanFunc.js';
24
24
  import { guiInfoLog } from '../logs.js';
25
+ import { ViewportSettingsClass } from '../combine.js';
25
26
 
26
27
  const crawlSitemap = async (
27
- sitemapUrl,
28
- randomToken,
29
- host,
30
- viewportSettings,
31
- maxRequestsPerCrawl,
32
- browser,
33
- userDataDirectory,
34
- specifiedMaxConcurrency,
35
- fileTypes,
36
- blacklistedPatterns,
37
- includeScreenshots,
38
- extraHTTPHeaders,
28
+ sitemapUrl: string,
29
+ randomToken: string,
30
+ _host: string,
31
+ viewportSettings: ViewportSettingsClass,
32
+ maxRequestsPerCrawl: number,
33
+ browser: string,
34
+ userDataDirectory: string,
35
+ specifiedMaxConcurrency: number,
36
+ fileTypes: string,
37
+ blacklistedPatterns: string[],
38
+ includeScreenshots: boolean,
39
+ extraHTTPHeaders: Record<string, string>,
39
40
  fromCrawlIntelligentSitemap = false, // optional
40
- userUrlInputFromIntelligent = null, // optional
41
- datasetFromIntelligent = null, // optional
42
- urlsCrawledFromIntelligent = null, // optional
41
+ userUrlInputFromIntelligent: string = null, // optional
42
+ datasetFromIntelligent: crawlee.Dataset = null, // optional
43
+ urlsCrawledFromIntelligent: UrlsCrawled = null, // optional
43
44
  crawledFromLocalFile = false, // optional
44
45
  ) => {
45
- let dataset;
46
- let urlsCrawled;
46
+ let dataset: crawlee.Dataset;
47
+ let urlsCrawled: UrlsCrawled;
47
48
 
48
49
  // Boolean to omit axe scan for basic auth URL
49
- let isBasicAuth;
50
+ let isBasicAuth: boolean;
50
51
  let basicAuthPage = 0;
51
52
  let finalLinks = [];
52
53
  let authHeader = '';
@@ -119,8 +120,8 @@ const crawlSitemap = async (
119
120
  basicAuthPage = -2;
120
121
  }
121
122
 
122
- const pdfDownloads = [];
123
- const uuidToPdfMapping = {};
123
+ const pdfDownloads: Promise<void>[] = [];
124
+ const uuidToPdfMapping: Record<string, string> = {};
124
125
  const isScanHtml = ['all', 'html-only'].includes(fileTypes);
125
126
  const isScanPdfs = ['all', 'pdf-only'].includes(fileTypes);
126
127
  const { playwrightDeviceDetailsObject } = viewportSettings;
@@ -152,7 +153,7 @@ const crawlSitemap = async (
152
153
  browserPoolOptions: {
153
154
  useFingerprints: false,
154
155
  preLaunchHooks: [
155
- async (pageId, launchContext) => {
156
+ async (_pageId: string, launchContext: LaunchContext) => {
156
157
  launchContext.launchOptions = {
157
158
  ...launchContext.launchOptions,
158
159
  bypassCSP: true,
@@ -164,39 +165,43 @@ const crawlSitemap = async (
164
165
  },
165
166
  requestList,
166
167
  postNavigationHooks: [
167
- async ({ page, request }) => {
168
+
169
+ async ({ page }) => {
168
170
  try {
169
171
  // Wait for a quiet period in the DOM, but with safeguards
170
172
  await page.evaluate(() => {
171
- return new Promise((resolve) => {
173
+ return new Promise(resolve => {
172
174
  let timeout;
173
175
  let mutationCount = 0;
174
- const MAX_MUTATIONS = 250; // Prevent infinite mutations
175
- const OBSERVER_TIMEOUT = 5000; // Hard timeout to exit
176
-
176
+ const MAX_MUTATIONS = 250; // stop if things never quiet down
177
+ const OBSERVER_TIMEOUT = 5000; // hard cap on total wait
178
+
177
179
  const observer = new MutationObserver(() => {
178
180
  clearTimeout(timeout);
179
-
181
+
180
182
  mutationCount++;
181
183
  if (mutationCount > MAX_MUTATIONS) {
182
184
  observer.disconnect();
183
- resolve('Too many mutations detected, exiting.');
185
+ resolve('Too many mutations, exiting.');
184
186
  return;
185
187
  }
186
-
188
+
189
+ // restart quiet‑period timer
187
190
  timeout = setTimeout(() => {
188
191
  observer.disconnect();
189
- resolve('DOM stabilized after mutations.');
192
+ resolve('DOM stabilized.');
190
193
  }, 1000);
191
194
  });
192
-
195
+
196
+ // overall timeout in case the page never settles
193
197
  timeout = setTimeout(() => {
194
198
  observer.disconnect();
195
- resolve('Observer timeout reached, exiting.');
196
- }, OBSERVER_TIMEOUT); // Ensure the observer stops after X seconds
197
-
198
- observer.observe(document.documentElement, { childList: true, subtree: true });
199
-
199
+ resolve('Observer timeout reached.');
200
+ }, OBSERVER_TIMEOUT);
201
+
202
+ // **HERE**: select the real DOM node inside evaluate
203
+ const root = document.documentElement;
204
+ observer.observe(root, { childList: true, subtree: true });
200
205
  });
201
206
  });
202
207
  } catch (err) {
@@ -207,6 +212,7 @@ const crawlSitemap = async (
207
212
  throw err; // Rethrow unknown errors
208
213
  }
209
214
  },
215
+
210
216
  ],
211
217
 
212
218
  preNavigationHooks: isBasicAuth
@@ -252,10 +258,12 @@ const crawlSitemap = async (
252
258
  numScanned: urlsCrawled.scanned.length,
253
259
  urlScanned: request.url,
254
260
  });
255
- urlsCrawled.blacklisted.push({
261
+ urlsCrawled.userExcluded.push({
256
262
  url: request.url,
257
263
  pageTitle: request.url,
258
- actualUrl: actualUrl, // i.e. actualUrl
264
+ actualUrl: request.url, // because about:blank is not useful
265
+ metadata: STATUS_CODE_METADATA[1],
266
+ httpStatusCode: 0,
259
267
  });
260
268
 
261
269
  return;
@@ -276,85 +284,64 @@ const crawlSitemap = async (
276
284
  const contentType = response?.headers?.()['content-type'] || '';
277
285
  const status = response ? response.status() : 0;
278
286
 
279
- if (blacklistedPatterns && !isFollowStrategy(actualUrl, request.url, "same-hostname") && isSkippedUrl(actualUrl, blacklistedPatterns)) {
280
- urlsCrawled.userExcluded.push({
281
- url: request.url,
282
- pageTitle: request.url,
283
- actualUrl: actualUrl,
284
- });
285
-
286
- guiInfoLog(guiInfoStatusTypes.SKIPPED, {
287
- numScanned: urlsCrawled.scanned.length,
288
- urlScanned: request.url,
289
- });
290
- return;
291
- }
287
+ if (basicAuthPage < 0) {
288
+ basicAuthPage += 1;
289
+ } else if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
290
+ const isRedirected = !areLinksEqual(page.url(), request.url);
291
+ const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(
292
+ item => (item.actualUrl || item.url) === page.url(),
293
+ );
292
294
 
293
- if (status === 403) {
294
- guiInfoLog(guiInfoStatusTypes.SKIPPED, {
295
- numScanned: urlsCrawled.scanned.length,
296
- urlScanned: request.url,
297
- });
298
- urlsCrawled.forbidden.push({ url: request.url });
299
- return;
300
- }
295
+ if (isRedirected && isLoadedUrlInCrawledUrls) {
296
+ urlsCrawled.notScannedRedirects.push({
297
+ fromUrl: request.url,
298
+ toUrl: actualUrl, // i.e. actualUrl
299
+ });
300
+ return;
301
+ }
301
302
 
302
- if (status !== 200) {
303
- guiInfoLog(guiInfoStatusTypes.SKIPPED, {
304
- numScanned: urlsCrawled.scanned.length,
305
- urlScanned: request.url,
306
- });
307
- urlsCrawled.invalid.push({
308
- url: request.url,
309
- pageTitle: request.url,
310
- actualUrl: actualUrl, // i.e. actualUrl
311
- });
303
+ // This logic is different from crawlDomain, as it also checks if the pae is redirected before checking if it is excluded using exclusions.txt
304
+ if (
305
+ isRedirected &&
306
+ blacklistedPatterns &&
307
+ isSkippedUrl(actualUrl, blacklistedPatterns)
308
+ ) {
309
+ urlsCrawled.userExcluded.push({
310
+ url: request.url,
311
+ pageTitle: request.url,
312
+ actualUrl: actualUrl,
313
+ metadata: STATUS_CODE_METADATA[0],
314
+ httpStatusCode: 0,
315
+ });
312
316
 
313
- return;
314
- }
317
+ guiInfoLog(guiInfoStatusTypes.SKIPPED, {
318
+ numScanned: urlsCrawled.scanned.length,
319
+ urlScanned: request.url,
320
+ });
321
+ return;
322
+ }
315
323
 
316
- if (basicAuthPage < 0) {
317
- basicAuthPage += 1;
318
- } else if (isScanHtml && status === 200 && isWhitelistedContentType(contentType)) {
319
324
  const results = await runAxeScript({ includeScreenshots, page, randomToken });
325
+
320
326
  guiInfoLog(guiInfoStatusTypes.SCANNED, {
321
327
  numScanned: urlsCrawled.scanned.length,
322
328
  urlScanned: request.url,
323
329
  });
324
330
 
325
- const isRedirected = !areLinksEqual(page.url(), request.url);
326
- if (isRedirected) {
327
- const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(
328
- item => (item.actualUrl || item.url.href) === page,
329
- );
330
-
331
- if (isLoadedUrlInCrawledUrls) {
332
- urlsCrawled.notScannedRedirects.push({
333
- fromUrl: request.url,
334
- toUrl: actualUrl, // i.e. actualUrl
335
- });
336
- return;
337
- }
331
+ urlsCrawled.scanned.push({
332
+ url: urlWithoutAuth(request.url),
333
+ pageTitle: results.pageTitle,
334
+ actualUrl: actualUrl, // i.e. actualUrl
335
+ });
338
336
 
339
- urlsCrawled.scanned.push({
340
- url: urlWithoutAuth(request.url),
341
- pageTitle: results.pageTitle,
342
- actualUrl: actualUrl, // i.e. actualUrl
343
- });
337
+ urlsCrawled.scannedRedirects.push({
338
+ fromUrl: urlWithoutAuth(request.url),
339
+ toUrl: actualUrl,
340
+ });
344
341
 
345
- urlsCrawled.scannedRedirects.push({
346
- fromUrl: urlWithoutAuth(request.url),
347
- toUrl: actualUrl,
348
- });
342
+ results.url = request.url;
343
+ results.actualUrl = actualUrl;
349
344
 
350
- results.url = request.url;
351
- results.actualUrl = actualUrl;
352
- } else {
353
- urlsCrawled.scanned.push({
354
- url: urlWithoutAuth(request.url),
355
- pageTitle: results.pageTitle,
356
- });
357
- }
358
345
  await dataset.pushData(results);
359
346
  } else {
360
347
  guiInfoLog(guiInfoStatusTypes.SKIPPED, {
@@ -363,11 +350,23 @@ const crawlSitemap = async (
363
350
  });
364
351
 
365
352
  if (isScanHtml) {
366
- urlsCrawled.invalid.push(actualUrl);
353
+ // carry through the HTTP status metadata
354
+ const status = response?.status();
355
+ const metadata = typeof status === 'number'
356
+ ? (STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599])
357
+ : STATUS_CODE_METADATA[2];
358
+
359
+ urlsCrawled.invalid.push({
360
+ actualUrl,
361
+ url: request.url,
362
+ pageTitle: request.url,
363
+ metadata,
364
+ httpStatusCode: typeof status === 'number' ? status : 0
365
+ });
367
366
  }
368
367
  }
369
368
  },
370
- failedRequestHandler: async ({ request }) => {
369
+ failedRequestHandler: async ({ request, response, error }) => {
371
370
  if (isBasicAuth && request.url) {
372
371
  request.url = `${request.url.split('://')[0]}://${request.url.split('@')[1]}`;
373
372
  }
@@ -381,7 +380,19 @@ const crawlSitemap = async (
381
380
  numScanned: urlsCrawled.scanned.length,
382
381
  urlScanned: request.url,
383
382
  });
384
- urlsCrawled.error.push(request.url);
383
+
384
+ const status = response?.status();
385
+ const metadata = typeof status === 'number'
386
+ ? (STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599])
387
+ : STATUS_CODE_METADATA[2];
388
+
389
+ urlsCrawled.error.push({
390
+ url: request.url,
391
+ pageTitle: request.url,
392
+ actualUrl: request.url,
393
+ metadata,
394
+ httpStatusCode: typeof status === 'number' ? status : 0
395
+ });
385
396
  crawlee.log.error(`Failed Request - ${request.url}: ${request.errorMessages}`);
386
397
  },
387
398
  maxRequestsPerCrawl: Infinity,
@@ -16,7 +16,7 @@ export function findElementByCssSelector(cssSelector: string): string | null {
16
16
 
17
17
  // Handle Shadow DOM if the element is not found
18
18
  if (!element) {
19
- const shadowRoots = [];
19
+ const shadowRoots: ShadowRoot[] = [];
20
20
  const allElements = document.querySelectorAll('*');
21
21
 
22
22
  // Look for elements with shadow roots
@@ -27,9 +27,9 @@ export async function flagUnlabelledClickableElements() {
27
27
  const loggingEnabled = false; // Set to true to enable console warnings
28
28
 
29
29
  let previousFlaggedXPathsByDocument = {}; // Object to hold previous flagged XPaths
30
- const previousAllFlaggedElementsXPaths = []; // Array to store all flagged XPaths
30
+ const previousAllFlaggedElementsXPaths : {xpath: string, code: string }[] = []; // Array to store all flagged XPaths
31
31
 
32
- function getXPath(element: Node) {
32
+ function getXPath(element: Node): string {
33
33
  if (!element) return null;
34
34
  if (element instanceof HTMLElement && element.id) {
35
35
  return `//*[@id="${element.id}"]`;
@@ -297,7 +297,7 @@ function hasPointerCursor(node: Node): boolean {
297
297
  return hasAccessibleChildElement || hasDirectAccessibleText;
298
298
  }
299
299
 
300
- function hasAllChildrenAccessible(element: Element) {
300
+ function hasAllChildrenAccessible(element: Element): boolean {
301
301
  // If the element is aria-hidden, consider it accessible
302
302
  if (element.getAttribute('aria-hidden') === 'true') {
303
303
  return true;
@@ -331,7 +331,7 @@ function hasPointerCursor(node: Node): boolean {
331
331
  function hasChildNotANewInteractWithAccessibleText(element: Element) {
332
332
 
333
333
  // Helper function to check if the element is a link or button
334
- const isBuildInInteractable = (child) => {
334
+ const isBuildInInteractable = (child: Element) => {
335
335
  return child.nodeName.toLowerCase() === "a" || child.nodeName.toLowerCase() === "button" || child.nodeName.toLowerCase() === "input" ||
336
336
  child.getAttribute('role') === 'link' || child.getAttribute('role') === 'button';
337
337
  };
@@ -376,7 +376,7 @@ function hasPointerCursor(node: Node): boolean {
376
376
  }
377
377
 
378
378
  // Recursively check for text content inside child nodes of elements that are not links or buttons
379
- if (node.nodeType === Node.ELEMENT_NODE && !isBuildInInteractable(node)) {
379
+ if (node.nodeType === Node.ELEMENT_NODE && !isBuildInInteractable(node as Element)) {
380
380
  return Array.from(node.childNodes).some(innerNode => {
381
381
  if (innerNode.nodeType === Node.TEXT_NODE) {
382
382
  const innerTextContent = getTextContent(innerNode).trim();
@@ -440,7 +440,7 @@ function hasPointerCursor(node: Node): boolean {
440
440
  const beforeContent = window.getComputedStyle(element, '::before').getPropertyValue('content');
441
441
  const afterContent = window.getComputedStyle(element, '::after').getPropertyValue('content');
442
442
 
443
- function isAccessibleContent(value) {
443
+ function isAccessibleContent(value: string) {
444
444
  if (!value || value === 'none' || value === 'normal') {
445
445
  return false;
446
446
  }
@@ -1126,11 +1126,11 @@ function hasPointerCursor(node: Node): boolean {
1126
1126
  });
1127
1127
 
1128
1128
  // Collect XPaths and outerHTMLs of flagged elements per document
1129
- const flaggedXPathsByDocument = {};
1129
+ const flaggedXPathsByDocument: { [key: string]: { xpath: string; code: string }[] } = {};
1130
1130
 
1131
1131
  for (const docKey in currentFlaggedElementsByDocument) {
1132
1132
  const elements = currentFlaggedElementsByDocument[docKey];
1133
- const flaggedInfo = []; // Array to hold flagged element info
1133
+ const flaggedInfo: { xpath: string; code: string }[] = []; // Array to hold flagged element info
1134
1134
  elements.forEach(flaggedElement => {
1135
1135
  const parentFlagged = flaggedElement.closest('[data-flagged="true"]');
1136
1136
  if (!parentFlagged || parentFlagged === flaggedElement) {
@@ -1,12 +1,12 @@
1
- export function xPathToCss(expr: string) {
2
- const isValidXPath = expr =>
1
+ export default function xPathToCss(expr: string) {
2
+ const isValidXPath = (expr: string) =>
3
3
  typeof expr !== 'undefined' &&
4
4
  expr.replace(/[\s-_=]/g, '') !== '' &&
5
5
  expr.length ===
6
- expr.replace(
7
- /[-_\w:.]+\(\)\s*=|=\s*[-_\w:.]+\(\)|\sor\s|\sand\s|\[(?:[^\/\]]+[\/\[]\/?.+)+\]|starts-with\(|\[.*last\(\)\s*[-\+<>=].+\]|number\(\)|not\(|count\(|text\(|first\(|normalize-space|[^\/]following-sibling|concat\(|descendant::|parent::|self::|child::|/gi,
8
- '',
9
- ).length;
6
+ expr.replace(
7
+ /[-_\w:.]+\(\)\s*=|=\s*[-_\w:.]+\(\)|\sor\s|\sand\s|\[(?:[^\/\]]+[\/\[]\/?.+)+\]|starts-with\(|\[.*last\(\)\s*[-\+<>=].+\]|number\(\)|not\(|count\(|text\(|first\(|normalize-space|[^\/]following-sibling|concat\(|descendant::|parent::|self::|child::|/gi,
8
+ '',
9
+ ).length;
10
10
 
11
11
  const getValidationRegex = () => {
12
12
  let regex =
@@ -30,7 +30,7 @@ export function xPathToCss(expr: string) {
30
30
  value: '\\s*[\\w/:][-/\\w\\s,:;.]*',
31
31
  };
32
32
 
33
- Object.keys(subRegexes).forEach(key => {
33
+ Object.keys(subRegexes).forEach((key: keyof typeof subRegexes) => {
34
34
  regex = regex.replace(new RegExp(`%\\(${key}\\)s`, 'gi'), subRegexes[key]);
35
35
  });
36
36
 
@@ -42,14 +42,14 @@ export function xPathToCss(expr: string) {
42
42
  return new RegExp(regex, 'gi');
43
43
  };
44
44
 
45
- const preParseXpath = expr =>
45
+ const preParseXpath = (expr: string) =>
46
46
  expr.replace(
47
47
  /contains\s*\(\s*concat\(["']\s+["']\s*,\s*@class\s*,\s*["']\s+["']\)\s*,\s*["']\s+([a-zA-Z0-9-_]+)\s+["']\)/gi,
48
48
  '@class="$1"',
49
49
  );
50
50
 
51
- function escapeCssIdSelectors(cssSelector) {
52
- return cssSelector.replace(/#([^ >]+)/g, (match, id) => {
51
+ function escapeCssIdSelectors(cssSelector: string) {
52
+ return cssSelector.replace(/#([^ >]+)/g, (_match, id) => {
53
53
  // Escape special characters in the id part
54
54
  return `#${id.replace(/[!"#$%&'()*+,./:;<=>?@[\\\]^`{|}~]/g, '\\$&')}`;
55
55
  });
@@ -48,7 +48,7 @@ const runCustom = async (
48
48
  includeScreenshots: boolean,
49
49
  ) => {
50
50
  // checks and delete datasets path if it already exists
51
- await cleanUp(randomToken);
51
+ cleanUp(randomToken);
52
52
  process.env.CRAWLEE_STORAGE_DIR = randomToken;
53
53
 
54
54
  const urlsCrawled: UrlsCrawled = { ...constants.urlsCrawledObj };
package/src/index.ts CHANGED
@@ -1,6 +1,4 @@
1
1
  #!/usr/bin/env node
2
- /* eslint-disable func-names */
3
- /* eslint-disable no-param-reassign */
4
2
  import printMessage from 'print-message';
5
3
  import inquirer from 'inquirer';
6
4
  import { EnqueueStrategy } from 'crawlee';
@@ -22,6 +20,7 @@ import {
22
20
  import questions from './constants/questions.js';
23
21
  import combineRun from './combine.js';
24
22
  import { BrowserTypes, RuleFlags, ScannerTypes } from './constants/constants.js';
23
+ import { DeviceDescriptor } from './types/types.js';
25
24
 
26
25
  export type Answers = {
27
26
  headless: boolean;
@@ -32,7 +31,7 @@ export type Answers = {
32
31
  scanner: ScannerTypes;
33
32
  url: string;
34
33
  clonedBrowserDataDir: string;
35
- playwrightDeviceDetailsObject: object;
34
+ playwrightDeviceDetailsObject: DeviceDescriptor;
36
35
  nameEmail: string;
37
36
  fileTypes: string;
38
37
  metadata: string;
@@ -61,7 +60,7 @@ export type Data = {
61
60
  deviceChosen: string;
62
61
  customDevice: string;
63
62
  viewportWidth: number;
64
- playwrightDeviceDetailsObject: object;
63
+ playwrightDeviceDetailsObject: DeviceDescriptor;
65
64
  maxRequestsPerCrawl: number;
66
65
  strategy: EnqueueStrategy;
67
66
  isLocalFileScan: boolean;
package/src/logs.ts CHANGED
@@ -40,7 +40,7 @@ const silentLogger = createLogger({
40
40
  });
41
41
 
42
42
  // guiInfoLogger feeds the gui information via console log and is mainly used for scanning process
43
- export const guiInfoLog = (status, data) => {
43
+ export const guiInfoLog = (status: string, data: { numScanned?: number; urlScanned?: string }) => {
44
44
  if (process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE) {
45
45
  switch (status) {
46
46
  case guiInfoStatusTypes.COMPLETED:
@@ -45,6 +45,7 @@ export type PageInfo = {
45
45
  pageImagePath?: string;
46
46
  pageIndex?: number;
47
47
  metadata?: string;
48
+ httpStatusCode?: number;
48
49
  };
49
50
 
50
51
  export type RuleInfo = {
@@ -248,7 +249,7 @@ const writeCsv = async (allIssues, storagePath) => {
248
249
  scanCompletedAt: allIssues.endTime ? allIssues.endTime.toISOString() : '',
249
250
  severity: 'error',
250
251
  issueId: 'error-pages-skipped',
251
- issueDescription: 'Page was skipped during the scan',
252
+ issueDescription: page.metadata ? page.metadata : 'An unknown error caused the page to be skipped',
252
253
  wcagConformance: '',
253
254
  url: page.url || page || '',
254
255
  pageTitle: 'Error',
@@ -791,25 +792,21 @@ const writeJsonAndBase64Files = async (
791
792
  items.mustFix.rules.forEach(rule => {
792
793
  rule.pagesAffected.forEach(page => {
793
794
  page.itemsCount = page.items.length;
794
- page.items = [];
795
795
  });
796
796
  });
797
797
  items.goodToFix.rules.forEach(rule => {
798
798
  rule.pagesAffected.forEach(page => {
799
799
  page.itemsCount = page.items.length;
800
- page.items = [];
801
800
  });
802
801
  });
803
802
  items.needsReview.rules.forEach(rule => {
804
803
  rule.pagesAffected.forEach(page => {
805
804
  page.itemsCount = page.items.length;
806
- page.items = [];
807
805
  });
808
806
  });
809
807
  items.passed.rules.forEach(rule => {
810
808
  rule.pagesAffected.forEach(page => {
811
809
  page.itemsCount = page.items.length;
812
- page.items = [];
813
810
  });
814
811
  });
815
812
 
@@ -1205,6 +1202,7 @@ const createRuleIdJson = allIssues => {
1205
1202
  });
1206
1203
  });
1207
1204
  snippets = [...snippetsSet];
1205
+ rule.pagesAffected.forEach(p => { delete p.items; });
1208
1206
  }
1209
1207
  compiledRuleJson[ruleId] = {
1210
1208
  snippets,
package/src/npmIndex.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import fs from 'fs';
2
2
  import path from 'path';
3
3
  import printMessage from 'print-message';
4
- import axe, { ImpactValue } from 'axe-core';
4
+ import axe, { AxeResults, ImpactValue } from 'axe-core';
5
5
  import { fileURLToPath } from 'url';
6
6
  import { EnqueueStrategy } from 'crawlee';
7
7
  import constants, { BrowserTypes, RuleFlags, ScannerTypes } from './constants/constants.js';
@@ -16,7 +16,7 @@ import { createCrawleeSubFolders, filterAxeResults } from './crawlers/commonCraw
16
16
  import { createAndUpdateResultsFolders, createDetailsAndLogs } from './utils.js';
17
17
  import generateArtifacts from './mergeAxeResults.js';
18
18
  import { takeScreenshotForHTMLElements } from './screenshotFunc/htmlScreenshotFunc.js';
19
- import { silentLogger } from './logs.js';
19
+ import { consoleLogger, silentLogger } from './logs.js';
20
20
  import { alertMessageOptions } from './constants/cliFunctions.js';
21
21
  import { evaluateAltText } from './crawlers/custom/evaluateAltText.js';
22
22
  import { escapeCssSelector } from './crawlers/custom/escapeCssSelector.js';
@@ -24,7 +24,7 @@ import { framesCheck } from './crawlers/custom/framesCheck.js';
24
24
  import { findElementByCssSelector } from './crawlers/custom/findElementByCssSelector.js';
25
25
  import { getAxeConfiguration } from './crawlers/custom/getAxeConfiguration.js';
26
26
  import { flagUnlabelledClickableElements } from './crawlers/custom/flagUnlabelledClickableElements.js';
27
- import { xPathToCss } from './crawlers/custom/xPathToCss.js';
27
+ import xPathToCss from './crawlers/custom/xPathToCss.js';
28
28
  import { extractText } from './crawlers/custom/extractText.js';
29
29
  import { gradeReadability } from './crawlers/custom/gradeReadability.js';
30
30
 
@@ -65,7 +65,7 @@ export const init = async ({
65
65
  specifiedMaxConcurrency?: number;
66
66
  followRobots?: boolean;
67
67
  }) => {
68
- console.log('Starting Oobee');
68
+ consoleLogger.info('Starting Oobee');
69
69
 
70
70
  const [date, time] = new Date().toLocaleString('sv').replaceAll(/-|:/g, '').split(' ');
71
71
  const domain = new URL(entryUrl).hostname;
@@ -126,7 +126,7 @@ export const init = async ({
126
126
  const cssSelector = xPathToCss(xpath);
127
127
  return cssSelector;
128
128
  } catch (e) {
129
- console.error('Error converting XPath to CSS: ', xpath, e);
129
+ consoleLogger.error(`Error converting XPath to CSS: ${xpath} - ${e}`);
130
130
  return '';
131
131
  }
132
132
  })
@@ -197,7 +197,11 @@ export const init = async ({
197
197
  `;
198
198
  };
199
199
 
200
- const pushScanResults = async (res, metadata, elementsToClick) => {
200
+ const pushScanResults = async (
201
+ res: { pageUrl: string; pageTitle: string; axeScanResults: AxeResults },
202
+ metadata: string,
203
+ elementsToClick: string[],
204
+ ) => {
201
205
  throwErrorIfTerminated();
202
206
  if (includeScreenshots) {
203
207
  // use chrome by default
@@ -211,7 +215,7 @@ export const init = async ({
211
215
  await page.waitForLoadState('networkidle');
212
216
 
213
217
  // click on elements to reveal hidden elements so screenshots can be taken
214
- elementsToClick?.forEach(async elem => {
218
+ elementsToClick?.forEach(async (elem: string) => {
215
219
  try {
216
220
  await page.locator(elem).click();
217
221
  } catch (e) {
@@ -259,7 +263,7 @@ export const init = async ({
259
263
 
260
264
  const terminate = async () => {
261
265
  throwErrorIfTerminated();
262
- console.log('Stopping Oobee');
266
+ consoleLogger.info('Stopping Oobee');
263
267
  isInstanceTerminated = true;
264
268
  scanDetails.endTime = new Date();
265
269
  scanDetails.urlsCrawled = urlsCrawled;