@govtechsg/oobee 0.10.58 → 0.10.62

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,15 +17,15 @@ import {
17
17
  getLinksFromSitemap,
18
18
  getPlaywrightLaunchOptions,
19
19
  isSkippedUrl,
20
- urlWithoutAuth,
21
20
  waitForPageLoaded,
22
21
  isFilePath,
23
- initModifiedUserAgent,
24
22
  } from '../constants/common.js';
25
- import { areLinksEqual, isWhitelistedContentType, isFollowStrategy } from '../utils.js';
23
+ import { areLinksEqual, isWhitelistedContentType, register } from '../utils.js';
26
24
  import { handlePdfDownload, runPdfScan, mapPdfScanResults } from './pdfScanFunc.js';
27
25
  import { guiInfoLog } from '../logs.js';
28
26
  import { ViewportSettingsClass } from '../combine.js';
27
+ import * as path from 'path';
28
+ import fsp from 'fs/promises';
29
29
 
30
30
  const crawlSitemap = async ({
31
31
  sitemapUrl,
@@ -70,50 +70,19 @@ const crawlSitemap = async ({
70
70
  let dataset: crawlee.Dataset;
71
71
  let urlsCrawled: UrlsCrawled;
72
72
 
73
- // Boolean to omit axe scan for basic auth URL
74
- let isBasicAuth: boolean;
75
- let basicAuthPage = 0;
76
- let finalLinks = [];
77
- let authHeader = '';
78
-
79
73
  if (fromCrawlIntelligentSitemap) {
80
74
  dataset = datasetFromIntelligent;
81
75
  urlsCrawled = urlsCrawledFromIntelligent;
82
76
  } else {
83
77
  ({ dataset } = await createCrawleeSubFolders(randomToken));
84
78
  urlsCrawled = { ...constants.urlsCrawledObj };
85
-
86
- if (!fs.existsSync(randomToken)) {
87
- fs.mkdirSync(randomToken);
88
- }
89
79
  }
90
80
 
91
- let parsedUrl;
92
- let username = '';
93
- let password = '';
94
-
95
81
  if (!crawledFromLocalFile && isFilePath(sitemapUrl)) {
96
82
  console.log('Local file crawling not supported for sitemap. Please provide a valid URL.');
97
83
  return;
98
84
  }
99
85
 
100
- if (isFilePath(sitemapUrl)) {
101
- parsedUrl = sitemapUrl;
102
- } else {
103
- parsedUrl = new URL(sitemapUrl);
104
- if (parsedUrl.username !== '' && parsedUrl.password !== '') {
105
- isBasicAuth = true;
106
- username = decodeURIComponent(parsedUrl.username);
107
- password = decodeURIComponent(parsedUrl.password);
108
-
109
- // Create auth header
110
- authHeader = `Basic ${Buffer.from(`${username}:${password}`).toString('base64')}`;
111
-
112
- parsedUrl.username = '';
113
- parsedUrl.password = '';
114
- }
115
- }
116
-
117
86
  const linksFromSitemap = await getLinksFromSitemap(
118
87
  sitemapUrl,
119
88
  maxRequestsPerCrawl,
@@ -121,29 +90,11 @@ const crawlSitemap = async ({
121
90
  userDataDirectory,
122
91
  userUrlInputFromIntelligent,
123
92
  fromCrawlIntelligentSitemap,
124
- username,
125
- password,
93
+ extraHTTPHeaders,
126
94
  );
127
- /**
128
- * Regex to match http://username:password@hostname.com
129
- * utilised in scan strategy to ensure subsequent URLs within the same domain are scanned.
130
- * First time scan with original `url` containing credentials is strictly to authenticate for browser session
131
- * subsequent URLs are without credentials.
132
- * basicAuthPage is set to -1 for basic auth URL to ensure it is not counted towards maxRequestsPerCrawl
133
- */
134
95
 
135
96
  sitemapUrl = encodeURI(sitemapUrl);
136
97
 
137
- if (isBasicAuth) {
138
- // request to basic auth URL to authenticate for browser session
139
- finalLinks.push(new Request({ url: sitemapUrl, uniqueKey: `auth:${sitemapUrl}` }));
140
- const finalUrl = `${sitemapUrl.split('://')[0]}://${sitemapUrl.split('@')[1]}`;
141
-
142
- // obtain base URL without credentials so that subsequent URLs within the same domain can be scanned
143
- finalLinks.push(new Request({ url: finalUrl }));
144
- basicAuthPage = -2;
145
- }
146
-
147
98
  const pdfDownloads: Promise<void>[] = [];
148
99
  const uuidToPdfMapping: Record<string, string> = {};
149
100
  const isScanHtml = ['all', 'html-only'].includes(fileTypes);
@@ -151,36 +102,43 @@ const crawlSitemap = async ({
151
102
  const { playwrightDeviceDetailsObject } = viewportSettings;
152
103
  const { maxConcurrency } = constants;
153
104
 
154
- finalLinks = [...finalLinks, ...linksFromSitemap];
155
-
156
105
  const requestList = await RequestList.open({
157
- sources: finalLinks,
106
+ sources: linksFromSitemap,
158
107
  });
159
108
 
160
- let userDataDir = '';
161
- if (userDataDirectory) {
162
- userDataDir = process.env.CRAWLEE_HEADLESS !== '0' ? userDataDirectory : '';
163
- }
164
-
165
- await initModifiedUserAgent(browser, playwrightDeviceDetailsObject);
166
- const crawler = new crawlee.PlaywrightCrawler({
109
+ const crawler = register(new crawlee.PlaywrightCrawler({
167
110
  launchContext: {
168
111
  launcher: constants.launcher,
169
112
  launchOptions: getPlaywrightLaunchOptions(browser),
170
113
  // Bug in Chrome which causes browser pool crash when userDataDirectory is set in non-headless mode
171
- ...(process.env.CRAWLEE_HEADLESS === '0' && { userDataDir }),
114
+ ...(process.env.CRAWLEE_HEADLESS === '1' && { userDataDir: userDataDirectory }),
172
115
  },
173
116
  retryOnBlocked: true,
174
117
  browserPoolOptions: {
175
118
  useFingerprints: false,
176
119
  preLaunchHooks: [
177
- async (_pageId: string, launchContext: LaunchContext) => {
120
+ async (_pageId, launchContext) => {
121
+ const baseDir = userDataDirectory; // e.g., /Users/young/.../Chrome/oobee-...
122
+
123
+ // Ensure base exists
124
+ await fsp.mkdir(baseDir, { recursive: true });
125
+
126
+ // Create a unique subdir per browser
127
+ const subProfileDir = path.join(baseDir, `profile-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`);
128
+ await fsp.mkdir(subProfileDir, { recursive: true });
129
+
130
+ // Assign to Crawlee's launcher
131
+ launchContext.userDataDir = subProfileDir;
132
+
133
+ // Safely extend launchOptions
178
134
  launchContext.launchOptions = {
179
135
  ...launchContext.launchOptions,
180
- bypassCSP: true,
181
136
  ignoreHTTPSErrors: true,
182
137
  ...playwrightDeviceDetailsObject,
183
138
  };
139
+
140
+ // Optionally log for debugging
141
+ // console.log(`[HOOK] Using userDataDir: ${subProfileDir}`);
184
142
  },
185
143
  ],
186
144
  },
@@ -193,7 +151,7 @@ const crawlSitemap = async ({
193
151
  return new Promise(resolve => {
194
152
  let timeout;
195
153
  let mutationCount = 0;
196
- const MAX_MUTATIONS = 250; // stop if things never quiet down
154
+ const MAX_MUTATIONS = 500; // stop if things never quiet down
197
155
  const OBSERVER_TIMEOUT = 5000; // hard cap on total wait
198
156
 
199
157
  const observer = new MutationObserver(() => {
@@ -252,15 +210,7 @@ const crawlSitemap = async ({
252
210
  return;
253
211
  }
254
212
 
255
- // Set headers if basic auth
256
- if (isBasicAuth) {
257
- await page.setExtraHTTPHeaders({
258
- Authorization: authHeader,
259
- ...extraHTTPHeaders,
260
- });
261
- } else {
262
- preNavigationHooks(extraHTTPHeaders);
263
- }
213
+ preNavigationHooks(extraHTTPHeaders);
264
214
  },
265
215
  ],
266
216
  requestHandlerTimeoutSecs: 90,
@@ -282,17 +232,6 @@ const crawlSitemap = async ({
282
232
  return;
283
233
  }
284
234
 
285
- // Set basic auth header if needed
286
- if (isBasicAuth) {
287
- await page.setExtraHTTPHeaders({
288
- Authorization: authHeader,
289
- });
290
- const currentUrl = new URL(request.url);
291
- currentUrl.username = username;
292
- currentUrl.password = password;
293
- request.url = currentUrl.href;
294
- }
295
-
296
235
  await waitForPageLoaded(page, 10000);
297
236
 
298
237
  const actualUrl = page.url() || request.loadedUrl || request.url;
@@ -341,9 +280,7 @@ const crawlSitemap = async ({
341
280
  const contentType = response?.headers?.()['content-type'] || '';
342
281
  const status = response ? response.status() : 0;
343
282
 
344
- if (basicAuthPage < 0) {
345
- basicAuthPage += 1;
346
- } else if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
283
+ if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
347
284
  const isRedirected = !areLinksEqual(page.url(), request.url);
348
285
  const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(
349
286
  item => (item.actualUrl || item.url) === page.url(),
@@ -382,13 +319,13 @@ const crawlSitemap = async ({
382
319
  });
383
320
 
384
321
  urlsCrawled.scanned.push({
385
- url: urlWithoutAuth(request.url),
322
+ url: request.url,
386
323
  pageTitle: results.pageTitle,
387
324
  actualUrl, // i.e. actualUrl
388
325
  });
389
326
 
390
327
  urlsCrawled.scannedRedirects.push({
391
- fromUrl: urlWithoutAuth(request.url),
328
+ fromUrl: request.url,
392
329
  toUrl: actualUrl,
393
330
  });
394
331
 
@@ -421,9 +358,6 @@ const crawlSitemap = async ({
421
358
  }
422
359
  },
423
360
  failedRequestHandler: async ({ request, response, error }) => {
424
- if (isBasicAuth && request.url) {
425
- request.url = `${request.url.split('://')[0]}://${request.url.split('@')[1]}`;
426
- }
427
361
 
428
362
  // check if scanned pages have reached limit due to multi-instances of handler running
429
363
  if (urlsCrawled.scanned.length >= maxRequestsPerCrawl) {
@@ -461,7 +395,7 @@ const crawlSitemap = async ({
461
395
  scaleDownStepRatio: 0.1, // Scale down slower
462
396
  },
463
397
  }),
464
- });
398
+ }));
465
399
 
466
400
  await crawler.run();
467
401
 
@@ -6,7 +6,7 @@ import path from 'path';
6
6
  import { runAxeScript } from '../commonCrawlerFunc.js';
7
7
  import { consoleLogger, guiInfoLog, silentLogger } from '../../logs.js';
8
8
  import { guiInfoStatusTypes } from '../../constants/constants.js';
9
- import { isSkippedUrl, urlWithoutAuth } from '../../constants/common.js';
9
+ import { isSkippedUrl } from '../../constants/common.js';
10
10
 
11
11
  //! For Cypress Test
12
12
  // env to check if Cypress test is running
@@ -77,8 +77,8 @@ export const screenshotFullPage = async (page, screenshotsDir: string, screensho
77
77
  window.scrollTo(0, 0);
78
78
  });
79
79
 
80
- consoleLogger.info(`Screenshot page at: ${urlWithoutAuth(page.url())}`);
81
- silentLogger.info(`Screenshot page at: ${urlWithoutAuth(page.url())}`);
80
+ consoleLogger.info(`Screenshot page at: ${page.url()}`);
81
+ consoleLogger.info(`Screenshot page at: ${page.url()}`);
82
82
 
83
83
  await page.screenshot({
84
84
  timeout: 5000,
@@ -116,7 +116,7 @@ export const runAxeScan = async (
116
116
  await dataset.pushData(result);
117
117
 
118
118
  urlsCrawled.scanned.push({
119
- url: urlWithoutAuth(page.url()),
119
+ url: page.url(),
120
120
  pageTitle: result.pageTitle,
121
121
  pageImagePath: customFlowDetails.pageImagePath,
122
122
  });
@@ -469,7 +469,7 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
469
469
  consoleLogger.info(`Overlay state: ${existingOverlay}`);
470
470
  } catch {
471
471
  consoleLogger.info('Error in adding overlay menu to page');
472
- silentLogger.info('Error in adding overlay menu to page');
472
+ consoleLogger.info('Error in adding overlay menu to page');
473
473
  }
474
474
  });
475
475
 
@@ -15,6 +15,7 @@ import constants, {
15
15
  STATUS_CODE_METADATA,
16
16
  UrlsCrawled,
17
17
  } from '../constants/constants.js';
18
+ import { cleanUpAndExit } from '../utils.js';
18
19
 
19
20
  const require = createRequire(import.meta.url);
20
21
 
@@ -233,7 +234,7 @@ const getVeraExecutable = () => {
233
234
  const veraPdfExeNotFoundError =
234
235
  'Could not find veraPDF executable. Please ensure veraPDF is installed at current directory.';
235
236
  consoleLogger.error(veraPdfExeNotFoundError);
236
- silentLogger.error(veraPdfExeNotFoundError);
237
+ consoleLogger.error(veraPdfExeNotFoundError);
237
238
  }
238
239
  return veraPdfExe;
239
240
  };
@@ -355,7 +356,7 @@ export const runPdfScan = async (randomToken: string) => {
355
356
  'profiles/veraPDF-validation-profiles-rel-1.26/PDF_UA/WCAG-2-2.xml',
356
357
  )}"`;
357
358
  if (!veraPdfExe || !veraPdfProfile) {
358
- process.exit(1);
359
+ cleanUpAndExit(1);
359
360
  }
360
361
 
361
362
  const intermediateFolder = randomToken; // NOTE: assumes this folder is already created for crawlee
@@ -1,7 +1,7 @@
1
1
  /* eslint-env browser */
2
2
  import { chromium } from 'playwright';
3
3
  import { createCrawleeSubFolders } from './commonCrawlerFunc.js';
4
- import { cleanUp } from '../utils.js';
4
+ import { cleanUpAndExit, register} from '../utils.js';
5
5
  import constants, {
6
6
  getIntermediateScreenshotsPath,
7
7
  guiInfoStatusTypes,
@@ -48,7 +48,6 @@ const runCustom = async (
48
48
  includeScreenshots: boolean,
49
49
  ) => {
50
50
  // checks and delete datasets path if it already exists
51
- cleanUp(randomToken);
52
51
  process.env.CRAWLEE_STORAGE_DIR = randomToken;
53
52
 
54
53
  const urlsCrawled: UrlsCrawled = { ...constants.urlsCrawledObj };
@@ -83,6 +82,8 @@ const runCustom = async (
83
82
  ...viewportSettings.playwrightDeviceDetailsObject,
84
83
  });
85
84
 
85
+ register(context);
86
+
86
87
  // Detection of new page
87
88
  context.on('page', async newPage => {
88
89
  await initNewPage(newPage, pageClosePromises, processPageParams, pagesDict);
@@ -107,7 +108,7 @@ const runCustom = async (
107
108
  await allPagesClosedPromise(pageClosePromises);
108
109
  } catch (error) {
109
110
  log(`PLAYWRIGHT EXECUTION ERROR ${error}`);
110
- process.exit(1);
111
+ cleanUpAndExit(1, randomToken, true);
111
112
  }
112
113
 
113
114
  guiInfoLog(guiInfoStatusTypes.COMPLETED, {});
package/src/index.ts CHANGED
@@ -7,6 +7,8 @@ import {
7
7
  cleanUp,
8
8
  getUserDataTxt,
9
9
  writeToUserDataTxt,
10
+ listenForCleanUp,
11
+ cleanUpAndExit,
10
12
  } from './utils.js';
11
13
  import {
12
14
  prepareData,
@@ -97,10 +99,7 @@ const runScan = async (answers: Answers) => {
97
99
  answers.customDevice,
98
100
  answers.viewportWidth,
99
101
  );
100
- const { browserToRun } = getBrowserToRun(BrowserTypes.CHROME);
101
- deleteClonedProfiles(browserToRun);
102
- answers.browserToRun = browserToRun;
103
-
102
+
104
103
  if (!answers.nameEmail) {
105
104
  answers.nameEmail = `${userData.name}:${userData.email}`;
106
105
  }
@@ -109,19 +108,19 @@ const runScan = async (answers: Answers) => {
109
108
  answers.metadata = '{}';
110
109
 
111
110
  const data: Data = await prepareData(answers);
111
+
112
+ // Executes cleanUp script if error encountered
113
+ listenForCleanUp(data.randomToken);
114
+
112
115
  data.userDataDirectory = getClonedProfilesWithRandomToken(data.browser, data.randomToken);
113
116
 
114
117
  printMessage(['Scanning website...'], messageOptions);
115
118
 
116
119
  await combineRun(data, screenToScan);
117
120
 
118
- // Delete cloned directory
119
- deleteClonedProfiles(data.browser);
120
-
121
121
  // Delete dataset and request queues
122
- cleanUp(data.randomToken);
122
+ cleanUpAndExit(0, data.randomToken);
123
123
 
124
- process.exit(0);
125
124
  };
126
125
 
127
126
  if (userData) {
package/src/logs.ts CHANGED
@@ -2,7 +2,8 @@
2
2
  /* eslint-disable no-shadow */
3
3
  import { createLogger, format, transports } from 'winston';
4
4
  import { guiInfoStatusTypes } from './constants/constants.js';
5
- import { urlWithoutAuth } from './constants/common.js';
5
+ import path from 'path';
6
+ import { randomUUID } from 'crypto';
6
7
 
7
8
  const { combine, timestamp, printf } = format;
8
9
 
@@ -21,12 +22,32 @@ const logFormat = printf(({ timestamp, level, message }) => {
21
22
  // transport: storage device for logs
22
23
  // Enabled for console and storing into files; Files are overwritten each time
23
24
  // All logs in combined.txt, error in errors.txt
25
+ const uuid = randomUUID();
26
+ let basePath: string;
27
+
28
+ if (process.env.OOBEE_LOGS_PATH) {
29
+ basePath = process.env.OOBEE_LOGS_PATH;
30
+ } else if (process.platform === 'win32') {
31
+ basePath = path.join(process.env.APPDATA, 'Oobee');
32
+ } else if (process.platform === 'darwin') {
33
+ basePath = path.join(process.env.HOME, 'Library', 'Application Support', 'Oobee');
34
+ } else {
35
+ basePath = path.join(process.cwd());
36
+ }
37
+
38
+ export const errorsTxtPath = path.join(basePath, `${uuid}.txt`);
24
39
 
25
40
  const consoleLogger = createLogger({
26
41
  silent: !(process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE),
27
42
  format: combine(timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), logFormat),
28
- transports:
29
- process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE ? [new transports.Console()] : [],
43
+ transports: [
44
+ new transports.Console({ level: 'info' }),
45
+ new transports.File({
46
+ filename: errorsTxtPath,
47
+ level: 'info',
48
+ handleExceptions: true,
49
+ }),
50
+ ],
30
51
  });
31
52
 
32
53
  // No display in consoles, this will mostly be used within the interactive script to avoid disrupting the flow
@@ -35,9 +56,10 @@ const consoleLogger = createLogger({
35
56
  const silentLogger = createLogger({
36
57
  format: combine(timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), logFormat),
37
58
  transports: [
38
- process.env.OOBEE_VERBOSE || process.env.RUNNING_FROM_PH_GUI
39
- ? new transports.Console({ handleExceptions: true })
40
- : new transports.File({ filename: 'errors.txt', level: 'warn', handleExceptions: true }),
59
+ new transports.File({
60
+ filename: errorsTxtPath,
61
+ level: 'warn',
62
+ handleExceptions: true }),
41
63
  ].filter(Boolean),
42
64
  });
43
65
 
@@ -47,16 +69,17 @@ export const guiInfoLog = (status: string, data: { numScanned?: number; urlScann
47
69
  switch (status) {
48
70
  case guiInfoStatusTypes.COMPLETED:
49
71
  console.log('Scan completed');
72
+ silentLogger.info('Scan completed');
50
73
  break;
51
74
  case guiInfoStatusTypes.SCANNED:
52
75
  case guiInfoStatusTypes.SKIPPED:
53
76
  case guiInfoStatusTypes.ERROR:
54
77
  case guiInfoStatusTypes.DUPLICATE:
55
- console.log(
56
- `crawling::${data.numScanned || 0}::${status}::${
57
- urlWithoutAuth(data.urlScanned) || 'no url provided'
58
- }`,
59
- );
78
+ const msg = `crawling::${data.numScanned || 0}::${status}::${
79
+ data.urlScanned || 'no url provided'
80
+ }`;
81
+ console.log(msg);
82
+ silentLogger.info(msg);
60
83
  break;
61
84
  default:
62
85
  console.log(`Status provided to gui info log not recognized: ${status}`);
@@ -65,4 +88,6 @@ export const guiInfoLog = (status: string, data: { numScanned?: number; urlScann
65
88
  }
66
89
  };
67
90
 
91
+ consoleLogger.info(`Logger writing to: ${errorsTxtPath}`);
92
+
68
93
  export { logFormat, consoleLogger, silentLogger };
@@ -15,7 +15,7 @@ import { pipeline } from 'stream/promises';
15
15
  // @ts-ignore
16
16
  import * as Sentry from '@sentry/node';
17
17
  import constants, { ScannerTypes, sentryConfig, setSentryUser } from './constants/constants.js';
18
- import { urlWithoutAuth } from './constants/common.js';
18
+ import { getBrowserToRun, getPlaywrightLaunchOptions } from './constants/common.js';
19
19
 
20
20
  import {
21
21
  createScreenshotsFolder,
@@ -29,6 +29,7 @@ import {
29
29
  getWcagCriteriaMap,
30
30
  categorizeWcagCriteria,
31
31
  getUserDataTxt,
32
+ register
32
33
  } from './utils.js';
33
34
  import { consoleLogger, silentLogger } from './logs.js';
34
35
  import itemTypeDescription from './constants/itemTypeDescription.js';
@@ -961,29 +962,21 @@ const writeScanDetailsCsv = async (
961
962
  });
962
963
  };
963
964
 
964
- let browserChannel = 'chrome';
965
+ let browserChannel = getBrowserToRun().browserToRun;
965
966
 
966
- if (os.platform() === 'win32') {
967
- browserChannel = 'msedge';
968
- }
969
-
970
- if (os.platform() === 'linux') {
971
- browserChannel = 'chromium';
972
- }
973
-
974
- const writeSummaryPdf = async (storagePath: string, pagesScanned: number, filename = 'summary') => {
967
+ const writeSummaryPdf = async (storagePath: string, pagesScanned: number, filename = 'summary', browser: string, userDataDirectory: string) => {
975
968
  const htmlFilePath = `${storagePath}/${filename}.html`;
976
969
  const fileDestinationPath = `${storagePath}/${filename}.pdf`;
977
- const browser = await chromium.launch({
978
- headless: false,
979
- channel: browserChannel,
980
- args: ['--headless=new', '--no-sandbox'],
981
- });
982
970
 
983
- const context = await browser.newContext({
984
- ignoreHTTPSErrors: true,
985
- serviceWorkers: 'block',
986
- });
971
+ const effectiveUserDataDirectory = process.env.CRAWLEE_HEADLESS === '1'
972
+ ? userDataDirectory
973
+ : '';
974
+ const context = await constants.launcher.launchPersistentContext(effectiveUserDataDirectory, {
975
+ headless: process.env.CRAWLEE_HEADLESS === '1',
976
+ ...getPlaywrightLaunchOptions(browser),
977
+ });
978
+
979
+ register(context);
987
980
 
988
981
  const page = await context.newPage();
989
982
 
@@ -1008,8 +1001,7 @@ const writeSummaryPdf = async (storagePath: string, pagesScanned: number, filena
1008
1001
 
1009
1002
  await page.close();
1010
1003
 
1011
- await context.close();
1012
- await browser.close();
1004
+ await context.close().catch(() => {});
1013
1005
 
1014
1006
  if (pagesScanned < 2000) {
1015
1007
  fs.unlinkSync(htmlFilePath);
@@ -1721,14 +1713,9 @@ const generateArtifacts = async (
1721
1713
  zip: string = undefined, // optional
1722
1714
  generateJsonFiles = false,
1723
1715
  ) => {
1724
- const intermediateDatasetsPath = `${randomToken}/datasets/${randomToken}`;
1725
- const oobeeAppVersion = getVersion();
1726
1716
  const storagePath = getStoragePath(randomToken);
1727
-
1728
- urlScanned =
1729
- scanType === ScannerTypes.SITEMAP || scanType === ScannerTypes.LOCALFILE
1730
- ? urlScanned
1731
- : urlWithoutAuth(urlScanned);
1717
+ const intermediateDatasetsPath = `${storagePath}/crawlee`;
1718
+ const oobeeAppVersion = getVersion();
1732
1719
 
1733
1720
  const formatAboutStartTime = (dateString: string) => {
1734
1721
  const utcStartTimeDate = new Date(dateString);
@@ -1851,10 +1838,18 @@ const generateArtifacts = async (
1851
1838
 
1852
1839
  printMessage([
1853
1840
  'Scan Summary',
1841
+ `Oobee App Version: ${allIssues.oobeeAppVersion}`,
1854
1842
  '',
1855
1843
  `Site Name: ${allIssues.siteName}`,
1856
1844
  `URL: ${allIssues.urlScanned}`,
1857
1845
  `Pages Scanned: ${allIssues.totalPagesScanned}`,
1846
+ `Start Time: ${allIssues.startTime}`,
1847
+ `End Time: ${allIssues.endTime}`,
1848
+ `Elapsed Time: ${(new Date(allIssues.endTime).getTime() - new Date(allIssues.startTime).getTime()) / 1000}s`,
1849
+ `Device: ${allIssues.deviceChosen}`,
1850
+ `Viewport: ${allIssues.viewport}`,
1851
+ `Scan Type: ${allIssues.scanType}`,
1852
+ `Label: ${allIssues.customFlowLabel || 'N/A'}`,
1858
1853
  '',
1859
1854
  `Must Fix: ${allIssues.items.mustFix.rules.length} ${Object.keys(allIssues.items.mustFix.rules).length === 1 ? 'issue' : 'issues'} / ${allIssues.items.mustFix.totalItems} ${allIssues.items.mustFix.totalItems === 1 ? 'occurrence' : 'occurrences'}`,
1860
1855
  `Good to Fix: ${allIssues.items.goodToFix.rules.length} ${Object.keys(allIssues.items.goodToFix.rules).length === 1 ? 'issue' : 'issues'} / ${allIssues.items.goodToFix.totalItems} ${allIssues.items.goodToFix.totalItems === 1 ? 'occurrence' : 'occurrences'}`,
@@ -1892,7 +1887,11 @@ const generateArtifacts = async (
1892
1887
  consoleLogger.info(`End Time: ${allIssues.endTime}`);
1893
1888
  const elapsedSeconds = (new Date(allIssues.endTime).getTime() - new Date(allIssues.startTime).getTime()) / 1000;
1894
1889
  consoleLogger.info(`Elapsed Time: ${elapsedSeconds}s`);
1895
-
1890
+ consoleLogger.info(`Device: ${allIssues.deviceChosen}`);
1891
+ consoleLogger.info(`Viewport: ${allIssues.viewport}`);
1892
+ consoleLogger.info(`Scan Type: ${allIssues.scanType}`);
1893
+ consoleLogger.info(`Label: ${allIssues.customFlowLabel || 'N/A'}`);
1894
+
1896
1895
  const getAxeImpactCount = (allIssues: AllIssues) => {
1897
1896
  const impactCount = {
1898
1897
  critical: 0,
@@ -1986,7 +1985,14 @@ const generateArtifacts = async (
1986
1985
  ]);
1987
1986
  }
1988
1987
 
1989
- await retryFunction(() => writeSummaryPdf(storagePath, pagesScanned.length), 1);
1988
+ // Should consider refactor constants.userDataDirectory to be a parameter in future
1989
+ await retryFunction(() => writeSummaryPdf(storagePath, pagesScanned.length, 'summary', browserChannel, constants.userDataDirectory), 1);
1990
+
1991
+ try {
1992
+ fs.rmSync(path.join(storagePath, 'crawlee'), { recursive: true, force: true });
1993
+ } catch (error) {
1994
+ consoleLogger.warn(`Unable to force remove crawlee folder: ${error.message}`);
1995
+ }
1990
1996
 
1991
1997
  // Take option if set
1992
1998
  if (typeof zip === 'string') {
package/src/npmIndex.ts CHANGED
@@ -10,7 +10,6 @@ import {
10
10
  getBrowserToRun,
11
11
  getPlaywrightLaunchOptions,
12
12
  submitForm,
13
- urlWithoutAuth,
14
13
  } from './constants/common.js';
15
14
  import { createCrawleeSubFolders, filterAxeResults } from './crawlers/commonCrawlerFunc.js';
16
15
  import { createAndUpdateResultsFolders, createDetailsAndLogs } from './utils.js';
@@ -201,11 +200,12 @@ export const init = async ({
201
200
  res: { pageUrl: string; pageTitle: string; axeScanResults: AxeResults },
202
201
  metadata: string,
203
202
  elementsToClick: string[],
203
+ randomToken: string,
204
204
  ) => {
205
205
  throwErrorIfTerminated();
206
206
  if (includeScreenshots) {
207
207
  // use chrome by default
208
- const { browserToRun, clonedBrowserDataDir } = getBrowserToRun(BrowserTypes.CHROME);
208
+ const { browserToRun, clonedBrowserDataDir } = getBrowserToRun(BrowserTypes.CHROME, false, randomToken);
209
209
  const browserContext = await constants.launcher.launchPersistentContext(
210
210
  clonedBrowserDataDir,
211
211
  { viewport: viewportSettings, ...getPlaywrightLaunchOptions(browserToRun) },
@@ -237,7 +237,7 @@ export const init = async ({
237
237
  );
238
238
 
239
239
  await browserContext.close();
240
- deleteClonedProfiles(browserToRun);
240
+ deleteClonedProfiles(browserToRun, randomToken);
241
241
  }
242
242
  const pageIndex = urlsCrawled.scanned.length + 1;
243
243
  const filteredResults = filterAxeResults(res.axeScanResults, res.pageTitle, {
@@ -245,7 +245,7 @@ export const init = async ({
245
245
  metadata,
246
246
  });
247
247
  urlsCrawled.scanned.push({
248
- url: urlWithoutAuth(res.pageUrl).toString(),
248
+ url: res.pageUrl.toString(),
249
249
  actualUrl: 'tbd',
250
250
  pageTitle: `${pageIndex}: ${res.pageTitle}`,
251
251
  });
@@ -22,7 +22,7 @@ export const takeScreenshotForHTMLElements = async (
22
22
  for (const violation of violations) {
23
23
  if (screenshotCount >= maxScreenshots) {
24
24
  /*
25
- silentLogger.warn(
25
+ consoleLogger.warn(
26
26
  `Skipping screenshots for ${violation.id} as maxScreenshots (${maxScreenshots}) exceeded. You can increase it by specifying a higher value when calling takeScreenshotForHTMLElements.`,
27
27
  );
28
28
  */
@@ -34,7 +34,7 @@ export const takeScreenshotForHTMLElements = async (
34
34
 
35
35
  // Check if rule ID is 'oobee-grading-text-contents' and skip screenshot logic
36
36
  if (rule === 'oobee-grading-text-contents') {
37
- // silentLogger.info('Skipping screenshot for rule oobee-grading-text-contents');
37
+ // consoleLogger.info('Skipping screenshot for rule oobee-grading-text-contents');
38
38
  newViolations.push(violation); // Make sure it gets added
39
39
  continue;
40
40
  }
@@ -59,13 +59,13 @@ export const takeScreenshotForHTMLElements = async (
59
59
  nodeWithScreenshotPath.screenshotPath = screenshotPath;
60
60
  screenshotCount++;
61
61
  } else {
62
- // silentLogger.info(`Element at ${currLocator} is not visible`);
62
+ // consoleLogger.info(`Element at ${currLocator} is not visible`);
63
63
  }
64
64
 
65
65
  break; // Stop looping after finding the first visible locator
66
66
  }
67
67
  } catch (e) {
68
- // silentLogger.info(`Unable to take element screenshot at ${selector}`);
68
+ // consoleLogger.info(`Unable to take element screenshot at ${selector}`);
69
69
  }
70
70
  }
71
71
  newViolationNodes.push(nodeWithScreenshotPath);