@govtechsg/oobee 0.10.58 → 0.10.62

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/DETAILS.md CHANGED
@@ -41,7 +41,7 @@ Note: Level AAA are disabled by default. Please specify `enable-wcag-aaa` in ru
41
41
  | WCAG 1.4.2 | A | Yes | | |
42
42
  | WCAG 1.4.3 | AA | Yes | | |
43
43
  | WCAG 1.4.4 | AA | Yes | | |
44
- | WCAG 1.4.6 | AAA | Yes | | |
44
+ | WCAG 1.4.6 | AAA | | Yes | |
45
45
  | WCAG 1.4.12 | AA | Yes | | |
46
46
  | WCAG 2.1.1 | A | Yes | | |
47
47
  | WCAG 2.1.3 | AAA | Yes * | | |
package/README.md CHANGED
@@ -86,6 +86,7 @@ verapdf --version
86
86
  | OOBEE_VERBOSE | When set to `true`, log output goes to console | `false` |
87
87
  | OOBEE_FAST_CRAWLER| When set to `true`, increases scan concurrency at a rapid rate. Experimental, may cause system stability issues on low-powered devices. | `false`|
88
88
  | OOBEE_VALIDATE_URL| When set to `true`, validates if URLs are valid and exits. | `false` |
89
+ | OOBEE_LOGS_PATH | When set, logs are written to this path. | |
89
90
  | WARN_LEVEL | Only used in tests. | |
90
91
 
91
92
  #### Environment variables used internally (Do not set)
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@govtechsg/oobee",
3
3
  "main": "dist/npmIndex.js",
4
- "version": "0.10.58",
4
+ "version": "0.10.62",
5
5
  "type": "module",
6
6
  "author": "Government Technology Agency <info@tech.gov.sg>",
7
7
  "dependencies": {
@@ -69,7 +69,8 @@
69
69
  "ansi-regex": "^5.0.1",
70
70
  "tough-cookie": "^5.0.0-rc.2",
71
71
  "micromatch": "github:micromatch/micromatch.git#4.0.8",
72
- "brace-expansion": "^1.1.12"
72
+ "brace-expansion": "^1.1.12",
73
+ "tmp": "0.2.4"
73
74
  },
74
75
  "optionalDependencies": {
75
76
  "@napi-rs/canvas-darwin-arm64": "^0.1.53",
package/src/cli.ts CHANGED
@@ -5,18 +5,14 @@ import printMessage from 'print-message';
5
5
  import { devices } from 'playwright';
6
6
  import { fileURLToPath } from 'url';
7
7
  import path from 'path';
8
- import { cleanUp, setHeadlessMode, getVersion, getStoragePath } from './utils.js';
8
+ import { cleanUp, setHeadlessMode, getVersion, getStoragePath, listenForCleanUp, cleanUpAndExit } from './utils.js';
9
9
  import {
10
10
  checkUrl,
11
11
  prepareData,
12
12
  getFileSitemap,
13
13
  validEmail,
14
14
  validName,
15
- getBrowserToRun,
16
- getPlaywrightDeviceDetailsObject,
17
- deleteClonedProfiles,
18
15
  getScreenToScan,
19
- getClonedProfilesWithRandomToken,
20
16
  validateDirPath,
21
17
  validateFilePath,
22
18
  validateCustomFlowLabel,
@@ -57,20 +53,20 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
57
53
  [`Invalid device. Please provide an existing device to start the scan.`],
58
54
  messageOptions,
59
55
  );
60
- process.exit(1);
56
+ cleanUpAndExit(1);
61
57
  }
62
58
  return option;
63
59
  })
64
60
  .coerce('w', option => {
65
61
  if (!option || Number.isNaN(option)) {
66
62
  printMessage([`Invalid viewport width. Please provide a number. `], messageOptions);
67
- process.exit(1);
63
+ cleanUpAndExit(1);
68
64
  } else if (option < 320 || option > 1080) {
69
65
  printMessage(
70
66
  ['Invalid viewport width! Please provide a viewport width between 320-1080 pixels.'],
71
67
  messageOptions,
72
68
  );
73
- process.exit(1);
69
+ cleanUpAndExit(1);
74
70
  }
75
71
  return option;
76
72
  })
@@ -80,7 +76,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
80
76
  [`Invalid maximum number of pages. Please provide a positive integer.`],
81
77
  messageOptions,
82
78
  );
83
- process.exit(1);
79
+ cleanUpAndExit(1);
84
80
  }
85
81
  return option;
86
82
  })
@@ -90,7 +86,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
90
86
  [`Invalid number for max concurrency. Please provide a positive integer.`],
91
87
  messageOptions,
92
88
  );
93
- process.exit(1);
89
+ cleanUpAndExit(1);
94
90
  }
95
91
  return option;
96
92
  })
@@ -100,23 +96,23 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
100
96
  [`Invalid format. Please provide your name and email address separated by ":"`],
101
97
  messageOptions,
102
98
  );
103
- process.exit(1);
99
+ cleanUpAndExit(1);
104
100
  }
105
101
  const [name, email] = nameEmail.split(':');
106
102
  if (name === '' || name === undefined || name === null) {
107
103
  printMessage([`Please provide your name.`], messageOptions);
108
- process.exit(1);
104
+ cleanUpAndExit(1);
109
105
  }
110
106
  if (!validName(name)) {
111
107
  printMessage([`Invalid name. Please provide a valid name.`], messageOptions);
112
- process.exit(1);
108
+ cleanUpAndExit(1);
113
109
  }
114
110
  if (!validEmail(email)) {
115
111
  printMessage(
116
112
  [`Invalid email address. Please provide a valid email address.`],
117
113
  messageOptions,
118
114
  );
119
- process.exit(1);
115
+ cleanUpAndExit(1);
120
116
  }
121
117
  return nameEmail;
122
118
  })
@@ -124,7 +120,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
124
120
  const validationErrors = validateDirPath(option);
125
121
  if (validationErrors) {
126
122
  printMessage([`Invalid exportDirectory directory path. ${validationErrors}`], messageOptions);
127
- process.exit(1);
123
+ cleanUpAndExit(1);
128
124
  }
129
125
  return option;
130
126
  })
@@ -136,7 +132,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
136
132
  return validateFilePath(option, dirname);
137
133
  } catch (err) {
138
134
  printMessage([`Invalid blacklistedPatternsFilename file path. ${err}`], messageOptions);
139
- process.exit(1);
135
+ cleanUpAndExit(1);
140
136
  }
141
137
  })
142
138
  .coerce('i', option => {
@@ -146,7 +142,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
146
142
  [`Invalid value for fileTypes. Please provide valid keywords: ${choices.join(', ')}.`],
147
143
  messageOptions,
148
144
  );
149
- process.exit(1);
145
+ cleanUpAndExit(1);
150
146
  }
151
147
  return option;
152
148
  })
@@ -154,7 +150,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
154
150
  const { isValid, errorMessage } = validateCustomFlowLabel(option);
155
151
  if (!isValid) {
156
152
  printMessage([errorMessage], messageOptions);
157
- process.exit(1);
153
+ cleanUpAndExit(1);
158
154
  }
159
155
  return option;
160
156
  })
@@ -165,7 +161,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
165
161
  [`Invalid value for additional. Please provide valid keywords: ${choices.join(', ')}.`],
166
162
  messageOptions,
167
163
  );
168
- process.exit(1);
164
+ cleanUpAndExit(1);
169
165
  }
170
166
  return option;
171
167
  })
@@ -202,12 +198,12 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
202
198
  ['Invalid scan duration. Please provide a positive number of seconds.'],
203
199
  messageOptions,
204
200
  );
205
- process.exit(1);
201
+ cleanUpAndExit(1);
206
202
  }
207
203
  return duration;
208
204
  })
209
205
  .check(argvs => {
210
- if (argvs.scanner === ScannerTypes.CUSTOM && argvs.scanDuration > 0) {
206
+ if (argvs.scanner === ScannerTypes.CUSTOM && typeof argvs.scanDuration === 'number' && argvs.scanDuration > 0) {
211
207
  throw new Error('-l or --scanDuration is not allowed for custom flow scans.');
212
208
  }
213
209
  return true;
@@ -225,48 +221,30 @@ const scanInit = async (argvs: Answers): Promise<string> => {
225
221
 
226
222
  // Cannot use data.browser and data.isHeadless as the connectivity check comes first before prepareData
227
223
  setHeadlessMode(updatedArgvs.browserToRun, updatedArgvs.headless);
228
-
229
- // let chromeDataDir = null;
230
- // let edgeDataDir = null;
231
- // Empty string for profile directory will use incognito mode in playwright
232
- let clonedDataDir = '';
233
224
  const statuses = constants.urlCheckStatuses;
234
225
 
235
- const { browserToRun, clonedBrowserDataDir } = getBrowserToRun(updatedArgvs.browserToRun, true);
236
- updatedArgvs.browserToRun = browserToRun;
237
- clonedDataDir = clonedBrowserDataDir;
238
-
239
- if (updatedArgvs.customDevice === 'Desktop' || updatedArgvs.customDevice === 'Mobile') {
240
- updatedArgvs.deviceChosen = argvs.customDevice;
241
- delete updatedArgvs.customDevice;
242
- }
226
+ const data = await prepareData(updatedArgvs);
243
227
 
244
- // Creating the playwrightDeviceDetailObject
245
- // for use in crawlDomain & crawlSitemap's preLaunchHook
246
- updatedArgvs.playwrightDeviceDetailsObject = getPlaywrightDeviceDetailsObject(
247
- updatedArgvs.deviceChosen,
248
- updatedArgvs.customDevice,
249
- updatedArgvs.viewportWidth,
250
- );
228
+ // Executes cleanUp script if error encountered
229
+ listenForCleanUp(data.randomToken);
251
230
 
252
231
  const res = await checkUrl(
253
- updatedArgvs.scanner,
254
- updatedArgvs.url,
255
- updatedArgvs.browserToRun,
256
- clonedDataDir,
257
- updatedArgvs.playwrightDeviceDetailsObject,
258
- isCustomFlow,
259
- parseHeaders(updatedArgvs.header),
232
+ data.type,
233
+ data.entryUrl,
234
+ data.browser,
235
+ data.userDataDirectory,
236
+ data.playwrightDeviceDetailsObject,
237
+ data.extraHTTPHeaders
260
238
  );
261
239
 
262
240
  if (res.httpStatus) consoleLogger.info(`Connectivity Check HTTP Response Code: ${res.httpStatus}`);
263
241
 
264
242
  switch (res.status) {
265
243
  case statuses.success.code: {
266
- updatedArgvs.finalUrl = res.url;
244
+ data.url = res.url;
267
245
  if (process.env.OOBEE_VALIDATE_URL) {
268
246
  console.log('Url is valid');
269
- process.exit(0);
247
+ cleanUpAndExit(0, data.randomToken);
270
248
  }
271
249
 
272
250
  break;
@@ -274,17 +252,17 @@ const scanInit = async (argvs: Answers): Promise<string> => {
274
252
  case statuses.unauthorised.code: {
275
253
  printMessage([statuses.unauthorised.message], messageOptions);
276
254
  consoleLogger.info(statuses.unauthorised.message);
277
- process.exit(res.status);
255
+ cleanUpAndExit(res.status);
278
256
  }
279
257
  case statuses.cannotBeResolved.code: {
280
258
  printMessage([statuses.cannotBeResolved.message], messageOptions);
281
259
  consoleLogger.info(statuses.cannotBeResolved.message);
282
- process.exit(res.status);
260
+ cleanUpAndExit(res.status);
283
261
  }
284
262
  case statuses.systemError.code: {
285
263
  printMessage([statuses.systemError.message], messageOptions);
286
264
  consoleLogger.info(statuses.systemError.message);
287
- process.exit(res.status);
265
+ cleanUpAndExit(res.status);
288
266
  }
289
267
  case statuses.invalidUrl.code: {
290
268
  if (
@@ -293,67 +271,49 @@ const scanInit = async (argvs: Answers): Promise<string> => {
293
271
  ) {
294
272
  printMessage([statuses.invalidUrl.message], messageOptions);
295
273
  consoleLogger.info(statuses.invalidUrl.message);
296
- process.exit(res.status);
274
+ cleanUpAndExit(res.status);
297
275
  }
298
276
 
299
277
  const finalFilePath = getFileSitemap(updatedArgvs.url);
300
278
  if (finalFilePath) {
301
- updatedArgvs.isLocalFileScan = true;
302
- updatedArgvs.finalUrl = finalFilePath;
279
+ data.isLocalFileScan = true;
280
+ data.url = finalFilePath;
303
281
 
304
282
  if (process.env.OOBEE_VALIDATE_URL) {
305
283
  console.log('Url is valid');
306
- process.exit(0);
284
+ cleanUpAndExit(0);
307
285
  }
308
286
  } else if (updatedArgvs.scanner === ScannerTypes.LOCALFILE) {
309
287
  printMessage([statuses.notALocalFile.message], messageOptions);
310
288
  consoleLogger.info(statuses.notALocalFile.message);
311
- process.exit(statuses.notALocalFile.code);
289
+ cleanUpAndExit(statuses.notALocalFile.code);
312
290
  } else if (updatedArgvs.scanner !== ScannerTypes.SITEMAP) {
313
291
  printMessage([statuses.notASitemap.message], messageOptions);
314
292
  consoleLogger.info(statuses.notASitemap.message);
315
- process.exit(statuses.notASitemap.code);
293
+ cleanUpAndExit(statuses.notASitemap.code);
316
294
  }
317
295
  break;
318
296
  }
319
297
  case statuses.notASitemap.code: {
320
298
  printMessage([statuses.notASitemap.message], messageOptions);
321
299
  consoleLogger.info(statuses.notASitemap.message);
322
- process.exit(res.status);
300
+ cleanUpAndExit(res.status);
323
301
  }
324
302
  case statuses.notALocalFile.code: {
325
303
  printMessage([statuses.notALocalFile.message], messageOptions);
326
304
  consoleLogger.info(statuses.notALocalFile.message);
327
- process.exit(res.status);
305
+ cleanUpAndExit(res.status);
328
306
  }
329
307
  case statuses.browserError.code: {
330
308
  printMessage([statuses.browserError.message], messageOptions);
331
309
  consoleLogger.info(statuses.browserError.message);
332
- process.exit(res.status);
310
+ cleanUpAndExit(res.status);
333
311
  }
334
312
  default:
335
313
  break;
336
314
  }
337
315
 
338
- if (updatedArgvs.scanner === ScannerTypes.WEBSITE && !updatedArgvs.strategy) {
339
- updatedArgvs.strategy = 'same-domain';
340
- }
341
-
342
- const data = await prepareData(updatedArgvs);
343
-
344
- // File clean up after url check
345
- // files will clone a second time below if url check passes
346
316
  if (process.env.OOBEE_VERBOSE) {
347
- deleteClonedProfiles(data.browser, data.randomToken);
348
- } else {
349
- deleteClonedProfiles(data.browser); // first deletion
350
- }
351
-
352
- if (updatedArgvs.exportDirectory) {
353
- constants.exportDirectory = updatedArgvs.exportDirectory;
354
- }
355
-
356
- if (process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE) {
357
317
  const randomTokenMessage = {
358
318
  type: 'randomToken',
359
319
  payload: `${data.randomToken}`,
@@ -364,29 +324,16 @@ const scanInit = async (argvs: Answers): Promise<string> => {
364
324
  }
365
325
 
366
326
  const screenToScan = getScreenToScan(
367
- updatedArgvs.deviceChosen,
368
- updatedArgvs.customDevice,
369
- updatedArgvs.viewportWidth,
327
+ data.deviceChosen,
328
+ data.customDevice,
329
+ data.viewportWidth,
370
330
  );
371
331
 
372
- // Clone profiles a second time
373
- clonedDataDir = getClonedProfilesWithRandomToken(data.browser, data.randomToken);
374
- data.userDataDirectory = clonedDataDir;
375
-
376
332
  printMessage([`Oobee version: ${appVersion}`, 'Starting scan...'], messageOptions);
377
-
333
+ consoleLogger.info(`Oobee version: ${appVersion}`);
334
+
378
335
  await combineRun(data, screenToScan);
379
336
 
380
- // Delete cloned directory
381
- if (process.env.OOBEE_VERBOSE) {
382
- deleteClonedProfiles(data.browser, data.randomToken);
383
- } else {
384
- deleteClonedProfiles(data.browser); // second deletion
385
- }
386
-
387
- // Delete dataset and request queues
388
- cleanUp(data.randomToken);
389
-
390
337
  return getStoragePath(data.randomToken);
391
338
  };
392
339
 
@@ -422,6 +369,6 @@ const optionsAnswer: Answers = {
422
369
  };
423
370
 
424
371
  await scanInit(optionsAnswer);
425
- process.exit(0);
372
+ cleanUpAndExit(0);
426
373
 
427
374
  export default options;
package/src/combine.ts CHANGED
@@ -5,9 +5,9 @@ import crawlDomain from './crawlers/crawlDomain.js';
5
5
  import crawlLocalFile from './crawlers/crawlLocalFile.js';
6
6
  import crawlIntelligentSitemap from './crawlers/crawlIntelligentSitemap.js';
7
7
  import generateArtifacts from './mergeAxeResults.js';
8
- import { getHost, createAndUpdateResultsFolders, createDetailsAndLogs } from './utils.js';
8
+ import { getHost, createAndUpdateResultsFolders, createDetailsAndLogs, cleanUp, cleanUpAndExit } from './utils.js';
9
9
  import { ScannerTypes, UrlsCrawled } from './constants/constants.js';
10
- import { getBlackListedPatterns, submitForm, urlWithoutAuth } from './constants/common.js';
10
+ import { getBlackListedPatterns, submitForm } from './constants/common.js';
11
11
  import { consoleLogger, silentLogger } from './logs.js';
12
12
  import runCustom from './crawlers/runCustom.js';
13
13
  import { alertMessageOptions } from './constants/cliFunctions.js';
@@ -55,7 +55,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
55
55
  includeScreenshots, // Include screenshots: if checked, = 'true'
56
56
  followRobots, // Adhere to robots.txt: if checked, = 'true'
57
57
  metadata,
58
- customFlowLabel = 'Custom Flow',
58
+ customFlowLabel = 'None',
59
59
  extraHTTPHeaders,
60
60
  safeMode,
61
61
  zip,
@@ -67,6 +67,12 @@ const combineRun = async (details: Data, deviceToScan: string) => {
67
67
  process.env.CRAWLEE_LOG_LEVEL = 'ERROR';
68
68
  process.env.CRAWLEE_STORAGE_DIR = randomToken;
69
69
 
70
+ if (process.env.CRAWLEE_SYSTEM_INFO_V2 === undefined) {
71
+ // Set the environment variable to enable system info v2
72
+ // Resolves issue with when wmic is not installed on Windows
73
+ process.env.CRAWLEE_SYSTEM_INFO_V2 = '1';
74
+ }
75
+
70
76
  const host = type === ScannerTypes.SITEMAP || type === ScannerTypes.LOCALFILE ? '' : getHost(url);
71
77
 
72
78
  let blacklistedPatterns: string[] | null = null;
@@ -74,12 +80,12 @@ const combineRun = async (details: Data, deviceToScan: string) => {
74
80
  blacklistedPatterns = getBlackListedPatterns(blacklistedPatternsFilename);
75
81
  } catch (error) {
76
82
  consoleLogger.error(error);
77
- process.exit(1);
83
+ cleanUpAndExit(1);
78
84
  }
79
85
 
80
86
  // remove basic-auth credentials from URL
81
87
  const finalUrl = !(type === ScannerTypes.SITEMAP || type === ScannerTypes.LOCALFILE)
82
- ? urlWithoutAuth(url)
88
+ ? new URL(url)
83
89
  : new URL(pathToFileURL(url));
84
90
 
85
91
  // Use the string version of finalUrl to reduce logic at submitForm
@@ -207,7 +213,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
207
213
 
208
214
  default:
209
215
  consoleLogger.error(`type: ${type} not defined`);
210
- process.exit(1);
216
+ cleanUpAndExit(1);
211
217
  }
212
218
 
213
219
  scanDetails.endTime = new Date();
@@ -252,10 +258,16 @@ const combineRun = async (details: Data, deviceToScan: string) => {
252
258
  metadata,
253
259
  );
254
260
  } else {
261
+
262
+ // No page were scanned because the URL loaded does not meet the crawler requirements
255
263
  printMessage([`No pages were scanned.`], alertMessageOptions);
264
+ cleanUpAndExit(1, randomToken, true);
256
265
  }
257
266
  } else {
267
+
268
+ // No page were scanned because the URL loaded does not meet the crawler requirements
258
269
  printMessage([`No pages were scanned.`], alertMessageOptions);
270
+ cleanUpAndExit(1, randomToken, true);
259
271
  }
260
272
  };
261
273
 
@@ -1,6 +1,7 @@
1
1
  import { Options } from 'yargs';
2
2
  import printMessage from 'print-message';
3
3
  import { BrowserTypes, RuleFlags, ScannerTypes } from './constants.js';
4
+ import { cleanUpAndExit } from '../utils.js';
4
5
 
5
6
  export const messageOptions = {
6
7
  border: false,
@@ -53,7 +54,7 @@ export const cliOptions: { [key: string]: Options } = {
53
54
  ],
54
55
  messageOptions,
55
56
  );
56
- process.exit(1);
57
+ cleanUpAndExit(1);
57
58
  return null;
58
59
  }
59
60
  },
@@ -158,7 +159,7 @@ export const cliOptions: { [key: string]: Options } = {
158
159
  ],
159
160
  messageOptions,
160
161
  );
161
- process.exit(1);
162
+ cleanUpAndExit(1);
162
163
  return null;
163
164
  }
164
165
  },
@@ -282,7 +283,7 @@ export const cliOptions: { [key: string]: Options } = {
282
283
  ],
283
284
  messageOptions,
284
285
  );
285
- process.exit(1);
286
+ cleanUpAndExit(1);
286
287
  }
287
288
  if (userChoices.length > 1 && userChoices.includes('default')) {
288
289
  printMessage(
@@ -291,7 +292,7 @@ export const cliOptions: { [key: string]: Options } = {
291
292
  ],
292
293
  messageOptions,
293
294
  );
294
- process.exit(1);
295
+ cleanUpAndExit(1);
295
296
  }
296
297
  return userChoices;
297
298
  },