@govtechsg/oobee 0.10.61 → 0.10.62

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -86,6 +86,7 @@ verapdf --version
86
86
  | OOBEE_VERBOSE | When set to `true`, log output goes to console | `false` |
87
87
  | OOBEE_FAST_CRAWLER| When set to `true`, increases scan concurrency at a rapid rate. Experimental, may cause system stability issues on low-powered devices. | `false`|
88
88
  | OOBEE_VALIDATE_URL| When set to `true`, validates if URLs are valid and exits. | `false` |
89
+ | OOBEE_LOGS_PATH | When set, logs are written to this path. | |
89
90
  | WARN_LEVEL | Only used in tests. | |
90
91
 
91
92
  #### Environment variables used internally (Do not set)
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@govtechsg/oobee",
3
3
  "main": "dist/npmIndex.js",
4
- "version": "0.10.61",
4
+ "version": "0.10.62",
5
5
  "type": "module",
6
6
  "author": "Government Technology Agency <info@tech.gov.sg>",
7
7
  "dependencies": {
@@ -69,7 +69,8 @@
69
69
  "ansi-regex": "^5.0.1",
70
70
  "tough-cookie": "^5.0.0-rc.2",
71
71
  "micromatch": "github:micromatch/micromatch.git#4.0.8",
72
- "brace-expansion": "^1.1.12"
72
+ "brace-expansion": "^1.1.12",
73
+ "tmp": "0.2.4"
73
74
  },
74
75
  "optionalDependencies": {
75
76
  "@napi-rs/canvas-darwin-arm64": "^0.1.53",
package/src/cli.ts CHANGED
@@ -5,14 +5,13 @@ import printMessage from 'print-message';
5
5
  import { devices } from 'playwright';
6
6
  import { fileURLToPath } from 'url';
7
7
  import path from 'path';
8
- import { cleanUp, setHeadlessMode, getVersion, getStoragePath } from './utils.js';
8
+ import { cleanUp, setHeadlessMode, getVersion, getStoragePath, listenForCleanUp, cleanUpAndExit } from './utils.js';
9
9
  import {
10
10
  checkUrl,
11
11
  prepareData,
12
12
  getFileSitemap,
13
13
  validEmail,
14
14
  validName,
15
- deleteClonedProfiles,
16
15
  getScreenToScan,
17
16
  validateDirPath,
18
17
  validateFilePath,
@@ -54,20 +53,20 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
54
53
  [`Invalid device. Please provide an existing device to start the scan.`],
55
54
  messageOptions,
56
55
  );
57
- process.exit(1);
56
+ cleanUpAndExit(1);
58
57
  }
59
58
  return option;
60
59
  })
61
60
  .coerce('w', option => {
62
61
  if (!option || Number.isNaN(option)) {
63
62
  printMessage([`Invalid viewport width. Please provide a number. `], messageOptions);
64
- process.exit(1);
63
+ cleanUpAndExit(1);
65
64
  } else if (option < 320 || option > 1080) {
66
65
  printMessage(
67
66
  ['Invalid viewport width! Please provide a viewport width between 320-1080 pixels.'],
68
67
  messageOptions,
69
68
  );
70
- process.exit(1);
69
+ cleanUpAndExit(1);
71
70
  }
72
71
  return option;
73
72
  })
@@ -77,7 +76,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
77
76
  [`Invalid maximum number of pages. Please provide a positive integer.`],
78
77
  messageOptions,
79
78
  );
80
- process.exit(1);
79
+ cleanUpAndExit(1);
81
80
  }
82
81
  return option;
83
82
  })
@@ -87,7 +86,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
87
86
  [`Invalid number for max concurrency. Please provide a positive integer.`],
88
87
  messageOptions,
89
88
  );
90
- process.exit(1);
89
+ cleanUpAndExit(1);
91
90
  }
92
91
  return option;
93
92
  })
@@ -97,23 +96,23 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
97
96
  [`Invalid format. Please provide your name and email address separated by ":"`],
98
97
  messageOptions,
99
98
  );
100
- process.exit(1);
99
+ cleanUpAndExit(1);
101
100
  }
102
101
  const [name, email] = nameEmail.split(':');
103
102
  if (name === '' || name === undefined || name === null) {
104
103
  printMessage([`Please provide your name.`], messageOptions);
105
- process.exit(1);
104
+ cleanUpAndExit(1);
106
105
  }
107
106
  if (!validName(name)) {
108
107
  printMessage([`Invalid name. Please provide a valid name.`], messageOptions);
109
- process.exit(1);
108
+ cleanUpAndExit(1);
110
109
  }
111
110
  if (!validEmail(email)) {
112
111
  printMessage(
113
112
  [`Invalid email address. Please provide a valid email address.`],
114
113
  messageOptions,
115
114
  );
116
- process.exit(1);
115
+ cleanUpAndExit(1);
117
116
  }
118
117
  return nameEmail;
119
118
  })
@@ -121,7 +120,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
121
120
  const validationErrors = validateDirPath(option);
122
121
  if (validationErrors) {
123
122
  printMessage([`Invalid exportDirectory directory path. ${validationErrors}`], messageOptions);
124
- process.exit(1);
123
+ cleanUpAndExit(1);
125
124
  }
126
125
  return option;
127
126
  })
@@ -133,7 +132,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
133
132
  return validateFilePath(option, dirname);
134
133
  } catch (err) {
135
134
  printMessage([`Invalid blacklistedPatternsFilename file path. ${err}`], messageOptions);
136
- process.exit(1);
135
+ cleanUpAndExit(1);
137
136
  }
138
137
  })
139
138
  .coerce('i', option => {
@@ -143,7 +142,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
143
142
  [`Invalid value for fileTypes. Please provide valid keywords: ${choices.join(', ')}.`],
144
143
  messageOptions,
145
144
  );
146
- process.exit(1);
145
+ cleanUpAndExit(1);
147
146
  }
148
147
  return option;
149
148
  })
@@ -151,7 +150,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
151
150
  const { isValid, errorMessage } = validateCustomFlowLabel(option);
152
151
  if (!isValid) {
153
152
  printMessage([errorMessage], messageOptions);
154
- process.exit(1);
153
+ cleanUpAndExit(1);
155
154
  }
156
155
  return option;
157
156
  })
@@ -162,7 +161,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
162
161
  [`Invalid value for additional. Please provide valid keywords: ${choices.join(', ')}.`],
163
162
  messageOptions,
164
163
  );
165
- process.exit(1);
164
+ cleanUpAndExit(1);
166
165
  }
167
166
  return option;
168
167
  })
@@ -199,7 +198,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
199
198
  ['Invalid scan duration. Please provide a positive number of seconds.'],
200
199
  messageOptions,
201
200
  );
202
- process.exit(1);
201
+ cleanUpAndExit(1);
203
202
  }
204
203
  return duration;
205
204
  })
@@ -226,7 +225,8 @@ const scanInit = async (argvs: Answers): Promise<string> => {
226
225
 
227
226
  const data = await prepareData(updatedArgvs);
228
227
 
229
- constants.userDataDirectory = data.userDataDirectory;
228
+ // Executes cleanUp script if error encountered
229
+ listenForCleanUp(data.randomToken);
230
230
 
231
231
  const res = await checkUrl(
232
232
  data.type,
@@ -244,7 +244,7 @@ const scanInit = async (argvs: Answers): Promise<string> => {
244
244
  data.url = res.url;
245
245
  if (process.env.OOBEE_VALIDATE_URL) {
246
246
  console.log('Url is valid');
247
- process.exit(0);
247
+ cleanUpAndExit(0, data.randomToken);
248
248
  }
249
249
 
250
250
  break;
@@ -252,17 +252,17 @@ const scanInit = async (argvs: Answers): Promise<string> => {
252
252
  case statuses.unauthorised.code: {
253
253
  printMessage([statuses.unauthorised.message], messageOptions);
254
254
  consoleLogger.info(statuses.unauthorised.message);
255
- process.exit(res.status);
255
+ cleanUpAndExit(res.status);
256
256
  }
257
257
  case statuses.cannotBeResolved.code: {
258
258
  printMessage([statuses.cannotBeResolved.message], messageOptions);
259
259
  consoleLogger.info(statuses.cannotBeResolved.message);
260
- process.exit(res.status);
260
+ cleanUpAndExit(res.status);
261
261
  }
262
262
  case statuses.systemError.code: {
263
263
  printMessage([statuses.systemError.message], messageOptions);
264
264
  consoleLogger.info(statuses.systemError.message);
265
- process.exit(res.status);
265
+ cleanUpAndExit(res.status);
266
266
  }
267
267
  case statuses.invalidUrl.code: {
268
268
  if (
@@ -271,7 +271,7 @@ const scanInit = async (argvs: Answers): Promise<string> => {
271
271
  ) {
272
272
  printMessage([statuses.invalidUrl.message], messageOptions);
273
273
  consoleLogger.info(statuses.invalidUrl.message);
274
- process.exit(res.status);
274
+ cleanUpAndExit(res.status);
275
275
  }
276
276
 
277
277
  const finalFilePath = getFileSitemap(updatedArgvs.url);
@@ -281,40 +281,38 @@ const scanInit = async (argvs: Answers): Promise<string> => {
281
281
 
282
282
  if (process.env.OOBEE_VALIDATE_URL) {
283
283
  console.log('Url is valid');
284
- process.exit(0);
284
+ cleanUpAndExit(0);
285
285
  }
286
286
  } else if (updatedArgvs.scanner === ScannerTypes.LOCALFILE) {
287
287
  printMessage([statuses.notALocalFile.message], messageOptions);
288
288
  consoleLogger.info(statuses.notALocalFile.message);
289
- process.exit(statuses.notALocalFile.code);
289
+ cleanUpAndExit(statuses.notALocalFile.code);
290
290
  } else if (updatedArgvs.scanner !== ScannerTypes.SITEMAP) {
291
291
  printMessage([statuses.notASitemap.message], messageOptions);
292
292
  consoleLogger.info(statuses.notASitemap.message);
293
- process.exit(statuses.notASitemap.code);
293
+ cleanUpAndExit(statuses.notASitemap.code);
294
294
  }
295
295
  break;
296
296
  }
297
297
  case statuses.notASitemap.code: {
298
298
  printMessage([statuses.notASitemap.message], messageOptions);
299
299
  consoleLogger.info(statuses.notASitemap.message);
300
- process.exit(res.status);
300
+ cleanUpAndExit(res.status);
301
301
  }
302
302
  case statuses.notALocalFile.code: {
303
303
  printMessage([statuses.notALocalFile.message], messageOptions);
304
304
  consoleLogger.info(statuses.notALocalFile.message);
305
- process.exit(res.status);
305
+ cleanUpAndExit(res.status);
306
306
  }
307
307
  case statuses.browserError.code: {
308
308
  printMessage([statuses.browserError.message], messageOptions);
309
309
  consoleLogger.info(statuses.browserError.message);
310
- process.exit(res.status);
310
+ cleanUpAndExit(res.status);
311
311
  }
312
312
  default:
313
313
  break;
314
314
  }
315
315
 
316
- deleteClonedProfiles(data.browser, data.randomToken);
317
-
318
316
  if (process.env.OOBEE_VERBOSE) {
319
317
  const randomTokenMessage = {
320
318
  type: 'randomToken',
@@ -332,14 +330,10 @@ const scanInit = async (argvs: Answers): Promise<string> => {
332
330
  );
333
331
 
334
332
  printMessage([`Oobee version: ${appVersion}`, 'Starting scan...'], messageOptions);
335
-
333
+ consoleLogger.info(`Oobee version: ${appVersion}`);
334
+
336
335
  await combineRun(data, screenToScan);
337
336
 
338
- deleteClonedProfiles(data.browser, data.randomToken);
339
-
340
- // Delete dataset and request queues
341
- cleanUp(data.randomToken);
342
-
343
337
  return getStoragePath(data.randomToken);
344
338
  };
345
339
 
@@ -375,6 +369,6 @@ const optionsAnswer: Answers = {
375
369
  };
376
370
 
377
371
  await scanInit(optionsAnswer);
378
- process.exit(0);
372
+ cleanUpAndExit(0);
379
373
 
380
374
  export default options;
package/src/combine.ts CHANGED
@@ -5,7 +5,7 @@ import crawlDomain from './crawlers/crawlDomain.js';
5
5
  import crawlLocalFile from './crawlers/crawlLocalFile.js';
6
6
  import crawlIntelligentSitemap from './crawlers/crawlIntelligentSitemap.js';
7
7
  import generateArtifacts from './mergeAxeResults.js';
8
- import { getHost, createAndUpdateResultsFolders, createDetailsAndLogs, cleanUp } from './utils.js';
8
+ import { getHost, createAndUpdateResultsFolders, createDetailsAndLogs, cleanUp, cleanUpAndExit } from './utils.js';
9
9
  import { ScannerTypes, UrlsCrawled } from './constants/constants.js';
10
10
  import { getBlackListedPatterns, submitForm } from './constants/common.js';
11
11
  import { consoleLogger, silentLogger } from './logs.js';
@@ -80,7 +80,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
80
80
  blacklistedPatterns = getBlackListedPatterns(blacklistedPatternsFilename);
81
81
  } catch (error) {
82
82
  consoleLogger.error(error);
83
- process.exit(1);
83
+ cleanUpAndExit(1);
84
84
  }
85
85
 
86
86
  // remove basic-auth credentials from URL
@@ -213,7 +213,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
213
213
 
214
214
  default:
215
215
  consoleLogger.error(`type: ${type} not defined`);
216
- process.exit(1);
216
+ cleanUpAndExit(1);
217
217
  }
218
218
 
219
219
  scanDetails.endTime = new Date();
@@ -261,15 +261,13 @@ const combineRun = async (details: Data, deviceToScan: string) => {
261
261
 
262
262
  // No page were scanned because the URL loaded does not meet the crawler requirements
263
263
  printMessage([`No pages were scanned.`], alertMessageOptions);
264
- cleanUp(randomToken);
265
- process.exit(1);
264
+ cleanUpAndExit(1, randomToken, true);
266
265
  }
267
266
  } else {
268
267
 
269
268
  // No page were scanned because the URL loaded does not meet the crawler requirements
270
269
  printMessage([`No pages were scanned.`], alertMessageOptions);
271
- cleanUp(randomToken);
272
- process.exit(1);
270
+ cleanUpAndExit(1, randomToken, true);
273
271
  }
274
272
  };
275
273
 
@@ -1,6 +1,7 @@
1
1
  import { Options } from 'yargs';
2
2
  import printMessage from 'print-message';
3
3
  import { BrowserTypes, RuleFlags, ScannerTypes } from './constants.js';
4
+ import { cleanUpAndExit } from '../utils.js';
4
5
 
5
6
  export const messageOptions = {
6
7
  border: false,
@@ -53,7 +54,7 @@ export const cliOptions: { [key: string]: Options } = {
53
54
  ],
54
55
  messageOptions,
55
56
  );
56
- process.exit(1);
57
+ cleanUpAndExit(1);
57
58
  return null;
58
59
  }
59
60
  },
@@ -158,7 +159,7 @@ export const cliOptions: { [key: string]: Options } = {
158
159
  ],
159
160
  messageOptions,
160
161
  );
161
- process.exit(1);
162
+ cleanUpAndExit(1);
162
163
  return null;
163
164
  }
164
165
  },
@@ -282,7 +283,7 @@ export const cliOptions: { [key: string]: Options } = {
282
283
  ],
283
284
  messageOptions,
284
285
  );
285
- process.exit(1);
286
+ cleanUpAndExit(1);
286
287
  }
287
288
  if (userChoices.length > 1 && userChoices.includes('default')) {
288
289
  printMessage(
@@ -291,7 +292,7 @@ export const cliOptions: { [key: string]: Options } = {
291
292
  ],
292
293
  messageOptions,
293
294
  );
294
- process.exit(1);
295
+ cleanUpAndExit(1);
295
296
  }
296
297
  return userChoices;
297
298
  },
@@ -31,7 +31,7 @@ import constants, {
31
31
  } from './constants.js';
32
32
  import { consoleLogger, silentLogger } from '../logs.js';
33
33
  import { isUrlPdf } from '../crawlers/commonCrawlerFunc.js';
34
- import { randomThreeDigitNumberString } from '../utils.js';
34
+ import { cleanUpAndExit, randomThreeDigitNumberString, register } from '../utils.js';
35
35
  import { Answers, Data } from '../index.js';
36
36
  import { DeviceDescriptor } from '../types/types.js';
37
37
 
@@ -305,6 +305,8 @@ const checkUrlConnectivityWithBrowser = async (
305
305
  ...getPlaywrightLaunchOptions(browserToRun),
306
306
  ...playwrightDeviceDetailsObject,
307
307
  });
308
+
309
+ register(browserContext);
308
310
  } catch (err) {
309
311
  printMessage([`Unable to launch browser\n${err}`], messageOptions);
310
312
  res.status = constants.urlCheckStatuses.browserError.code;
@@ -463,7 +465,7 @@ export const parseHeaders = (header?: string): Record<string, string> => {
463
465
  ],
464
466
  messageOptions,
465
467
  );
466
- process.exit(1);
468
+ cleanUpAndExit(1);
467
469
  }
468
470
  allHeaders[headerValuePair[0]] = headerValuePair[1]; // {"header": "value", "header2": "value2", ...}
469
471
  });
@@ -558,6 +560,9 @@ export const prepareData = async (argv: Answers): Promise<Data> => {
558
560
  await getUrlsFromRobotsTxt(url, browserToRun, resolvedUserDataDirectory, extraHTTPHeaders);
559
561
  }
560
562
 
563
+ constants.userDataDirectory = resolvedUserDataDirectory;
564
+ constants.randomToken = resultFilename;
565
+
561
566
  return {
562
567
  type: scanner,
563
568
  url: url,
@@ -676,6 +681,8 @@ const getRobotsTxtViaPlaywright = async (robotsUrl: string, browser: string, use
676
681
  ...(extraHTTPHeaders && { extraHTTPHeaders }),
677
682
  });
678
683
 
684
+ register(browserContext);
685
+
679
686
  const page = await browserContext.newPage();
680
687
 
681
688
  await page.goto(robotsUrl, { waitUntil: 'networkidle', timeout: 30000 });
@@ -848,6 +855,7 @@ export const getLinksFromSitemap = async (
848
855
  },
849
856
  );
850
857
 
858
+ register(browserContext);
851
859
  const page = await browserContext.newPage();
852
860
 
853
861
  await page.goto(url, { waitUntil: 'networkidle', timeout: 60000 });
@@ -1603,6 +1611,8 @@ export const submitFormViaPlaywright = async (
1603
1611
  },
1604
1612
  );
1605
1613
 
1614
+ register(browserContext);
1615
+
1606
1616
  const page = await browserContext.newPage();
1607
1617
 
1608
1618
  try {
@@ -1706,6 +1716,8 @@ export async function initModifiedUserAgent(
1706
1716
  : '';
1707
1717
 
1708
1718
  const browserContext = await constants.launcher.launchPersistentContext(effectiveUserDataDirectory, launchOptions);
1719
+ register(browserContext);
1720
+
1709
1721
  const page = await browserContext.newPage();
1710
1722
 
1711
1723
  // Retrieve the default user agent.
@@ -5,10 +5,11 @@ import { globSync } from 'glob';
5
5
  import which from 'which';
6
6
  import os from 'os';
7
7
  import { spawnSync, execSync } from 'child_process';
8
- import { chromium } from 'playwright';
8
+ import { Browser, BrowserContext, chromium } from 'playwright';
9
9
  import * as Sentry from '@sentry/node';
10
10
  import { consoleLogger, silentLogger } from '../logs.js';
11
11
  import { PageInfo } from '../mergeAxeResults.js';
12
+ import { PlaywrightCrawler } from 'crawlee';
12
13
 
13
14
  const filename = fileURLToPath(import.meta.url);
14
15
  const dirname = path.dirname(filename);
@@ -136,7 +137,7 @@ export const getDefaultChromiumDataDir = () => {
136
137
  }
137
138
  return null;
138
139
  } catch (error) {
139
- silentLogger.error(`Error in getDefaultChromiumDataDir(): ${error}`);
140
+ consoleLogger.error(`Error in getDefaultChromiumDataDir(): ${error}`);
140
141
  }
141
142
  };
142
143
 
@@ -227,45 +228,68 @@ if (fs.existsSync('/.dockerenv')) {
227
228
  launchOptionsArgs = ['--disable-gpu', '--no-sandbox', '--disable-dev-shm-usage'];
228
229
  }
229
230
 
230
- export const getProxy = (): { type: string; url: string } | null => {
231
- if (os.platform() === 'win32') {
232
- let internetSettings: string[];
233
- try {
234
- internetSettings = execSync(
235
- 'Get-ItemProperty -Path "Registry::HKCU\\Software\\Microsoft\\Windows\\CurrentVersion\\Internet Settings"',
236
- { shell: 'powershell.exe' },
237
- )
238
- .toString()
239
- .split('\n');
240
- } catch (e) {
241
- console.log(e.toString());
242
- silentLogger.error(e.toString());
231
+ type ProxyInfo = { type: 'autoConfig' | 'manualProxy'; url: string } | null;
232
+
233
+ function queryRegKey(key: string): Record<string, string> {
234
+ try {
235
+ const out = execSync(`reg query "${key}"`, { encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] });
236
+ const values: Record<string, string> = {};
237
+ for (const line of out.split(/\r?\n/)) {
238
+ const parts = line.trim().split(/\s{2,}/);
239
+ if (parts.length >= 3) {
240
+ const [name, _type, ...rest] = parts;
241
+ values[name] = rest.join(' ');
242
+ }
243
243
  }
244
+ return values;
245
+ } catch {
246
+ return {};
247
+ }
248
+ }
244
249
 
245
- const getSettingValue = (settingName: string) =>
246
- internetSettings
247
- .find(s => s.startsWith(settingName))
248
- // split only once at with ':' as the delimiter
249
- ?.split(/:(.*)/s)[1]
250
- ?.trim();
250
+ function parseDwordFlag(v: unknown): number {
251
+ if (v == null) return 0;
252
+ const s = String(v).trim();
253
+ // Handles "1", "0", "0x1", "0x0"
254
+ if (/^0x[0-9a-f]+$/i.test(s)) return parseInt(s, 16);
255
+ if (/^\d+$/.test(s)) return parseInt(s, 10);
256
+ return 0;
257
+ }
251
258
 
252
- if (getSettingValue('AutoConfigURL')) {
253
- return { type: 'autoConfig', url: getSettingValue('AutoConfigURL') };
254
- }
255
- if (getSettingValue('ProxyEnable') === '1') {
256
- return { type: 'manualProxy', url: getSettingValue('ProxyServer') };
257
- }
258
- return null;
259
+ function normalizePacUrl(u: string): string {
260
+ const s = u.trim();
261
+ // If it lacks a scheme, assume http:// (Chrome requires a full URL)
262
+ return /^(https?|file):/i.test(s) ? s : `http://${s}`;
263
+ }
264
+
265
+ export const getProxy = (): ProxyInfo => {
266
+ if (os.platform() !== 'win32') return null;
267
+
268
+ const values = queryRegKey('HKCU\\Software\\Microsoft\\Windows\\CurrentVersion\\Internet Settings');
269
+ const pacUrlRaw = (values['AutoConfigURL'] || '').trim();
270
+ const proxyEnableRaw = (values['ProxyEnable'] || '').trim();
271
+ const proxyServerRaw = (values['ProxyServer'] || '').trim();
272
+
273
+ // 1) PAC beats manual proxy if present
274
+ if (pacUrlRaw) {
275
+ return { type: 'autoConfig', url: normalizePacUrl(pacUrlRaw) };
276
+ }
277
+
278
+ // 2) Manual proxy only if enabled
279
+ const enabled = parseDwordFlag(proxyEnableRaw) === 1;
280
+ if (enabled && proxyServerRaw) {
281
+ return { type: 'manualProxy', url: proxyServerRaw };
259
282
  }
260
- // develop for mac
283
+
261
284
  return null;
262
285
  };
263
286
 
287
+ // Usage
264
288
  export const proxy = getProxy();
265
289
 
266
- if (proxy && proxy.type === 'autoConfig') {
290
+ if (proxy?.type === 'autoConfig') {
267
291
  launchOptionsArgs.push(`--proxy-pac-url=${proxy.url}`);
268
- } else if (proxy && proxy.type === 'manualProxy') {
292
+ } else if (proxy?.type === 'manualProxy') {
269
293
  launchOptionsArgs.push(`--proxy-server=${proxy.url}`);
270
294
  }
271
295
 
@@ -405,6 +429,7 @@ const urlCheckStatuses = {
405
429
  },
406
430
  axiosTimeout: { code: 18, message: 'Axios timeout exceeded. Falling back on browser checks.' },
407
431
  notALocalFile: { code: 19, message: 'Provided filepath is not a local html or sitemap file.' },
432
+ terminationRequested: { code: 15, message: 'Termination requested.' }
408
433
  };
409
434
 
410
435
  /* eslint-disable no-unused-vars */
@@ -467,6 +492,13 @@ export default {
467
492
  wcagLinks,
468
493
  robotsTxtUrls: null,
469
494
  userDataDirectory: null, // This will be set later in the code
495
+ randomToken: null, // This will be set later in the code
496
+ // Track all active Crawlee / Playwright resources for cleanup
497
+ resources: {
498
+ crawlers: new Set<PlaywrightCrawler>(),
499
+ browserContexts: new Set<BrowserContext>(),
500
+ browsers: new Set<Browser>(),
501
+ },
470
502
  };
471
503
 
472
504
  export const rootPath = dirname;
@@ -105,8 +105,6 @@ const startScanQuestions = [
105
105
  playwrightDeviceDetailsObject,
106
106
  parseHeaders(answers.header),
107
107
  );
108
-
109
- deleteClonedProfiles(browserToRun, resultFilename);
110
108
 
111
109
  switch (res.status) {
112
110
  case statuses.success.code:
@@ -318,9 +318,9 @@ export const runAxeScript = async ({
318
318
  page.on('console', msg => {
319
319
  const type = msg.type();
320
320
  if (type === 'error') {
321
- silentLogger.log({ level: 'error', message: msg.text() });
321
+ consoleLogger.log({ level: 'error', message: msg.text() });
322
322
  } else {
323
- silentLogger.log({ level: 'info', message: msg.text() });
323
+ consoleLogger.log({ level: 'info', message: msg.text() });
324
324
  }
325
325
  });
326
326
  */
@@ -29,7 +29,7 @@ import {
29
29
  getUrlsFromRobotsTxt,
30
30
  waitForPageLoaded,
31
31
  } from '../constants/common.js';
32
- import { areLinksEqual, isFollowStrategy } from '../utils.js';
32
+ import { areLinksEqual, isFollowStrategy, register } from '../utils.js';
33
33
  import {
34
34
  handlePdfDownload,
35
35
  runPdfScan,
@@ -339,7 +339,7 @@ const crawlDomain = async ({
339
339
 
340
340
  let isAbortingScanNow = false;
341
341
 
342
- const crawler = new crawlee.PlaywrightCrawler({
342
+ const crawler = register(new crawlee.PlaywrightCrawler({
343
343
  launchContext: {
344
344
  launcher: constants.launcher,
345
345
  launchOptions: getPlaywrightLaunchOptions(browser),
@@ -723,7 +723,7 @@ const crawlDomain = async ({
723
723
  scaleDownStepRatio: 0.1, // Scale down slower
724
724
  },
725
725
  }),
726
- });
726
+ }));
727
727
 
728
728
  await crawler.run();
729
729
 
@@ -8,6 +8,7 @@ import crawlSitemap from './crawlSitemap.js';
8
8
  import { EnqueueStrategy } from 'crawlee';
9
9
  import { ViewportSettingsClass } from '../combine.js';
10
10
  import { getPlaywrightLaunchOptions } from '../constants/common.js';
11
+ import { register } from '../utils.js';
11
12
 
12
13
  const crawlIntelligentSitemap = async (
13
14
  url: string,
@@ -58,6 +59,7 @@ const crawlIntelligentSitemap = async (
58
59
  ...getPlaywrightLaunchOptions(browser),
59
60
  ...(extraHTTPHeaders && { extraHTTPHeaders }),
60
61
  });
62
+ register(context);
61
63
 
62
64
  const page = await context.newPage();
63
65
 
@@ -17,6 +17,7 @@ import {
17
17
  import { runPdfScan, mapPdfScanResults, doPdfScreenshots } from './pdfScanFunc.js';
18
18
  import { guiInfoLog } from '../logs.js';
19
19
  import crawlSitemap from './crawlSitemap.js';
20
+ import { register } from '../utils.js';
20
21
 
21
22
  export const crawlLocalFile = async ({
22
23
  url,
@@ -161,6 +162,8 @@ export const crawlLocalFile = async ({
161
162
  ...playwrightDeviceDetailsObject,
162
163
  });
163
164
 
165
+ register(browserContext);
166
+
164
167
  const timeoutId = scanDuration > 0
165
168
  ? setTimeout(() => {
166
169
  console.log(`Crawl duration of ${scanDuration}s exceeded. Aborting local file scan.`);
@@ -20,7 +20,7 @@ import {
20
20
  waitForPageLoaded,
21
21
  isFilePath,
22
22
  } from '../constants/common.js';
23
- import { areLinksEqual, isWhitelistedContentType, isFollowStrategy } from '../utils.js';
23
+ import { areLinksEqual, isWhitelistedContentType, register } from '../utils.js';
24
24
  import { handlePdfDownload, runPdfScan, mapPdfScanResults } from './pdfScanFunc.js';
25
25
  import { guiInfoLog } from '../logs.js';
26
26
  import { ViewportSettingsClass } from '../combine.js';
@@ -106,7 +106,7 @@ const crawlSitemap = async ({
106
106
  sources: linksFromSitemap,
107
107
  });
108
108
 
109
- const crawler = new crawlee.PlaywrightCrawler({
109
+ const crawler = register(new crawlee.PlaywrightCrawler({
110
110
  launchContext: {
111
111
  launcher: constants.launcher,
112
112
  launchOptions: getPlaywrightLaunchOptions(browser),
@@ -395,7 +395,7 @@ const crawlSitemap = async ({
395
395
  scaleDownStepRatio: 0.1, // Scale down slower
396
396
  },
397
397
  }),
398
- });
398
+ }));
399
399
 
400
400
  await crawler.run();
401
401
 
@@ -78,7 +78,7 @@ export const screenshotFullPage = async (page, screenshotsDir: string, screensho
78
78
  });
79
79
 
80
80
  consoleLogger.info(`Screenshot page at: ${page.url()}`);
81
- silentLogger.info(`Screenshot page at: ${page.url()}`);
81
+ consoleLogger.info(`Screenshot page at: ${page.url()}`);
82
82
 
83
83
  await page.screenshot({
84
84
  timeout: 5000,
@@ -469,7 +469,7 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
469
469
  consoleLogger.info(`Overlay state: ${existingOverlay}`);
470
470
  } catch {
471
471
  consoleLogger.info('Error in adding overlay menu to page');
472
- silentLogger.info('Error in adding overlay menu to page');
472
+ consoleLogger.info('Error in adding overlay menu to page');
473
473
  }
474
474
  });
475
475
 
@@ -15,6 +15,7 @@ import constants, {
15
15
  STATUS_CODE_METADATA,
16
16
  UrlsCrawled,
17
17
  } from '../constants/constants.js';
18
+ import { cleanUpAndExit } from '../utils.js';
18
19
 
19
20
  const require = createRequire(import.meta.url);
20
21
 
@@ -233,7 +234,7 @@ const getVeraExecutable = () => {
233
234
  const veraPdfExeNotFoundError =
234
235
  'Could not find veraPDF executable. Please ensure veraPDF is installed at current directory.';
235
236
  consoleLogger.error(veraPdfExeNotFoundError);
236
- silentLogger.error(veraPdfExeNotFoundError);
237
+ consoleLogger.error(veraPdfExeNotFoundError);
237
238
  }
238
239
  return veraPdfExe;
239
240
  };
@@ -355,7 +356,7 @@ export const runPdfScan = async (randomToken: string) => {
355
356
  'profiles/veraPDF-validation-profiles-rel-1.26/PDF_UA/WCAG-2-2.xml',
356
357
  )}"`;
357
358
  if (!veraPdfExe || !veraPdfProfile) {
358
- process.exit(1);
359
+ cleanUpAndExit(1);
359
360
  }
360
361
 
361
362
  const intermediateFolder = randomToken; // NOTE: assumes this folder is already created for crawlee
@@ -1,7 +1,7 @@
1
1
  /* eslint-env browser */
2
2
  import { chromium } from 'playwright';
3
3
  import { createCrawleeSubFolders } from './commonCrawlerFunc.js';
4
- import { cleanUp } from '../utils.js';
4
+ import { cleanUpAndExit, register} from '../utils.js';
5
5
  import constants, {
6
6
  getIntermediateScreenshotsPath,
7
7
  guiInfoStatusTypes,
@@ -48,7 +48,6 @@ const runCustom = async (
48
48
  includeScreenshots: boolean,
49
49
  ) => {
50
50
  // checks and delete datasets path if it already exists
51
- cleanUp(randomToken);
52
51
  process.env.CRAWLEE_STORAGE_DIR = randomToken;
53
52
 
54
53
  const urlsCrawled: UrlsCrawled = { ...constants.urlsCrawledObj };
@@ -83,6 +82,8 @@ const runCustom = async (
83
82
  ...viewportSettings.playwrightDeviceDetailsObject,
84
83
  });
85
84
 
85
+ register(context);
86
+
86
87
  // Detection of new page
87
88
  context.on('page', async newPage => {
88
89
  await initNewPage(newPage, pageClosePromises, processPageParams, pagesDict);
@@ -107,7 +108,7 @@ const runCustom = async (
107
108
  await allPagesClosedPromise(pageClosePromises);
108
109
  } catch (error) {
109
110
  log(`PLAYWRIGHT EXECUTION ERROR ${error}`);
110
- process.exit(1);
111
+ cleanUpAndExit(1, randomToken, true);
111
112
  }
112
113
 
113
114
  guiInfoLog(guiInfoStatusTypes.COMPLETED, {});
package/src/index.ts CHANGED
@@ -7,6 +7,8 @@ import {
7
7
  cleanUp,
8
8
  getUserDataTxt,
9
9
  writeToUserDataTxt,
10
+ listenForCleanUp,
11
+ cleanUpAndExit,
10
12
  } from './utils.js';
11
13
  import {
12
14
  prepareData,
@@ -106,19 +108,19 @@ const runScan = async (answers: Answers) => {
106
108
  answers.metadata = '{}';
107
109
 
108
110
  const data: Data = await prepareData(answers);
111
+
112
+ // Executes cleanUp script if error encountered
113
+ listenForCleanUp(data.randomToken);
114
+
109
115
  data.userDataDirectory = getClonedProfilesWithRandomToken(data.browser, data.randomToken);
110
116
 
111
117
  printMessage(['Scanning website...'], messageOptions);
112
118
 
113
119
  await combineRun(data, screenToScan);
114
120
 
115
- // Delete cloned directory
116
- deleteClonedProfiles(data.browser, data.randomToken);
117
-
118
121
  // Delete dataset and request queues
119
- cleanUp(data.randomToken);
122
+ cleanUpAndExit(0, data.randomToken);
120
123
 
121
- process.exit(0);
122
124
  };
123
125
 
124
126
  if (userData) {
package/src/logs.ts CHANGED
@@ -2,6 +2,8 @@
2
2
  /* eslint-disable no-shadow */
3
3
  import { createLogger, format, transports } from 'winston';
4
4
  import { guiInfoStatusTypes } from './constants/constants.js';
5
+ import path from 'path';
6
+ import { randomUUID } from 'crypto';
5
7
 
6
8
  const { combine, timestamp, printf } = format;
7
9
 
@@ -20,12 +22,32 @@ const logFormat = printf(({ timestamp, level, message }) => {
20
22
  // transport: storage device for logs
21
23
  // Enabled for console and storing into files; Files are overwritten each time
22
24
  // All logs in combined.txt, error in errors.txt
25
+ const uuid = randomUUID();
26
+ let basePath: string;
27
+
28
+ if (process.env.OOBEE_LOGS_PATH) {
29
+ basePath = process.env.OOBEE_LOGS_PATH;
30
+ } else if (process.platform === 'win32') {
31
+ basePath = path.join(process.env.APPDATA, 'Oobee');
32
+ } else if (process.platform === 'darwin') {
33
+ basePath = path.join(process.env.HOME, 'Library', 'Application Support', 'Oobee');
34
+ } else {
35
+ basePath = path.join(process.cwd());
36
+ }
37
+
38
+ export const errorsTxtPath = path.join(basePath, `${uuid}.txt`);
23
39
 
24
40
  const consoleLogger = createLogger({
25
41
  silent: !(process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE),
26
42
  format: combine(timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), logFormat),
27
- transports:
28
- process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE ? [new transports.Console()] : [],
43
+ transports: [
44
+ new transports.Console({ level: 'info' }),
45
+ new transports.File({
46
+ filename: errorsTxtPath,
47
+ level: 'info',
48
+ handleExceptions: true,
49
+ }),
50
+ ],
29
51
  });
30
52
 
31
53
  // No display in consoles, this will mostly be used within the interactive script to avoid disrupting the flow
@@ -34,9 +56,10 @@ const consoleLogger = createLogger({
34
56
  const silentLogger = createLogger({
35
57
  format: combine(timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), logFormat),
36
58
  transports: [
37
- process.env.OOBEE_VERBOSE || process.env.RUNNING_FROM_PH_GUI
38
- ? new transports.Console({ handleExceptions: true })
39
- : new transports.File({ filename: 'errors.txt', level: 'warn', handleExceptions: true }),
59
+ new transports.File({
60
+ filename: errorsTxtPath,
61
+ level: 'warn',
62
+ handleExceptions: true }),
40
63
  ].filter(Boolean),
41
64
  });
42
65
 
@@ -46,16 +69,17 @@ export const guiInfoLog = (status: string, data: { numScanned?: number; urlScann
46
69
  switch (status) {
47
70
  case guiInfoStatusTypes.COMPLETED:
48
71
  console.log('Scan completed');
72
+ silentLogger.info('Scan completed');
49
73
  break;
50
74
  case guiInfoStatusTypes.SCANNED:
51
75
  case guiInfoStatusTypes.SKIPPED:
52
76
  case guiInfoStatusTypes.ERROR:
53
77
  case guiInfoStatusTypes.DUPLICATE:
54
- console.log(
55
- `crawling::${data.numScanned || 0}::${status}::${
78
+ const msg = `crawling::${data.numScanned || 0}::${status}::${
56
79
  data.urlScanned || 'no url provided'
57
- }`,
58
- );
80
+ }`;
81
+ console.log(msg);
82
+ silentLogger.info(msg);
59
83
  break;
60
84
  default:
61
85
  console.log(`Status provided to gui info log not recognized: ${status}`);
@@ -64,4 +88,6 @@ export const guiInfoLog = (status: string, data: { numScanned?: number; urlScann
64
88
  }
65
89
  };
66
90
 
91
+ consoleLogger.info(`Logger writing to: ${errorsTxtPath}`);
92
+
67
93
  export { logFormat, consoleLogger, silentLogger };
@@ -29,6 +29,7 @@ import {
29
29
  getWcagCriteriaMap,
30
30
  categorizeWcagCriteria,
31
31
  getUserDataTxt,
32
+ register
32
33
  } from './utils.js';
33
34
  import { consoleLogger, silentLogger } from './logs.js';
34
35
  import itemTypeDescription from './constants/itemTypeDescription.js';
@@ -975,6 +976,8 @@ const writeSummaryPdf = async (storagePath: string, pagesScanned: number, filena
975
976
  ...getPlaywrightLaunchOptions(browser),
976
977
  });
977
978
 
979
+ register(context);
980
+
978
981
  const page = await context.newPage();
979
982
 
980
983
  const data = fs.readFileSync(htmlFilePath, { encoding: 'utf-8' });
@@ -1710,9 +1713,9 @@ const generateArtifacts = async (
1710
1713
  zip: string = undefined, // optional
1711
1714
  generateJsonFiles = false,
1712
1715
  ) => {
1713
- const intermediateDatasetsPath = `${getStoragePath(randomToken)}/crawlee`;
1714
- const oobeeAppVersion = getVersion();
1715
1716
  const storagePath = getStoragePath(randomToken);
1717
+ const intermediateDatasetsPath = `${storagePath}/crawlee`;
1718
+ const oobeeAppVersion = getVersion();
1716
1719
 
1717
1720
  const formatAboutStartTime = (dateString: string) => {
1718
1721
  const utcStartTimeDate = new Date(dateString);
@@ -1985,12 +1988,10 @@ const generateArtifacts = async (
1985
1988
  // Should consider refactor constants.userDataDirectory to be a parameter in future
1986
1989
  await retryFunction(() => writeSummaryPdf(storagePath, pagesScanned.length, 'summary', browserChannel, constants.userDataDirectory), 1);
1987
1990
 
1988
- const foldersToRemove = ['crawlee', 'logs'];
1989
- for (const folder of foldersToRemove) {
1990
- const folderPath = path.join(storagePath, folder);
1991
- if (await fs.pathExists(folderPath)) {
1992
- await fs.remove(folderPath);
1993
- }
1991
+ try {
1992
+ fs.rmSync(path.join(storagePath, 'crawlee'), { recursive: true, force: true });
1993
+ } catch (error) {
1994
+ consoleLogger.warn(`Unable to force remove crawlee folder: ${error.message}`);
1994
1995
  }
1995
1996
 
1996
1997
  // Take option if set
@@ -22,7 +22,7 @@ export const takeScreenshotForHTMLElements = async (
22
22
  for (const violation of violations) {
23
23
  if (screenshotCount >= maxScreenshots) {
24
24
  /*
25
- silentLogger.warn(
25
+ consoleLogger.warn(
26
26
  `Skipping screenshots for ${violation.id} as maxScreenshots (${maxScreenshots}) exceeded. You can increase it by specifying a higher value when calling takeScreenshotForHTMLElements.`,
27
27
  );
28
28
  */
@@ -34,7 +34,7 @@ export const takeScreenshotForHTMLElements = async (
34
34
 
35
35
  // Check if rule ID is 'oobee-grading-text-contents' and skip screenshot logic
36
36
  if (rule === 'oobee-grading-text-contents') {
37
- // silentLogger.info('Skipping screenshot for rule oobee-grading-text-contents');
37
+ // consoleLogger.info('Skipping screenshot for rule oobee-grading-text-contents');
38
38
  newViolations.push(violation); // Make sure it gets added
39
39
  continue;
40
40
  }
@@ -59,13 +59,13 @@ export const takeScreenshotForHTMLElements = async (
59
59
  nodeWithScreenshotPath.screenshotPath = screenshotPath;
60
60
  screenshotCount++;
61
61
  } else {
62
- // silentLogger.info(`Element at ${currLocator} is not visible`);
62
+ // consoleLogger.info(`Element at ${currLocator} is not visible`);
63
63
  }
64
64
 
65
65
  break; // Stop looping after finding the first visible locator
66
66
  }
67
67
  } catch (e) {
68
- // silentLogger.info(`Unable to take element screenshot at ${selector}`);
68
+ // consoleLogger.info(`Unable to take element screenshot at ${selector}`);
69
69
  }
70
70
  }
71
71
  newViolationNodes.push(nodeWithScreenshotPath);
@@ -12,17 +12,14 @@
12
12
  // extract tagname and attribute name from html tag
13
13
  // e.g. ["input", "type", "value", "role"] from <input type="text" value="..." role="..." />
14
14
  const getHtmlTagAndAttributes = (htmlString) => {
15
- const regex = /<(\w+)(\s+(\w+)(\s*=\s*"[^"]*")?)*\s*\/?>/;
16
- const match = htmlString.match(regex); // check if structure of html tag is valid
17
-
18
- if (match) {
19
- const tagName = match[1];
20
- const attributes = match[0]
21
- .match(/\w+\s*=\s*"[^"]*"/g) // extract attributes e.g. ['type="text"', 'value="..."']
22
- .map((attr) => attr.match(/(\w+)\s*=/)[1]); // get the name e.g. "type" from each
23
- return [tagName, ...attributes];
24
- }
25
- return [];
15
+ const tagMatch = htmlString.match(/^<\s*(\w+)/); // Get tag name
16
+ if (!tagMatch) return [];
17
+
18
+ const tagName = tagMatch[1];
19
+ const attrMatches = [...htmlString.matchAll(/(\w[\w-]*)\s*=\s*"[^"]*"/g)];
20
+ const attributes = attrMatches.map(match => match[1]);
21
+
22
+ return [tagName, ...attributes];
26
23
  };
27
24
 
28
25
  const rulesUsingRoles = [
package/src/utils.ts CHANGED
@@ -9,9 +9,10 @@ import constants, {
9
9
  destinationPath,
10
10
  getIntermediateScreenshotsPath,
11
11
  } from './constants/constants.js';
12
- import { consoleLogger, silentLogger } from './logs.js';
12
+ import { consoleLogger, errorsTxtPath, silentLogger } from './logs.js';
13
13
  import { getAxeConfiguration } from './crawlers/custom/getAxeConfiguration.js';
14
14
  import { constant } from 'lodash';
15
+ import { errors } from 'playwright';
15
16
 
16
17
  export const getVersion = () => {
17
18
  const loadJSON = (filePath: string): { version: string } =>
@@ -84,7 +85,7 @@ export const getStoragePath = (randomToken: string): string => {
84
85
 
85
86
  export const createDetailsAndLogs = async (randomToken: string): Promise<void> => {
86
87
  const storagePath = getStoragePath(randomToken);
87
- const logPath = `${getStoragePath(randomToken)}/logs`;
88
+ const logPath = `${storagePath}}/logs`;
88
89
  try {
89
90
  await fs.ensureDir(storagePath);
90
91
 
@@ -230,11 +231,257 @@ export const createScreenshotsFolder = (randomToken: string): void => {
230
231
  }
231
232
  };
232
233
 
233
- export const cleanUp = (randomToken: string): void => {
234
- fs.removeSync(randomToken);
235
- fs.removeSync(path.join(process.env.APPDATA || '/tmp', randomToken));
236
- fs.removeSync(path.join(getStoragePath(randomToken),'crawlee'));
237
- fs.removeSync(path.join(getStoragePath(randomToken),'logs'));
234
+
235
+ let __shuttingDown = false;
236
+ let __stopAllLock: Promise<void> | null = null;
237
+
238
+ /**
239
+ * Register a resource so it can be stopped later.
240
+ * Supports Crawlee crawlers, Playwright BrowserContexts, and Browsers.
241
+ */
242
+ export function register(resource: any) {
243
+ const name = resource?.constructor?.name;
244
+
245
+ if (name?.endsWith('Crawler')) {
246
+ constants.resources.crawlers.add(resource);
247
+ } else if (name === 'BrowserContext') {
248
+ constants.resources.browserContexts.add(resource);
249
+ } else if (name === 'Browser') {
250
+ constants.resources.browsers.add(resource);
251
+ }
252
+
253
+ return resource;
254
+ }
255
+
256
+ /**
257
+ * Stops or tears down all tracked resources.
258
+ * @param mode "graceful" (finish in-flight), "abort" (drop in-flight), or "teardown" (close immediately)
259
+ * @param timeoutMs Max time to wait before forcing shutdown
260
+ */
261
+ export async function stopAll({ mode = 'graceful', timeoutMs = 10_000 } = {}) {
262
+ if (__stopAllLock) return __stopAllLock; // prevent overlap
263
+ __stopAllLock = (async () => {
264
+ const timeout = (ms: number) => new Promise(res => setTimeout(res, ms));
265
+ consoleLogger.info(`Stop browsers starting, mode=${mode}, timeoutMs=${timeoutMs}`);
266
+
267
+ // --- Crawlers ---
268
+ for (const c of [...constants.resources.crawlers]) {
269
+ try {
270
+ const pool = (c as any).autoscaledPool;
271
+ if (pool && typeof pool.isRunning !== 'undefined' && !pool.isRunning) {
272
+ consoleLogger.info('Skipping crawler (already stopped)');
273
+ continue;
274
+ }
275
+
276
+ consoleLogger.info(`Closing crawler (${mode})...`);
277
+ if (mode === 'graceful') {
278
+ if (typeof c.stop === 'function') {
279
+ await Promise.race([c.stop(), timeout(timeoutMs)]);
280
+ }
281
+ } else if (mode === 'abort') {
282
+ pool?.abort?.();
283
+ } else {
284
+ if (typeof c.teardown === 'function') {
285
+ await Promise.race([c.teardown(), timeout(timeoutMs)]);
286
+ }
287
+ }
288
+ consoleLogger.info(`Crawler closed (${mode})`);
289
+ } catch (err) {
290
+ consoleLogger.warn(`Error stopping crawler: ${(err as Error).message}`);
291
+ } finally {
292
+ constants.resources.crawlers.delete(c);
293
+ }
294
+ }
295
+
296
+ // --- BrowserContexts ---
297
+ for (const ctx of [...constants.resources.browserContexts]) {
298
+ // compute once so we can also use in finally
299
+ const pagesArr = typeof ctx.pages === 'function' ? ctx.pages() : [];
300
+ const hasOpenPages = Array.isArray(pagesArr) && pagesArr.length > 0;
301
+
302
+ try {
303
+ const browser = typeof ctx.browser === 'function' ? ctx.browser() : null;
304
+ if (browser && (browser as any).isClosed?.()) {
305
+ consoleLogger.info('Skipping BrowserContext (browser already closed)');
306
+ continue;
307
+ }
308
+
309
+ // ➜ Graceful: don't kill contexts that are still doing work
310
+ if (mode === 'graceful' && hasOpenPages) {
311
+ consoleLogger.info(`Skipping BrowserContext in graceful (has ${pagesArr.length} open page(s))`);
312
+ continue; // leave it for the teardown pass
313
+ }
314
+
315
+ // (Optional speed-up) close pages first if any
316
+ if (hasOpenPages) {
317
+ consoleLogger.info(`Closing ${pagesArr.length} page(s) before context close...`);
318
+ for (const p of pagesArr) {
319
+ try { await Promise.race([p.close(), timeout(1500)]); } catch {}
320
+ }
321
+ }
322
+
323
+ consoleLogger.info('Closing BrowserContext...');
324
+ if (typeof ctx.close === 'function') {
325
+ await Promise.race([ctx.close(), timeout(timeoutMs)]);
326
+ }
327
+ consoleLogger.info('BrowserContext closed');
328
+
329
+ // also close its browser (persistent contexts)
330
+ const b = browser;
331
+ if (b && !(b as any).isClosed?.()) {
332
+ consoleLogger.info('Closing Browser (from context.browser())...');
333
+ if (typeof b.close === 'function') {
334
+ await Promise.race([b.close(), timeout(timeoutMs)]);
335
+ }
336
+ consoleLogger.info('Browser closed (from context.browser())');
337
+ }
338
+ } catch (err) {
339
+ consoleLogger.warn(`Error closing BrowserContext: ${(err as Error).message}`);
340
+ } finally {
341
+ // only delete from the set if we actually closed it (or tried to)
342
+ if (!(mode === 'graceful' && hasOpenPages)) {
343
+ constants.resources.browserContexts.delete(ctx);
344
+ }
345
+ }
346
+ }
347
+
348
+ // --- Browsers ---
349
+ for (const b of [...constants.resources.browsers]) {
350
+ try {
351
+ if ((b as any).isClosed?.()) {
352
+ consoleLogger.info('Skipping Browser (already closed)');
353
+ continue;
354
+ }
355
+
356
+ consoleLogger.info('Closing Browser...');
357
+ if (typeof b.close === 'function') {
358
+ await Promise.race([b.close(), timeout(timeoutMs)]);
359
+ }
360
+ consoleLogger.info('Browser closed');
361
+ } catch (err) {
362
+ consoleLogger.warn(`Error closing Browser: ${(err as Error).message}`);
363
+ } finally {
364
+ constants.resources.browsers.delete(b);
365
+ }
366
+ }
367
+
368
+ consoleLogger.info(`Stop browsers finished for mode=${mode}`);
369
+ })();
370
+
371
+ try {
372
+ await __stopAllLock;
373
+ } finally {
374
+ __stopAllLock = null;
375
+ }
376
+ }
377
+
378
+ export const cleanUp = async (randomToken?: string, isError: boolean = false): Promise<void> => {
379
+
380
+ if (isError) {
381
+ await stopAll({ mode: 'graceful', timeoutMs: 8000 });
382
+ await stopAll({ mode: 'teardown', timeoutMs: 4000 });
383
+ }
384
+
385
+ if (randomToken === undefined && constants.randomToken) {
386
+ randomToken = constants.randomToken;
387
+ }
388
+
389
+ if (constants.userDataDirectory) try {
390
+ fs.rmSync(constants.userDataDirectory, { recursive: true, force: true });
391
+ } catch (error) {
392
+ consoleLogger.warn(`Unable to force remove userDataDirectory: ${error.message}`);
393
+ }
394
+
395
+ if (randomToken !== undefined) {
396
+ const storagePath = getStoragePath(randomToken);
397
+
398
+ try {
399
+ fs.rmSync(path.join(storagePath, 'crawlee'), { recursive: true, force: true });
400
+ } catch (error) {
401
+ consoleLogger.warn(`Unable to force remove crawlee folder: ${error.message}`);
402
+ }
403
+
404
+ let deleteErrorLogFile = true;
405
+
406
+ if (isError) {
407
+ let logsPath = storagePath;
408
+
409
+ if (process.env.OOBEE_LOGS_PATH) {
410
+ logsPath = process.env.OOBEE_LOGS_PATH;
411
+ }
412
+
413
+ if (fs.existsSync(errorsTxtPath)) {
414
+ try {
415
+ const logFilePath = path.join(logsPath, `logs-${randomToken}.txt`);
416
+ fs.copyFileSync(errorsTxtPath, logFilePath);
417
+ console.log(`An error occured. Log file is located at: ${logFilePath}`);
418
+
419
+ } catch (copyError) {
420
+ consoleLogger.error(`Error copying errors file during cleanup: ${copyError.message}`);
421
+ console.log(`An error occured. Log file is located at: ${errorsTxtPath}`);
422
+ deleteErrorLogFile = false; // Do not delete the log file if copy failed
423
+ }
424
+
425
+ if (deleteErrorLogFile && fs.existsSync(errorsTxtPath)) {
426
+ try {
427
+ fs.unlinkSync(errorsTxtPath);
428
+ } catch (error) {
429
+ consoleLogger.warn(`Unable to delete log file ${errorsTxtPath}: ${error.message}`);
430
+ }
431
+ }
432
+
433
+ }
434
+
435
+ }
436
+
437
+ if (fs.existsSync(storagePath) && fs.readdirSync(storagePath).length === 0) {
438
+ try {
439
+ fs.rmdirSync(storagePath);
440
+ consoleLogger.info(`Deleted empty storage path: ${storagePath}`);
441
+
442
+ } catch (error) {
443
+ consoleLogger.warn(`Error deleting empty storage path ${storagePath}: ${error.message}`);
444
+ }
445
+ }
446
+
447
+ consoleLogger.info(`Clean up completed for: ${randomToken}`);
448
+ }
449
+
450
+ };
451
+
452
+ export const cleanUpAndExit = async (
453
+ exitCode: number,
454
+ randomToken?: string,
455
+ isError: boolean = false,
456
+ ): Promise<void> => {
457
+ if (__shuttingDown) {
458
+ consoleLogger.info('Cleanup already in progress; ignoring duplicate exit request.');
459
+ return;
460
+ }
461
+ __shuttingDown = true;
462
+
463
+ try {
464
+ await cleanUp(randomToken, isError); // runs stopAll inside cleanUp
465
+ } catch (e: any) {
466
+ consoleLogger.warn(`Cleanup error: ${e?.message || e}`);
467
+ }
468
+
469
+ consoleLogger.info(`Exiting with code: ${exitCode}`);
470
+ process.exit(exitCode); // explicit exit after cleanup completes
471
+ };
472
+
473
+ export const listenForCleanUp = (randomToken: string): void => {
474
+ consoleLogger.info(`PID: ${process.pid}`);
475
+
476
+ process.on('SIGINT', async () => { // ← keep handler installed
477
+ consoleLogger.info('SIGINT received. Cleaning up and exiting.');
478
+ await cleanUpAndExit(130, randomToken, true);
479
+ });
480
+
481
+ process.on('SIGTERM', async () => { // ← keep handler installed
482
+ consoleLogger.info('SIGTERM received. Cleaning up and exiting.');
483
+ await cleanUpAndExit(143, randomToken, true);
484
+ });
238
485
  };
239
486
 
240
487
  export const getWcagPassPercentage = (
@@ -757,8 +1004,8 @@ export const zipResults = (zipName: string, resultsPath: string): void => {
757
1004
 
758
1005
  if (os.platform() === 'win32') {
759
1006
  execSync(
760
- `Get-ChildItem -Path "*.*" -Recurse | Compress-Archive -DestinationPath "${zipFilePath}"`,
761
- { shell: 'powershell.exe', cwd: resultsPath },
1007
+ `tar.exe -a -c -f "${zipFilePath}" *`,
1008
+ { cwd: resultsPath },
762
1009
  );
763
1010
  } else {
764
1011
  // Get zip command in Mac and Linux