@govtechsg/oobee 0.10.61 → 0.10.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Dockerfile CHANGED
@@ -2,9 +2,14 @@
2
2
  # Node version is v22
3
3
  FROM mcr.microsoft.com/playwright:v1.50.1-noble
4
4
 
5
- # Installation of packages for oobee and runner
6
- RUN apt-get update && apt-get install -y zip git
7
-
5
+ # Installation of packages for oobee and runner (locked versions from build log)
6
+ RUN apt-get update && apt-get install -y \
7
+ git=1:2.43.0-1ubuntu7.3 \
8
+ git-man=1:2.43.0-1ubuntu7.3 \
9
+ unzip=6.0-28ubuntu4.1 \
10
+ zip=3.0-13ubuntu0.2 \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
8
13
  WORKDIR /app/oobee
9
14
 
10
15
  # Clone oobee repository
package/README.md CHANGED
@@ -86,7 +86,10 @@ verapdf --version
86
86
  | OOBEE_VERBOSE | When set to `true`, log output goes to console | `false` |
87
87
  | OOBEE_FAST_CRAWLER| When set to `true`, increases scan concurrency at a rapid rate. Experimental, may cause system stability issues on low-powered devices. | `false`|
88
88
  | OOBEE_VALIDATE_URL| When set to `true`, validates if URLs are valid and exits. | `false` |
89
+ | OOBEE_LOGS_PATH | When set, logs are written to this path. | |
89
90
  | WARN_LEVEL | Only used in tests. | |
91
+ | OOBEE_DISABLE_BROWSER_DOWNLOAD | Experimental flag to disable file downloads on Chrome/Chromium/Edge. Does not affect Local File scan | |
92
+ | OOBEE_SLOWMO | Experimental flag to slow down web browser behaviour by specified duration (in miliseconds) | |
90
93
 
91
94
  #### Environment variables used internally (Do not set)
92
95
  Do not set these environment variables or behaviour might change unexpectedly.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@govtechsg/oobee",
3
3
  "main": "dist/npmIndex.js",
4
- "version": "0.10.61",
4
+ "version": "0.10.65",
5
5
  "type": "module",
6
6
  "author": "Government Technology Agency <info@tech.gov.sg>",
7
7
  "dependencies": {
@@ -20,6 +20,7 @@
20
20
  "https": "^1.0.0",
21
21
  "inquirer": "^9.2.12",
22
22
  "jsdom": "^21.1.2",
23
+ "jszip": "^3.10.1",
23
24
  "lodash": "^4.17.21",
24
25
  "mime-types": "^2.1.35",
25
26
  "minimatch": "^9.0.3",
@@ -69,7 +70,8 @@
69
70
  "ansi-regex": "^5.0.1",
70
71
  "tough-cookie": "^5.0.0-rc.2",
71
72
  "micromatch": "github:micromatch/micromatch.git#4.0.8",
72
- "brace-expansion": "^1.1.12"
73
+ "brace-expansion": "^1.1.12",
74
+ "tmp": "0.2.4"
73
75
  },
74
76
  "optionalDependencies": {
75
77
  "@napi-rs/canvas-darwin-arm64": "^0.1.53",
package/src/cli.ts CHANGED
@@ -5,19 +5,17 @@ import printMessage from 'print-message';
5
5
  import { devices } from 'playwright';
6
6
  import { fileURLToPath } from 'url';
7
7
  import path from 'path';
8
- import { cleanUp, setHeadlessMode, getVersion, getStoragePath } from './utils.js';
8
+ import { setHeadlessMode, getVersion, getStoragePath, listenForCleanUp, cleanUpAndExit } from './utils.js';
9
9
  import {
10
10
  checkUrl,
11
11
  prepareData,
12
12
  getFileSitemap,
13
13
  validEmail,
14
14
  validName,
15
- deleteClonedProfiles,
16
15
  getScreenToScan,
17
16
  validateDirPath,
18
17
  validateFilePath,
19
18
  validateCustomFlowLabel,
20
- parseHeaders,
21
19
  } from './constants/common.js';
22
20
  import constants, { ScannerTypes } from './constants/constants.js';
23
21
  import { cliOptions, messageOptions } from './constants/cliFunctions.js';
@@ -54,20 +52,20 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
54
52
  [`Invalid device. Please provide an existing device to start the scan.`],
55
53
  messageOptions,
56
54
  );
57
- process.exit(1);
55
+ cleanUpAndExit(1);
58
56
  }
59
57
  return option;
60
58
  })
61
59
  .coerce('w', option => {
62
60
  if (!option || Number.isNaN(option)) {
63
61
  printMessage([`Invalid viewport width. Please provide a number. `], messageOptions);
64
- process.exit(1);
62
+ cleanUpAndExit(1);
65
63
  } else if (option < 320 || option > 1080) {
66
64
  printMessage(
67
65
  ['Invalid viewport width! Please provide a viewport width between 320-1080 pixels.'],
68
66
  messageOptions,
69
67
  );
70
- process.exit(1);
68
+ cleanUpAndExit(1);
71
69
  }
72
70
  return option;
73
71
  })
@@ -77,7 +75,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
77
75
  [`Invalid maximum number of pages. Please provide a positive integer.`],
78
76
  messageOptions,
79
77
  );
80
- process.exit(1);
78
+ cleanUpAndExit(1);
81
79
  }
82
80
  return option;
83
81
  })
@@ -87,7 +85,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
87
85
  [`Invalid number for max concurrency. Please provide a positive integer.`],
88
86
  messageOptions,
89
87
  );
90
- process.exit(1);
88
+ cleanUpAndExit(1);
91
89
  }
92
90
  return option;
93
91
  })
@@ -97,23 +95,23 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
97
95
  [`Invalid format. Please provide your name and email address separated by ":"`],
98
96
  messageOptions,
99
97
  );
100
- process.exit(1);
98
+ cleanUpAndExit(1);
101
99
  }
102
100
  const [name, email] = nameEmail.split(':');
103
101
  if (name === '' || name === undefined || name === null) {
104
102
  printMessage([`Please provide your name.`], messageOptions);
105
- process.exit(1);
103
+ cleanUpAndExit(1);
106
104
  }
107
105
  if (!validName(name)) {
108
106
  printMessage([`Invalid name. Please provide a valid name.`], messageOptions);
109
- process.exit(1);
107
+ cleanUpAndExit(1);
110
108
  }
111
109
  if (!validEmail(email)) {
112
110
  printMessage(
113
111
  [`Invalid email address. Please provide a valid email address.`],
114
112
  messageOptions,
115
113
  );
116
- process.exit(1);
114
+ cleanUpAndExit(1);
117
115
  }
118
116
  return nameEmail;
119
117
  })
@@ -121,7 +119,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
121
119
  const validationErrors = validateDirPath(option);
122
120
  if (validationErrors) {
123
121
  printMessage([`Invalid exportDirectory directory path. ${validationErrors}`], messageOptions);
124
- process.exit(1);
122
+ cleanUpAndExit(1);
125
123
  }
126
124
  return option;
127
125
  })
@@ -133,7 +131,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
133
131
  return validateFilePath(option, dirname);
134
132
  } catch (err) {
135
133
  printMessage([`Invalid blacklistedPatternsFilename file path. ${err}`], messageOptions);
136
- process.exit(1);
134
+ cleanUpAndExit(1);
137
135
  }
138
136
  })
139
137
  .coerce('i', option => {
@@ -143,7 +141,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
143
141
  [`Invalid value for fileTypes. Please provide valid keywords: ${choices.join(', ')}.`],
144
142
  messageOptions,
145
143
  );
146
- process.exit(1);
144
+ cleanUpAndExit(1);
147
145
  }
148
146
  return option;
149
147
  })
@@ -151,7 +149,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
151
149
  const { isValid, errorMessage } = validateCustomFlowLabel(option);
152
150
  if (!isValid) {
153
151
  printMessage([errorMessage], messageOptions);
154
- process.exit(1);
152
+ cleanUpAndExit(1);
155
153
  }
156
154
  return option;
157
155
  })
@@ -162,7 +160,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
162
160
  [`Invalid value for additional. Please provide valid keywords: ${choices.join(', ')}.`],
163
161
  messageOptions,
164
162
  );
165
- process.exit(1);
163
+ cleanUpAndExit(1);
166
164
  }
167
165
  return option;
168
166
  })
@@ -199,7 +197,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
199
197
  ['Invalid scan duration. Please provide a positive number of seconds.'],
200
198
  messageOptions,
201
199
  );
202
- process.exit(1);
200
+ cleanUpAndExit(1);
203
201
  }
204
202
  return duration;
205
203
  })
@@ -226,7 +224,8 @@ const scanInit = async (argvs: Answers): Promise<string> => {
226
224
 
227
225
  const data = await prepareData(updatedArgvs);
228
226
 
229
- constants.userDataDirectory = data.userDataDirectory;
227
+ // Executes cleanUp script if error encountered
228
+ listenForCleanUp(data.randomToken);
230
229
 
231
230
  const res = await checkUrl(
232
231
  data.type,
@@ -244,7 +243,7 @@ const scanInit = async (argvs: Answers): Promise<string> => {
244
243
  data.url = res.url;
245
244
  if (process.env.OOBEE_VALIDATE_URL) {
246
245
  console.log('Url is valid');
247
- process.exit(0);
246
+ cleanUpAndExit(0, data.randomToken);
248
247
  }
249
248
 
250
249
  break;
@@ -252,17 +251,17 @@ const scanInit = async (argvs: Answers): Promise<string> => {
252
251
  case statuses.unauthorised.code: {
253
252
  printMessage([statuses.unauthorised.message], messageOptions);
254
253
  consoleLogger.info(statuses.unauthorised.message);
255
- process.exit(res.status);
254
+ cleanUpAndExit(res.status);
256
255
  }
257
256
  case statuses.cannotBeResolved.code: {
258
257
  printMessage([statuses.cannotBeResolved.message], messageOptions);
259
258
  consoleLogger.info(statuses.cannotBeResolved.message);
260
- process.exit(res.status);
259
+ cleanUpAndExit(res.status);
261
260
  }
262
261
  case statuses.systemError.code: {
263
262
  printMessage([statuses.systemError.message], messageOptions);
264
263
  consoleLogger.info(statuses.systemError.message);
265
- process.exit(res.status);
264
+ cleanUpAndExit(res.status);
266
265
  }
267
266
  case statuses.invalidUrl.code: {
268
267
  if (
@@ -271,7 +270,7 @@ const scanInit = async (argvs: Answers): Promise<string> => {
271
270
  ) {
272
271
  printMessage([statuses.invalidUrl.message], messageOptions);
273
272
  consoleLogger.info(statuses.invalidUrl.message);
274
- process.exit(res.status);
273
+ cleanUpAndExit(res.status);
275
274
  }
276
275
 
277
276
  const finalFilePath = getFileSitemap(updatedArgvs.url);
@@ -281,40 +280,38 @@ const scanInit = async (argvs: Answers): Promise<string> => {
281
280
 
282
281
  if (process.env.OOBEE_VALIDATE_URL) {
283
282
  console.log('Url is valid');
284
- process.exit(0);
283
+ cleanUpAndExit(0);
285
284
  }
286
285
  } else if (updatedArgvs.scanner === ScannerTypes.LOCALFILE) {
287
286
  printMessage([statuses.notALocalFile.message], messageOptions);
288
287
  consoleLogger.info(statuses.notALocalFile.message);
289
- process.exit(statuses.notALocalFile.code);
288
+ cleanUpAndExit(statuses.notALocalFile.code);
290
289
  } else if (updatedArgvs.scanner !== ScannerTypes.SITEMAP) {
291
290
  printMessage([statuses.notASitemap.message], messageOptions);
292
291
  consoleLogger.info(statuses.notASitemap.message);
293
- process.exit(statuses.notASitemap.code);
292
+ cleanUpAndExit(statuses.notASitemap.code);
294
293
  }
295
294
  break;
296
295
  }
297
296
  case statuses.notASitemap.code: {
298
297
  printMessage([statuses.notASitemap.message], messageOptions);
299
298
  consoleLogger.info(statuses.notASitemap.message);
300
- process.exit(res.status);
299
+ cleanUpAndExit(res.status);
301
300
  }
302
301
  case statuses.notALocalFile.code: {
303
302
  printMessage([statuses.notALocalFile.message], messageOptions);
304
303
  consoleLogger.info(statuses.notALocalFile.message);
305
- process.exit(res.status);
304
+ cleanUpAndExit(res.status);
306
305
  }
307
306
  case statuses.browserError.code: {
308
307
  printMessage([statuses.browserError.message], messageOptions);
309
308
  consoleLogger.info(statuses.browserError.message);
310
- process.exit(res.status);
309
+ cleanUpAndExit(res.status);
311
310
  }
312
311
  default:
313
312
  break;
314
313
  }
315
314
 
316
- deleteClonedProfiles(data.browser, data.randomToken);
317
-
318
315
  if (process.env.OOBEE_VERBOSE) {
319
316
  const randomTokenMessage = {
320
317
  type: 'randomToken',
@@ -332,14 +329,10 @@ const scanInit = async (argvs: Answers): Promise<string> => {
332
329
  );
333
330
 
334
331
  printMessage([`Oobee version: ${appVersion}`, 'Starting scan...'], messageOptions);
335
-
332
+ consoleLogger.info(`Oobee version: ${appVersion}`);
333
+
336
334
  await combineRun(data, screenToScan);
337
335
 
338
- deleteClonedProfiles(data.browser, data.randomToken);
339
-
340
- // Delete dataset and request queues
341
- cleanUp(data.randomToken);
342
-
343
336
  return getStoragePath(data.randomToken);
344
337
  };
345
338
 
@@ -375,6 +368,6 @@ const optionsAnswer: Answers = {
375
368
  };
376
369
 
377
370
  await scanInit(optionsAnswer);
378
- process.exit(0);
371
+ cleanUpAndExit(0);
379
372
 
380
373
  export default options;
package/src/combine.ts CHANGED
@@ -5,7 +5,7 @@ import crawlDomain from './crawlers/crawlDomain.js';
5
5
  import crawlLocalFile from './crawlers/crawlLocalFile.js';
6
6
  import crawlIntelligentSitemap from './crawlers/crawlIntelligentSitemap.js';
7
7
  import generateArtifacts from './mergeAxeResults.js';
8
- import { getHost, createAndUpdateResultsFolders, createDetailsAndLogs, cleanUp } from './utils.js';
8
+ import { getHost, createAndUpdateResultsFolders, cleanUpAndExit } from './utils.js';
9
9
  import { ScannerTypes, UrlsCrawled } from './constants/constants.js';
10
10
  import { getBlackListedPatterns, submitForm } from './constants/common.js';
11
11
  import { consoleLogger, silentLogger } from './logs.js';
@@ -80,7 +80,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
80
80
  blacklistedPatterns = getBlackListedPatterns(blacklistedPatternsFilename);
81
81
  } catch (error) {
82
82
  consoleLogger.error(error);
83
- process.exit(1);
83
+ cleanUpAndExit(1);
84
84
  }
85
85
 
86
86
  // remove basic-auth credentials from URL
@@ -213,12 +213,12 @@ const combineRun = async (details: Data, deviceToScan: string) => {
213
213
 
214
214
  default:
215
215
  consoleLogger.error(`type: ${type} not defined`);
216
- process.exit(1);
216
+ cleanUpAndExit(1);
217
217
  }
218
218
 
219
219
  scanDetails.endTime = new Date();
220
220
  scanDetails.urlsCrawled = urlsCrawledObj;
221
- await createDetailsAndLogs(randomToken);
221
+
222
222
  if (scanDetails.urlsCrawled) {
223
223
  if (scanDetails.urlsCrawled.scanned.length > 0) {
224
224
  await createAndUpdateResultsFolders(randomToken);
@@ -261,15 +261,13 @@ const combineRun = async (details: Data, deviceToScan: string) => {
261
261
 
262
262
  // No page were scanned because the URL loaded does not meet the crawler requirements
263
263
  printMessage([`No pages were scanned.`], alertMessageOptions);
264
- cleanUp(randomToken);
265
- process.exit(1);
264
+ cleanUpAndExit(1, randomToken, true);
266
265
  }
267
266
  } else {
268
267
 
269
268
  // No page were scanned because the URL loaded does not meet the crawler requirements
270
269
  printMessage([`No pages were scanned.`], alertMessageOptions);
271
- cleanUp(randomToken);
272
- process.exit(1);
270
+ cleanUpAndExit(1, randomToken, true);
273
271
  }
274
272
  };
275
273
 
@@ -1,6 +1,7 @@
1
1
  import { Options } from 'yargs';
2
2
  import printMessage from 'print-message';
3
3
  import { BrowserTypes, RuleFlags, ScannerTypes } from './constants.js';
4
+ import { cleanUpAndExit } from '../utils.js';
4
5
 
5
6
  export const messageOptions = {
6
7
  border: false,
@@ -53,7 +54,7 @@ export const cliOptions: { [key: string]: Options } = {
53
54
  ],
54
55
  messageOptions,
55
56
  );
56
- process.exit(1);
57
+ cleanUpAndExit(1);
57
58
  return null;
58
59
  }
59
60
  },
@@ -158,7 +159,7 @@ export const cliOptions: { [key: string]: Options } = {
158
159
  ],
159
160
  messageOptions,
160
161
  );
161
- process.exit(1);
162
+ cleanUpAndExit(1);
162
163
  return null;
163
164
  }
164
165
  },
@@ -282,7 +283,7 @@ export const cliOptions: { [key: string]: Options } = {
282
283
  ],
283
284
  messageOptions,
284
285
  );
285
- process.exit(1);
286
+ cleanUpAndExit(1);
286
287
  }
287
288
  if (userChoices.length > 1 && userChoices.includes('default')) {
288
289
  printMessage(
@@ -291,7 +292,7 @@ export const cliOptions: { [key: string]: Options } = {
291
292
  ],
292
293
  messageOptions,
293
294
  );
294
- process.exit(1);
295
+ cleanUpAndExit(1);
295
296
  }
296
297
  return userChoices;
297
298
  },
@@ -22,18 +22,18 @@ import constants, {
22
22
  getDefaultChromeDataDir,
23
23
  getDefaultEdgeDataDir,
24
24
  getDefaultChromiumDataDir,
25
- proxy,
26
25
  // Legacy code start - Google Sheets submission
27
26
  formDataFields,
28
27
  // Legacy code end - Google Sheets submission
29
28
  ScannerTypes,
30
29
  BrowserTypes,
31
30
  } from './constants.js';
32
- import { consoleLogger, silentLogger } from '../logs.js';
31
+ import { consoleLogger } from '../logs.js';
33
32
  import { isUrlPdf } from '../crawlers/commonCrawlerFunc.js';
34
- import { randomThreeDigitNumberString } from '../utils.js';
33
+ import { cleanUpAndExit, randomThreeDigitNumberString, register } from '../utils.js';
35
34
  import { Answers, Data } from '../index.js';
36
35
  import { DeviceDescriptor } from '../types/types.js';
36
+ import { getProxyInfo, proxyInfoToResolution, ProxySettings } from '../proxyService.js';
37
37
 
38
38
  // validateDirPath validates a provided directory path
39
39
  // returns null if no error
@@ -304,7 +304,10 @@ const checkUrlConnectivityWithBrowser = async (
304
304
  ignoreHTTPSErrors: true,
305
305
  ...getPlaywrightLaunchOptions(browserToRun),
306
306
  ...playwrightDeviceDetailsObject,
307
+ ...(process.env.OOBEE_DISABLE_BROWSER_DOWNLOAD && { acceptDownloads: false }),
307
308
  });
309
+
310
+ register(browserContext);
308
311
  } catch (err) {
309
312
  printMessage([`Unable to launch browser\n${err}`], messageOptions);
310
313
  res.status = constants.urlCheckStatuses.browserError.code;
@@ -463,7 +466,7 @@ export const parseHeaders = (header?: string): Record<string, string> => {
463
466
  ],
464
467
  messageOptions,
465
468
  );
466
- process.exit(1);
469
+ cleanUpAndExit(1);
467
470
  }
468
471
  allHeaders[headerValuePair[0]] = headerValuePair[1]; // {"header": "value", "header2": "value2", ...}
469
472
  });
@@ -483,7 +486,7 @@ export const prepareData = async (argv: Answers): Promise<Data> => {
483
486
  viewportWidth,
484
487
  maxpages,
485
488
  strategy,
486
- isLocalFileScan,
489
+ isLocalFileScan = false,
487
490
  browserToRun,
488
491
  nameEmail,
489
492
  customFlowLabel,
@@ -508,6 +511,10 @@ export const prepareData = async (argv: Answers): Promise<Data> => {
508
511
  let username = '';
509
512
  let password = '';
510
513
 
514
+ if (isFilePath(url)) {
515
+ argv.isLocalFileScan = true;
516
+ }
517
+
511
518
  // Remove credentials from URL if not a local file scan
512
519
  url = argv.isLocalFileScan
513
520
  ? url
@@ -548,7 +555,7 @@ export const prepareData = async (argv: Answers): Promise<Data> => {
548
555
  viewportWidth,
549
556
  );
550
557
 
551
- const { browserToRun: resolvedBrowser, clonedBrowserDataDir } = getBrowserToRun(browserToRun, true, resultFilename);
558
+ const { browserToRun: resolvedBrowser, clonedBrowserDataDir } = getBrowserToRun(resultFilename, browserToRun, true);
552
559
  browserToRun = resolvedBrowser;
553
560
 
554
561
  const resolvedUserDataDirectory = getClonedProfilesWithRandomToken(browserToRun, resultFilename);
@@ -558,6 +565,9 @@ export const prepareData = async (argv: Answers): Promise<Data> => {
558
565
  await getUrlsFromRobotsTxt(url, browserToRun, resolvedUserDataDirectory, extraHTTPHeaders);
559
566
  }
560
567
 
568
+ constants.userDataDirectory = resolvedUserDataDirectory;
569
+ constants.randomToken = resultFilename;
570
+
561
571
  return {
562
572
  type: scanner,
563
573
  url: url,
@@ -676,6 +686,8 @@ const getRobotsTxtViaPlaywright = async (robotsUrl: string, browser: string, use
676
686
  ...(extraHTTPHeaders && { extraHTTPHeaders }),
677
687
  });
678
688
 
689
+ register(browserContext);
690
+
679
691
  const page = await browserContext.newPage();
680
692
 
681
693
  await page.goto(robotsUrl, { waitUntil: 'networkidle', timeout: 30000 });
@@ -848,6 +860,7 @@ export const getLinksFromSitemap = async (
848
860
  },
849
861
  );
850
862
 
863
+ register(browserContext);
851
864
  const page = await browserContext.newPage();
852
865
 
853
866
  await page.goto(url, { waitUntil: 'networkidle', timeout: 60000 });
@@ -997,14 +1010,10 @@ export const validName = (name: string) => {
997
1010
  * @returns object consisting of browser to run and cloned data directory
998
1011
  */
999
1012
  export const getBrowserToRun = (
1013
+ randomToken: string,
1000
1014
  preferredBrowser?: BrowserTypes,
1001
1015
  isCli = false,
1002
- randomToken?: string
1003
1016
  ): { browserToRun: BrowserTypes; clonedBrowserDataDir: string } => {
1004
-
1005
- if (!randomToken) {
1006
- randomToken = '';
1007
- }
1008
1017
 
1009
1018
  const platform = os.platform();
1010
1019
 
@@ -1589,26 +1598,21 @@ export const submitFormViaPlaywright = async (
1589
1598
  userDataDirectory: string,
1590
1599
  finalUrl: string,
1591
1600
  ) => {
1592
- const dirName = `clone-${Date.now()}`;
1593
- let clonedDir = null;
1594
- if (proxy && browserToRun === BrowserTypes.EDGE) {
1595
- clonedDir = cloneEdgeProfiles(dirName);
1596
- } else if (proxy && browserToRun === BrowserTypes.CHROME) {
1597
- clonedDir = cloneChromeProfiles(dirName);
1598
- }
1599
1601
  const browserContext = await constants.launcher.launchPersistentContext(
1600
- clonedDir || userDataDirectory,
1602
+ userDataDirectory,
1601
1603
  {
1602
1604
  ...getPlaywrightLaunchOptions(browserToRun),
1603
1605
  },
1604
1606
  );
1605
1607
 
1608
+ register(browserContext);
1609
+
1606
1610
  const page = await browserContext.newPage();
1607
1611
 
1608
1612
  try {
1609
1613
  await page.goto(finalUrl, {
1610
1614
  timeout: 30000,
1611
- ...(proxy && { waitUntil: 'commit' }),
1615
+ waitUntil: 'commit',
1612
1616
  });
1613
1617
 
1614
1618
  try {
@@ -1620,11 +1624,6 @@ export const submitFormViaPlaywright = async (
1620
1624
  consoleLogger.error(error);
1621
1625
  } finally {
1622
1626
  await browserContext.close();
1623
- if (proxy && browserToRun === BrowserTypes.EDGE) {
1624
- deleteClonedEdgeProfiles(clonedDir);
1625
- } else if (proxy && browserToRun === BrowserTypes.CHROME) {
1626
- deleteClonedChromeProfiles(clonedDir);
1627
- }
1628
1627
  }
1629
1628
  };
1630
1629
 
@@ -1663,19 +1662,17 @@ export const submitForm = async (
1663
1662
  finalUrl += `&${formDataFields.redirectUrlField}=${scannedUrl}`;
1664
1663
  }
1665
1664
 
1666
- if (proxy) {
1667
- await submitFormViaPlaywright(browserToRun, userDataDirectory, finalUrl);
1668
- } else {
1669
- try {
1670
- await axios.get(finalUrl, { timeout: 2000 });
1671
- } catch (error) {
1672
- if (error.code === 'ECONNABORTED') {
1673
- if (browserToRun || constants.launcher === webkit) {
1674
- await submitFormViaPlaywright(browserToRun, userDataDirectory, finalUrl);
1675
- }
1665
+
1666
+ try {
1667
+ await axios.get(finalUrl, { timeout: 2000 });
1668
+ } catch (error) {
1669
+ if (error.code === 'ECONNABORTED') {
1670
+ if (browserToRun || constants.launcher === webkit) {
1671
+ await submitFormViaPlaywright(browserToRun, userDataDirectory, finalUrl);
1676
1672
  }
1677
1673
  }
1678
1674
  }
1675
+
1679
1676
  };
1680
1677
  // Legacy code end - Google Sheets submission
1681
1678
 
@@ -1706,6 +1703,8 @@ export async function initModifiedUserAgent(
1706
1703
  : '';
1707
1704
 
1708
1705
  const browserContext = await constants.launcher.launchPersistentContext(effectiveUserDataDirectory, launchOptions);
1706
+ register(browserContext);
1707
+
1709
1708
  const page = await browserContext.newPage();
1710
1709
 
1711
1710
  // Retrieve the default user agent.
@@ -1724,42 +1723,61 @@ export async function initModifiedUserAgent(
1724
1723
  // console.log('Modified User Agent:', modifiedUA);
1725
1724
  }
1726
1725
 
1726
+ const cacheProxyInfo = getProxyInfo();
1727
+
1727
1728
  /**
1728
1729
  * @param {string} browser browser name ("chrome" or "edge", null for chromium, the default Playwright browser)
1729
1730
  * @returns playwright launch options object. For more details: https://playwright.dev/docs/api/class-browsertype#browser-type-launch
1730
1731
  */
1731
1732
  export const getPlaywrightLaunchOptions = (browser?: string): LaunchOptions => {
1732
- let channel: string;
1733
- if (browser) {
1734
- channel = browser;
1735
- }
1733
+ const channel = browser || undefined;
1734
+
1735
+ const resolution = proxyInfoToResolution(cacheProxyInfo);
1736
1736
 
1737
- // Set new headless mode as Chrome 132 does not support headless=old
1738
- // Also mute audio
1737
+ // Start with your base args
1738
+ const finalArgs = [...constants.launchOptionsArgs];
1739
+
1740
+ // Headless flags (unchanged)
1739
1741
  if (process.env.CRAWLEE_HEADLESS === '1') {
1740
- constants.launchOptionsArgs.push('--headless=new');
1741
- constants.launchOptionsArgs.push('--mute-audio');
1742
+ if (!finalArgs.includes('--headless=new')) finalArgs.push('--headless=new');
1743
+ if (!finalArgs.includes('--mute-audio')) finalArgs.push('--mute-audio');
1744
+ }
1745
+
1746
+ // Map resolution to Playwright options
1747
+ let proxyOpt: ProxySettings | undefined;
1748
+ switch (resolution.kind) {
1749
+ case 'manual':
1750
+ proxyOpt = resolution.settings;
1751
+ break;
1752
+ case 'pac': {
1753
+ finalArgs.push(`--proxy-pac-url=${resolution.pacUrl}`);
1754
+ if (resolution.bypass) finalArgs.push(`--proxy-bypass-list=${resolution.bypass}`);
1755
+ break;
1756
+ }
1757
+ case 'none':
1758
+ // nothing
1759
+ break;
1742
1760
  }
1743
1761
 
1744
1762
  const options: LaunchOptions = {
1745
- // Drop the --use-mock-keychain flag to allow MacOS devices
1746
- // to use the cloned cookies.
1747
1763
  ignoreDefaultArgs: ['--use-mock-keychain', '--headless'],
1748
- // necessary from Chrome 132 to use our own headless=new flag
1749
- args: constants.launchOptionsArgs,
1764
+ args: finalArgs,
1750
1765
  headless: false,
1751
- ...(channel && { channel }), // Having no channel is equivalent to "chromium"
1766
+ ...(channel && { channel }),
1767
+ ...(proxyOpt ? { proxy: proxyOpt } : {}),
1752
1768
  };
1753
1769
 
1754
- // Necessary as Chrome 132 does not support headless=old
1755
- options.headless = false;
1770
+ // SlowMo (unchanged)
1771
+ if (!options.slowMo && process.env.OOBEE_SLOWMO && Number(process.env.OOBEE_SLOWMO) >= 1) {
1772
+ options.slowMo = Number(process.env.OOBEE_SLOWMO);
1773
+ consoleLogger.info(`Enabled browser slowMo with value: ${process.env.OOBEE_SLOWMO}ms`);
1774
+ }
1756
1775
 
1757
- if (proxy) {
1758
- options.slowMo = 1000; // To ensure server-side rendered proxy page is loaded
1759
- } else if (browser === BrowserTypes.EDGE && os.platform() === 'win32') {
1760
- // edge should be in non-headless mode
1776
+ // Edge on Windows should not be headless (unchanged)
1777
+ if (browser === BrowserTypes.EDGE && os.platform() === 'win32') {
1761
1778
  options.headless = false;
1762
1779
  }
1780
+
1763
1781
  return options;
1764
1782
  };
1765
1783