@govtechsg/oobee 0.10.61 → 0.10.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +8 -3
- package/README.md +3 -0
- package/package.json +4 -2
- package/src/cli.ts +32 -39
- package/src/combine.ts +6 -8
- package/src/constants/cliFunctions.ts +5 -4
- package/src/constants/common.ts +72 -54
- package/src/constants/constants.ts +56 -50
- package/src/constants/questions.ts +15 -4
- package/src/crawlers/commonCrawlerFunc.ts +2 -2
- package/src/crawlers/crawlDomain.ts +4 -3
- package/src/crawlers/crawlIntelligentSitemap.ts +2 -3
- package/src/crawlers/crawlLocalFile.ts +31 -31
- package/src/crawlers/crawlSitemap.ts +10 -9
- package/src/crawlers/custom/utils.ts +2 -2
- package/src/crawlers/pdfScanFunc.ts +24 -51
- package/src/crawlers/runCustom.ts +4 -3
- package/src/index.ts +7 -5
- package/src/logs.ts +35 -9
- package/src/mergeAxeResults.ts +23 -11
- package/src/npmIndex.ts +2 -3
- package/src/proxyService.ts +405 -0
- package/src/screenshotFunc/htmlScreenshotFunc.ts +4 -4
- package/src/screenshotFunc/pdfScreenshotFunc.ts +2 -5
- package/src/static/ejs/partials/scripts/utils.ejs +8 -11
- package/src/utils.ts +310 -65
package/Dockerfile
CHANGED
|
@@ -2,9 +2,14 @@
|
|
|
2
2
|
# Node version is v22
|
|
3
3
|
FROM mcr.microsoft.com/playwright:v1.50.1-noble
|
|
4
4
|
|
|
5
|
-
# Installation of packages for oobee and runner
|
|
6
|
-
RUN apt-get update && apt-get install -y
|
|
7
|
-
|
|
5
|
+
# Installation of packages for oobee and runner (locked versions from build log)
|
|
6
|
+
RUN apt-get update && apt-get install -y \
|
|
7
|
+
git=1:2.43.0-1ubuntu7.3 \
|
|
8
|
+
git-man=1:2.43.0-1ubuntu7.3 \
|
|
9
|
+
unzip=6.0-28ubuntu4.1 \
|
|
10
|
+
zip=3.0-13ubuntu0.2 \
|
|
11
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
12
|
+
|
|
8
13
|
WORKDIR /app/oobee
|
|
9
14
|
|
|
10
15
|
# Clone oobee repository
|
package/README.md
CHANGED
|
@@ -86,7 +86,10 @@ verapdf --version
|
|
|
86
86
|
| OOBEE_VERBOSE | When set to `true`, log output goes to console | `false` |
|
|
87
87
|
| OOBEE_FAST_CRAWLER| When set to `true`, increases scan concurrency at a rapid rate. Experimental, may cause system stability issues on low-powered devices. | `false`|
|
|
88
88
|
| OOBEE_VALIDATE_URL| When set to `true`, validates if URLs are valid and exits. | `false` |
|
|
89
|
+
| OOBEE_LOGS_PATH | When set, logs are written to this path. | |
|
|
89
90
|
| WARN_LEVEL | Only used in tests. | |
|
|
91
|
+
| OOBEE_DISABLE_BROWSER_DOWNLOAD | Experimental flag to disable file downloads on Chrome/Chromium/Edge. Does not affect Local File scan | |
|
|
92
|
+
| OOBEE_SLOWMO | Experimental flag to slow down web browser behaviour by specified duration (in miliseconds) | |
|
|
90
93
|
|
|
91
94
|
#### Environment variables used internally (Do not set)
|
|
92
95
|
Do not set these environment variables or behaviour might change unexpectedly.
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@govtechsg/oobee",
|
|
3
3
|
"main": "dist/npmIndex.js",
|
|
4
|
-
"version": "0.10.
|
|
4
|
+
"version": "0.10.65",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"author": "Government Technology Agency <info@tech.gov.sg>",
|
|
7
7
|
"dependencies": {
|
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
"https": "^1.0.0",
|
|
21
21
|
"inquirer": "^9.2.12",
|
|
22
22
|
"jsdom": "^21.1.2",
|
|
23
|
+
"jszip": "^3.10.1",
|
|
23
24
|
"lodash": "^4.17.21",
|
|
24
25
|
"mime-types": "^2.1.35",
|
|
25
26
|
"minimatch": "^9.0.3",
|
|
@@ -69,7 +70,8 @@
|
|
|
69
70
|
"ansi-regex": "^5.0.1",
|
|
70
71
|
"tough-cookie": "^5.0.0-rc.2",
|
|
71
72
|
"micromatch": "github:micromatch/micromatch.git#4.0.8",
|
|
72
|
-
"brace-expansion": "^1.1.12"
|
|
73
|
+
"brace-expansion": "^1.1.12",
|
|
74
|
+
"tmp": "0.2.4"
|
|
73
75
|
},
|
|
74
76
|
"optionalDependencies": {
|
|
75
77
|
"@napi-rs/canvas-darwin-arm64": "^0.1.53",
|
package/src/cli.ts
CHANGED
|
@@ -5,19 +5,17 @@ import printMessage from 'print-message';
|
|
|
5
5
|
import { devices } from 'playwright';
|
|
6
6
|
import { fileURLToPath } from 'url';
|
|
7
7
|
import path from 'path';
|
|
8
|
-
import {
|
|
8
|
+
import { setHeadlessMode, getVersion, getStoragePath, listenForCleanUp, cleanUpAndExit } from './utils.js';
|
|
9
9
|
import {
|
|
10
10
|
checkUrl,
|
|
11
11
|
prepareData,
|
|
12
12
|
getFileSitemap,
|
|
13
13
|
validEmail,
|
|
14
14
|
validName,
|
|
15
|
-
deleteClonedProfiles,
|
|
16
15
|
getScreenToScan,
|
|
17
16
|
validateDirPath,
|
|
18
17
|
validateFilePath,
|
|
19
18
|
validateCustomFlowLabel,
|
|
20
|
-
parseHeaders,
|
|
21
19
|
} from './constants/common.js';
|
|
22
20
|
import constants, { ScannerTypes } from './constants/constants.js';
|
|
23
21
|
import { cliOptions, messageOptions } from './constants/cliFunctions.js';
|
|
@@ -54,20 +52,20 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
|
54
52
|
[`Invalid device. Please provide an existing device to start the scan.`],
|
|
55
53
|
messageOptions,
|
|
56
54
|
);
|
|
57
|
-
|
|
55
|
+
cleanUpAndExit(1);
|
|
58
56
|
}
|
|
59
57
|
return option;
|
|
60
58
|
})
|
|
61
59
|
.coerce('w', option => {
|
|
62
60
|
if (!option || Number.isNaN(option)) {
|
|
63
61
|
printMessage([`Invalid viewport width. Please provide a number. `], messageOptions);
|
|
64
|
-
|
|
62
|
+
cleanUpAndExit(1);
|
|
65
63
|
} else if (option < 320 || option > 1080) {
|
|
66
64
|
printMessage(
|
|
67
65
|
['Invalid viewport width! Please provide a viewport width between 320-1080 pixels.'],
|
|
68
66
|
messageOptions,
|
|
69
67
|
);
|
|
70
|
-
|
|
68
|
+
cleanUpAndExit(1);
|
|
71
69
|
}
|
|
72
70
|
return option;
|
|
73
71
|
})
|
|
@@ -77,7 +75,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
|
77
75
|
[`Invalid maximum number of pages. Please provide a positive integer.`],
|
|
78
76
|
messageOptions,
|
|
79
77
|
);
|
|
80
|
-
|
|
78
|
+
cleanUpAndExit(1);
|
|
81
79
|
}
|
|
82
80
|
return option;
|
|
83
81
|
})
|
|
@@ -87,7 +85,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
|
87
85
|
[`Invalid number for max concurrency. Please provide a positive integer.`],
|
|
88
86
|
messageOptions,
|
|
89
87
|
);
|
|
90
|
-
|
|
88
|
+
cleanUpAndExit(1);
|
|
91
89
|
}
|
|
92
90
|
return option;
|
|
93
91
|
})
|
|
@@ -97,23 +95,23 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
|
97
95
|
[`Invalid format. Please provide your name and email address separated by ":"`],
|
|
98
96
|
messageOptions,
|
|
99
97
|
);
|
|
100
|
-
|
|
98
|
+
cleanUpAndExit(1);
|
|
101
99
|
}
|
|
102
100
|
const [name, email] = nameEmail.split(':');
|
|
103
101
|
if (name === '' || name === undefined || name === null) {
|
|
104
102
|
printMessage([`Please provide your name.`], messageOptions);
|
|
105
|
-
|
|
103
|
+
cleanUpAndExit(1);
|
|
106
104
|
}
|
|
107
105
|
if (!validName(name)) {
|
|
108
106
|
printMessage([`Invalid name. Please provide a valid name.`], messageOptions);
|
|
109
|
-
|
|
107
|
+
cleanUpAndExit(1);
|
|
110
108
|
}
|
|
111
109
|
if (!validEmail(email)) {
|
|
112
110
|
printMessage(
|
|
113
111
|
[`Invalid email address. Please provide a valid email address.`],
|
|
114
112
|
messageOptions,
|
|
115
113
|
);
|
|
116
|
-
|
|
114
|
+
cleanUpAndExit(1);
|
|
117
115
|
}
|
|
118
116
|
return nameEmail;
|
|
119
117
|
})
|
|
@@ -121,7 +119,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
|
121
119
|
const validationErrors = validateDirPath(option);
|
|
122
120
|
if (validationErrors) {
|
|
123
121
|
printMessage([`Invalid exportDirectory directory path. ${validationErrors}`], messageOptions);
|
|
124
|
-
|
|
122
|
+
cleanUpAndExit(1);
|
|
125
123
|
}
|
|
126
124
|
return option;
|
|
127
125
|
})
|
|
@@ -133,7 +131,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
|
133
131
|
return validateFilePath(option, dirname);
|
|
134
132
|
} catch (err) {
|
|
135
133
|
printMessage([`Invalid blacklistedPatternsFilename file path. ${err}`], messageOptions);
|
|
136
|
-
|
|
134
|
+
cleanUpAndExit(1);
|
|
137
135
|
}
|
|
138
136
|
})
|
|
139
137
|
.coerce('i', option => {
|
|
@@ -143,7 +141,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
|
143
141
|
[`Invalid value for fileTypes. Please provide valid keywords: ${choices.join(', ')}.`],
|
|
144
142
|
messageOptions,
|
|
145
143
|
);
|
|
146
|
-
|
|
144
|
+
cleanUpAndExit(1);
|
|
147
145
|
}
|
|
148
146
|
return option;
|
|
149
147
|
})
|
|
@@ -151,7 +149,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
|
151
149
|
const { isValid, errorMessage } = validateCustomFlowLabel(option);
|
|
152
150
|
if (!isValid) {
|
|
153
151
|
printMessage([errorMessage], messageOptions);
|
|
154
|
-
|
|
152
|
+
cleanUpAndExit(1);
|
|
155
153
|
}
|
|
156
154
|
return option;
|
|
157
155
|
})
|
|
@@ -162,7 +160,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
|
162
160
|
[`Invalid value for additional. Please provide valid keywords: ${choices.join(', ')}.`],
|
|
163
161
|
messageOptions,
|
|
164
162
|
);
|
|
165
|
-
|
|
163
|
+
cleanUpAndExit(1);
|
|
166
164
|
}
|
|
167
165
|
return option;
|
|
168
166
|
})
|
|
@@ -199,7 +197,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
|
199
197
|
['Invalid scan duration. Please provide a positive number of seconds.'],
|
|
200
198
|
messageOptions,
|
|
201
199
|
);
|
|
202
|
-
|
|
200
|
+
cleanUpAndExit(1);
|
|
203
201
|
}
|
|
204
202
|
return duration;
|
|
205
203
|
})
|
|
@@ -226,7 +224,8 @@ const scanInit = async (argvs: Answers): Promise<string> => {
|
|
|
226
224
|
|
|
227
225
|
const data = await prepareData(updatedArgvs);
|
|
228
226
|
|
|
229
|
-
|
|
227
|
+
// Executes cleanUp script if error encountered
|
|
228
|
+
listenForCleanUp(data.randomToken);
|
|
230
229
|
|
|
231
230
|
const res = await checkUrl(
|
|
232
231
|
data.type,
|
|
@@ -244,7 +243,7 @@ const scanInit = async (argvs: Answers): Promise<string> => {
|
|
|
244
243
|
data.url = res.url;
|
|
245
244
|
if (process.env.OOBEE_VALIDATE_URL) {
|
|
246
245
|
console.log('Url is valid');
|
|
247
|
-
|
|
246
|
+
cleanUpAndExit(0, data.randomToken);
|
|
248
247
|
}
|
|
249
248
|
|
|
250
249
|
break;
|
|
@@ -252,17 +251,17 @@ const scanInit = async (argvs: Answers): Promise<string> => {
|
|
|
252
251
|
case statuses.unauthorised.code: {
|
|
253
252
|
printMessage([statuses.unauthorised.message], messageOptions);
|
|
254
253
|
consoleLogger.info(statuses.unauthorised.message);
|
|
255
|
-
|
|
254
|
+
cleanUpAndExit(res.status);
|
|
256
255
|
}
|
|
257
256
|
case statuses.cannotBeResolved.code: {
|
|
258
257
|
printMessage([statuses.cannotBeResolved.message], messageOptions);
|
|
259
258
|
consoleLogger.info(statuses.cannotBeResolved.message);
|
|
260
|
-
|
|
259
|
+
cleanUpAndExit(res.status);
|
|
261
260
|
}
|
|
262
261
|
case statuses.systemError.code: {
|
|
263
262
|
printMessage([statuses.systemError.message], messageOptions);
|
|
264
263
|
consoleLogger.info(statuses.systemError.message);
|
|
265
|
-
|
|
264
|
+
cleanUpAndExit(res.status);
|
|
266
265
|
}
|
|
267
266
|
case statuses.invalidUrl.code: {
|
|
268
267
|
if (
|
|
@@ -271,7 +270,7 @@ const scanInit = async (argvs: Answers): Promise<string> => {
|
|
|
271
270
|
) {
|
|
272
271
|
printMessage([statuses.invalidUrl.message], messageOptions);
|
|
273
272
|
consoleLogger.info(statuses.invalidUrl.message);
|
|
274
|
-
|
|
273
|
+
cleanUpAndExit(res.status);
|
|
275
274
|
}
|
|
276
275
|
|
|
277
276
|
const finalFilePath = getFileSitemap(updatedArgvs.url);
|
|
@@ -281,40 +280,38 @@ const scanInit = async (argvs: Answers): Promise<string> => {
|
|
|
281
280
|
|
|
282
281
|
if (process.env.OOBEE_VALIDATE_URL) {
|
|
283
282
|
console.log('Url is valid');
|
|
284
|
-
|
|
283
|
+
cleanUpAndExit(0);
|
|
285
284
|
}
|
|
286
285
|
} else if (updatedArgvs.scanner === ScannerTypes.LOCALFILE) {
|
|
287
286
|
printMessage([statuses.notALocalFile.message], messageOptions);
|
|
288
287
|
consoleLogger.info(statuses.notALocalFile.message);
|
|
289
|
-
|
|
288
|
+
cleanUpAndExit(statuses.notALocalFile.code);
|
|
290
289
|
} else if (updatedArgvs.scanner !== ScannerTypes.SITEMAP) {
|
|
291
290
|
printMessage([statuses.notASitemap.message], messageOptions);
|
|
292
291
|
consoleLogger.info(statuses.notASitemap.message);
|
|
293
|
-
|
|
292
|
+
cleanUpAndExit(statuses.notASitemap.code);
|
|
294
293
|
}
|
|
295
294
|
break;
|
|
296
295
|
}
|
|
297
296
|
case statuses.notASitemap.code: {
|
|
298
297
|
printMessage([statuses.notASitemap.message], messageOptions);
|
|
299
298
|
consoleLogger.info(statuses.notASitemap.message);
|
|
300
|
-
|
|
299
|
+
cleanUpAndExit(res.status);
|
|
301
300
|
}
|
|
302
301
|
case statuses.notALocalFile.code: {
|
|
303
302
|
printMessage([statuses.notALocalFile.message], messageOptions);
|
|
304
303
|
consoleLogger.info(statuses.notALocalFile.message);
|
|
305
|
-
|
|
304
|
+
cleanUpAndExit(res.status);
|
|
306
305
|
}
|
|
307
306
|
case statuses.browserError.code: {
|
|
308
307
|
printMessage([statuses.browserError.message], messageOptions);
|
|
309
308
|
consoleLogger.info(statuses.browserError.message);
|
|
310
|
-
|
|
309
|
+
cleanUpAndExit(res.status);
|
|
311
310
|
}
|
|
312
311
|
default:
|
|
313
312
|
break;
|
|
314
313
|
}
|
|
315
314
|
|
|
316
|
-
deleteClonedProfiles(data.browser, data.randomToken);
|
|
317
|
-
|
|
318
315
|
if (process.env.OOBEE_VERBOSE) {
|
|
319
316
|
const randomTokenMessage = {
|
|
320
317
|
type: 'randomToken',
|
|
@@ -332,14 +329,10 @@ const scanInit = async (argvs: Answers): Promise<string> => {
|
|
|
332
329
|
);
|
|
333
330
|
|
|
334
331
|
printMessage([`Oobee version: ${appVersion}`, 'Starting scan...'], messageOptions);
|
|
335
|
-
|
|
332
|
+
consoleLogger.info(`Oobee version: ${appVersion}`);
|
|
333
|
+
|
|
336
334
|
await combineRun(data, screenToScan);
|
|
337
335
|
|
|
338
|
-
deleteClonedProfiles(data.browser, data.randomToken);
|
|
339
|
-
|
|
340
|
-
// Delete dataset and request queues
|
|
341
|
-
cleanUp(data.randomToken);
|
|
342
|
-
|
|
343
336
|
return getStoragePath(data.randomToken);
|
|
344
337
|
};
|
|
345
338
|
|
|
@@ -375,6 +368,6 @@ const optionsAnswer: Answers = {
|
|
|
375
368
|
};
|
|
376
369
|
|
|
377
370
|
await scanInit(optionsAnswer);
|
|
378
|
-
|
|
371
|
+
cleanUpAndExit(0);
|
|
379
372
|
|
|
380
373
|
export default options;
|
package/src/combine.ts
CHANGED
|
@@ -5,7 +5,7 @@ import crawlDomain from './crawlers/crawlDomain.js';
|
|
|
5
5
|
import crawlLocalFile from './crawlers/crawlLocalFile.js';
|
|
6
6
|
import crawlIntelligentSitemap from './crawlers/crawlIntelligentSitemap.js';
|
|
7
7
|
import generateArtifacts from './mergeAxeResults.js';
|
|
8
|
-
import { getHost, createAndUpdateResultsFolders,
|
|
8
|
+
import { getHost, createAndUpdateResultsFolders, cleanUpAndExit } from './utils.js';
|
|
9
9
|
import { ScannerTypes, UrlsCrawled } from './constants/constants.js';
|
|
10
10
|
import { getBlackListedPatterns, submitForm } from './constants/common.js';
|
|
11
11
|
import { consoleLogger, silentLogger } from './logs.js';
|
|
@@ -80,7 +80,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
|
80
80
|
blacklistedPatterns = getBlackListedPatterns(blacklistedPatternsFilename);
|
|
81
81
|
} catch (error) {
|
|
82
82
|
consoleLogger.error(error);
|
|
83
|
-
|
|
83
|
+
cleanUpAndExit(1);
|
|
84
84
|
}
|
|
85
85
|
|
|
86
86
|
// remove basic-auth credentials from URL
|
|
@@ -213,12 +213,12 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
|
213
213
|
|
|
214
214
|
default:
|
|
215
215
|
consoleLogger.error(`type: ${type} not defined`);
|
|
216
|
-
|
|
216
|
+
cleanUpAndExit(1);
|
|
217
217
|
}
|
|
218
218
|
|
|
219
219
|
scanDetails.endTime = new Date();
|
|
220
220
|
scanDetails.urlsCrawled = urlsCrawledObj;
|
|
221
|
-
|
|
221
|
+
|
|
222
222
|
if (scanDetails.urlsCrawled) {
|
|
223
223
|
if (scanDetails.urlsCrawled.scanned.length > 0) {
|
|
224
224
|
await createAndUpdateResultsFolders(randomToken);
|
|
@@ -261,15 +261,13 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
|
261
261
|
|
|
262
262
|
// No page were scanned because the URL loaded does not meet the crawler requirements
|
|
263
263
|
printMessage([`No pages were scanned.`], alertMessageOptions);
|
|
264
|
-
|
|
265
|
-
process.exit(1);
|
|
264
|
+
cleanUpAndExit(1, randomToken, true);
|
|
266
265
|
}
|
|
267
266
|
} else {
|
|
268
267
|
|
|
269
268
|
// No page were scanned because the URL loaded does not meet the crawler requirements
|
|
270
269
|
printMessage([`No pages were scanned.`], alertMessageOptions);
|
|
271
|
-
|
|
272
|
-
process.exit(1);
|
|
270
|
+
cleanUpAndExit(1, randomToken, true);
|
|
273
271
|
}
|
|
274
272
|
};
|
|
275
273
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { Options } from 'yargs';
|
|
2
2
|
import printMessage from 'print-message';
|
|
3
3
|
import { BrowserTypes, RuleFlags, ScannerTypes } from './constants.js';
|
|
4
|
+
import { cleanUpAndExit } from '../utils.js';
|
|
4
5
|
|
|
5
6
|
export const messageOptions = {
|
|
6
7
|
border: false,
|
|
@@ -53,7 +54,7 @@ export const cliOptions: { [key: string]: Options } = {
|
|
|
53
54
|
],
|
|
54
55
|
messageOptions,
|
|
55
56
|
);
|
|
56
|
-
|
|
57
|
+
cleanUpAndExit(1);
|
|
57
58
|
return null;
|
|
58
59
|
}
|
|
59
60
|
},
|
|
@@ -158,7 +159,7 @@ export const cliOptions: { [key: string]: Options } = {
|
|
|
158
159
|
],
|
|
159
160
|
messageOptions,
|
|
160
161
|
);
|
|
161
|
-
|
|
162
|
+
cleanUpAndExit(1);
|
|
162
163
|
return null;
|
|
163
164
|
}
|
|
164
165
|
},
|
|
@@ -282,7 +283,7 @@ export const cliOptions: { [key: string]: Options } = {
|
|
|
282
283
|
],
|
|
283
284
|
messageOptions,
|
|
284
285
|
);
|
|
285
|
-
|
|
286
|
+
cleanUpAndExit(1);
|
|
286
287
|
}
|
|
287
288
|
if (userChoices.length > 1 && userChoices.includes('default')) {
|
|
288
289
|
printMessage(
|
|
@@ -291,7 +292,7 @@ export const cliOptions: { [key: string]: Options } = {
|
|
|
291
292
|
],
|
|
292
293
|
messageOptions,
|
|
293
294
|
);
|
|
294
|
-
|
|
295
|
+
cleanUpAndExit(1);
|
|
295
296
|
}
|
|
296
297
|
return userChoices;
|
|
297
298
|
},
|
package/src/constants/common.ts
CHANGED
|
@@ -22,18 +22,18 @@ import constants, {
|
|
|
22
22
|
getDefaultChromeDataDir,
|
|
23
23
|
getDefaultEdgeDataDir,
|
|
24
24
|
getDefaultChromiumDataDir,
|
|
25
|
-
proxy,
|
|
26
25
|
// Legacy code start - Google Sheets submission
|
|
27
26
|
formDataFields,
|
|
28
27
|
// Legacy code end - Google Sheets submission
|
|
29
28
|
ScannerTypes,
|
|
30
29
|
BrowserTypes,
|
|
31
30
|
} from './constants.js';
|
|
32
|
-
import { consoleLogger
|
|
31
|
+
import { consoleLogger } from '../logs.js';
|
|
33
32
|
import { isUrlPdf } from '../crawlers/commonCrawlerFunc.js';
|
|
34
|
-
import { randomThreeDigitNumberString } from '../utils.js';
|
|
33
|
+
import { cleanUpAndExit, randomThreeDigitNumberString, register } from '../utils.js';
|
|
35
34
|
import { Answers, Data } from '../index.js';
|
|
36
35
|
import { DeviceDescriptor } from '../types/types.js';
|
|
36
|
+
import { getProxyInfo, proxyInfoToResolution, ProxySettings } from '../proxyService.js';
|
|
37
37
|
|
|
38
38
|
// validateDirPath validates a provided directory path
|
|
39
39
|
// returns null if no error
|
|
@@ -304,7 +304,10 @@ const checkUrlConnectivityWithBrowser = async (
|
|
|
304
304
|
ignoreHTTPSErrors: true,
|
|
305
305
|
...getPlaywrightLaunchOptions(browserToRun),
|
|
306
306
|
...playwrightDeviceDetailsObject,
|
|
307
|
+
...(process.env.OOBEE_DISABLE_BROWSER_DOWNLOAD && { acceptDownloads: false }),
|
|
307
308
|
});
|
|
309
|
+
|
|
310
|
+
register(browserContext);
|
|
308
311
|
} catch (err) {
|
|
309
312
|
printMessage([`Unable to launch browser\n${err}`], messageOptions);
|
|
310
313
|
res.status = constants.urlCheckStatuses.browserError.code;
|
|
@@ -463,7 +466,7 @@ export const parseHeaders = (header?: string): Record<string, string> => {
|
|
|
463
466
|
],
|
|
464
467
|
messageOptions,
|
|
465
468
|
);
|
|
466
|
-
|
|
469
|
+
cleanUpAndExit(1);
|
|
467
470
|
}
|
|
468
471
|
allHeaders[headerValuePair[0]] = headerValuePair[1]; // {"header": "value", "header2": "value2", ...}
|
|
469
472
|
});
|
|
@@ -483,7 +486,7 @@ export const prepareData = async (argv: Answers): Promise<Data> => {
|
|
|
483
486
|
viewportWidth,
|
|
484
487
|
maxpages,
|
|
485
488
|
strategy,
|
|
486
|
-
isLocalFileScan,
|
|
489
|
+
isLocalFileScan = false,
|
|
487
490
|
browserToRun,
|
|
488
491
|
nameEmail,
|
|
489
492
|
customFlowLabel,
|
|
@@ -508,6 +511,10 @@ export const prepareData = async (argv: Answers): Promise<Data> => {
|
|
|
508
511
|
let username = '';
|
|
509
512
|
let password = '';
|
|
510
513
|
|
|
514
|
+
if (isFilePath(url)) {
|
|
515
|
+
argv.isLocalFileScan = true;
|
|
516
|
+
}
|
|
517
|
+
|
|
511
518
|
// Remove credentials from URL if not a local file scan
|
|
512
519
|
url = argv.isLocalFileScan
|
|
513
520
|
? url
|
|
@@ -548,7 +555,7 @@ export const prepareData = async (argv: Answers): Promise<Data> => {
|
|
|
548
555
|
viewportWidth,
|
|
549
556
|
);
|
|
550
557
|
|
|
551
|
-
const { browserToRun: resolvedBrowser, clonedBrowserDataDir } = getBrowserToRun(browserToRun, true
|
|
558
|
+
const { browserToRun: resolvedBrowser, clonedBrowserDataDir } = getBrowserToRun(resultFilename, browserToRun, true);
|
|
552
559
|
browserToRun = resolvedBrowser;
|
|
553
560
|
|
|
554
561
|
const resolvedUserDataDirectory = getClonedProfilesWithRandomToken(browserToRun, resultFilename);
|
|
@@ -558,6 +565,9 @@ export const prepareData = async (argv: Answers): Promise<Data> => {
|
|
|
558
565
|
await getUrlsFromRobotsTxt(url, browserToRun, resolvedUserDataDirectory, extraHTTPHeaders);
|
|
559
566
|
}
|
|
560
567
|
|
|
568
|
+
constants.userDataDirectory = resolvedUserDataDirectory;
|
|
569
|
+
constants.randomToken = resultFilename;
|
|
570
|
+
|
|
561
571
|
return {
|
|
562
572
|
type: scanner,
|
|
563
573
|
url: url,
|
|
@@ -676,6 +686,8 @@ const getRobotsTxtViaPlaywright = async (robotsUrl: string, browser: string, use
|
|
|
676
686
|
...(extraHTTPHeaders && { extraHTTPHeaders }),
|
|
677
687
|
});
|
|
678
688
|
|
|
689
|
+
register(browserContext);
|
|
690
|
+
|
|
679
691
|
const page = await browserContext.newPage();
|
|
680
692
|
|
|
681
693
|
await page.goto(robotsUrl, { waitUntil: 'networkidle', timeout: 30000 });
|
|
@@ -848,6 +860,7 @@ export const getLinksFromSitemap = async (
|
|
|
848
860
|
},
|
|
849
861
|
);
|
|
850
862
|
|
|
863
|
+
register(browserContext);
|
|
851
864
|
const page = await browserContext.newPage();
|
|
852
865
|
|
|
853
866
|
await page.goto(url, { waitUntil: 'networkidle', timeout: 60000 });
|
|
@@ -997,14 +1010,10 @@ export const validName = (name: string) => {
|
|
|
997
1010
|
* @returns object consisting of browser to run and cloned data directory
|
|
998
1011
|
*/
|
|
999
1012
|
export const getBrowserToRun = (
|
|
1013
|
+
randomToken: string,
|
|
1000
1014
|
preferredBrowser?: BrowserTypes,
|
|
1001
1015
|
isCli = false,
|
|
1002
|
-
randomToken?: string
|
|
1003
1016
|
): { browserToRun: BrowserTypes; clonedBrowserDataDir: string } => {
|
|
1004
|
-
|
|
1005
|
-
if (!randomToken) {
|
|
1006
|
-
randomToken = '';
|
|
1007
|
-
}
|
|
1008
1017
|
|
|
1009
1018
|
const platform = os.platform();
|
|
1010
1019
|
|
|
@@ -1589,26 +1598,21 @@ export const submitFormViaPlaywright = async (
|
|
|
1589
1598
|
userDataDirectory: string,
|
|
1590
1599
|
finalUrl: string,
|
|
1591
1600
|
) => {
|
|
1592
|
-
const dirName = `clone-${Date.now()}`;
|
|
1593
|
-
let clonedDir = null;
|
|
1594
|
-
if (proxy && browserToRun === BrowserTypes.EDGE) {
|
|
1595
|
-
clonedDir = cloneEdgeProfiles(dirName);
|
|
1596
|
-
} else if (proxy && browserToRun === BrowserTypes.CHROME) {
|
|
1597
|
-
clonedDir = cloneChromeProfiles(dirName);
|
|
1598
|
-
}
|
|
1599
1601
|
const browserContext = await constants.launcher.launchPersistentContext(
|
|
1600
|
-
|
|
1602
|
+
userDataDirectory,
|
|
1601
1603
|
{
|
|
1602
1604
|
...getPlaywrightLaunchOptions(browserToRun),
|
|
1603
1605
|
},
|
|
1604
1606
|
);
|
|
1605
1607
|
|
|
1608
|
+
register(browserContext);
|
|
1609
|
+
|
|
1606
1610
|
const page = await browserContext.newPage();
|
|
1607
1611
|
|
|
1608
1612
|
try {
|
|
1609
1613
|
await page.goto(finalUrl, {
|
|
1610
1614
|
timeout: 30000,
|
|
1611
|
-
|
|
1615
|
+
waitUntil: 'commit',
|
|
1612
1616
|
});
|
|
1613
1617
|
|
|
1614
1618
|
try {
|
|
@@ -1620,11 +1624,6 @@ export const submitFormViaPlaywright = async (
|
|
|
1620
1624
|
consoleLogger.error(error);
|
|
1621
1625
|
} finally {
|
|
1622
1626
|
await browserContext.close();
|
|
1623
|
-
if (proxy && browserToRun === BrowserTypes.EDGE) {
|
|
1624
|
-
deleteClonedEdgeProfiles(clonedDir);
|
|
1625
|
-
} else if (proxy && browserToRun === BrowserTypes.CHROME) {
|
|
1626
|
-
deleteClonedChromeProfiles(clonedDir);
|
|
1627
|
-
}
|
|
1628
1627
|
}
|
|
1629
1628
|
};
|
|
1630
1629
|
|
|
@@ -1663,19 +1662,17 @@ export const submitForm = async (
|
|
|
1663
1662
|
finalUrl += `&${formDataFields.redirectUrlField}=${scannedUrl}`;
|
|
1664
1663
|
}
|
|
1665
1664
|
|
|
1666
|
-
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
if (browserToRun || constants.launcher === webkit) {
|
|
1674
|
-
await submitFormViaPlaywright(browserToRun, userDataDirectory, finalUrl);
|
|
1675
|
-
}
|
|
1665
|
+
|
|
1666
|
+
try {
|
|
1667
|
+
await axios.get(finalUrl, { timeout: 2000 });
|
|
1668
|
+
} catch (error) {
|
|
1669
|
+
if (error.code === 'ECONNABORTED') {
|
|
1670
|
+
if (browserToRun || constants.launcher === webkit) {
|
|
1671
|
+
await submitFormViaPlaywright(browserToRun, userDataDirectory, finalUrl);
|
|
1676
1672
|
}
|
|
1677
1673
|
}
|
|
1678
1674
|
}
|
|
1675
|
+
|
|
1679
1676
|
};
|
|
1680
1677
|
// Legacy code end - Google Sheets submission
|
|
1681
1678
|
|
|
@@ -1706,6 +1703,8 @@ export async function initModifiedUserAgent(
|
|
|
1706
1703
|
: '';
|
|
1707
1704
|
|
|
1708
1705
|
const browserContext = await constants.launcher.launchPersistentContext(effectiveUserDataDirectory, launchOptions);
|
|
1706
|
+
register(browserContext);
|
|
1707
|
+
|
|
1709
1708
|
const page = await browserContext.newPage();
|
|
1710
1709
|
|
|
1711
1710
|
// Retrieve the default user agent.
|
|
@@ -1724,42 +1723,61 @@ export async function initModifiedUserAgent(
|
|
|
1724
1723
|
// console.log('Modified User Agent:', modifiedUA);
|
|
1725
1724
|
}
|
|
1726
1725
|
|
|
1726
|
+
const cacheProxyInfo = getProxyInfo();
|
|
1727
|
+
|
|
1727
1728
|
/**
|
|
1728
1729
|
* @param {string} browser browser name ("chrome" or "edge", null for chromium, the default Playwright browser)
|
|
1729
1730
|
* @returns playwright launch options object. For more details: https://playwright.dev/docs/api/class-browsertype#browser-type-launch
|
|
1730
1731
|
*/
|
|
1731
1732
|
export const getPlaywrightLaunchOptions = (browser?: string): LaunchOptions => {
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
}
|
|
1733
|
+
const channel = browser || undefined;
|
|
1734
|
+
|
|
1735
|
+
const resolution = proxyInfoToResolution(cacheProxyInfo);
|
|
1736
1736
|
|
|
1737
|
-
//
|
|
1738
|
-
|
|
1737
|
+
// Start with your base args
|
|
1738
|
+
const finalArgs = [...constants.launchOptionsArgs];
|
|
1739
|
+
|
|
1740
|
+
// Headless flags (unchanged)
|
|
1739
1741
|
if (process.env.CRAWLEE_HEADLESS === '1') {
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
+
if (!finalArgs.includes('--headless=new')) finalArgs.push('--headless=new');
|
|
1743
|
+
if (!finalArgs.includes('--mute-audio')) finalArgs.push('--mute-audio');
|
|
1744
|
+
}
|
|
1745
|
+
|
|
1746
|
+
// Map resolution to Playwright options
|
|
1747
|
+
let proxyOpt: ProxySettings | undefined;
|
|
1748
|
+
switch (resolution.kind) {
|
|
1749
|
+
case 'manual':
|
|
1750
|
+
proxyOpt = resolution.settings;
|
|
1751
|
+
break;
|
|
1752
|
+
case 'pac': {
|
|
1753
|
+
finalArgs.push(`--proxy-pac-url=${resolution.pacUrl}`);
|
|
1754
|
+
if (resolution.bypass) finalArgs.push(`--proxy-bypass-list=${resolution.bypass}`);
|
|
1755
|
+
break;
|
|
1756
|
+
}
|
|
1757
|
+
case 'none':
|
|
1758
|
+
// nothing
|
|
1759
|
+
break;
|
|
1742
1760
|
}
|
|
1743
1761
|
|
|
1744
1762
|
const options: LaunchOptions = {
|
|
1745
|
-
// Drop the --use-mock-keychain flag to allow MacOS devices
|
|
1746
|
-
// to use the cloned cookies.
|
|
1747
1763
|
ignoreDefaultArgs: ['--use-mock-keychain', '--headless'],
|
|
1748
|
-
|
|
1749
|
-
args: constants.launchOptionsArgs,
|
|
1764
|
+
args: finalArgs,
|
|
1750
1765
|
headless: false,
|
|
1751
|
-
...(channel && { channel }),
|
|
1766
|
+
...(channel && { channel }),
|
|
1767
|
+
...(proxyOpt ? { proxy: proxyOpt } : {}),
|
|
1752
1768
|
};
|
|
1753
1769
|
|
|
1754
|
-
//
|
|
1755
|
-
options.
|
|
1770
|
+
// SlowMo (unchanged)
|
|
1771
|
+
if (!options.slowMo && process.env.OOBEE_SLOWMO && Number(process.env.OOBEE_SLOWMO) >= 1) {
|
|
1772
|
+
options.slowMo = Number(process.env.OOBEE_SLOWMO);
|
|
1773
|
+
consoleLogger.info(`Enabled browser slowMo with value: ${process.env.OOBEE_SLOWMO}ms`);
|
|
1774
|
+
}
|
|
1756
1775
|
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
} else if (browser === BrowserTypes.EDGE && os.platform() === 'win32') {
|
|
1760
|
-
// edge should be in non-headless mode
|
|
1776
|
+
// Edge on Windows should not be headless (unchanged)
|
|
1777
|
+
if (browser === BrowserTypes.EDGE && os.platform() === 'win32') {
|
|
1761
1778
|
options.headless = false;
|
|
1762
1779
|
}
|
|
1780
|
+
|
|
1763
1781
|
return options;
|
|
1764
1782
|
};
|
|
1765
1783
|
|