@govtechsg/oobee 0.10.61 → 0.10.62
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/package.json +3 -2
- package/src/cli.ts +32 -38
- package/src/combine.ts +5 -7
- package/src/constants/cliFunctions.ts +5 -4
- package/src/constants/common.ts +14 -2
- package/src/constants/constants.ts +63 -31
- package/src/constants/questions.ts +0 -2
- package/src/crawlers/commonCrawlerFunc.ts +2 -2
- package/src/crawlers/crawlDomain.ts +3 -3
- package/src/crawlers/crawlIntelligentSitemap.ts +2 -0
- package/src/crawlers/crawlLocalFile.ts +3 -0
- package/src/crawlers/crawlSitemap.ts +3 -3
- package/src/crawlers/custom/utils.ts +2 -2
- package/src/crawlers/pdfScanFunc.ts +3 -2
- package/src/crawlers/runCustom.ts +4 -3
- package/src/index.ts +7 -5
- package/src/logs.ts +35 -9
- package/src/mergeAxeResults.ts +9 -8
- package/src/screenshotFunc/htmlScreenshotFunc.ts +4 -4
- package/src/static/ejs/partials/scripts/utils.ejs +8 -11
- package/src/utils.ts +256 -9
package/README.md
CHANGED
@@ -86,6 +86,7 @@ verapdf --version
|
|
86
86
|
| OOBEE_VERBOSE | When set to `true`, log output goes to console | `false` |
|
87
87
|
| OOBEE_FAST_CRAWLER| When set to `true`, increases scan concurrency at a rapid rate. Experimental, may cause system stability issues on low-powered devices. | `false`|
|
88
88
|
| OOBEE_VALIDATE_URL| When set to `true`, validates if URLs are valid and exits. | `false` |
|
89
|
+
| OOBEE_LOGS_PATH | When set, logs are written to this path. | |
|
89
90
|
| WARN_LEVEL | Only used in tests. | |
|
90
91
|
|
91
92
|
#### Environment variables used internally (Do not set)
|
package/package.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
{
|
2
2
|
"name": "@govtechsg/oobee",
|
3
3
|
"main": "dist/npmIndex.js",
|
4
|
-
"version": "0.10.
|
4
|
+
"version": "0.10.62",
|
5
5
|
"type": "module",
|
6
6
|
"author": "Government Technology Agency <info@tech.gov.sg>",
|
7
7
|
"dependencies": {
|
@@ -69,7 +69,8 @@
|
|
69
69
|
"ansi-regex": "^5.0.1",
|
70
70
|
"tough-cookie": "^5.0.0-rc.2",
|
71
71
|
"micromatch": "github:micromatch/micromatch.git#4.0.8",
|
72
|
-
"brace-expansion": "^1.1.12"
|
72
|
+
"brace-expansion": "^1.1.12",
|
73
|
+
"tmp": "0.2.4"
|
73
74
|
},
|
74
75
|
"optionalDependencies": {
|
75
76
|
"@napi-rs/canvas-darwin-arm64": "^0.1.53",
|
package/src/cli.ts
CHANGED
@@ -5,14 +5,13 @@ import printMessage from 'print-message';
|
|
5
5
|
import { devices } from 'playwright';
|
6
6
|
import { fileURLToPath } from 'url';
|
7
7
|
import path from 'path';
|
8
|
-
import { cleanUp, setHeadlessMode, getVersion, getStoragePath } from './utils.js';
|
8
|
+
import { cleanUp, setHeadlessMode, getVersion, getStoragePath, listenForCleanUp, cleanUpAndExit } from './utils.js';
|
9
9
|
import {
|
10
10
|
checkUrl,
|
11
11
|
prepareData,
|
12
12
|
getFileSitemap,
|
13
13
|
validEmail,
|
14
14
|
validName,
|
15
|
-
deleteClonedProfiles,
|
16
15
|
getScreenToScan,
|
17
16
|
validateDirPath,
|
18
17
|
validateFilePath,
|
@@ -54,20 +53,20 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
54
53
|
[`Invalid device. Please provide an existing device to start the scan.`],
|
55
54
|
messageOptions,
|
56
55
|
);
|
57
|
-
|
56
|
+
cleanUpAndExit(1);
|
58
57
|
}
|
59
58
|
return option;
|
60
59
|
})
|
61
60
|
.coerce('w', option => {
|
62
61
|
if (!option || Number.isNaN(option)) {
|
63
62
|
printMessage([`Invalid viewport width. Please provide a number. `], messageOptions);
|
64
|
-
|
63
|
+
cleanUpAndExit(1);
|
65
64
|
} else if (option < 320 || option > 1080) {
|
66
65
|
printMessage(
|
67
66
|
['Invalid viewport width! Please provide a viewport width between 320-1080 pixels.'],
|
68
67
|
messageOptions,
|
69
68
|
);
|
70
|
-
|
69
|
+
cleanUpAndExit(1);
|
71
70
|
}
|
72
71
|
return option;
|
73
72
|
})
|
@@ -77,7 +76,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
77
76
|
[`Invalid maximum number of pages. Please provide a positive integer.`],
|
78
77
|
messageOptions,
|
79
78
|
);
|
80
|
-
|
79
|
+
cleanUpAndExit(1);
|
81
80
|
}
|
82
81
|
return option;
|
83
82
|
})
|
@@ -87,7 +86,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
87
86
|
[`Invalid number for max concurrency. Please provide a positive integer.`],
|
88
87
|
messageOptions,
|
89
88
|
);
|
90
|
-
|
89
|
+
cleanUpAndExit(1);
|
91
90
|
}
|
92
91
|
return option;
|
93
92
|
})
|
@@ -97,23 +96,23 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
97
96
|
[`Invalid format. Please provide your name and email address separated by ":"`],
|
98
97
|
messageOptions,
|
99
98
|
);
|
100
|
-
|
99
|
+
cleanUpAndExit(1);
|
101
100
|
}
|
102
101
|
const [name, email] = nameEmail.split(':');
|
103
102
|
if (name === '' || name === undefined || name === null) {
|
104
103
|
printMessage([`Please provide your name.`], messageOptions);
|
105
|
-
|
104
|
+
cleanUpAndExit(1);
|
106
105
|
}
|
107
106
|
if (!validName(name)) {
|
108
107
|
printMessage([`Invalid name. Please provide a valid name.`], messageOptions);
|
109
|
-
|
108
|
+
cleanUpAndExit(1);
|
110
109
|
}
|
111
110
|
if (!validEmail(email)) {
|
112
111
|
printMessage(
|
113
112
|
[`Invalid email address. Please provide a valid email address.`],
|
114
113
|
messageOptions,
|
115
114
|
);
|
116
|
-
|
115
|
+
cleanUpAndExit(1);
|
117
116
|
}
|
118
117
|
return nameEmail;
|
119
118
|
})
|
@@ -121,7 +120,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
121
120
|
const validationErrors = validateDirPath(option);
|
122
121
|
if (validationErrors) {
|
123
122
|
printMessage([`Invalid exportDirectory directory path. ${validationErrors}`], messageOptions);
|
124
|
-
|
123
|
+
cleanUpAndExit(1);
|
125
124
|
}
|
126
125
|
return option;
|
127
126
|
})
|
@@ -133,7 +132,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
133
132
|
return validateFilePath(option, dirname);
|
134
133
|
} catch (err) {
|
135
134
|
printMessage([`Invalid blacklistedPatternsFilename file path. ${err}`], messageOptions);
|
136
|
-
|
135
|
+
cleanUpAndExit(1);
|
137
136
|
}
|
138
137
|
})
|
139
138
|
.coerce('i', option => {
|
@@ -143,7 +142,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
143
142
|
[`Invalid value for fileTypes. Please provide valid keywords: ${choices.join(', ')}.`],
|
144
143
|
messageOptions,
|
145
144
|
);
|
146
|
-
|
145
|
+
cleanUpAndExit(1);
|
147
146
|
}
|
148
147
|
return option;
|
149
148
|
})
|
@@ -151,7 +150,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
151
150
|
const { isValid, errorMessage } = validateCustomFlowLabel(option);
|
152
151
|
if (!isValid) {
|
153
152
|
printMessage([errorMessage], messageOptions);
|
154
|
-
|
153
|
+
cleanUpAndExit(1);
|
155
154
|
}
|
156
155
|
return option;
|
157
156
|
})
|
@@ -162,7 +161,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
162
161
|
[`Invalid value for additional. Please provide valid keywords: ${choices.join(', ')}.`],
|
163
162
|
messageOptions,
|
164
163
|
);
|
165
|
-
|
164
|
+
cleanUpAndExit(1);
|
166
165
|
}
|
167
166
|
return option;
|
168
167
|
})
|
@@ -199,7 +198,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
199
198
|
['Invalid scan duration. Please provide a positive number of seconds.'],
|
200
199
|
messageOptions,
|
201
200
|
);
|
202
|
-
|
201
|
+
cleanUpAndExit(1);
|
203
202
|
}
|
204
203
|
return duration;
|
205
204
|
})
|
@@ -226,7 +225,8 @@ const scanInit = async (argvs: Answers): Promise<string> => {
|
|
226
225
|
|
227
226
|
const data = await prepareData(updatedArgvs);
|
228
227
|
|
229
|
-
|
228
|
+
// Executes cleanUp script if error encountered
|
229
|
+
listenForCleanUp(data.randomToken);
|
230
230
|
|
231
231
|
const res = await checkUrl(
|
232
232
|
data.type,
|
@@ -244,7 +244,7 @@ const scanInit = async (argvs: Answers): Promise<string> => {
|
|
244
244
|
data.url = res.url;
|
245
245
|
if (process.env.OOBEE_VALIDATE_URL) {
|
246
246
|
console.log('Url is valid');
|
247
|
-
|
247
|
+
cleanUpAndExit(0, data.randomToken);
|
248
248
|
}
|
249
249
|
|
250
250
|
break;
|
@@ -252,17 +252,17 @@ const scanInit = async (argvs: Answers): Promise<string> => {
|
|
252
252
|
case statuses.unauthorised.code: {
|
253
253
|
printMessage([statuses.unauthorised.message], messageOptions);
|
254
254
|
consoleLogger.info(statuses.unauthorised.message);
|
255
|
-
|
255
|
+
cleanUpAndExit(res.status);
|
256
256
|
}
|
257
257
|
case statuses.cannotBeResolved.code: {
|
258
258
|
printMessage([statuses.cannotBeResolved.message], messageOptions);
|
259
259
|
consoleLogger.info(statuses.cannotBeResolved.message);
|
260
|
-
|
260
|
+
cleanUpAndExit(res.status);
|
261
261
|
}
|
262
262
|
case statuses.systemError.code: {
|
263
263
|
printMessage([statuses.systemError.message], messageOptions);
|
264
264
|
consoleLogger.info(statuses.systemError.message);
|
265
|
-
|
265
|
+
cleanUpAndExit(res.status);
|
266
266
|
}
|
267
267
|
case statuses.invalidUrl.code: {
|
268
268
|
if (
|
@@ -271,7 +271,7 @@ const scanInit = async (argvs: Answers): Promise<string> => {
|
|
271
271
|
) {
|
272
272
|
printMessage([statuses.invalidUrl.message], messageOptions);
|
273
273
|
consoleLogger.info(statuses.invalidUrl.message);
|
274
|
-
|
274
|
+
cleanUpAndExit(res.status);
|
275
275
|
}
|
276
276
|
|
277
277
|
const finalFilePath = getFileSitemap(updatedArgvs.url);
|
@@ -281,40 +281,38 @@ const scanInit = async (argvs: Answers): Promise<string> => {
|
|
281
281
|
|
282
282
|
if (process.env.OOBEE_VALIDATE_URL) {
|
283
283
|
console.log('Url is valid');
|
284
|
-
|
284
|
+
cleanUpAndExit(0);
|
285
285
|
}
|
286
286
|
} else if (updatedArgvs.scanner === ScannerTypes.LOCALFILE) {
|
287
287
|
printMessage([statuses.notALocalFile.message], messageOptions);
|
288
288
|
consoleLogger.info(statuses.notALocalFile.message);
|
289
|
-
|
289
|
+
cleanUpAndExit(statuses.notALocalFile.code);
|
290
290
|
} else if (updatedArgvs.scanner !== ScannerTypes.SITEMAP) {
|
291
291
|
printMessage([statuses.notASitemap.message], messageOptions);
|
292
292
|
consoleLogger.info(statuses.notASitemap.message);
|
293
|
-
|
293
|
+
cleanUpAndExit(statuses.notASitemap.code);
|
294
294
|
}
|
295
295
|
break;
|
296
296
|
}
|
297
297
|
case statuses.notASitemap.code: {
|
298
298
|
printMessage([statuses.notASitemap.message], messageOptions);
|
299
299
|
consoleLogger.info(statuses.notASitemap.message);
|
300
|
-
|
300
|
+
cleanUpAndExit(res.status);
|
301
301
|
}
|
302
302
|
case statuses.notALocalFile.code: {
|
303
303
|
printMessage([statuses.notALocalFile.message], messageOptions);
|
304
304
|
consoleLogger.info(statuses.notALocalFile.message);
|
305
|
-
|
305
|
+
cleanUpAndExit(res.status);
|
306
306
|
}
|
307
307
|
case statuses.browserError.code: {
|
308
308
|
printMessage([statuses.browserError.message], messageOptions);
|
309
309
|
consoleLogger.info(statuses.browserError.message);
|
310
|
-
|
310
|
+
cleanUpAndExit(res.status);
|
311
311
|
}
|
312
312
|
default:
|
313
313
|
break;
|
314
314
|
}
|
315
315
|
|
316
|
-
deleteClonedProfiles(data.browser, data.randomToken);
|
317
|
-
|
318
316
|
if (process.env.OOBEE_VERBOSE) {
|
319
317
|
const randomTokenMessage = {
|
320
318
|
type: 'randomToken',
|
@@ -332,14 +330,10 @@ const scanInit = async (argvs: Answers): Promise<string> => {
|
|
332
330
|
);
|
333
331
|
|
334
332
|
printMessage([`Oobee version: ${appVersion}`, 'Starting scan...'], messageOptions);
|
335
|
-
|
333
|
+
consoleLogger.info(`Oobee version: ${appVersion}`);
|
334
|
+
|
336
335
|
await combineRun(data, screenToScan);
|
337
336
|
|
338
|
-
deleteClonedProfiles(data.browser, data.randomToken);
|
339
|
-
|
340
|
-
// Delete dataset and request queues
|
341
|
-
cleanUp(data.randomToken);
|
342
|
-
|
343
337
|
return getStoragePath(data.randomToken);
|
344
338
|
};
|
345
339
|
|
@@ -375,6 +369,6 @@ const optionsAnswer: Answers = {
|
|
375
369
|
};
|
376
370
|
|
377
371
|
await scanInit(optionsAnswer);
|
378
|
-
|
372
|
+
cleanUpAndExit(0);
|
379
373
|
|
380
374
|
export default options;
|
package/src/combine.ts
CHANGED
@@ -5,7 +5,7 @@ import crawlDomain from './crawlers/crawlDomain.js';
|
|
5
5
|
import crawlLocalFile from './crawlers/crawlLocalFile.js';
|
6
6
|
import crawlIntelligentSitemap from './crawlers/crawlIntelligentSitemap.js';
|
7
7
|
import generateArtifacts from './mergeAxeResults.js';
|
8
|
-
import { getHost, createAndUpdateResultsFolders, createDetailsAndLogs, cleanUp } from './utils.js';
|
8
|
+
import { getHost, createAndUpdateResultsFolders, createDetailsAndLogs, cleanUp, cleanUpAndExit } from './utils.js';
|
9
9
|
import { ScannerTypes, UrlsCrawled } from './constants/constants.js';
|
10
10
|
import { getBlackListedPatterns, submitForm } from './constants/common.js';
|
11
11
|
import { consoleLogger, silentLogger } from './logs.js';
|
@@ -80,7 +80,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
80
80
|
blacklistedPatterns = getBlackListedPatterns(blacklistedPatternsFilename);
|
81
81
|
} catch (error) {
|
82
82
|
consoleLogger.error(error);
|
83
|
-
|
83
|
+
cleanUpAndExit(1);
|
84
84
|
}
|
85
85
|
|
86
86
|
// remove basic-auth credentials from URL
|
@@ -213,7 +213,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
213
213
|
|
214
214
|
default:
|
215
215
|
consoleLogger.error(`type: ${type} not defined`);
|
216
|
-
|
216
|
+
cleanUpAndExit(1);
|
217
217
|
}
|
218
218
|
|
219
219
|
scanDetails.endTime = new Date();
|
@@ -261,15 +261,13 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
261
261
|
|
262
262
|
// No page were scanned because the URL loaded does not meet the crawler requirements
|
263
263
|
printMessage([`No pages were scanned.`], alertMessageOptions);
|
264
|
-
|
265
|
-
process.exit(1);
|
264
|
+
cleanUpAndExit(1, randomToken, true);
|
266
265
|
}
|
267
266
|
} else {
|
268
267
|
|
269
268
|
// No page were scanned because the URL loaded does not meet the crawler requirements
|
270
269
|
printMessage([`No pages were scanned.`], alertMessageOptions);
|
271
|
-
|
272
|
-
process.exit(1);
|
270
|
+
cleanUpAndExit(1, randomToken, true);
|
273
271
|
}
|
274
272
|
};
|
275
273
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import { Options } from 'yargs';
|
2
2
|
import printMessage from 'print-message';
|
3
3
|
import { BrowserTypes, RuleFlags, ScannerTypes } from './constants.js';
|
4
|
+
import { cleanUpAndExit } from '../utils.js';
|
4
5
|
|
5
6
|
export const messageOptions = {
|
6
7
|
border: false,
|
@@ -53,7 +54,7 @@ export const cliOptions: { [key: string]: Options } = {
|
|
53
54
|
],
|
54
55
|
messageOptions,
|
55
56
|
);
|
56
|
-
|
57
|
+
cleanUpAndExit(1);
|
57
58
|
return null;
|
58
59
|
}
|
59
60
|
},
|
@@ -158,7 +159,7 @@ export const cliOptions: { [key: string]: Options } = {
|
|
158
159
|
],
|
159
160
|
messageOptions,
|
160
161
|
);
|
161
|
-
|
162
|
+
cleanUpAndExit(1);
|
162
163
|
return null;
|
163
164
|
}
|
164
165
|
},
|
@@ -282,7 +283,7 @@ export const cliOptions: { [key: string]: Options } = {
|
|
282
283
|
],
|
283
284
|
messageOptions,
|
284
285
|
);
|
285
|
-
|
286
|
+
cleanUpAndExit(1);
|
286
287
|
}
|
287
288
|
if (userChoices.length > 1 && userChoices.includes('default')) {
|
288
289
|
printMessage(
|
@@ -291,7 +292,7 @@ export const cliOptions: { [key: string]: Options } = {
|
|
291
292
|
],
|
292
293
|
messageOptions,
|
293
294
|
);
|
294
|
-
|
295
|
+
cleanUpAndExit(1);
|
295
296
|
}
|
296
297
|
return userChoices;
|
297
298
|
},
|
package/src/constants/common.ts
CHANGED
@@ -31,7 +31,7 @@ import constants, {
|
|
31
31
|
} from './constants.js';
|
32
32
|
import { consoleLogger, silentLogger } from '../logs.js';
|
33
33
|
import { isUrlPdf } from '../crawlers/commonCrawlerFunc.js';
|
34
|
-
import { randomThreeDigitNumberString } from '../utils.js';
|
34
|
+
import { cleanUpAndExit, randomThreeDigitNumberString, register } from '../utils.js';
|
35
35
|
import { Answers, Data } from '../index.js';
|
36
36
|
import { DeviceDescriptor } from '../types/types.js';
|
37
37
|
|
@@ -305,6 +305,8 @@ const checkUrlConnectivityWithBrowser = async (
|
|
305
305
|
...getPlaywrightLaunchOptions(browserToRun),
|
306
306
|
...playwrightDeviceDetailsObject,
|
307
307
|
});
|
308
|
+
|
309
|
+
register(browserContext);
|
308
310
|
} catch (err) {
|
309
311
|
printMessage([`Unable to launch browser\n${err}`], messageOptions);
|
310
312
|
res.status = constants.urlCheckStatuses.browserError.code;
|
@@ -463,7 +465,7 @@ export const parseHeaders = (header?: string): Record<string, string> => {
|
|
463
465
|
],
|
464
466
|
messageOptions,
|
465
467
|
);
|
466
|
-
|
468
|
+
cleanUpAndExit(1);
|
467
469
|
}
|
468
470
|
allHeaders[headerValuePair[0]] = headerValuePair[1]; // {"header": "value", "header2": "value2", ...}
|
469
471
|
});
|
@@ -558,6 +560,9 @@ export const prepareData = async (argv: Answers): Promise<Data> => {
|
|
558
560
|
await getUrlsFromRobotsTxt(url, browserToRun, resolvedUserDataDirectory, extraHTTPHeaders);
|
559
561
|
}
|
560
562
|
|
563
|
+
constants.userDataDirectory = resolvedUserDataDirectory;
|
564
|
+
constants.randomToken = resultFilename;
|
565
|
+
|
561
566
|
return {
|
562
567
|
type: scanner,
|
563
568
|
url: url,
|
@@ -676,6 +681,8 @@ const getRobotsTxtViaPlaywright = async (robotsUrl: string, browser: string, use
|
|
676
681
|
...(extraHTTPHeaders && { extraHTTPHeaders }),
|
677
682
|
});
|
678
683
|
|
684
|
+
register(browserContext);
|
685
|
+
|
679
686
|
const page = await browserContext.newPage();
|
680
687
|
|
681
688
|
await page.goto(robotsUrl, { waitUntil: 'networkidle', timeout: 30000 });
|
@@ -848,6 +855,7 @@ export const getLinksFromSitemap = async (
|
|
848
855
|
},
|
849
856
|
);
|
850
857
|
|
858
|
+
register(browserContext);
|
851
859
|
const page = await browserContext.newPage();
|
852
860
|
|
853
861
|
await page.goto(url, { waitUntil: 'networkidle', timeout: 60000 });
|
@@ -1603,6 +1611,8 @@ export const submitFormViaPlaywright = async (
|
|
1603
1611
|
},
|
1604
1612
|
);
|
1605
1613
|
|
1614
|
+
register(browserContext);
|
1615
|
+
|
1606
1616
|
const page = await browserContext.newPage();
|
1607
1617
|
|
1608
1618
|
try {
|
@@ -1706,6 +1716,8 @@ export async function initModifiedUserAgent(
|
|
1706
1716
|
: '';
|
1707
1717
|
|
1708
1718
|
const browserContext = await constants.launcher.launchPersistentContext(effectiveUserDataDirectory, launchOptions);
|
1719
|
+
register(browserContext);
|
1720
|
+
|
1709
1721
|
const page = await browserContext.newPage();
|
1710
1722
|
|
1711
1723
|
// Retrieve the default user agent.
|
@@ -5,10 +5,11 @@ import { globSync } from 'glob';
|
|
5
5
|
import which from 'which';
|
6
6
|
import os from 'os';
|
7
7
|
import { spawnSync, execSync } from 'child_process';
|
8
|
-
import { chromium } from 'playwright';
|
8
|
+
import { Browser, BrowserContext, chromium } from 'playwright';
|
9
9
|
import * as Sentry from '@sentry/node';
|
10
10
|
import { consoleLogger, silentLogger } from '../logs.js';
|
11
11
|
import { PageInfo } from '../mergeAxeResults.js';
|
12
|
+
import { PlaywrightCrawler } from 'crawlee';
|
12
13
|
|
13
14
|
const filename = fileURLToPath(import.meta.url);
|
14
15
|
const dirname = path.dirname(filename);
|
@@ -136,7 +137,7 @@ export const getDefaultChromiumDataDir = () => {
|
|
136
137
|
}
|
137
138
|
return null;
|
138
139
|
} catch (error) {
|
139
|
-
|
140
|
+
consoleLogger.error(`Error in getDefaultChromiumDataDir(): ${error}`);
|
140
141
|
}
|
141
142
|
};
|
142
143
|
|
@@ -227,45 +228,68 @@ if (fs.existsSync('/.dockerenv')) {
|
|
227
228
|
launchOptionsArgs = ['--disable-gpu', '--no-sandbox', '--disable-dev-shm-usage'];
|
228
229
|
}
|
229
230
|
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
)
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
silentLogger.error(e.toString());
|
231
|
+
type ProxyInfo = { type: 'autoConfig' | 'manualProxy'; url: string } | null;
|
232
|
+
|
233
|
+
function queryRegKey(key: string): Record<string, string> {
|
234
|
+
try {
|
235
|
+
const out = execSync(`reg query "${key}"`, { encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] });
|
236
|
+
const values: Record<string, string> = {};
|
237
|
+
for (const line of out.split(/\r?\n/)) {
|
238
|
+
const parts = line.trim().split(/\s{2,}/);
|
239
|
+
if (parts.length >= 3) {
|
240
|
+
const [name, _type, ...rest] = parts;
|
241
|
+
values[name] = rest.join(' ');
|
242
|
+
}
|
243
243
|
}
|
244
|
+
return values;
|
245
|
+
} catch {
|
246
|
+
return {};
|
247
|
+
}
|
248
|
+
}
|
244
249
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
250
|
+
function parseDwordFlag(v: unknown): number {
|
251
|
+
if (v == null) return 0;
|
252
|
+
const s = String(v).trim();
|
253
|
+
// Handles "1", "0", "0x1", "0x0"
|
254
|
+
if (/^0x[0-9a-f]+$/i.test(s)) return parseInt(s, 16);
|
255
|
+
if (/^\d+$/.test(s)) return parseInt(s, 10);
|
256
|
+
return 0;
|
257
|
+
}
|
251
258
|
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
+
function normalizePacUrl(u: string): string {
|
260
|
+
const s = u.trim();
|
261
|
+
// If it lacks a scheme, assume http:// (Chrome requires a full URL)
|
262
|
+
return /^(https?|file):/i.test(s) ? s : `http://${s}`;
|
263
|
+
}
|
264
|
+
|
265
|
+
export const getProxy = (): ProxyInfo => {
|
266
|
+
if (os.platform() !== 'win32') return null;
|
267
|
+
|
268
|
+
const values = queryRegKey('HKCU\\Software\\Microsoft\\Windows\\CurrentVersion\\Internet Settings');
|
269
|
+
const pacUrlRaw = (values['AutoConfigURL'] || '').trim();
|
270
|
+
const proxyEnableRaw = (values['ProxyEnable'] || '').trim();
|
271
|
+
const proxyServerRaw = (values['ProxyServer'] || '').trim();
|
272
|
+
|
273
|
+
// 1) PAC beats manual proxy if present
|
274
|
+
if (pacUrlRaw) {
|
275
|
+
return { type: 'autoConfig', url: normalizePacUrl(pacUrlRaw) };
|
276
|
+
}
|
277
|
+
|
278
|
+
// 2) Manual proxy only if enabled
|
279
|
+
const enabled = parseDwordFlag(proxyEnableRaw) === 1;
|
280
|
+
if (enabled && proxyServerRaw) {
|
281
|
+
return { type: 'manualProxy', url: proxyServerRaw };
|
259
282
|
}
|
260
|
-
|
283
|
+
|
261
284
|
return null;
|
262
285
|
};
|
263
286
|
|
287
|
+
// Usage
|
264
288
|
export const proxy = getProxy();
|
265
289
|
|
266
|
-
if (proxy
|
290
|
+
if (proxy?.type === 'autoConfig') {
|
267
291
|
launchOptionsArgs.push(`--proxy-pac-url=${proxy.url}`);
|
268
|
-
} else if (proxy
|
292
|
+
} else if (proxy?.type === 'manualProxy') {
|
269
293
|
launchOptionsArgs.push(`--proxy-server=${proxy.url}`);
|
270
294
|
}
|
271
295
|
|
@@ -405,6 +429,7 @@ const urlCheckStatuses = {
|
|
405
429
|
},
|
406
430
|
axiosTimeout: { code: 18, message: 'Axios timeout exceeded. Falling back on browser checks.' },
|
407
431
|
notALocalFile: { code: 19, message: 'Provided filepath is not a local html or sitemap file.' },
|
432
|
+
terminationRequested: { code: 15, message: 'Termination requested.' }
|
408
433
|
};
|
409
434
|
|
410
435
|
/* eslint-disable no-unused-vars */
|
@@ -467,6 +492,13 @@ export default {
|
|
467
492
|
wcagLinks,
|
468
493
|
robotsTxtUrls: null,
|
469
494
|
userDataDirectory: null, // This will be set later in the code
|
495
|
+
randomToken: null, // This will be set later in the code
|
496
|
+
// Track all active Crawlee / Playwright resources for cleanup
|
497
|
+
resources: {
|
498
|
+
crawlers: new Set<PlaywrightCrawler>(),
|
499
|
+
browserContexts: new Set<BrowserContext>(),
|
500
|
+
browsers: new Set<Browser>(),
|
501
|
+
},
|
470
502
|
};
|
471
503
|
|
472
504
|
export const rootPath = dirname;
|
@@ -318,9 +318,9 @@ export const runAxeScript = async ({
|
|
318
318
|
page.on('console', msg => {
|
319
319
|
const type = msg.type();
|
320
320
|
if (type === 'error') {
|
321
|
-
|
321
|
+
consoleLogger.log({ level: 'error', message: msg.text() });
|
322
322
|
} else {
|
323
|
-
|
323
|
+
consoleLogger.log({ level: 'info', message: msg.text() });
|
324
324
|
}
|
325
325
|
});
|
326
326
|
*/
|
@@ -29,7 +29,7 @@ import {
|
|
29
29
|
getUrlsFromRobotsTxt,
|
30
30
|
waitForPageLoaded,
|
31
31
|
} from '../constants/common.js';
|
32
|
-
import { areLinksEqual, isFollowStrategy } from '../utils.js';
|
32
|
+
import { areLinksEqual, isFollowStrategy, register } from '../utils.js';
|
33
33
|
import {
|
34
34
|
handlePdfDownload,
|
35
35
|
runPdfScan,
|
@@ -339,7 +339,7 @@ const crawlDomain = async ({
|
|
339
339
|
|
340
340
|
let isAbortingScanNow = false;
|
341
341
|
|
342
|
-
const crawler = new crawlee.PlaywrightCrawler({
|
342
|
+
const crawler = register(new crawlee.PlaywrightCrawler({
|
343
343
|
launchContext: {
|
344
344
|
launcher: constants.launcher,
|
345
345
|
launchOptions: getPlaywrightLaunchOptions(browser),
|
@@ -723,7 +723,7 @@ const crawlDomain = async ({
|
|
723
723
|
scaleDownStepRatio: 0.1, // Scale down slower
|
724
724
|
},
|
725
725
|
}),
|
726
|
-
});
|
726
|
+
}));
|
727
727
|
|
728
728
|
await crawler.run();
|
729
729
|
|
@@ -8,6 +8,7 @@ import crawlSitemap from './crawlSitemap.js';
|
|
8
8
|
import { EnqueueStrategy } from 'crawlee';
|
9
9
|
import { ViewportSettingsClass } from '../combine.js';
|
10
10
|
import { getPlaywrightLaunchOptions } from '../constants/common.js';
|
11
|
+
import { register } from '../utils.js';
|
11
12
|
|
12
13
|
const crawlIntelligentSitemap = async (
|
13
14
|
url: string,
|
@@ -58,6 +59,7 @@ const crawlIntelligentSitemap = async (
|
|
58
59
|
...getPlaywrightLaunchOptions(browser),
|
59
60
|
...(extraHTTPHeaders && { extraHTTPHeaders }),
|
60
61
|
});
|
62
|
+
register(context);
|
61
63
|
|
62
64
|
const page = await context.newPage();
|
63
65
|
|
@@ -17,6 +17,7 @@ import {
|
|
17
17
|
import { runPdfScan, mapPdfScanResults, doPdfScreenshots } from './pdfScanFunc.js';
|
18
18
|
import { guiInfoLog } from '../logs.js';
|
19
19
|
import crawlSitemap from './crawlSitemap.js';
|
20
|
+
import { register } from '../utils.js';
|
20
21
|
|
21
22
|
export const crawlLocalFile = async ({
|
22
23
|
url,
|
@@ -161,6 +162,8 @@ export const crawlLocalFile = async ({
|
|
161
162
|
...playwrightDeviceDetailsObject,
|
162
163
|
});
|
163
164
|
|
165
|
+
register(browserContext);
|
166
|
+
|
164
167
|
const timeoutId = scanDuration > 0
|
165
168
|
? setTimeout(() => {
|
166
169
|
console.log(`Crawl duration of ${scanDuration}s exceeded. Aborting local file scan.`);
|
@@ -20,7 +20,7 @@ import {
|
|
20
20
|
waitForPageLoaded,
|
21
21
|
isFilePath,
|
22
22
|
} from '../constants/common.js';
|
23
|
-
import { areLinksEqual, isWhitelistedContentType,
|
23
|
+
import { areLinksEqual, isWhitelistedContentType, register } from '../utils.js';
|
24
24
|
import { handlePdfDownload, runPdfScan, mapPdfScanResults } from './pdfScanFunc.js';
|
25
25
|
import { guiInfoLog } from '../logs.js';
|
26
26
|
import { ViewportSettingsClass } from '../combine.js';
|
@@ -106,7 +106,7 @@ const crawlSitemap = async ({
|
|
106
106
|
sources: linksFromSitemap,
|
107
107
|
});
|
108
108
|
|
109
|
-
const crawler = new crawlee.PlaywrightCrawler({
|
109
|
+
const crawler = register(new crawlee.PlaywrightCrawler({
|
110
110
|
launchContext: {
|
111
111
|
launcher: constants.launcher,
|
112
112
|
launchOptions: getPlaywrightLaunchOptions(browser),
|
@@ -395,7 +395,7 @@ const crawlSitemap = async ({
|
|
395
395
|
scaleDownStepRatio: 0.1, // Scale down slower
|
396
396
|
},
|
397
397
|
}),
|
398
|
-
});
|
398
|
+
}));
|
399
399
|
|
400
400
|
await crawler.run();
|
401
401
|
|
@@ -78,7 +78,7 @@ export const screenshotFullPage = async (page, screenshotsDir: string, screensho
|
|
78
78
|
});
|
79
79
|
|
80
80
|
consoleLogger.info(`Screenshot page at: ${page.url()}`);
|
81
|
-
|
81
|
+
consoleLogger.info(`Screenshot page at: ${page.url()}`);
|
82
82
|
|
83
83
|
await page.screenshot({
|
84
84
|
timeout: 5000,
|
@@ -469,7 +469,7 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
|
|
469
469
|
consoleLogger.info(`Overlay state: ${existingOverlay}`);
|
470
470
|
} catch {
|
471
471
|
consoleLogger.info('Error in adding overlay menu to page');
|
472
|
-
|
472
|
+
consoleLogger.info('Error in adding overlay menu to page');
|
473
473
|
}
|
474
474
|
});
|
475
475
|
|
@@ -15,6 +15,7 @@ import constants, {
|
|
15
15
|
STATUS_CODE_METADATA,
|
16
16
|
UrlsCrawled,
|
17
17
|
} from '../constants/constants.js';
|
18
|
+
import { cleanUpAndExit } from '../utils.js';
|
18
19
|
|
19
20
|
const require = createRequire(import.meta.url);
|
20
21
|
|
@@ -233,7 +234,7 @@ const getVeraExecutable = () => {
|
|
233
234
|
const veraPdfExeNotFoundError =
|
234
235
|
'Could not find veraPDF executable. Please ensure veraPDF is installed at current directory.';
|
235
236
|
consoleLogger.error(veraPdfExeNotFoundError);
|
236
|
-
|
237
|
+
consoleLogger.error(veraPdfExeNotFoundError);
|
237
238
|
}
|
238
239
|
return veraPdfExe;
|
239
240
|
};
|
@@ -355,7 +356,7 @@ export const runPdfScan = async (randomToken: string) => {
|
|
355
356
|
'profiles/veraPDF-validation-profiles-rel-1.26/PDF_UA/WCAG-2-2.xml',
|
356
357
|
)}"`;
|
357
358
|
if (!veraPdfExe || !veraPdfProfile) {
|
358
|
-
|
359
|
+
cleanUpAndExit(1);
|
359
360
|
}
|
360
361
|
|
361
362
|
const intermediateFolder = randomToken; // NOTE: assumes this folder is already created for crawlee
|
@@ -1,7 +1,7 @@
|
|
1
1
|
/* eslint-env browser */
|
2
2
|
import { chromium } from 'playwright';
|
3
3
|
import { createCrawleeSubFolders } from './commonCrawlerFunc.js';
|
4
|
-
import {
|
4
|
+
import { cleanUpAndExit, register} from '../utils.js';
|
5
5
|
import constants, {
|
6
6
|
getIntermediateScreenshotsPath,
|
7
7
|
guiInfoStatusTypes,
|
@@ -48,7 +48,6 @@ const runCustom = async (
|
|
48
48
|
includeScreenshots: boolean,
|
49
49
|
) => {
|
50
50
|
// checks and delete datasets path if it already exists
|
51
|
-
cleanUp(randomToken);
|
52
51
|
process.env.CRAWLEE_STORAGE_DIR = randomToken;
|
53
52
|
|
54
53
|
const urlsCrawled: UrlsCrawled = { ...constants.urlsCrawledObj };
|
@@ -83,6 +82,8 @@ const runCustom = async (
|
|
83
82
|
...viewportSettings.playwrightDeviceDetailsObject,
|
84
83
|
});
|
85
84
|
|
85
|
+
register(context);
|
86
|
+
|
86
87
|
// Detection of new page
|
87
88
|
context.on('page', async newPage => {
|
88
89
|
await initNewPage(newPage, pageClosePromises, processPageParams, pagesDict);
|
@@ -107,7 +108,7 @@ const runCustom = async (
|
|
107
108
|
await allPagesClosedPromise(pageClosePromises);
|
108
109
|
} catch (error) {
|
109
110
|
log(`PLAYWRIGHT EXECUTION ERROR ${error}`);
|
110
|
-
|
111
|
+
cleanUpAndExit(1, randomToken, true);
|
111
112
|
}
|
112
113
|
|
113
114
|
guiInfoLog(guiInfoStatusTypes.COMPLETED, {});
|
package/src/index.ts
CHANGED
@@ -7,6 +7,8 @@ import {
|
|
7
7
|
cleanUp,
|
8
8
|
getUserDataTxt,
|
9
9
|
writeToUserDataTxt,
|
10
|
+
listenForCleanUp,
|
11
|
+
cleanUpAndExit,
|
10
12
|
} from './utils.js';
|
11
13
|
import {
|
12
14
|
prepareData,
|
@@ -106,19 +108,19 @@ const runScan = async (answers: Answers) => {
|
|
106
108
|
answers.metadata = '{}';
|
107
109
|
|
108
110
|
const data: Data = await prepareData(answers);
|
111
|
+
|
112
|
+
// Executes cleanUp script if error encountered
|
113
|
+
listenForCleanUp(data.randomToken);
|
114
|
+
|
109
115
|
data.userDataDirectory = getClonedProfilesWithRandomToken(data.browser, data.randomToken);
|
110
116
|
|
111
117
|
printMessage(['Scanning website...'], messageOptions);
|
112
118
|
|
113
119
|
await combineRun(data, screenToScan);
|
114
120
|
|
115
|
-
// Delete cloned directory
|
116
|
-
deleteClonedProfiles(data.browser, data.randomToken);
|
117
|
-
|
118
121
|
// Delete dataset and request queues
|
119
|
-
|
122
|
+
cleanUpAndExit(0, data.randomToken);
|
120
123
|
|
121
|
-
process.exit(0);
|
122
124
|
};
|
123
125
|
|
124
126
|
if (userData) {
|
package/src/logs.ts
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
/* eslint-disable no-shadow */
|
3
3
|
import { createLogger, format, transports } from 'winston';
|
4
4
|
import { guiInfoStatusTypes } from './constants/constants.js';
|
5
|
+
import path from 'path';
|
6
|
+
import { randomUUID } from 'crypto';
|
5
7
|
|
6
8
|
const { combine, timestamp, printf } = format;
|
7
9
|
|
@@ -20,12 +22,32 @@ const logFormat = printf(({ timestamp, level, message }) => {
|
|
20
22
|
// transport: storage device for logs
|
21
23
|
// Enabled for console and storing into files; Files are overwritten each time
|
22
24
|
// All logs in combined.txt, error in errors.txt
|
25
|
+
const uuid = randomUUID();
|
26
|
+
let basePath: string;
|
27
|
+
|
28
|
+
if (process.env.OOBEE_LOGS_PATH) {
|
29
|
+
basePath = process.env.OOBEE_LOGS_PATH;
|
30
|
+
} else if (process.platform === 'win32') {
|
31
|
+
basePath = path.join(process.env.APPDATA, 'Oobee');
|
32
|
+
} else if (process.platform === 'darwin') {
|
33
|
+
basePath = path.join(process.env.HOME, 'Library', 'Application Support', 'Oobee');
|
34
|
+
} else {
|
35
|
+
basePath = path.join(process.cwd());
|
36
|
+
}
|
37
|
+
|
38
|
+
export const errorsTxtPath = path.join(basePath, `${uuid}.txt`);
|
23
39
|
|
24
40
|
const consoleLogger = createLogger({
|
25
41
|
silent: !(process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE),
|
26
42
|
format: combine(timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), logFormat),
|
27
|
-
transports:
|
28
|
-
|
43
|
+
transports: [
|
44
|
+
new transports.Console({ level: 'info' }),
|
45
|
+
new transports.File({
|
46
|
+
filename: errorsTxtPath,
|
47
|
+
level: 'info',
|
48
|
+
handleExceptions: true,
|
49
|
+
}),
|
50
|
+
],
|
29
51
|
});
|
30
52
|
|
31
53
|
// No display in consoles, this will mostly be used within the interactive script to avoid disrupting the flow
|
@@ -34,9 +56,10 @@ const consoleLogger = createLogger({
|
|
34
56
|
const silentLogger = createLogger({
|
35
57
|
format: combine(timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), logFormat),
|
36
58
|
transports: [
|
37
|
-
|
38
|
-
|
39
|
-
|
59
|
+
new transports.File({
|
60
|
+
filename: errorsTxtPath,
|
61
|
+
level: 'warn',
|
62
|
+
handleExceptions: true }),
|
40
63
|
].filter(Boolean),
|
41
64
|
});
|
42
65
|
|
@@ -46,16 +69,17 @@ export const guiInfoLog = (status: string, data: { numScanned?: number; urlScann
|
|
46
69
|
switch (status) {
|
47
70
|
case guiInfoStatusTypes.COMPLETED:
|
48
71
|
console.log('Scan completed');
|
72
|
+
silentLogger.info('Scan completed');
|
49
73
|
break;
|
50
74
|
case guiInfoStatusTypes.SCANNED:
|
51
75
|
case guiInfoStatusTypes.SKIPPED:
|
52
76
|
case guiInfoStatusTypes.ERROR:
|
53
77
|
case guiInfoStatusTypes.DUPLICATE:
|
54
|
-
|
55
|
-
`crawling::${data.numScanned || 0}::${status}::${
|
78
|
+
const msg = `crawling::${data.numScanned || 0}::${status}::${
|
56
79
|
data.urlScanned || 'no url provided'
|
57
|
-
}
|
58
|
-
);
|
80
|
+
}`;
|
81
|
+
console.log(msg);
|
82
|
+
silentLogger.info(msg);
|
59
83
|
break;
|
60
84
|
default:
|
61
85
|
console.log(`Status provided to gui info log not recognized: ${status}`);
|
@@ -64,4 +88,6 @@ export const guiInfoLog = (status: string, data: { numScanned?: number; urlScann
|
|
64
88
|
}
|
65
89
|
};
|
66
90
|
|
91
|
+
consoleLogger.info(`Logger writing to: ${errorsTxtPath}`);
|
92
|
+
|
67
93
|
export { logFormat, consoleLogger, silentLogger };
|
package/src/mergeAxeResults.ts
CHANGED
@@ -29,6 +29,7 @@ import {
|
|
29
29
|
getWcagCriteriaMap,
|
30
30
|
categorizeWcagCriteria,
|
31
31
|
getUserDataTxt,
|
32
|
+
register
|
32
33
|
} from './utils.js';
|
33
34
|
import { consoleLogger, silentLogger } from './logs.js';
|
34
35
|
import itemTypeDescription from './constants/itemTypeDescription.js';
|
@@ -975,6 +976,8 @@ const writeSummaryPdf = async (storagePath: string, pagesScanned: number, filena
|
|
975
976
|
...getPlaywrightLaunchOptions(browser),
|
976
977
|
});
|
977
978
|
|
979
|
+
register(context);
|
980
|
+
|
978
981
|
const page = await context.newPage();
|
979
982
|
|
980
983
|
const data = fs.readFileSync(htmlFilePath, { encoding: 'utf-8' });
|
@@ -1710,9 +1713,9 @@ const generateArtifacts = async (
|
|
1710
1713
|
zip: string = undefined, // optional
|
1711
1714
|
generateJsonFiles = false,
|
1712
1715
|
) => {
|
1713
|
-
const intermediateDatasetsPath = `${getStoragePath(randomToken)}/crawlee`;
|
1714
|
-
const oobeeAppVersion = getVersion();
|
1715
1716
|
const storagePath = getStoragePath(randomToken);
|
1717
|
+
const intermediateDatasetsPath = `${storagePath}/crawlee`;
|
1718
|
+
const oobeeAppVersion = getVersion();
|
1716
1719
|
|
1717
1720
|
const formatAboutStartTime = (dateString: string) => {
|
1718
1721
|
const utcStartTimeDate = new Date(dateString);
|
@@ -1985,12 +1988,10 @@ const generateArtifacts = async (
|
|
1985
1988
|
// Should consider refactor constants.userDataDirectory to be a parameter in future
|
1986
1989
|
await retryFunction(() => writeSummaryPdf(storagePath, pagesScanned.length, 'summary', browserChannel, constants.userDataDirectory), 1);
|
1987
1990
|
|
1988
|
-
|
1989
|
-
|
1990
|
-
|
1991
|
-
|
1992
|
-
await fs.remove(folderPath);
|
1993
|
-
}
|
1991
|
+
try {
|
1992
|
+
fs.rmSync(path.join(storagePath, 'crawlee'), { recursive: true, force: true });
|
1993
|
+
} catch (error) {
|
1994
|
+
consoleLogger.warn(`Unable to force remove crawlee folder: ${error.message}`);
|
1994
1995
|
}
|
1995
1996
|
|
1996
1997
|
// Take option if set
|
@@ -22,7 +22,7 @@ export const takeScreenshotForHTMLElements = async (
|
|
22
22
|
for (const violation of violations) {
|
23
23
|
if (screenshotCount >= maxScreenshots) {
|
24
24
|
/*
|
25
|
-
|
25
|
+
consoleLogger.warn(
|
26
26
|
`Skipping screenshots for ${violation.id} as maxScreenshots (${maxScreenshots}) exceeded. You can increase it by specifying a higher value when calling takeScreenshotForHTMLElements.`,
|
27
27
|
);
|
28
28
|
*/
|
@@ -34,7 +34,7 @@ export const takeScreenshotForHTMLElements = async (
|
|
34
34
|
|
35
35
|
// Check if rule ID is 'oobee-grading-text-contents' and skip screenshot logic
|
36
36
|
if (rule === 'oobee-grading-text-contents') {
|
37
|
-
//
|
37
|
+
// consoleLogger.info('Skipping screenshot for rule oobee-grading-text-contents');
|
38
38
|
newViolations.push(violation); // Make sure it gets added
|
39
39
|
continue;
|
40
40
|
}
|
@@ -59,13 +59,13 @@ export const takeScreenshotForHTMLElements = async (
|
|
59
59
|
nodeWithScreenshotPath.screenshotPath = screenshotPath;
|
60
60
|
screenshotCount++;
|
61
61
|
} else {
|
62
|
-
//
|
62
|
+
// consoleLogger.info(`Element at ${currLocator} is not visible`);
|
63
63
|
}
|
64
64
|
|
65
65
|
break; // Stop looping after finding the first visible locator
|
66
66
|
}
|
67
67
|
} catch (e) {
|
68
|
-
//
|
68
|
+
// consoleLogger.info(`Unable to take element screenshot at ${selector}`);
|
69
69
|
}
|
70
70
|
}
|
71
71
|
newViolationNodes.push(nodeWithScreenshotPath);
|
@@ -12,17 +12,14 @@
|
|
12
12
|
// extract tagname and attribute name from html tag
|
13
13
|
// e.g. ["input", "type", "value", "role"] from <input type="text" value="..." role="..." />
|
14
14
|
const getHtmlTagAndAttributes = (htmlString) => {
|
15
|
-
const
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
return [tagName, ...attributes];
|
24
|
-
}
|
25
|
-
return [];
|
15
|
+
const tagMatch = htmlString.match(/^<\s*(\w+)/); // Get tag name
|
16
|
+
if (!tagMatch) return [];
|
17
|
+
|
18
|
+
const tagName = tagMatch[1];
|
19
|
+
const attrMatches = [...htmlString.matchAll(/(\w[\w-]*)\s*=\s*"[^"]*"/g)];
|
20
|
+
const attributes = attrMatches.map(match => match[1]);
|
21
|
+
|
22
|
+
return [tagName, ...attributes];
|
26
23
|
};
|
27
24
|
|
28
25
|
const rulesUsingRoles = [
|
package/src/utils.ts
CHANGED
@@ -9,9 +9,10 @@ import constants, {
|
|
9
9
|
destinationPath,
|
10
10
|
getIntermediateScreenshotsPath,
|
11
11
|
} from './constants/constants.js';
|
12
|
-
import { consoleLogger, silentLogger } from './logs.js';
|
12
|
+
import { consoleLogger, errorsTxtPath, silentLogger } from './logs.js';
|
13
13
|
import { getAxeConfiguration } from './crawlers/custom/getAxeConfiguration.js';
|
14
14
|
import { constant } from 'lodash';
|
15
|
+
import { errors } from 'playwright';
|
15
16
|
|
16
17
|
export const getVersion = () => {
|
17
18
|
const loadJSON = (filePath: string): { version: string } =>
|
@@ -84,7 +85,7 @@ export const getStoragePath = (randomToken: string): string => {
|
|
84
85
|
|
85
86
|
export const createDetailsAndLogs = async (randomToken: string): Promise<void> => {
|
86
87
|
const storagePath = getStoragePath(randomToken);
|
87
|
-
const logPath = `${
|
88
|
+
const logPath = `${storagePath}}/logs`;
|
88
89
|
try {
|
89
90
|
await fs.ensureDir(storagePath);
|
90
91
|
|
@@ -230,11 +231,257 @@ export const createScreenshotsFolder = (randomToken: string): void => {
|
|
230
231
|
}
|
231
232
|
};
|
232
233
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
234
|
+
|
235
|
+
let __shuttingDown = false;
|
236
|
+
let __stopAllLock: Promise<void> | null = null;
|
237
|
+
|
238
|
+
/**
|
239
|
+
* Register a resource so it can be stopped later.
|
240
|
+
* Supports Crawlee crawlers, Playwright BrowserContexts, and Browsers.
|
241
|
+
*/
|
242
|
+
export function register(resource: any) {
|
243
|
+
const name = resource?.constructor?.name;
|
244
|
+
|
245
|
+
if (name?.endsWith('Crawler')) {
|
246
|
+
constants.resources.crawlers.add(resource);
|
247
|
+
} else if (name === 'BrowserContext') {
|
248
|
+
constants.resources.browserContexts.add(resource);
|
249
|
+
} else if (name === 'Browser') {
|
250
|
+
constants.resources.browsers.add(resource);
|
251
|
+
}
|
252
|
+
|
253
|
+
return resource;
|
254
|
+
}
|
255
|
+
|
256
|
+
/**
|
257
|
+
* Stops or tears down all tracked resources.
|
258
|
+
* @param mode "graceful" (finish in-flight), "abort" (drop in-flight), or "teardown" (close immediately)
|
259
|
+
* @param timeoutMs Max time to wait before forcing shutdown
|
260
|
+
*/
|
261
|
+
export async function stopAll({ mode = 'graceful', timeoutMs = 10_000 } = {}) {
|
262
|
+
if (__stopAllLock) return __stopAllLock; // prevent overlap
|
263
|
+
__stopAllLock = (async () => {
|
264
|
+
const timeout = (ms: number) => new Promise(res => setTimeout(res, ms));
|
265
|
+
consoleLogger.info(`Stop browsers starting, mode=${mode}, timeoutMs=${timeoutMs}`);
|
266
|
+
|
267
|
+
// --- Crawlers ---
|
268
|
+
for (const c of [...constants.resources.crawlers]) {
|
269
|
+
try {
|
270
|
+
const pool = (c as any).autoscaledPool;
|
271
|
+
if (pool && typeof pool.isRunning !== 'undefined' && !pool.isRunning) {
|
272
|
+
consoleLogger.info('Skipping crawler (already stopped)');
|
273
|
+
continue;
|
274
|
+
}
|
275
|
+
|
276
|
+
consoleLogger.info(`Closing crawler (${mode})...`);
|
277
|
+
if (mode === 'graceful') {
|
278
|
+
if (typeof c.stop === 'function') {
|
279
|
+
await Promise.race([c.stop(), timeout(timeoutMs)]);
|
280
|
+
}
|
281
|
+
} else if (mode === 'abort') {
|
282
|
+
pool?.abort?.();
|
283
|
+
} else {
|
284
|
+
if (typeof c.teardown === 'function') {
|
285
|
+
await Promise.race([c.teardown(), timeout(timeoutMs)]);
|
286
|
+
}
|
287
|
+
}
|
288
|
+
consoleLogger.info(`Crawler closed (${mode})`);
|
289
|
+
} catch (err) {
|
290
|
+
consoleLogger.warn(`Error stopping crawler: ${(err as Error).message}`);
|
291
|
+
} finally {
|
292
|
+
constants.resources.crawlers.delete(c);
|
293
|
+
}
|
294
|
+
}
|
295
|
+
|
296
|
+
// --- BrowserContexts ---
|
297
|
+
for (const ctx of [...constants.resources.browserContexts]) {
|
298
|
+
// compute once so we can also use in finally
|
299
|
+
const pagesArr = typeof ctx.pages === 'function' ? ctx.pages() : [];
|
300
|
+
const hasOpenPages = Array.isArray(pagesArr) && pagesArr.length > 0;
|
301
|
+
|
302
|
+
try {
|
303
|
+
const browser = typeof ctx.browser === 'function' ? ctx.browser() : null;
|
304
|
+
if (browser && (browser as any).isClosed?.()) {
|
305
|
+
consoleLogger.info('Skipping BrowserContext (browser already closed)');
|
306
|
+
continue;
|
307
|
+
}
|
308
|
+
|
309
|
+
// ➜ Graceful: don't kill contexts that are still doing work
|
310
|
+
if (mode === 'graceful' && hasOpenPages) {
|
311
|
+
consoleLogger.info(`Skipping BrowserContext in graceful (has ${pagesArr.length} open page(s))`);
|
312
|
+
continue; // leave it for the teardown pass
|
313
|
+
}
|
314
|
+
|
315
|
+
// (Optional speed-up) close pages first if any
|
316
|
+
if (hasOpenPages) {
|
317
|
+
consoleLogger.info(`Closing ${pagesArr.length} page(s) before context close...`);
|
318
|
+
for (const p of pagesArr) {
|
319
|
+
try { await Promise.race([p.close(), timeout(1500)]); } catch {}
|
320
|
+
}
|
321
|
+
}
|
322
|
+
|
323
|
+
consoleLogger.info('Closing BrowserContext...');
|
324
|
+
if (typeof ctx.close === 'function') {
|
325
|
+
await Promise.race([ctx.close(), timeout(timeoutMs)]);
|
326
|
+
}
|
327
|
+
consoleLogger.info('BrowserContext closed');
|
328
|
+
|
329
|
+
// also close its browser (persistent contexts)
|
330
|
+
const b = browser;
|
331
|
+
if (b && !(b as any).isClosed?.()) {
|
332
|
+
consoleLogger.info('Closing Browser (from context.browser())...');
|
333
|
+
if (typeof b.close === 'function') {
|
334
|
+
await Promise.race([b.close(), timeout(timeoutMs)]);
|
335
|
+
}
|
336
|
+
consoleLogger.info('Browser closed (from context.browser())');
|
337
|
+
}
|
338
|
+
} catch (err) {
|
339
|
+
consoleLogger.warn(`Error closing BrowserContext: ${(err as Error).message}`);
|
340
|
+
} finally {
|
341
|
+
// only delete from the set if we actually closed it (or tried to)
|
342
|
+
if (!(mode === 'graceful' && hasOpenPages)) {
|
343
|
+
constants.resources.browserContexts.delete(ctx);
|
344
|
+
}
|
345
|
+
}
|
346
|
+
}
|
347
|
+
|
348
|
+
// --- Browsers ---
|
349
|
+
for (const b of [...constants.resources.browsers]) {
|
350
|
+
try {
|
351
|
+
if ((b as any).isClosed?.()) {
|
352
|
+
consoleLogger.info('Skipping Browser (already closed)');
|
353
|
+
continue;
|
354
|
+
}
|
355
|
+
|
356
|
+
consoleLogger.info('Closing Browser...');
|
357
|
+
if (typeof b.close === 'function') {
|
358
|
+
await Promise.race([b.close(), timeout(timeoutMs)]);
|
359
|
+
}
|
360
|
+
consoleLogger.info('Browser closed');
|
361
|
+
} catch (err) {
|
362
|
+
consoleLogger.warn(`Error closing Browser: ${(err as Error).message}`);
|
363
|
+
} finally {
|
364
|
+
constants.resources.browsers.delete(b);
|
365
|
+
}
|
366
|
+
}
|
367
|
+
|
368
|
+
consoleLogger.info(`Stop browsers finished for mode=${mode}`);
|
369
|
+
})();
|
370
|
+
|
371
|
+
try {
|
372
|
+
await __stopAllLock;
|
373
|
+
} finally {
|
374
|
+
__stopAllLock = null;
|
375
|
+
}
|
376
|
+
}
|
377
|
+
|
378
|
+
export const cleanUp = async (randomToken?: string, isError: boolean = false): Promise<void> => {
|
379
|
+
|
380
|
+
if (isError) {
|
381
|
+
await stopAll({ mode: 'graceful', timeoutMs: 8000 });
|
382
|
+
await stopAll({ mode: 'teardown', timeoutMs: 4000 });
|
383
|
+
}
|
384
|
+
|
385
|
+
if (randomToken === undefined && constants.randomToken) {
|
386
|
+
randomToken = constants.randomToken;
|
387
|
+
}
|
388
|
+
|
389
|
+
if (constants.userDataDirectory) try {
|
390
|
+
fs.rmSync(constants.userDataDirectory, { recursive: true, force: true });
|
391
|
+
} catch (error) {
|
392
|
+
consoleLogger.warn(`Unable to force remove userDataDirectory: ${error.message}`);
|
393
|
+
}
|
394
|
+
|
395
|
+
if (randomToken !== undefined) {
|
396
|
+
const storagePath = getStoragePath(randomToken);
|
397
|
+
|
398
|
+
try {
|
399
|
+
fs.rmSync(path.join(storagePath, 'crawlee'), { recursive: true, force: true });
|
400
|
+
} catch (error) {
|
401
|
+
consoleLogger.warn(`Unable to force remove crawlee folder: ${error.message}`);
|
402
|
+
}
|
403
|
+
|
404
|
+
let deleteErrorLogFile = true;
|
405
|
+
|
406
|
+
if (isError) {
|
407
|
+
let logsPath = storagePath;
|
408
|
+
|
409
|
+
if (process.env.OOBEE_LOGS_PATH) {
|
410
|
+
logsPath = process.env.OOBEE_LOGS_PATH;
|
411
|
+
}
|
412
|
+
|
413
|
+
if (fs.existsSync(errorsTxtPath)) {
|
414
|
+
try {
|
415
|
+
const logFilePath = path.join(logsPath, `logs-${randomToken}.txt`);
|
416
|
+
fs.copyFileSync(errorsTxtPath, logFilePath);
|
417
|
+
console.log(`An error occured. Log file is located at: ${logFilePath}`);
|
418
|
+
|
419
|
+
} catch (copyError) {
|
420
|
+
consoleLogger.error(`Error copying errors file during cleanup: ${copyError.message}`);
|
421
|
+
console.log(`An error occured. Log file is located at: ${errorsTxtPath}`);
|
422
|
+
deleteErrorLogFile = false; // Do not delete the log file if copy failed
|
423
|
+
}
|
424
|
+
|
425
|
+
if (deleteErrorLogFile && fs.existsSync(errorsTxtPath)) {
|
426
|
+
try {
|
427
|
+
fs.unlinkSync(errorsTxtPath);
|
428
|
+
} catch (error) {
|
429
|
+
consoleLogger.warn(`Unable to delete log file ${errorsTxtPath}: ${error.message}`);
|
430
|
+
}
|
431
|
+
}
|
432
|
+
|
433
|
+
}
|
434
|
+
|
435
|
+
}
|
436
|
+
|
437
|
+
if (fs.existsSync(storagePath) && fs.readdirSync(storagePath).length === 0) {
|
438
|
+
try {
|
439
|
+
fs.rmdirSync(storagePath);
|
440
|
+
consoleLogger.info(`Deleted empty storage path: ${storagePath}`);
|
441
|
+
|
442
|
+
} catch (error) {
|
443
|
+
consoleLogger.warn(`Error deleting empty storage path ${storagePath}: ${error.message}`);
|
444
|
+
}
|
445
|
+
}
|
446
|
+
|
447
|
+
consoleLogger.info(`Clean up completed for: ${randomToken}`);
|
448
|
+
}
|
449
|
+
|
450
|
+
};
|
451
|
+
|
452
|
+
export const cleanUpAndExit = async (
|
453
|
+
exitCode: number,
|
454
|
+
randomToken?: string,
|
455
|
+
isError: boolean = false,
|
456
|
+
): Promise<void> => {
|
457
|
+
if (__shuttingDown) {
|
458
|
+
consoleLogger.info('Cleanup already in progress; ignoring duplicate exit request.');
|
459
|
+
return;
|
460
|
+
}
|
461
|
+
__shuttingDown = true;
|
462
|
+
|
463
|
+
try {
|
464
|
+
await cleanUp(randomToken, isError); // runs stopAll inside cleanUp
|
465
|
+
} catch (e: any) {
|
466
|
+
consoleLogger.warn(`Cleanup error: ${e?.message || e}`);
|
467
|
+
}
|
468
|
+
|
469
|
+
consoleLogger.info(`Exiting with code: ${exitCode}`);
|
470
|
+
process.exit(exitCode); // explicit exit after cleanup completes
|
471
|
+
};
|
472
|
+
|
473
|
+
export const listenForCleanUp = (randomToken: string): void => {
|
474
|
+
consoleLogger.info(`PID: ${process.pid}`);
|
475
|
+
|
476
|
+
process.on('SIGINT', async () => { // ← keep handler installed
|
477
|
+
consoleLogger.info('SIGINT received. Cleaning up and exiting.');
|
478
|
+
await cleanUpAndExit(130, randomToken, true);
|
479
|
+
});
|
480
|
+
|
481
|
+
process.on('SIGTERM', async () => { // ← keep handler installed
|
482
|
+
consoleLogger.info('SIGTERM received. Cleaning up and exiting.');
|
483
|
+
await cleanUpAndExit(143, randomToken, true);
|
484
|
+
});
|
238
485
|
};
|
239
486
|
|
240
487
|
export const getWcagPassPercentage = (
|
@@ -757,8 +1004,8 @@ export const zipResults = (zipName: string, resultsPath: string): void => {
|
|
757
1004
|
|
758
1005
|
if (os.platform() === 'win32') {
|
759
1006
|
execSync(
|
760
|
-
`
|
761
|
-
{
|
1007
|
+
`tar.exe -a -c -f "${zipFilePath}" *`,
|
1008
|
+
{ cwd: resultsPath },
|
762
1009
|
);
|
763
1010
|
} else {
|
764
1011
|
// Get zip command in Mac and Linux
|