@govtechsg/oobee 0.10.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +22 -0
- package/.github/pull_request_template.md +11 -0
- package/.github/workflows/docker-test.yml +54 -0
- package/.github/workflows/image.yml +107 -0
- package/.github/workflows/publish.yml +18 -0
- package/.idea/modules.xml +8 -0
- package/.idea/purple-a11y.iml +9 -0
- package/.idea/vcs.xml +6 -0
- package/.prettierrc.json +12 -0
- package/.vscode/extensions.json +5 -0
- package/.vscode/settings.json +10 -0
- package/CODE_OF_CONDUCT.md +128 -0
- package/DETAILS.md +163 -0
- package/Dockerfile +60 -0
- package/INSTALLATION.md +146 -0
- package/INTEGRATION.md +785 -0
- package/LICENSE +22 -0
- package/README.md +587 -0
- package/SECURITY.md +5 -0
- package/__mocks__/mock-report.html +1431 -0
- package/__mocks__/mockFunctions.ts +32 -0
- package/__mocks__/mockIssues.ts +64 -0
- package/__mocks__/mock_all_issues/000000001.json +64 -0
- package/__mocks__/mock_all_issues/000000002.json +53 -0
- package/__mocks__/mock_all_issues/fake-file.txt +0 -0
- package/__tests__/logs.test.ts +25 -0
- package/__tests__/mergeAxeResults.test.ts +278 -0
- package/__tests__/utils.test.ts +118 -0
- package/a11y-scan-results.zip +0 -0
- package/eslint.config.js +53 -0
- package/exclusions.txt +2 -0
- package/gitlab-pipeline-template.yml +54 -0
- package/jest.config.js +1 -0
- package/package.json +96 -0
- package/scripts/copyFiles.js +44 -0
- package/scripts/install_oobee_dependencies.cmd +13 -0
- package/scripts/install_oobee_dependencies.command +101 -0
- package/scripts/install_oobee_dependencies.ps1 +110 -0
- package/scripts/oobee_shell.cmd +13 -0
- package/scripts/oobee_shell.command +11 -0
- package/scripts/oobee_shell.sh +55 -0
- package/scripts/oobee_shell_ps.ps1 +54 -0
- package/src/cli.ts +401 -0
- package/src/combine.ts +240 -0
- package/src/constants/__tests__/common.test.ts +44 -0
- package/src/constants/cliFunctions.ts +305 -0
- package/src/constants/common.ts +1840 -0
- package/src/constants/constants.ts +443 -0
- package/src/constants/errorMeta.json +319 -0
- package/src/constants/itemTypeDescription.ts +11 -0
- package/src/constants/oobeeAi.ts +141 -0
- package/src/constants/questions.ts +181 -0
- package/src/constants/sampleData.ts +187 -0
- package/src/crawlers/__tests__/commonCrawlerFunc.test.ts +51 -0
- package/src/crawlers/commonCrawlerFunc.ts +656 -0
- package/src/crawlers/crawlDomain.ts +877 -0
- package/src/crawlers/crawlIntelligentSitemap.ts +156 -0
- package/src/crawlers/crawlLocalFile.ts +193 -0
- package/src/crawlers/crawlSitemap.ts +356 -0
- package/src/crawlers/custom/extractAndGradeText.ts +57 -0
- package/src/crawlers/custom/flagUnlabelledClickableElements.ts +964 -0
- package/src/crawlers/custom/utils.ts +486 -0
- package/src/crawlers/customAxeFunctions.ts +82 -0
- package/src/crawlers/pdfScanFunc.ts +468 -0
- package/src/crawlers/runCustom.ts +117 -0
- package/src/index.ts +173 -0
- package/src/logs.ts +66 -0
- package/src/mergeAxeResults.ts +964 -0
- package/src/npmIndex.ts +284 -0
- package/src/screenshotFunc/htmlScreenshotFunc.ts +411 -0
- package/src/screenshotFunc/pdfScreenshotFunc.ts +762 -0
- package/src/static/ejs/partials/components/categorySelector.ejs +4 -0
- package/src/static/ejs/partials/components/categorySelectorDropdown.ejs +57 -0
- package/src/static/ejs/partials/components/pagesScannedModal.ejs +70 -0
- package/src/static/ejs/partials/components/reportSearch.ejs +47 -0
- package/src/static/ejs/partials/components/ruleOffcanvas.ejs +105 -0
- package/src/static/ejs/partials/components/scanAbout.ejs +263 -0
- package/src/static/ejs/partials/components/screenshotLightbox.ejs +13 -0
- package/src/static/ejs/partials/components/summaryScanAbout.ejs +141 -0
- package/src/static/ejs/partials/components/summaryScanResults.ejs +16 -0
- package/src/static/ejs/partials/components/summaryTable.ejs +20 -0
- package/src/static/ejs/partials/components/summaryWcagCompliance.ejs +94 -0
- package/src/static/ejs/partials/components/topFive.ejs +6 -0
- package/src/static/ejs/partials/components/wcagCompliance.ejs +70 -0
- package/src/static/ejs/partials/footer.ejs +21 -0
- package/src/static/ejs/partials/header.ejs +230 -0
- package/src/static/ejs/partials/main.ejs +40 -0
- package/src/static/ejs/partials/scripts/bootstrap.ejs +8 -0
- package/src/static/ejs/partials/scripts/categorySelectorDropdownScript.ejs +190 -0
- package/src/static/ejs/partials/scripts/categorySummary.ejs +141 -0
- package/src/static/ejs/partials/scripts/highlightjs.ejs +335 -0
- package/src/static/ejs/partials/scripts/popper.ejs +7 -0
- package/src/static/ejs/partials/scripts/reportSearch.ejs +248 -0
- package/src/static/ejs/partials/scripts/ruleOffcanvas.ejs +801 -0
- package/src/static/ejs/partials/scripts/screenshotLightbox.ejs +71 -0
- package/src/static/ejs/partials/scripts/summaryScanResults.ejs +14 -0
- package/src/static/ejs/partials/scripts/summaryTable.ejs +78 -0
- package/src/static/ejs/partials/scripts/utils.ejs +441 -0
- package/src/static/ejs/partials/styles/bootstrap.ejs +12375 -0
- package/src/static/ejs/partials/styles/highlightjs.ejs +54 -0
- package/src/static/ejs/partials/styles/styles.ejs +1843 -0
- package/src/static/ejs/partials/styles/summaryBootstrap.ejs +12458 -0
- package/src/static/ejs/partials/summaryHeader.ejs +70 -0
- package/src/static/ejs/partials/summaryMain.ejs +75 -0
- package/src/static/ejs/report.ejs +420 -0
- package/src/static/ejs/summary.ejs +47 -0
- package/src/static/mustache/.prettierrc +4 -0
- package/src/static/mustache/Attention Deficit.mustache +11 -0
- package/src/static/mustache/Blind.mustache +11 -0
- package/src/static/mustache/Cognitive.mustache +7 -0
- package/src/static/mustache/Colorblindness.mustache +20 -0
- package/src/static/mustache/Deaf.mustache +12 -0
- package/src/static/mustache/Deafblind.mustache +7 -0
- package/src/static/mustache/Dyslexia.mustache +14 -0
- package/src/static/mustache/Low Vision.mustache +7 -0
- package/src/static/mustache/Mobility.mustache +15 -0
- package/src/static/mustache/Sighted Keyboard Users.mustache +42 -0
- package/src/static/mustache/report.mustache +1709 -0
- package/src/types/print-message.d.ts +28 -0
- package/src/types/types.ts +46 -0
- package/src/types/xpath-to-css.d.ts +3 -0
- package/src/utils.ts +332 -0
- package/tsconfig.json +15 -0
package/src/cli.ts
ADDED
@@ -0,0 +1,401 @@
|
|
1
|
+
#!/usr/bin/env node
|
2
|
+
import _yargs from 'yargs';
|
3
|
+
import { hideBin } from 'yargs/helpers';
|
4
|
+
import printMessage from 'print-message';
|
5
|
+
import { devices } from 'playwright';
|
6
|
+
import { fileURLToPath } from 'url';
|
7
|
+
import path from 'path';
|
8
|
+
import { cleanUp, setHeadlessMode, getVersion, getStoragePath } from './utils.js';
|
9
|
+
import {
|
10
|
+
checkUrl,
|
11
|
+
prepareData,
|
12
|
+
getFileSitemap,
|
13
|
+
validEmail,
|
14
|
+
validName,
|
15
|
+
getBrowserToRun,
|
16
|
+
getPlaywrightDeviceDetailsObject,
|
17
|
+
deleteClonedProfiles,
|
18
|
+
getScreenToScan,
|
19
|
+
getClonedProfilesWithRandomToken,
|
20
|
+
validateDirPath,
|
21
|
+
validateFilePath,
|
22
|
+
validateCustomFlowLabel,
|
23
|
+
parseHeaders,
|
24
|
+
} from './constants/common.js';
|
25
|
+
import constants, { ScannerTypes } from './constants/constants.js';
|
26
|
+
import { cliOptions, messageOptions } from './constants/cliFunctions.js';
|
27
|
+
import combineRun from './combine.js';
|
28
|
+
import { Answers } from './index.js';
|
29
|
+
|
30
|
+
const appVersion = getVersion();
|
31
|
+
const yargs = _yargs(hideBin(process.argv));
|
32
|
+
|
33
|
+
const options = yargs
|
34
|
+
.version(false)
|
35
|
+
.usage(
|
36
|
+
`Oobee version: ${appVersion}
|
37
|
+
Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
38
|
+
)
|
39
|
+
.strictOptions(true)
|
40
|
+
.options(cliOptions)
|
41
|
+
.example([
|
42
|
+
[
|
43
|
+
`To scan sitemap of website:', 'npm run cli -- -c [ 1 | sitemap ] -u <url_link> [ -d <device> | -w <viewport_width> ]`,
|
44
|
+
],
|
45
|
+
[
|
46
|
+
`To scan a website', 'npm run cli -- -c [ 2 | website ] -u <url_link> [ -d <device> | -w <viewport_width> ]`,
|
47
|
+
],
|
48
|
+
[
|
49
|
+
`To start a custom flow scan', 'npm run cli -- -c [ 3 | custom ] -u <url_link> [ -d <device> | -w <viewport_width> ]`,
|
50
|
+
],
|
51
|
+
])
|
52
|
+
.coerce('d', option => {
|
53
|
+
const device = devices[option];
|
54
|
+
if (!device && option !== 'Desktop' && option !== 'Mobile') {
|
55
|
+
printMessage(
|
56
|
+
[`Invalid device. Please provide an existing device to start the scan.`],
|
57
|
+
messageOptions,
|
58
|
+
);
|
59
|
+
process.exit(1);
|
60
|
+
}
|
61
|
+
return option;
|
62
|
+
})
|
63
|
+
.coerce('w', option => {
|
64
|
+
if (!option || Number.isNaN(option)) {
|
65
|
+
printMessage([`Invalid viewport width. Please provide a number. `], messageOptions);
|
66
|
+
process.exit(1);
|
67
|
+
} else if (option < 320 || option > 1080) {
|
68
|
+
printMessage(
|
69
|
+
['Invalid viewport width! Please provide a viewport width between 320-1080 pixels.'],
|
70
|
+
messageOptions,
|
71
|
+
);
|
72
|
+
process.exit(1);
|
73
|
+
}
|
74
|
+
return option;
|
75
|
+
})
|
76
|
+
.coerce('p', option => {
|
77
|
+
if (!Number.isInteger(option) || Number(option) <= 0) {
|
78
|
+
printMessage(
|
79
|
+
[`Invalid maximum number of pages. Please provide a positive integer.`],
|
80
|
+
messageOptions,
|
81
|
+
);
|
82
|
+
process.exit(1);
|
83
|
+
}
|
84
|
+
return option;
|
85
|
+
})
|
86
|
+
.coerce('t', option => {
|
87
|
+
if (!Number.isInteger(option) || Number(option) <= 0) {
|
88
|
+
printMessage(
|
89
|
+
[`Invalid number for max concurrency. Please provide a positive integer.`],
|
90
|
+
messageOptions,
|
91
|
+
);
|
92
|
+
process.exit(1);
|
93
|
+
}
|
94
|
+
return option;
|
95
|
+
})
|
96
|
+
.coerce('k', nameEmail => {
|
97
|
+
if (nameEmail.indexOf(':') === -1) {
|
98
|
+
printMessage(
|
99
|
+
[`Invalid format. Please provide your name and email address separated by ":"`],
|
100
|
+
messageOptions,
|
101
|
+
);
|
102
|
+
process.exit(1);
|
103
|
+
}
|
104
|
+
const [name, email] = nameEmail.split(':');
|
105
|
+
if (name === '' || name === undefined || name === null) {
|
106
|
+
printMessage([`Please provide your name.`], messageOptions);
|
107
|
+
process.exit(1);
|
108
|
+
}
|
109
|
+
if (!validName(name)) {
|
110
|
+
printMessage([`Invalid name. Please provide a valid name.`], messageOptions);
|
111
|
+
process.exit(1);
|
112
|
+
}
|
113
|
+
if (!validEmail(email)) {
|
114
|
+
printMessage(
|
115
|
+
[`Invalid email address. Please provide a valid email address.`],
|
116
|
+
messageOptions,
|
117
|
+
);
|
118
|
+
process.exit(1);
|
119
|
+
}
|
120
|
+
return nameEmail;
|
121
|
+
})
|
122
|
+
.coerce('e', option => {
|
123
|
+
const validationErrors = validateDirPath(option);
|
124
|
+
if (validationErrors) {
|
125
|
+
printMessage([`Invalid exportDirectory directory path. ${validationErrors}`], messageOptions);
|
126
|
+
process.exit(1);
|
127
|
+
}
|
128
|
+
return option;
|
129
|
+
})
|
130
|
+
.coerce('x', option => {
|
131
|
+
const filename = fileURLToPath(import.meta.url);
|
132
|
+
const dirname = `${path.dirname(filename)}/../`; // check in the parent of dist directory
|
133
|
+
|
134
|
+
try {
|
135
|
+
return validateFilePath(option, dirname);
|
136
|
+
} catch (err) {
|
137
|
+
printMessage([`Invalid blacklistedPatternsFilename file path. ${err}`], messageOptions);
|
138
|
+
process.exit(1);
|
139
|
+
}
|
140
|
+
|
141
|
+
// eslint-disable-next-line no-unreachable
|
142
|
+
return null;
|
143
|
+
})
|
144
|
+
.coerce('i', option => {
|
145
|
+
const { choices } = cliOptions.i;
|
146
|
+
if (!choices.includes(option)) {
|
147
|
+
printMessage(
|
148
|
+
[`Invalid value for fileTypes. Please provide valid keywords: ${choices.join(', ')}.`],
|
149
|
+
messageOptions,
|
150
|
+
);
|
151
|
+
process.exit(1);
|
152
|
+
}
|
153
|
+
return option;
|
154
|
+
})
|
155
|
+
.coerce('j', option => {
|
156
|
+
const { isValid, errorMessage } = validateCustomFlowLabel(option);
|
157
|
+
if (!isValid) {
|
158
|
+
printMessage([errorMessage], messageOptions);
|
159
|
+
process.exit(1);
|
160
|
+
}
|
161
|
+
return option;
|
162
|
+
})
|
163
|
+
.coerce('a', option => {
|
164
|
+
const { choices } = cliOptions.a;
|
165
|
+
if (!choices.includes(option)) {
|
166
|
+
printMessage(
|
167
|
+
[`Invalid value for additional. Please provide valid keywords: ${choices.join(', ')}.`],
|
168
|
+
messageOptions,
|
169
|
+
);
|
170
|
+
process.exit(1);
|
171
|
+
}
|
172
|
+
return option;
|
173
|
+
})
|
174
|
+
.coerce('q', option => {
|
175
|
+
try {
|
176
|
+
JSON.parse(option);
|
177
|
+
} catch {
|
178
|
+
// default to empty object
|
179
|
+
return '{}';
|
180
|
+
}
|
181
|
+
return option;
|
182
|
+
})
|
183
|
+
.coerce('m', option => {
|
184
|
+
return option;
|
185
|
+
})
|
186
|
+
.check(argvs => {
|
187
|
+
if (
|
188
|
+
(argvs.scanner === ScannerTypes.CUSTOM || argvs.scanner === ScannerTypes.LOCALFILE) &&
|
189
|
+
argvs.maxpages
|
190
|
+
) {
|
191
|
+
throw new Error('-p or --maxpages is only available in website and sitemap scans.');
|
192
|
+
}
|
193
|
+
return true;
|
194
|
+
})
|
195
|
+
.check(argvs => {
|
196
|
+
if (argvs.scanner !== ScannerTypes.WEBSITE && argvs.strategy) {
|
197
|
+
throw new Error('-s or --strategy is only available in website scans.');
|
198
|
+
}
|
199
|
+
return true;
|
200
|
+
})
|
201
|
+
.conflicts('d', 'w')
|
202
|
+
.parse() as unknown as Answers;
|
203
|
+
|
204
|
+
const scanInit = async (argvs: Answers): Promise<string> => {
|
205
|
+
let isCustomFlow = false;
|
206
|
+
if (argvs.scanner === ScannerTypes.CUSTOM) {
|
207
|
+
isCustomFlow = true;
|
208
|
+
}
|
209
|
+
|
210
|
+
const updatedArgvs = { ...argvs };
|
211
|
+
|
212
|
+
// let chromeDataDir = null;
|
213
|
+
// let edgeDataDir = null;
|
214
|
+
// Empty string for profile directory will use incognito mode in playwright
|
215
|
+
let clonedDataDir = '';
|
216
|
+
const statuses = constants.urlCheckStatuses;
|
217
|
+
|
218
|
+
const { browserToRun, clonedBrowserDataDir } = getBrowserToRun(updatedArgvs.browserToRun, true);
|
219
|
+
updatedArgvs.browserToRun = browserToRun;
|
220
|
+
clonedDataDir = clonedBrowserDataDir;
|
221
|
+
|
222
|
+
if (updatedArgvs.customDevice === 'Desktop' || updatedArgvs.customDevice === 'Mobile') {
|
223
|
+
updatedArgvs.deviceChosen = argvs.customDevice;
|
224
|
+
delete updatedArgvs.customDevice;
|
225
|
+
}
|
226
|
+
|
227
|
+
// Creating the playwrightDeviceDetailObject
|
228
|
+
// for use in crawlDomain & crawlSitemap's preLaunchHook
|
229
|
+
updatedArgvs.playwrightDeviceDetailsObject = getPlaywrightDeviceDetailsObject(
|
230
|
+
updatedArgvs.deviceChosen,
|
231
|
+
updatedArgvs.customDevice,
|
232
|
+
updatedArgvs.viewportWidth,
|
233
|
+
);
|
234
|
+
|
235
|
+
const res = await checkUrl(
|
236
|
+
updatedArgvs.scanner,
|
237
|
+
updatedArgvs.url,
|
238
|
+
updatedArgvs.browserToRun,
|
239
|
+
clonedDataDir,
|
240
|
+
updatedArgvs.playwrightDeviceDetailsObject,
|
241
|
+
isCustomFlow,
|
242
|
+
updatedArgvs.header,
|
243
|
+
);
|
244
|
+
switch (res.status) {
|
245
|
+
case statuses.success.code: {
|
246
|
+
updatedArgvs.finalUrl = res.url;
|
247
|
+
if (process.env.VALIDATE_URL_PH_GUI) {
|
248
|
+
console.log('Url is valid');
|
249
|
+
process.exit(0);
|
250
|
+
}
|
251
|
+
break;
|
252
|
+
}
|
253
|
+
case statuses.unauthorised.code: {
|
254
|
+
printMessage([statuses.unauthorised.message], messageOptions);
|
255
|
+
process.exit(res.status);
|
256
|
+
break;
|
257
|
+
}
|
258
|
+
case statuses.cannotBeResolved.code: {
|
259
|
+
printMessage([statuses.cannotBeResolved.message], messageOptions);
|
260
|
+
process.exit(res.status);
|
261
|
+
break;
|
262
|
+
}
|
263
|
+
case statuses.systemError.code: {
|
264
|
+
printMessage([statuses.systemError.message], messageOptions);
|
265
|
+
process.exit(res.status);
|
266
|
+
break;
|
267
|
+
}
|
268
|
+
case statuses.invalidUrl.code: {
|
269
|
+
if (
|
270
|
+
updatedArgvs.scanner !== ScannerTypes.SITEMAP &&
|
271
|
+
updatedArgvs.scanner !== ScannerTypes.LOCALFILE
|
272
|
+
) {
|
273
|
+
printMessage([statuses.invalidUrl.message], messageOptions);
|
274
|
+
process.exit(res.status);
|
275
|
+
}
|
276
|
+
|
277
|
+
const finalFilePath = getFileSitemap(updatedArgvs.url);
|
278
|
+
if (finalFilePath) {
|
279
|
+
updatedArgvs.isLocalFileScan = true;
|
280
|
+
updatedArgvs.finalUrl = finalFilePath;
|
281
|
+
if (process.env.VALIDATE_URL_PH_GUI) {
|
282
|
+
console.log('Url is valid');
|
283
|
+
process.exit(0);
|
284
|
+
}
|
285
|
+
} else if (updatedArgvs.scanner === ScannerTypes.LOCALFILE) {
|
286
|
+
printMessage([statuses.notALocalFile.message], messageOptions);
|
287
|
+
process.exit(statuses.notALocalFile.code);
|
288
|
+
} else if (updatedArgvs.scanner !== ScannerTypes.SITEMAP) {
|
289
|
+
printMessage([statuses.notASitemap.message], messageOptions);
|
290
|
+
process.exit(statuses.notASitemap.code);
|
291
|
+
}
|
292
|
+
break;
|
293
|
+
}
|
294
|
+
case statuses.notASitemap.code: {
|
295
|
+
printMessage([statuses.notASitemap.message], messageOptions);
|
296
|
+
process.exit(res.status);
|
297
|
+
break;
|
298
|
+
}
|
299
|
+
case statuses.notALocalFile.code: {
|
300
|
+
printMessage([statuses.notALocalFile.message], messageOptions);
|
301
|
+
process.exit(res.status);
|
302
|
+
break;
|
303
|
+
}
|
304
|
+
case statuses.browserError.code: {
|
305
|
+
printMessage([statuses.browserError.message], messageOptions);
|
306
|
+
process.exit(res.status);
|
307
|
+
break;
|
308
|
+
}
|
309
|
+
default:
|
310
|
+
break;
|
311
|
+
}
|
312
|
+
|
313
|
+
if (updatedArgvs.scanner === ScannerTypes.WEBSITE && !updatedArgvs.strategy) {
|
314
|
+
updatedArgvs.strategy = 'same-domain';
|
315
|
+
}
|
316
|
+
|
317
|
+
const data = await prepareData(updatedArgvs);
|
318
|
+
|
319
|
+
// File clean up after url check
|
320
|
+
// files will clone a second time below if url check passes
|
321
|
+
if (process.env.OOBEE_VERBOSE) {
|
322
|
+
deleteClonedProfiles(data.browser, data.randomToken);
|
323
|
+
} else {
|
324
|
+
deleteClonedProfiles(data.browser); // first deletion
|
325
|
+
}
|
326
|
+
|
327
|
+
if (updatedArgvs.exportDirectory) {
|
328
|
+
constants.exportDirectory = updatedArgvs.exportDirectory;
|
329
|
+
}
|
330
|
+
|
331
|
+
if (process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE) {
|
332
|
+
const randomTokenMessage = {
|
333
|
+
type: 'randomToken',
|
334
|
+
payload: `${data.randomToken}`,
|
335
|
+
};
|
336
|
+
if (process.send) {
|
337
|
+
process.send(JSON.stringify(randomTokenMessage));
|
338
|
+
}
|
339
|
+
}
|
340
|
+
|
341
|
+
setHeadlessMode(data.browser, data.isHeadless);
|
342
|
+
|
343
|
+
const screenToScan = getScreenToScan(
|
344
|
+
updatedArgvs.deviceChosen,
|
345
|
+
updatedArgvs.customDevice,
|
346
|
+
updatedArgvs.viewportWidth,
|
347
|
+
);
|
348
|
+
|
349
|
+
// Clone profiles a second time
|
350
|
+
clonedDataDir = getClonedProfilesWithRandomToken(data.browser, data.randomToken);
|
351
|
+
data.userDataDirectory = clonedDataDir;
|
352
|
+
|
353
|
+
printMessage([`Oobee version: ${appVersion}`, 'Starting scan...'], messageOptions);
|
354
|
+
|
355
|
+
await combineRun(data, screenToScan);
|
356
|
+
|
357
|
+
// Delete cloned directory
|
358
|
+
if (process.env.OOBEE_VERBOSE) {
|
359
|
+
deleteClonedProfiles(data.browser, data.randomToken);
|
360
|
+
} else {
|
361
|
+
deleteClonedProfiles(data.browser); // second deletion
|
362
|
+
}
|
363
|
+
|
364
|
+
// Delete dataset and request queues
|
365
|
+
await cleanUp(data.randomToken);
|
366
|
+
|
367
|
+
return getStoragePath(data.randomToken);
|
368
|
+
};
|
369
|
+
|
370
|
+
const optionsAnswer: Answers = {
|
371
|
+
scanner: options.scanner,
|
372
|
+
header: options.header,
|
373
|
+
browserToRun: options.browserToRun,
|
374
|
+
zip: options.zip,
|
375
|
+
url: options.url,
|
376
|
+
finalUrl: options.finalUrl,
|
377
|
+
headless: options.headless,
|
378
|
+
maxpages: options.maxpages,
|
379
|
+
metadata: options.metadata,
|
380
|
+
safeMode: options.safeMode,
|
381
|
+
strategy: options.strategy,
|
382
|
+
fileTypes: options.fileTypes,
|
383
|
+
nameEmail: options.nameEmail,
|
384
|
+
additional: options.additional,
|
385
|
+
customDevice: options.customDevice,
|
386
|
+
deviceChosen: options.deviceChosen,
|
387
|
+
followRobots: options.followRobots,
|
388
|
+
customFlowLabel: options.customFlowLabel,
|
389
|
+
viewportWidth: options.viewportWidth,
|
390
|
+
isLocalFileScan: options.isLocalFileScan,
|
391
|
+
exportDirectory: options.exportDirectory,
|
392
|
+
clonedBrowserDataDir: options.clonedBrowserDataDir,
|
393
|
+
specifiedMaxConcurrency: options.specifiedMaxConcurrency,
|
394
|
+
blacklistedPatternsFilename: options.blacklistedPatternsFilename,
|
395
|
+
playwrightDeviceDetailsObject: options.playwrightDeviceDetailsObject,
|
396
|
+
ruleset: options.ruleset,
|
397
|
+
};
|
398
|
+
await scanInit(optionsAnswer);
|
399
|
+
process.exit(0);
|
400
|
+
|
401
|
+
export default options;
|
package/src/combine.ts
ADDED
@@ -0,0 +1,240 @@
|
|
1
|
+
import printMessage from 'print-message';
|
2
|
+
import { pathToFileURL } from 'url';
|
3
|
+
import crawlSitemap from './crawlers/crawlSitemap.js';
|
4
|
+
import crawlDomain from './crawlers/crawlDomain.js';
|
5
|
+
import crawlLocalFile from './crawlers/crawlLocalFile.js';
|
6
|
+
import crawlIntelligentSitemap from './crawlers/crawlIntelligentSitemap.js';
|
7
|
+
import generateArtifacts from './mergeAxeResults.js';
|
8
|
+
import { getHost, createAndUpdateResultsFolders, createDetailsAndLogs } from './utils.js';
|
9
|
+
import { ScannerTypes, UrlsCrawled } from './constants/constants.js';
|
10
|
+
import { getBlackListedPatterns, submitForm, urlWithoutAuth } from './constants/common.js';
|
11
|
+
import { consoleLogger, silentLogger } from './logs.js';
|
12
|
+
import runCustom from './crawlers/runCustom.js';
|
13
|
+
import { alertMessageOptions } from './constants/cliFunctions.js';
|
14
|
+
import { Data } from './index.js';
|
15
|
+
|
16
|
+
// Class exports
|
17
|
+
export class ViewportSettingsClass {
|
18
|
+
deviceChosen: string;
|
19
|
+
customDevice: string;
|
20
|
+
viewportWidth: number;
|
21
|
+
playwrightDeviceDetailsObject: any; // You can replace 'any' with a more specific type if possible
|
22
|
+
|
23
|
+
constructor(
|
24
|
+
deviceChosen: string,
|
25
|
+
customDevice: string,
|
26
|
+
viewportWidth: number,
|
27
|
+
playwrightDeviceDetailsObject: any,
|
28
|
+
) {
|
29
|
+
this.deviceChosen = deviceChosen;
|
30
|
+
this.customDevice = customDevice;
|
31
|
+
this.viewportWidth = viewportWidth;
|
32
|
+
this.playwrightDeviceDetailsObject = playwrightDeviceDetailsObject;
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
const combineRun = async (details: Data, deviceToScan: string) => {
|
37
|
+
const envDetails = { ...details };
|
38
|
+
|
39
|
+
const {
|
40
|
+
type,
|
41
|
+
url,
|
42
|
+
nameEmail,
|
43
|
+
randomToken,
|
44
|
+
deviceChosen,
|
45
|
+
customDevice,
|
46
|
+
viewportWidth,
|
47
|
+
playwrightDeviceDetailsObject,
|
48
|
+
maxRequestsPerCrawl,
|
49
|
+
browser,
|
50
|
+
userDataDirectory,
|
51
|
+
strategy,
|
52
|
+
specifiedMaxConcurrency,
|
53
|
+
fileTypes,
|
54
|
+
blacklistedPatternsFilename,
|
55
|
+
includeScreenshots,
|
56
|
+
followRobots,
|
57
|
+
metadata,
|
58
|
+
customFlowLabel = 'Custom Flow',
|
59
|
+
extraHTTPHeaders,
|
60
|
+
safeMode,
|
61
|
+
zip,
|
62
|
+
ruleset,
|
63
|
+
} = envDetails;
|
64
|
+
|
65
|
+
process.env.CRAWLEE_LOG_LEVEL = 'ERROR';
|
66
|
+
process.env.CRAWLEE_STORAGE_DIR = randomToken;
|
67
|
+
|
68
|
+
const host = type === ScannerTypes.SITEMAP || type === ScannerTypes.LOCALFILE ? '' : getHost(url);
|
69
|
+
|
70
|
+
let blacklistedPatterns: string[] | null = null;
|
71
|
+
try {
|
72
|
+
blacklistedPatterns = getBlackListedPatterns(blacklistedPatternsFilename);
|
73
|
+
} catch (error) {
|
74
|
+
consoleLogger.error(error);
|
75
|
+
silentLogger.error(error);
|
76
|
+
process.exit(1);
|
77
|
+
}
|
78
|
+
|
79
|
+
// remove basic-auth credentials from URL
|
80
|
+
const finalUrl = !(type === ScannerTypes.SITEMAP || type === ScannerTypes.LOCALFILE)
|
81
|
+
? urlWithoutAuth(url)
|
82
|
+
: new URL(pathToFileURL(url));
|
83
|
+
|
84
|
+
// Use the string version of finalUrl to reduce logic at submitForm
|
85
|
+
const finalUrlString = finalUrl.toString();
|
86
|
+
|
87
|
+
const scanDetails = {
|
88
|
+
startTime: new Date(),
|
89
|
+
endTime: new Date(),
|
90
|
+
crawlType: type,
|
91
|
+
requestUrl: finalUrl,
|
92
|
+
urlsCrawled: new UrlsCrawled(),
|
93
|
+
};
|
94
|
+
|
95
|
+
const viewportSettings: ViewportSettingsClass = new ViewportSettingsClass(
|
96
|
+
deviceChosen,
|
97
|
+
customDevice,
|
98
|
+
viewportWidth,
|
99
|
+
playwrightDeviceDetailsObject,
|
100
|
+
);
|
101
|
+
|
102
|
+
let urlsCrawledObj: UrlsCrawled;
|
103
|
+
switch (type) {
|
104
|
+
case ScannerTypes.CUSTOM:
|
105
|
+
urlsCrawledObj = await runCustom(
|
106
|
+
url,
|
107
|
+
randomToken,
|
108
|
+
viewportSettings,
|
109
|
+
blacklistedPatterns,
|
110
|
+
includeScreenshots,
|
111
|
+
);
|
112
|
+
break;
|
113
|
+
|
114
|
+
case ScannerTypes.SITEMAP:
|
115
|
+
urlsCrawledObj = await crawlSitemap(
|
116
|
+
url,
|
117
|
+
randomToken,
|
118
|
+
host,
|
119
|
+
viewportSettings,
|
120
|
+
maxRequestsPerCrawl,
|
121
|
+
browser,
|
122
|
+
userDataDirectory,
|
123
|
+
specifiedMaxConcurrency,
|
124
|
+
fileTypes,
|
125
|
+
blacklistedPatterns,
|
126
|
+
includeScreenshots,
|
127
|
+
extraHTTPHeaders,
|
128
|
+
);
|
129
|
+
break;
|
130
|
+
|
131
|
+
case ScannerTypes.LOCALFILE:
|
132
|
+
urlsCrawledObj = await crawlLocalFile(
|
133
|
+
url,
|
134
|
+
randomToken,
|
135
|
+
host,
|
136
|
+
viewportSettings,
|
137
|
+
maxRequestsPerCrawl,
|
138
|
+
browser,
|
139
|
+
userDataDirectory,
|
140
|
+
specifiedMaxConcurrency,
|
141
|
+
fileTypes,
|
142
|
+
blacklistedPatterns,
|
143
|
+
includeScreenshots,
|
144
|
+
extraHTTPHeaders,
|
145
|
+
);
|
146
|
+
break;
|
147
|
+
|
148
|
+
case ScannerTypes.INTELLIGENT:
|
149
|
+
urlsCrawledObj = await crawlIntelligentSitemap(
|
150
|
+
url,
|
151
|
+
randomToken,
|
152
|
+
host,
|
153
|
+
viewportSettings,
|
154
|
+
maxRequestsPerCrawl,
|
155
|
+
browser,
|
156
|
+
userDataDirectory,
|
157
|
+
strategy,
|
158
|
+
specifiedMaxConcurrency,
|
159
|
+
fileTypes,
|
160
|
+
blacklistedPatterns,
|
161
|
+
includeScreenshots,
|
162
|
+
followRobots,
|
163
|
+
extraHTTPHeaders,
|
164
|
+
safeMode,
|
165
|
+
);
|
166
|
+
break;
|
167
|
+
|
168
|
+
case ScannerTypes.WEBSITE:
|
169
|
+
urlsCrawledObj = await crawlDomain({
|
170
|
+
url,
|
171
|
+
randomToken,
|
172
|
+
host,
|
173
|
+
viewportSettings,
|
174
|
+
maxRequestsPerCrawl,
|
175
|
+
browser,
|
176
|
+
userDataDirectory,
|
177
|
+
strategy,
|
178
|
+
specifiedMaxConcurrency,
|
179
|
+
fileTypes,
|
180
|
+
blacklistedPatterns,
|
181
|
+
includeScreenshots,
|
182
|
+
followRobots,
|
183
|
+
extraHTTPHeaders,
|
184
|
+
safeMode,
|
185
|
+
ruleset,
|
186
|
+
});
|
187
|
+
break;
|
188
|
+
|
189
|
+
default:
|
190
|
+
consoleLogger.error(`type: ${type} not defined`);
|
191
|
+
silentLogger.error(`type: ${type} not defined`);
|
192
|
+
process.exit(1);
|
193
|
+
}
|
194
|
+
|
195
|
+
scanDetails.endTime = new Date();
|
196
|
+
scanDetails.urlsCrawled = urlsCrawledObj;
|
197
|
+
await createDetailsAndLogs(randomToken);
|
198
|
+
if (scanDetails.urlsCrawled) {
|
199
|
+
if (scanDetails.urlsCrawled.scanned.length > 0) {
|
200
|
+
await createAndUpdateResultsFolders(randomToken);
|
201
|
+
const pagesNotScanned = [
|
202
|
+
...urlsCrawledObj.error,
|
203
|
+
...urlsCrawledObj.invalid,
|
204
|
+
...urlsCrawledObj.forbidden,
|
205
|
+
];
|
206
|
+
const basicFormHTMLSnippet = await generateArtifacts(
|
207
|
+
randomToken,
|
208
|
+
url,
|
209
|
+
type,
|
210
|
+
deviceToScan,
|
211
|
+
urlsCrawledObj.scanned,
|
212
|
+
pagesNotScanned,
|
213
|
+
customFlowLabel,
|
214
|
+
undefined,
|
215
|
+
scanDetails,
|
216
|
+
zip,
|
217
|
+
);
|
218
|
+
const [name, email] = nameEmail.split(':');
|
219
|
+
|
220
|
+
await submitForm(
|
221
|
+
browser,
|
222
|
+
userDataDirectory,
|
223
|
+
url, // scannedUrl
|
224
|
+
new URL(finalUrlString).href, // entryUrl
|
225
|
+
type,
|
226
|
+
email,
|
227
|
+
name,
|
228
|
+
JSON.stringify(basicFormHTMLSnippet),
|
229
|
+
urlsCrawledObj.scanned.length,
|
230
|
+
urlsCrawledObj.scannedRedirects.length,
|
231
|
+
pagesNotScanned.length,
|
232
|
+
metadata,
|
233
|
+
);
|
234
|
+
}
|
235
|
+
} else {
|
236
|
+
printMessage([`No pages were scanned.`], alertMessageOptions);
|
237
|
+
}
|
238
|
+
};
|
239
|
+
|
240
|
+
export default combineRun;
|
@@ -0,0 +1,44 @@
|
|
1
|
+
import { jest } from '@jest/globals';
|
2
|
+
import axios from 'axios';
|
3
|
+
import * as sampleData from '../sampleData';
|
4
|
+
import { getLinksFromSitemap } from '../common';
|
5
|
+
import constants from '../constants';
|
6
|
+
|
7
|
+
jest.mock('axios');
|
8
|
+
|
9
|
+
describe('test getLinksFromSitemap', () => {
|
10
|
+
const maxRequestsPerCrawl = constants.maxRequestsPerCrawl;
|
11
|
+
|
12
|
+
test('should only get links from loc tags in an XML sitemap and not include namespace links or links in comments', async () => {
|
13
|
+
axios.get = jest.fn().mockResolvedValue({ data: sampleData.sampleXmlSitemap });
|
14
|
+
// URL passed to getLinksFromSitemap here doesn't matter because the response from any get requests is mocked
|
15
|
+
const links = await getLinksFromSitemap('http://mockUrl/sitemap.xml', maxRequestsPerCrawl);
|
16
|
+
expect(links).toEqual(sampleData.sampleXmlSitemapLinks.slice(0, maxRequestsPerCrawl));
|
17
|
+
});
|
18
|
+
|
19
|
+
test('should only get links from link tags in a RSS feed sitemap, and duplicate links should only be added once', async () => {
|
20
|
+
axios.get = jest.fn().mockResolvedValue({ data: sampleData.sampleRssFeed });
|
21
|
+
const links = await getLinksFromSitemap('http://mockUrl/rssfeed.xml', maxRequestsPerCrawl);
|
22
|
+
expect(links).toEqual(sampleData.sampleRssFeedLinks.slice(0, maxRequestsPerCrawl));
|
23
|
+
});
|
24
|
+
|
25
|
+
test('should only get links from the href property in link tags in an Atom feed sitemap', async () => {
|
26
|
+
axios.get = jest.fn().mockResolvedValue({ data: sampleData.sampleAtomFeed });
|
27
|
+
const links = await getLinksFromSitemap('http://mockUrl/atomfeed.xml', maxRequestsPerCrawl);
|
28
|
+
expect(links).toEqual(sampleData.sampleAtomFeedLinks.slice(0, maxRequestsPerCrawl));
|
29
|
+
});
|
30
|
+
|
31
|
+
test('should get all links from a txt sitemap', async () => {
|
32
|
+
axios.get = jest.fn().mockResolvedValue({ data: sampleData.sampleTxtSitemap });
|
33
|
+
const links = await getLinksFromSitemap('http://mockUrl/sitemap.txt', maxRequestsPerCrawl);
|
34
|
+
expect(links).toEqual(sampleData.sampleTxtSitemapLinks.slice(0, maxRequestsPerCrawl));
|
35
|
+
});
|
36
|
+
|
37
|
+
test('should get all links from a non standard XML sitemap', async () => {
|
38
|
+
axios.get = jest.fn().mockResolvedValue({ data: sampleData.sampleNonStandardXmlSitemap });
|
39
|
+
const links = await getLinksFromSitemap('http://mockUrl/weirdSitemap.xml', maxRequestsPerCrawl);
|
40
|
+
expect(links).toEqual(
|
41
|
+
sampleData.sampleNonStandardXmlSitemapLinks.slice(0, maxRequestsPerCrawl),
|
42
|
+
);
|
43
|
+
});
|
44
|
+
});
|