@govtechsg/oobee 0.10.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/.dockerignore +22 -0
  2. package/.github/pull_request_template.md +11 -0
  3. package/.github/workflows/docker-test.yml +54 -0
  4. package/.github/workflows/image.yml +107 -0
  5. package/.github/workflows/publish.yml +18 -0
  6. package/.idea/modules.xml +8 -0
  7. package/.idea/purple-a11y.iml +9 -0
  8. package/.idea/vcs.xml +6 -0
  9. package/.prettierrc.json +12 -0
  10. package/.vscode/extensions.json +5 -0
  11. package/.vscode/settings.json +10 -0
  12. package/CODE_OF_CONDUCT.md +128 -0
  13. package/DETAILS.md +163 -0
  14. package/Dockerfile +60 -0
  15. package/INSTALLATION.md +146 -0
  16. package/INTEGRATION.md +785 -0
  17. package/LICENSE +22 -0
  18. package/README.md +587 -0
  19. package/SECURITY.md +5 -0
  20. package/__mocks__/mock-report.html +1431 -0
  21. package/__mocks__/mockFunctions.ts +32 -0
  22. package/__mocks__/mockIssues.ts +64 -0
  23. package/__mocks__/mock_all_issues/000000001.json +64 -0
  24. package/__mocks__/mock_all_issues/000000002.json +53 -0
  25. package/__mocks__/mock_all_issues/fake-file.txt +0 -0
  26. package/__tests__/logs.test.ts +25 -0
  27. package/__tests__/mergeAxeResults.test.ts +278 -0
  28. package/__tests__/utils.test.ts +118 -0
  29. package/a11y-scan-results.zip +0 -0
  30. package/eslint.config.js +53 -0
  31. package/exclusions.txt +2 -0
  32. package/gitlab-pipeline-template.yml +54 -0
  33. package/jest.config.js +1 -0
  34. package/package.json +96 -0
  35. package/scripts/copyFiles.js +44 -0
  36. package/scripts/install_oobee_dependencies.cmd +13 -0
  37. package/scripts/install_oobee_dependencies.command +101 -0
  38. package/scripts/install_oobee_dependencies.ps1 +110 -0
  39. package/scripts/oobee_shell.cmd +13 -0
  40. package/scripts/oobee_shell.command +11 -0
  41. package/scripts/oobee_shell.sh +55 -0
  42. package/scripts/oobee_shell_ps.ps1 +54 -0
  43. package/src/cli.ts +401 -0
  44. package/src/combine.ts +240 -0
  45. package/src/constants/__tests__/common.test.ts +44 -0
  46. package/src/constants/cliFunctions.ts +305 -0
  47. package/src/constants/common.ts +1840 -0
  48. package/src/constants/constants.ts +443 -0
  49. package/src/constants/errorMeta.json +319 -0
  50. package/src/constants/itemTypeDescription.ts +11 -0
  51. package/src/constants/oobeeAi.ts +141 -0
  52. package/src/constants/questions.ts +181 -0
  53. package/src/constants/sampleData.ts +187 -0
  54. package/src/crawlers/__tests__/commonCrawlerFunc.test.ts +51 -0
  55. package/src/crawlers/commonCrawlerFunc.ts +656 -0
  56. package/src/crawlers/crawlDomain.ts +877 -0
  57. package/src/crawlers/crawlIntelligentSitemap.ts +156 -0
  58. package/src/crawlers/crawlLocalFile.ts +193 -0
  59. package/src/crawlers/crawlSitemap.ts +356 -0
  60. package/src/crawlers/custom/extractAndGradeText.ts +57 -0
  61. package/src/crawlers/custom/flagUnlabelledClickableElements.ts +964 -0
  62. package/src/crawlers/custom/utils.ts +486 -0
  63. package/src/crawlers/customAxeFunctions.ts +82 -0
  64. package/src/crawlers/pdfScanFunc.ts +468 -0
  65. package/src/crawlers/runCustom.ts +117 -0
  66. package/src/index.ts +173 -0
  67. package/src/logs.ts +66 -0
  68. package/src/mergeAxeResults.ts +964 -0
  69. package/src/npmIndex.ts +284 -0
  70. package/src/screenshotFunc/htmlScreenshotFunc.ts +411 -0
  71. package/src/screenshotFunc/pdfScreenshotFunc.ts +762 -0
  72. package/src/static/ejs/partials/components/categorySelector.ejs +4 -0
  73. package/src/static/ejs/partials/components/categorySelectorDropdown.ejs +57 -0
  74. package/src/static/ejs/partials/components/pagesScannedModal.ejs +70 -0
  75. package/src/static/ejs/partials/components/reportSearch.ejs +47 -0
  76. package/src/static/ejs/partials/components/ruleOffcanvas.ejs +105 -0
  77. package/src/static/ejs/partials/components/scanAbout.ejs +263 -0
  78. package/src/static/ejs/partials/components/screenshotLightbox.ejs +13 -0
  79. package/src/static/ejs/partials/components/summaryScanAbout.ejs +141 -0
  80. package/src/static/ejs/partials/components/summaryScanResults.ejs +16 -0
  81. package/src/static/ejs/partials/components/summaryTable.ejs +20 -0
  82. package/src/static/ejs/partials/components/summaryWcagCompliance.ejs +94 -0
  83. package/src/static/ejs/partials/components/topFive.ejs +6 -0
  84. package/src/static/ejs/partials/components/wcagCompliance.ejs +70 -0
  85. package/src/static/ejs/partials/footer.ejs +21 -0
  86. package/src/static/ejs/partials/header.ejs +230 -0
  87. package/src/static/ejs/partials/main.ejs +40 -0
  88. package/src/static/ejs/partials/scripts/bootstrap.ejs +8 -0
  89. package/src/static/ejs/partials/scripts/categorySelectorDropdownScript.ejs +190 -0
  90. package/src/static/ejs/partials/scripts/categorySummary.ejs +141 -0
  91. package/src/static/ejs/partials/scripts/highlightjs.ejs +335 -0
  92. package/src/static/ejs/partials/scripts/popper.ejs +7 -0
  93. package/src/static/ejs/partials/scripts/reportSearch.ejs +248 -0
  94. package/src/static/ejs/partials/scripts/ruleOffcanvas.ejs +801 -0
  95. package/src/static/ejs/partials/scripts/screenshotLightbox.ejs +71 -0
  96. package/src/static/ejs/partials/scripts/summaryScanResults.ejs +14 -0
  97. package/src/static/ejs/partials/scripts/summaryTable.ejs +78 -0
  98. package/src/static/ejs/partials/scripts/utils.ejs +441 -0
  99. package/src/static/ejs/partials/styles/bootstrap.ejs +12375 -0
  100. package/src/static/ejs/partials/styles/highlightjs.ejs +54 -0
  101. package/src/static/ejs/partials/styles/styles.ejs +1843 -0
  102. package/src/static/ejs/partials/styles/summaryBootstrap.ejs +12458 -0
  103. package/src/static/ejs/partials/summaryHeader.ejs +70 -0
  104. package/src/static/ejs/partials/summaryMain.ejs +75 -0
  105. package/src/static/ejs/report.ejs +420 -0
  106. package/src/static/ejs/summary.ejs +47 -0
  107. package/src/static/mustache/.prettierrc +4 -0
  108. package/src/static/mustache/Attention Deficit.mustache +11 -0
  109. package/src/static/mustache/Blind.mustache +11 -0
  110. package/src/static/mustache/Cognitive.mustache +7 -0
  111. package/src/static/mustache/Colorblindness.mustache +20 -0
  112. package/src/static/mustache/Deaf.mustache +12 -0
  113. package/src/static/mustache/Deafblind.mustache +7 -0
  114. package/src/static/mustache/Dyslexia.mustache +14 -0
  115. package/src/static/mustache/Low Vision.mustache +7 -0
  116. package/src/static/mustache/Mobility.mustache +15 -0
  117. package/src/static/mustache/Sighted Keyboard Users.mustache +42 -0
  118. package/src/static/mustache/report.mustache +1709 -0
  119. package/src/types/print-message.d.ts +28 -0
  120. package/src/types/types.ts +46 -0
  121. package/src/types/xpath-to-css.d.ts +3 -0
  122. package/src/utils.ts +332 -0
  123. package/tsconfig.json +15 -0
package/src/cli.ts ADDED
@@ -0,0 +1,401 @@
1
+ #!/usr/bin/env node
2
+ import _yargs from 'yargs';
3
+ import { hideBin } from 'yargs/helpers';
4
+ import printMessage from 'print-message';
5
+ import { devices } from 'playwright';
6
+ import { fileURLToPath } from 'url';
7
+ import path from 'path';
8
+ import { cleanUp, setHeadlessMode, getVersion, getStoragePath } from './utils.js';
9
+ import {
10
+ checkUrl,
11
+ prepareData,
12
+ getFileSitemap,
13
+ validEmail,
14
+ validName,
15
+ getBrowserToRun,
16
+ getPlaywrightDeviceDetailsObject,
17
+ deleteClonedProfiles,
18
+ getScreenToScan,
19
+ getClonedProfilesWithRandomToken,
20
+ validateDirPath,
21
+ validateFilePath,
22
+ validateCustomFlowLabel,
23
+ parseHeaders,
24
+ } from './constants/common.js';
25
+ import constants, { ScannerTypes } from './constants/constants.js';
26
+ import { cliOptions, messageOptions } from './constants/cliFunctions.js';
27
+ import combineRun from './combine.js';
28
+ import { Answers } from './index.js';
29
+
30
+ const appVersion = getVersion();
31
+ const yargs = _yargs(hideBin(process.argv));
32
+
33
+ const options = yargs
34
+ .version(false)
35
+ .usage(
36
+ `Oobee version: ${appVersion}
37
+ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
38
+ )
39
+ .strictOptions(true)
40
+ .options(cliOptions)
41
+ .example([
42
+ [
43
+ `To scan sitemap of website:', 'npm run cli -- -c [ 1 | sitemap ] -u <url_link> [ -d <device> | -w <viewport_width> ]`,
44
+ ],
45
+ [
46
+ `To scan a website', 'npm run cli -- -c [ 2 | website ] -u <url_link> [ -d <device> | -w <viewport_width> ]`,
47
+ ],
48
+ [
49
+ `To start a custom flow scan', 'npm run cli -- -c [ 3 | custom ] -u <url_link> [ -d <device> | -w <viewport_width> ]`,
50
+ ],
51
+ ])
52
+ .coerce('d', option => {
53
+ const device = devices[option];
54
+ if (!device && option !== 'Desktop' && option !== 'Mobile') {
55
+ printMessage(
56
+ [`Invalid device. Please provide an existing device to start the scan.`],
57
+ messageOptions,
58
+ );
59
+ process.exit(1);
60
+ }
61
+ return option;
62
+ })
63
+ .coerce('w', option => {
64
+ if (!option || Number.isNaN(option)) {
65
+ printMessage([`Invalid viewport width. Please provide a number. `], messageOptions);
66
+ process.exit(1);
67
+ } else if (option < 320 || option > 1080) {
68
+ printMessage(
69
+ ['Invalid viewport width! Please provide a viewport width between 320-1080 pixels.'],
70
+ messageOptions,
71
+ );
72
+ process.exit(1);
73
+ }
74
+ return option;
75
+ })
76
+ .coerce('p', option => {
77
+ if (!Number.isInteger(option) || Number(option) <= 0) {
78
+ printMessage(
79
+ [`Invalid maximum number of pages. Please provide a positive integer.`],
80
+ messageOptions,
81
+ );
82
+ process.exit(1);
83
+ }
84
+ return option;
85
+ })
86
+ .coerce('t', option => {
87
+ if (!Number.isInteger(option) || Number(option) <= 0) {
88
+ printMessage(
89
+ [`Invalid number for max concurrency. Please provide a positive integer.`],
90
+ messageOptions,
91
+ );
92
+ process.exit(1);
93
+ }
94
+ return option;
95
+ })
96
+ .coerce('k', nameEmail => {
97
+ if (nameEmail.indexOf(':') === -1) {
98
+ printMessage(
99
+ [`Invalid format. Please provide your name and email address separated by ":"`],
100
+ messageOptions,
101
+ );
102
+ process.exit(1);
103
+ }
104
+ const [name, email] = nameEmail.split(':');
105
+ if (name === '' || name === undefined || name === null) {
106
+ printMessage([`Please provide your name.`], messageOptions);
107
+ process.exit(1);
108
+ }
109
+ if (!validName(name)) {
110
+ printMessage([`Invalid name. Please provide a valid name.`], messageOptions);
111
+ process.exit(1);
112
+ }
113
+ if (!validEmail(email)) {
114
+ printMessage(
115
+ [`Invalid email address. Please provide a valid email address.`],
116
+ messageOptions,
117
+ );
118
+ process.exit(1);
119
+ }
120
+ return nameEmail;
121
+ })
122
+ .coerce('e', option => {
123
+ const validationErrors = validateDirPath(option);
124
+ if (validationErrors) {
125
+ printMessage([`Invalid exportDirectory directory path. ${validationErrors}`], messageOptions);
126
+ process.exit(1);
127
+ }
128
+ return option;
129
+ })
130
+ .coerce('x', option => {
131
+ const filename = fileURLToPath(import.meta.url);
132
+ const dirname = `${path.dirname(filename)}/../`; // check in the parent of dist directory
133
+
134
+ try {
135
+ return validateFilePath(option, dirname);
136
+ } catch (err) {
137
+ printMessage([`Invalid blacklistedPatternsFilename file path. ${err}`], messageOptions);
138
+ process.exit(1);
139
+ }
140
+
141
+ // eslint-disable-next-line no-unreachable
142
+ return null;
143
+ })
144
+ .coerce('i', option => {
145
+ const { choices } = cliOptions.i;
146
+ if (!choices.includes(option)) {
147
+ printMessage(
148
+ [`Invalid value for fileTypes. Please provide valid keywords: ${choices.join(', ')}.`],
149
+ messageOptions,
150
+ );
151
+ process.exit(1);
152
+ }
153
+ return option;
154
+ })
155
+ .coerce('j', option => {
156
+ const { isValid, errorMessage } = validateCustomFlowLabel(option);
157
+ if (!isValid) {
158
+ printMessage([errorMessage], messageOptions);
159
+ process.exit(1);
160
+ }
161
+ return option;
162
+ })
163
+ .coerce('a', option => {
164
+ const { choices } = cliOptions.a;
165
+ if (!choices.includes(option)) {
166
+ printMessage(
167
+ [`Invalid value for additional. Please provide valid keywords: ${choices.join(', ')}.`],
168
+ messageOptions,
169
+ );
170
+ process.exit(1);
171
+ }
172
+ return option;
173
+ })
174
+ .coerce('q', option => {
175
+ try {
176
+ JSON.parse(option);
177
+ } catch {
178
+ // default to empty object
179
+ return '{}';
180
+ }
181
+ return option;
182
+ })
183
+ .coerce('m', option => {
184
+ return option;
185
+ })
186
+ .check(argvs => {
187
+ if (
188
+ (argvs.scanner === ScannerTypes.CUSTOM || argvs.scanner === ScannerTypes.LOCALFILE) &&
189
+ argvs.maxpages
190
+ ) {
191
+ throw new Error('-p or --maxpages is only available in website and sitemap scans.');
192
+ }
193
+ return true;
194
+ })
195
+ .check(argvs => {
196
+ if (argvs.scanner !== ScannerTypes.WEBSITE && argvs.strategy) {
197
+ throw new Error('-s or --strategy is only available in website scans.');
198
+ }
199
+ return true;
200
+ })
201
+ .conflicts('d', 'w')
202
+ .parse() as unknown as Answers;
203
+
204
+ const scanInit = async (argvs: Answers): Promise<string> => {
205
+ let isCustomFlow = false;
206
+ if (argvs.scanner === ScannerTypes.CUSTOM) {
207
+ isCustomFlow = true;
208
+ }
209
+
210
+ const updatedArgvs = { ...argvs };
211
+
212
+ // let chromeDataDir = null;
213
+ // let edgeDataDir = null;
214
+ // Empty string for profile directory will use incognito mode in playwright
215
+ let clonedDataDir = '';
216
+ const statuses = constants.urlCheckStatuses;
217
+
218
+ const { browserToRun, clonedBrowserDataDir } = getBrowserToRun(updatedArgvs.browserToRun, true);
219
+ updatedArgvs.browserToRun = browserToRun;
220
+ clonedDataDir = clonedBrowserDataDir;
221
+
222
+ if (updatedArgvs.customDevice === 'Desktop' || updatedArgvs.customDevice === 'Mobile') {
223
+ updatedArgvs.deviceChosen = argvs.customDevice;
224
+ delete updatedArgvs.customDevice;
225
+ }
226
+
227
+ // Creating the playwrightDeviceDetailObject
228
+ // for use in crawlDomain & crawlSitemap's preLaunchHook
229
+ updatedArgvs.playwrightDeviceDetailsObject = getPlaywrightDeviceDetailsObject(
230
+ updatedArgvs.deviceChosen,
231
+ updatedArgvs.customDevice,
232
+ updatedArgvs.viewportWidth,
233
+ );
234
+
235
+ const res = await checkUrl(
236
+ updatedArgvs.scanner,
237
+ updatedArgvs.url,
238
+ updatedArgvs.browserToRun,
239
+ clonedDataDir,
240
+ updatedArgvs.playwrightDeviceDetailsObject,
241
+ isCustomFlow,
242
+ updatedArgvs.header,
243
+ );
244
+ switch (res.status) {
245
+ case statuses.success.code: {
246
+ updatedArgvs.finalUrl = res.url;
247
+ if (process.env.VALIDATE_URL_PH_GUI) {
248
+ console.log('Url is valid');
249
+ process.exit(0);
250
+ }
251
+ break;
252
+ }
253
+ case statuses.unauthorised.code: {
254
+ printMessage([statuses.unauthorised.message], messageOptions);
255
+ process.exit(res.status);
256
+ break;
257
+ }
258
+ case statuses.cannotBeResolved.code: {
259
+ printMessage([statuses.cannotBeResolved.message], messageOptions);
260
+ process.exit(res.status);
261
+ break;
262
+ }
263
+ case statuses.systemError.code: {
264
+ printMessage([statuses.systemError.message], messageOptions);
265
+ process.exit(res.status);
266
+ break;
267
+ }
268
+ case statuses.invalidUrl.code: {
269
+ if (
270
+ updatedArgvs.scanner !== ScannerTypes.SITEMAP &&
271
+ updatedArgvs.scanner !== ScannerTypes.LOCALFILE
272
+ ) {
273
+ printMessage([statuses.invalidUrl.message], messageOptions);
274
+ process.exit(res.status);
275
+ }
276
+
277
+ const finalFilePath = getFileSitemap(updatedArgvs.url);
278
+ if (finalFilePath) {
279
+ updatedArgvs.isLocalFileScan = true;
280
+ updatedArgvs.finalUrl = finalFilePath;
281
+ if (process.env.VALIDATE_URL_PH_GUI) {
282
+ console.log('Url is valid');
283
+ process.exit(0);
284
+ }
285
+ } else if (updatedArgvs.scanner === ScannerTypes.LOCALFILE) {
286
+ printMessage([statuses.notALocalFile.message], messageOptions);
287
+ process.exit(statuses.notALocalFile.code);
288
+ } else if (updatedArgvs.scanner !== ScannerTypes.SITEMAP) {
289
+ printMessage([statuses.notASitemap.message], messageOptions);
290
+ process.exit(statuses.notASitemap.code);
291
+ }
292
+ break;
293
+ }
294
+ case statuses.notASitemap.code: {
295
+ printMessage([statuses.notASitemap.message], messageOptions);
296
+ process.exit(res.status);
297
+ break;
298
+ }
299
+ case statuses.notALocalFile.code: {
300
+ printMessage([statuses.notALocalFile.message], messageOptions);
301
+ process.exit(res.status);
302
+ break;
303
+ }
304
+ case statuses.browserError.code: {
305
+ printMessage([statuses.browserError.message], messageOptions);
306
+ process.exit(res.status);
307
+ break;
308
+ }
309
+ default:
310
+ break;
311
+ }
312
+
313
+ if (updatedArgvs.scanner === ScannerTypes.WEBSITE && !updatedArgvs.strategy) {
314
+ updatedArgvs.strategy = 'same-domain';
315
+ }
316
+
317
+ const data = await prepareData(updatedArgvs);
318
+
319
+ // File clean up after url check
320
+ // files will clone a second time below if url check passes
321
+ if (process.env.OOBEE_VERBOSE) {
322
+ deleteClonedProfiles(data.browser, data.randomToken);
323
+ } else {
324
+ deleteClonedProfiles(data.browser); // first deletion
325
+ }
326
+
327
+ if (updatedArgvs.exportDirectory) {
328
+ constants.exportDirectory = updatedArgvs.exportDirectory;
329
+ }
330
+
331
+ if (process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE) {
332
+ const randomTokenMessage = {
333
+ type: 'randomToken',
334
+ payload: `${data.randomToken}`,
335
+ };
336
+ if (process.send) {
337
+ process.send(JSON.stringify(randomTokenMessage));
338
+ }
339
+ }
340
+
341
+ setHeadlessMode(data.browser, data.isHeadless);
342
+
343
+ const screenToScan = getScreenToScan(
344
+ updatedArgvs.deviceChosen,
345
+ updatedArgvs.customDevice,
346
+ updatedArgvs.viewportWidth,
347
+ );
348
+
349
+ // Clone profiles a second time
350
+ clonedDataDir = getClonedProfilesWithRandomToken(data.browser, data.randomToken);
351
+ data.userDataDirectory = clonedDataDir;
352
+
353
+ printMessage([`Oobee version: ${appVersion}`, 'Starting scan...'], messageOptions);
354
+
355
+ await combineRun(data, screenToScan);
356
+
357
+ // Delete cloned directory
358
+ if (process.env.OOBEE_VERBOSE) {
359
+ deleteClonedProfiles(data.browser, data.randomToken);
360
+ } else {
361
+ deleteClonedProfiles(data.browser); // second deletion
362
+ }
363
+
364
+ // Delete dataset and request queues
365
+ await cleanUp(data.randomToken);
366
+
367
+ return getStoragePath(data.randomToken);
368
+ };
369
+
370
+ const optionsAnswer: Answers = {
371
+ scanner: options.scanner,
372
+ header: options.header,
373
+ browserToRun: options.browserToRun,
374
+ zip: options.zip,
375
+ url: options.url,
376
+ finalUrl: options.finalUrl,
377
+ headless: options.headless,
378
+ maxpages: options.maxpages,
379
+ metadata: options.metadata,
380
+ safeMode: options.safeMode,
381
+ strategy: options.strategy,
382
+ fileTypes: options.fileTypes,
383
+ nameEmail: options.nameEmail,
384
+ additional: options.additional,
385
+ customDevice: options.customDevice,
386
+ deviceChosen: options.deviceChosen,
387
+ followRobots: options.followRobots,
388
+ customFlowLabel: options.customFlowLabel,
389
+ viewportWidth: options.viewportWidth,
390
+ isLocalFileScan: options.isLocalFileScan,
391
+ exportDirectory: options.exportDirectory,
392
+ clonedBrowserDataDir: options.clonedBrowserDataDir,
393
+ specifiedMaxConcurrency: options.specifiedMaxConcurrency,
394
+ blacklistedPatternsFilename: options.blacklistedPatternsFilename,
395
+ playwrightDeviceDetailsObject: options.playwrightDeviceDetailsObject,
396
+ ruleset: options.ruleset,
397
+ };
398
+ await scanInit(optionsAnswer);
399
+ process.exit(0);
400
+
401
+ export default options;
package/src/combine.ts ADDED
@@ -0,0 +1,240 @@
1
+ import printMessage from 'print-message';
2
+ import { pathToFileURL } from 'url';
3
+ import crawlSitemap from './crawlers/crawlSitemap.js';
4
+ import crawlDomain from './crawlers/crawlDomain.js';
5
+ import crawlLocalFile from './crawlers/crawlLocalFile.js';
6
+ import crawlIntelligentSitemap from './crawlers/crawlIntelligentSitemap.js';
7
+ import generateArtifacts from './mergeAxeResults.js';
8
+ import { getHost, createAndUpdateResultsFolders, createDetailsAndLogs } from './utils.js';
9
+ import { ScannerTypes, UrlsCrawled } from './constants/constants.js';
10
+ import { getBlackListedPatterns, submitForm, urlWithoutAuth } from './constants/common.js';
11
+ import { consoleLogger, silentLogger } from './logs.js';
12
+ import runCustom from './crawlers/runCustom.js';
13
+ import { alertMessageOptions } from './constants/cliFunctions.js';
14
+ import { Data } from './index.js';
15
+
16
+ // Class exports
17
+ export class ViewportSettingsClass {
18
+ deviceChosen: string;
19
+ customDevice: string;
20
+ viewportWidth: number;
21
+ playwrightDeviceDetailsObject: any; // You can replace 'any' with a more specific type if possible
22
+
23
+ constructor(
24
+ deviceChosen: string,
25
+ customDevice: string,
26
+ viewportWidth: number,
27
+ playwrightDeviceDetailsObject: any,
28
+ ) {
29
+ this.deviceChosen = deviceChosen;
30
+ this.customDevice = customDevice;
31
+ this.viewportWidth = viewportWidth;
32
+ this.playwrightDeviceDetailsObject = playwrightDeviceDetailsObject;
33
+ }
34
+ }
35
+
36
+ const combineRun = async (details: Data, deviceToScan: string) => {
37
+ const envDetails = { ...details };
38
+
39
+ const {
40
+ type,
41
+ url,
42
+ nameEmail,
43
+ randomToken,
44
+ deviceChosen,
45
+ customDevice,
46
+ viewportWidth,
47
+ playwrightDeviceDetailsObject,
48
+ maxRequestsPerCrawl,
49
+ browser,
50
+ userDataDirectory,
51
+ strategy,
52
+ specifiedMaxConcurrency,
53
+ fileTypes,
54
+ blacklistedPatternsFilename,
55
+ includeScreenshots,
56
+ followRobots,
57
+ metadata,
58
+ customFlowLabel = 'Custom Flow',
59
+ extraHTTPHeaders,
60
+ safeMode,
61
+ zip,
62
+ ruleset,
63
+ } = envDetails;
64
+
65
+ process.env.CRAWLEE_LOG_LEVEL = 'ERROR';
66
+ process.env.CRAWLEE_STORAGE_DIR = randomToken;
67
+
68
+ const host = type === ScannerTypes.SITEMAP || type === ScannerTypes.LOCALFILE ? '' : getHost(url);
69
+
70
+ let blacklistedPatterns: string[] | null = null;
71
+ try {
72
+ blacklistedPatterns = getBlackListedPatterns(blacklistedPatternsFilename);
73
+ } catch (error) {
74
+ consoleLogger.error(error);
75
+ silentLogger.error(error);
76
+ process.exit(1);
77
+ }
78
+
79
+ // remove basic-auth credentials from URL
80
+ const finalUrl = !(type === ScannerTypes.SITEMAP || type === ScannerTypes.LOCALFILE)
81
+ ? urlWithoutAuth(url)
82
+ : new URL(pathToFileURL(url));
83
+
84
+ // Use the string version of finalUrl to reduce logic at submitForm
85
+ const finalUrlString = finalUrl.toString();
86
+
87
+ const scanDetails = {
88
+ startTime: new Date(),
89
+ endTime: new Date(),
90
+ crawlType: type,
91
+ requestUrl: finalUrl,
92
+ urlsCrawled: new UrlsCrawled(),
93
+ };
94
+
95
+ const viewportSettings: ViewportSettingsClass = new ViewportSettingsClass(
96
+ deviceChosen,
97
+ customDevice,
98
+ viewportWidth,
99
+ playwrightDeviceDetailsObject,
100
+ );
101
+
102
+ let urlsCrawledObj: UrlsCrawled;
103
+ switch (type) {
104
+ case ScannerTypes.CUSTOM:
105
+ urlsCrawledObj = await runCustom(
106
+ url,
107
+ randomToken,
108
+ viewportSettings,
109
+ blacklistedPatterns,
110
+ includeScreenshots,
111
+ );
112
+ break;
113
+
114
+ case ScannerTypes.SITEMAP:
115
+ urlsCrawledObj = await crawlSitemap(
116
+ url,
117
+ randomToken,
118
+ host,
119
+ viewportSettings,
120
+ maxRequestsPerCrawl,
121
+ browser,
122
+ userDataDirectory,
123
+ specifiedMaxConcurrency,
124
+ fileTypes,
125
+ blacklistedPatterns,
126
+ includeScreenshots,
127
+ extraHTTPHeaders,
128
+ );
129
+ break;
130
+
131
+ case ScannerTypes.LOCALFILE:
132
+ urlsCrawledObj = await crawlLocalFile(
133
+ url,
134
+ randomToken,
135
+ host,
136
+ viewportSettings,
137
+ maxRequestsPerCrawl,
138
+ browser,
139
+ userDataDirectory,
140
+ specifiedMaxConcurrency,
141
+ fileTypes,
142
+ blacklistedPatterns,
143
+ includeScreenshots,
144
+ extraHTTPHeaders,
145
+ );
146
+ break;
147
+
148
+ case ScannerTypes.INTELLIGENT:
149
+ urlsCrawledObj = await crawlIntelligentSitemap(
150
+ url,
151
+ randomToken,
152
+ host,
153
+ viewportSettings,
154
+ maxRequestsPerCrawl,
155
+ browser,
156
+ userDataDirectory,
157
+ strategy,
158
+ specifiedMaxConcurrency,
159
+ fileTypes,
160
+ blacklistedPatterns,
161
+ includeScreenshots,
162
+ followRobots,
163
+ extraHTTPHeaders,
164
+ safeMode,
165
+ );
166
+ break;
167
+
168
+ case ScannerTypes.WEBSITE:
169
+ urlsCrawledObj = await crawlDomain({
170
+ url,
171
+ randomToken,
172
+ host,
173
+ viewportSettings,
174
+ maxRequestsPerCrawl,
175
+ browser,
176
+ userDataDirectory,
177
+ strategy,
178
+ specifiedMaxConcurrency,
179
+ fileTypes,
180
+ blacklistedPatterns,
181
+ includeScreenshots,
182
+ followRobots,
183
+ extraHTTPHeaders,
184
+ safeMode,
185
+ ruleset,
186
+ });
187
+ break;
188
+
189
+ default:
190
+ consoleLogger.error(`type: ${type} not defined`);
191
+ silentLogger.error(`type: ${type} not defined`);
192
+ process.exit(1);
193
+ }
194
+
195
+ scanDetails.endTime = new Date();
196
+ scanDetails.urlsCrawled = urlsCrawledObj;
197
+ await createDetailsAndLogs(randomToken);
198
+ if (scanDetails.urlsCrawled) {
199
+ if (scanDetails.urlsCrawled.scanned.length > 0) {
200
+ await createAndUpdateResultsFolders(randomToken);
201
+ const pagesNotScanned = [
202
+ ...urlsCrawledObj.error,
203
+ ...urlsCrawledObj.invalid,
204
+ ...urlsCrawledObj.forbidden,
205
+ ];
206
+ const basicFormHTMLSnippet = await generateArtifacts(
207
+ randomToken,
208
+ url,
209
+ type,
210
+ deviceToScan,
211
+ urlsCrawledObj.scanned,
212
+ pagesNotScanned,
213
+ customFlowLabel,
214
+ undefined,
215
+ scanDetails,
216
+ zip,
217
+ );
218
+ const [name, email] = nameEmail.split(':');
219
+
220
+ await submitForm(
221
+ browser,
222
+ userDataDirectory,
223
+ url, // scannedUrl
224
+ new URL(finalUrlString).href, // entryUrl
225
+ type,
226
+ email,
227
+ name,
228
+ JSON.stringify(basicFormHTMLSnippet),
229
+ urlsCrawledObj.scanned.length,
230
+ urlsCrawledObj.scannedRedirects.length,
231
+ pagesNotScanned.length,
232
+ metadata,
233
+ );
234
+ }
235
+ } else {
236
+ printMessage([`No pages were scanned.`], alertMessageOptions);
237
+ }
238
+ };
239
+
240
+ export default combineRun;
@@ -0,0 +1,44 @@
1
+ import { jest } from '@jest/globals';
2
+ import axios from 'axios';
3
+ import * as sampleData from '../sampleData';
4
+ import { getLinksFromSitemap } from '../common';
5
+ import constants from '../constants';
6
+
7
+ jest.mock('axios');
8
+
9
+ describe('test getLinksFromSitemap', () => {
10
+ const maxRequestsPerCrawl = constants.maxRequestsPerCrawl;
11
+
12
+ test('should only get links from loc tags in an XML sitemap and not include namespace links or links in comments', async () => {
13
+ axios.get = jest.fn().mockResolvedValue({ data: sampleData.sampleXmlSitemap });
14
+ // URL passed to getLinksFromSitemap here doesn't matter because the response from any get requests is mocked
15
+ const links = await getLinksFromSitemap('http://mockUrl/sitemap.xml', maxRequestsPerCrawl);
16
+ expect(links).toEqual(sampleData.sampleXmlSitemapLinks.slice(0, maxRequestsPerCrawl));
17
+ });
18
+
19
+ test('should only get links from link tags in a RSS feed sitemap, and duplicate links should only be added once', async () => {
20
+ axios.get = jest.fn().mockResolvedValue({ data: sampleData.sampleRssFeed });
21
+ const links = await getLinksFromSitemap('http://mockUrl/rssfeed.xml', maxRequestsPerCrawl);
22
+ expect(links).toEqual(sampleData.sampleRssFeedLinks.slice(0, maxRequestsPerCrawl));
23
+ });
24
+
25
+ test('should only get links from the href property in link tags in an Atom feed sitemap', async () => {
26
+ axios.get = jest.fn().mockResolvedValue({ data: sampleData.sampleAtomFeed });
27
+ const links = await getLinksFromSitemap('http://mockUrl/atomfeed.xml', maxRequestsPerCrawl);
28
+ expect(links).toEqual(sampleData.sampleAtomFeedLinks.slice(0, maxRequestsPerCrawl));
29
+ });
30
+
31
+ test('should get all links from a txt sitemap', async () => {
32
+ axios.get = jest.fn().mockResolvedValue({ data: sampleData.sampleTxtSitemap });
33
+ const links = await getLinksFromSitemap('http://mockUrl/sitemap.txt', maxRequestsPerCrawl);
34
+ expect(links).toEqual(sampleData.sampleTxtSitemapLinks.slice(0, maxRequestsPerCrawl));
35
+ });
36
+
37
+ test('should get all links from a non standard XML sitemap', async () => {
38
+ axios.get = jest.fn().mockResolvedValue({ data: sampleData.sampleNonStandardXmlSitemap });
39
+ const links = await getLinksFromSitemap('http://mockUrl/weirdSitemap.xml', maxRequestsPerCrawl);
40
+ expect(links).toEqual(
41
+ sampleData.sampleNonStandardXmlSitemapLinks.slice(0, maxRequestsPerCrawl),
42
+ );
43
+ });
44
+ });