@govtechsg/oobee 0.10.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/.dockerignore +22 -0
  2. package/.github/pull_request_template.md +11 -0
  3. package/.github/workflows/docker-test.yml +54 -0
  4. package/.github/workflows/image.yml +107 -0
  5. package/.github/workflows/publish.yml +18 -0
  6. package/.idea/modules.xml +8 -0
  7. package/.idea/purple-a11y.iml +9 -0
  8. package/.idea/vcs.xml +6 -0
  9. package/.prettierrc.json +12 -0
  10. package/.vscode/extensions.json +5 -0
  11. package/.vscode/settings.json +10 -0
  12. package/CODE_OF_CONDUCT.md +128 -0
  13. package/DETAILS.md +163 -0
  14. package/Dockerfile +60 -0
  15. package/INSTALLATION.md +146 -0
  16. package/INTEGRATION.md +785 -0
  17. package/LICENSE +22 -0
  18. package/README.md +587 -0
  19. package/SECURITY.md +5 -0
  20. package/__mocks__/mock-report.html +1431 -0
  21. package/__mocks__/mockFunctions.ts +32 -0
  22. package/__mocks__/mockIssues.ts +64 -0
  23. package/__mocks__/mock_all_issues/000000001.json +64 -0
  24. package/__mocks__/mock_all_issues/000000002.json +53 -0
  25. package/__mocks__/mock_all_issues/fake-file.txt +0 -0
  26. package/__tests__/logs.test.ts +25 -0
  27. package/__tests__/mergeAxeResults.test.ts +278 -0
  28. package/__tests__/utils.test.ts +118 -0
  29. package/a11y-scan-results.zip +0 -0
  30. package/eslint.config.js +53 -0
  31. package/exclusions.txt +2 -0
  32. package/gitlab-pipeline-template.yml +54 -0
  33. package/jest.config.js +1 -0
  34. package/package.json +96 -0
  35. package/scripts/copyFiles.js +44 -0
  36. package/scripts/install_oobee_dependencies.cmd +13 -0
  37. package/scripts/install_oobee_dependencies.command +101 -0
  38. package/scripts/install_oobee_dependencies.ps1 +110 -0
  39. package/scripts/oobee_shell.cmd +13 -0
  40. package/scripts/oobee_shell.command +11 -0
  41. package/scripts/oobee_shell.sh +55 -0
  42. package/scripts/oobee_shell_ps.ps1 +54 -0
  43. package/src/cli.ts +401 -0
  44. package/src/combine.ts +240 -0
  45. package/src/constants/__tests__/common.test.ts +44 -0
  46. package/src/constants/cliFunctions.ts +305 -0
  47. package/src/constants/common.ts +1840 -0
  48. package/src/constants/constants.ts +443 -0
  49. package/src/constants/errorMeta.json +319 -0
  50. package/src/constants/itemTypeDescription.ts +11 -0
  51. package/src/constants/oobeeAi.ts +141 -0
  52. package/src/constants/questions.ts +181 -0
  53. package/src/constants/sampleData.ts +187 -0
  54. package/src/crawlers/__tests__/commonCrawlerFunc.test.ts +51 -0
  55. package/src/crawlers/commonCrawlerFunc.ts +656 -0
  56. package/src/crawlers/crawlDomain.ts +877 -0
  57. package/src/crawlers/crawlIntelligentSitemap.ts +156 -0
  58. package/src/crawlers/crawlLocalFile.ts +193 -0
  59. package/src/crawlers/crawlSitemap.ts +356 -0
  60. package/src/crawlers/custom/extractAndGradeText.ts +57 -0
  61. package/src/crawlers/custom/flagUnlabelledClickableElements.ts +964 -0
  62. package/src/crawlers/custom/utils.ts +486 -0
  63. package/src/crawlers/customAxeFunctions.ts +82 -0
  64. package/src/crawlers/pdfScanFunc.ts +468 -0
  65. package/src/crawlers/runCustom.ts +117 -0
  66. package/src/index.ts +173 -0
  67. package/src/logs.ts +66 -0
  68. package/src/mergeAxeResults.ts +964 -0
  69. package/src/npmIndex.ts +284 -0
  70. package/src/screenshotFunc/htmlScreenshotFunc.ts +411 -0
  71. package/src/screenshotFunc/pdfScreenshotFunc.ts +762 -0
  72. package/src/static/ejs/partials/components/categorySelector.ejs +4 -0
  73. package/src/static/ejs/partials/components/categorySelectorDropdown.ejs +57 -0
  74. package/src/static/ejs/partials/components/pagesScannedModal.ejs +70 -0
  75. package/src/static/ejs/partials/components/reportSearch.ejs +47 -0
  76. package/src/static/ejs/partials/components/ruleOffcanvas.ejs +105 -0
  77. package/src/static/ejs/partials/components/scanAbout.ejs +263 -0
  78. package/src/static/ejs/partials/components/screenshotLightbox.ejs +13 -0
  79. package/src/static/ejs/partials/components/summaryScanAbout.ejs +141 -0
  80. package/src/static/ejs/partials/components/summaryScanResults.ejs +16 -0
  81. package/src/static/ejs/partials/components/summaryTable.ejs +20 -0
  82. package/src/static/ejs/partials/components/summaryWcagCompliance.ejs +94 -0
  83. package/src/static/ejs/partials/components/topFive.ejs +6 -0
  84. package/src/static/ejs/partials/components/wcagCompliance.ejs +70 -0
  85. package/src/static/ejs/partials/footer.ejs +21 -0
  86. package/src/static/ejs/partials/header.ejs +230 -0
  87. package/src/static/ejs/partials/main.ejs +40 -0
  88. package/src/static/ejs/partials/scripts/bootstrap.ejs +8 -0
  89. package/src/static/ejs/partials/scripts/categorySelectorDropdownScript.ejs +190 -0
  90. package/src/static/ejs/partials/scripts/categorySummary.ejs +141 -0
  91. package/src/static/ejs/partials/scripts/highlightjs.ejs +335 -0
  92. package/src/static/ejs/partials/scripts/popper.ejs +7 -0
  93. package/src/static/ejs/partials/scripts/reportSearch.ejs +248 -0
  94. package/src/static/ejs/partials/scripts/ruleOffcanvas.ejs +801 -0
  95. package/src/static/ejs/partials/scripts/screenshotLightbox.ejs +71 -0
  96. package/src/static/ejs/partials/scripts/summaryScanResults.ejs +14 -0
  97. package/src/static/ejs/partials/scripts/summaryTable.ejs +78 -0
  98. package/src/static/ejs/partials/scripts/utils.ejs +441 -0
  99. package/src/static/ejs/partials/styles/bootstrap.ejs +12375 -0
  100. package/src/static/ejs/partials/styles/highlightjs.ejs +54 -0
  101. package/src/static/ejs/partials/styles/styles.ejs +1843 -0
  102. package/src/static/ejs/partials/styles/summaryBootstrap.ejs +12458 -0
  103. package/src/static/ejs/partials/summaryHeader.ejs +70 -0
  104. package/src/static/ejs/partials/summaryMain.ejs +75 -0
  105. package/src/static/ejs/report.ejs +420 -0
  106. package/src/static/ejs/summary.ejs +47 -0
  107. package/src/static/mustache/.prettierrc +4 -0
  108. package/src/static/mustache/Attention Deficit.mustache +11 -0
  109. package/src/static/mustache/Blind.mustache +11 -0
  110. package/src/static/mustache/Cognitive.mustache +7 -0
  111. package/src/static/mustache/Colorblindness.mustache +20 -0
  112. package/src/static/mustache/Deaf.mustache +12 -0
  113. package/src/static/mustache/Deafblind.mustache +7 -0
  114. package/src/static/mustache/Dyslexia.mustache +14 -0
  115. package/src/static/mustache/Low Vision.mustache +7 -0
  116. package/src/static/mustache/Mobility.mustache +15 -0
  117. package/src/static/mustache/Sighted Keyboard Users.mustache +42 -0
  118. package/src/static/mustache/report.mustache +1709 -0
  119. package/src/types/print-message.d.ts +28 -0
  120. package/src/types/types.ts +46 -0
  121. package/src/types/xpath-to-css.d.ts +3 -0
  122. package/src/utils.ts +332 -0
  123. package/tsconfig.json +15 -0
@@ -0,0 +1,1840 @@
1
+ /* eslint-disable consistent-return */
2
+ /* eslint-disable no-console */
3
+ /* eslint-disable camelcase */
4
+ /* eslint-disable no-use-before-define */
5
+ import validator from 'validator';
6
+ import axios from 'axios';
7
+ import { JSDOM } from 'jsdom';
8
+ import * as cheerio from 'cheerio';
9
+ import crawlee, { EnqueueStrategy, Request } from 'crawlee';
10
+ import { parseString } from 'xml2js';
11
+ import fs from 'fs';
12
+ import path from 'path';
13
+ import url, { fileURLToPath, pathToFileURL } from 'url';
14
+ import safe from 'safe-regex';
15
+ import * as https from 'https';
16
+ import os from 'os';
17
+ import { minimatch } from 'minimatch';
18
+ import { globSync } from 'glob';
19
+ import { LaunchOptions, devices, webkit } from 'playwright';
20
+ import printMessage from 'print-message';
21
+ import constants, {
22
+ getDefaultChromeDataDir,
23
+ getDefaultEdgeDataDir,
24
+ getDefaultChromiumDataDir,
25
+ proxy,
26
+ formDataFields,
27
+ ScannerTypes,
28
+ BrowserTypes,
29
+ } from './constants.js';
30
+ import { silentLogger } from '../logs.js';
31
+ import { isUrlPdf } from '../crawlers/commonCrawlerFunc.js';
32
+ import { randomThreeDigitNumberString } from '../utils.js';
33
+ import { Answers, Data } from '../index.js';
34
+
35
+ // validateDirPath validates a provided directory path
36
+ // returns null if no error
37
+ export const validateDirPath = (dirPath: string): string => {
38
+ if (typeof dirPath !== 'string') {
39
+ return 'Please provide string value of directory path.';
40
+ }
41
+
42
+ try {
43
+ fs.accessSync(dirPath);
44
+ if (!fs.statSync(dirPath).isDirectory()) {
45
+ return 'Please provide a directory path.';
46
+ }
47
+
48
+ return null;
49
+ } catch {
50
+ return 'Please ensure path provided exists.';
51
+ }
52
+ };
53
+
54
+ export class RES {
55
+ status: number;
56
+ url: string;
57
+ content: string;
58
+ constructor(res?: Partial<RES>) {
59
+ if (res) {
60
+ Object.assign(this, res);
61
+ }
62
+ }
63
+ }
64
+
65
+ export const validateCustomFlowLabel = (customFlowLabel: string) => {
66
+ const containsReserveWithDot = constants.reserveFileNameKeywords.some(char =>
67
+ customFlowLabel.toLowerCase().includes(`${char.toLowerCase()}.`),
68
+ );
69
+ const containsForbiddenCharacters = constants.forbiddenCharactersInDirPath.some(char =>
70
+ customFlowLabel.includes(char),
71
+ );
72
+ const exceedsMaxLength = customFlowLabel.length > 80;
73
+
74
+ if (containsForbiddenCharacters) {
75
+ const displayForbiddenCharacters = constants.forbiddenCharactersInDirPath
76
+ .toString()
77
+ .replaceAll(',', ' , ');
78
+ return {
79
+ isValid: false,
80
+ errorMessage: `Invalid label. Cannot contain ${displayForbiddenCharacters}`,
81
+ };
82
+ }
83
+ if (exceedsMaxLength) {
84
+ return { isValid: false, errorMessage: `Invalid label. Cannot exceed 80 characters.` };
85
+ }
86
+ if (containsReserveWithDot) {
87
+ const displayReserveKeywords = constants.reserveFileNameKeywords
88
+ .toString()
89
+ .replaceAll(',', ' , ');
90
+ return {
91
+ isValid: false,
92
+ errorMessage: `Invalid label. Cannot have '.' appended to ${displayReserveKeywords} as they are reserved keywords.`,
93
+ };
94
+ }
95
+ return { isValid: true };
96
+ };
97
+
98
+ // validateFilePath validates a provided file path
99
+ // returns null if no error
100
+ export const validateFilePath = (filePath: string, cliDir: string) => {
101
+ if (typeof filePath !== 'string') {
102
+ throw new Error('Please provide string value of file path.');
103
+ }
104
+
105
+ const absolutePath = path.isAbsolute(filePath) ? filePath : path.resolve(cliDir, filePath);
106
+ try {
107
+ fs.accessSync(absolutePath);
108
+ if (!fs.statSync(absolutePath).isFile()) {
109
+ throw new Error('Please provide a file path.');
110
+ }
111
+
112
+ if (path.extname(absolutePath) !== '.txt') {
113
+ throw new Error('Please provide a file with txt extension.');
114
+ }
115
+
116
+ return absolutePath;
117
+ } catch {
118
+ throw new Error(`Please ensure path provided exists: ${absolutePath}`);
119
+ }
120
+ };
121
+
122
+ export const getBlackListedPatterns = (
123
+ blacklistedPatternsFilename: string | null,
124
+ ): string[] | null => {
125
+ let exclusionsFile = null;
126
+ if (blacklistedPatternsFilename) {
127
+ exclusionsFile = blacklistedPatternsFilename;
128
+ } else if (fs.existsSync('exclusions.txt')) {
129
+ exclusionsFile = 'exclusions.txt';
130
+ }
131
+
132
+ if (!exclusionsFile) {
133
+ return null;
134
+ }
135
+
136
+ const rawPatterns = fs.readFileSync(exclusionsFile).toString();
137
+ const blacklistedPatterns = rawPatterns
138
+ .split('\n')
139
+ .map(p => p.trim())
140
+ .filter(p => p !== '');
141
+
142
+ const unsafe = blacklistedPatterns.filter(pattern => !safe(pattern));
143
+ if (unsafe.length > 0) {
144
+ const unsafeExpressionsError = `Unsafe expressions detected: ${unsafe} Please revise ${exclusionsFile}`;
145
+ throw new Error(unsafeExpressionsError);
146
+ }
147
+
148
+ return blacklistedPatterns;
149
+ };
150
+
151
+ export const isBlacklistedFileExtensions = (url: string, blacklistedFileExtensions: string[]) => {
152
+ const urlExtension = url.split('.').pop();
153
+ return blacklistedFileExtensions.includes(urlExtension);
154
+ };
155
+
156
+ const document = new JSDOM('').window;
157
+
158
+ const httpsAgent = new https.Agent({
159
+ // Run in environments with custom certificates
160
+ rejectUnauthorized: false,
161
+ keepAlive: true,
162
+ });
163
+
164
+ export const messageOptions = {
165
+ border: false,
166
+ marginTop: 2,
167
+ marginBottom: 2,
168
+ };
169
+
170
+ const urlOptions = {
171
+ protocols: ['http', 'https'],
172
+ require_protocol: true,
173
+ require_tld: false,
174
+ };
175
+
176
+ const queryCheck = (s: string) => document.createDocumentFragment().querySelector(s);
177
+ export const isSelectorValid = (selector: string): boolean => {
178
+ try {
179
+ queryCheck(selector);
180
+ } catch {
181
+ return false;
182
+ }
183
+ return true;
184
+ };
185
+
186
+ // Refer to NPM validator's special characters under sanitizers for escape()
187
+ const blackListCharacters = '\\<>&\'"';
188
+
189
+ export const validateXML = (content: string): { isValid: boolean; parsedContent: string } => {
190
+ let isValid: boolean;
191
+ let parsedContent: string;
192
+ parseString(content, (_err, result) => {
193
+ if (result) {
194
+ isValid = true;
195
+ parsedContent = result;
196
+ } else {
197
+ isValid = false;
198
+ }
199
+ });
200
+ return { isValid, parsedContent };
201
+ };
202
+
203
+ export const isSkippedUrl = (pageUrl: string, whitelistedDomains: string[]) => {
204
+ const matched =
205
+ whitelistedDomains.filter(p => {
206
+ const pattern = p.replace(/[\n\r]+/g, '');
207
+
208
+ // is url
209
+ if (pattern.startsWith('http') && pattern === pageUrl) {
210
+ return true;
211
+ }
212
+
213
+ // is regex (default)
214
+ return new RegExp(pattern).test(pageUrl);
215
+ }).length > 0;
216
+
217
+ return matched;
218
+ };
219
+
220
+ export const getFileSitemap = (filePath: string): string | null => {
221
+ if (filePath.startsWith('file:///')) {
222
+ if (os.platform() === 'win32') {
223
+ filePath = filePath.match(/^file:\/\/\/([A-Z]:\/[^?#]+)/)?.[1];
224
+ } else {
225
+ filePath = filePath.match(/^file:\/\/(\/[^?#]+)/)?.[1];
226
+ }
227
+ }
228
+
229
+ filePath = convertToFilePath(filePath);
230
+
231
+ if (!fs.existsSync(filePath)) {
232
+ return null;
233
+ }
234
+
235
+ const file = fs.readFileSync(filePath, 'utf8');
236
+ const isLocalFileScan = isSitemapContent(file);
237
+ return isLocalFileScan || file !== undefined ? filePath : null;
238
+ };
239
+
240
+ export const getUrlMessage = (scanner: ScannerTypes): string => {
241
+ switch (scanner) {
242
+ case ScannerTypes.WEBSITE:
243
+ case ScannerTypes.CUSTOM:
244
+ case ScannerTypes.INTELLIGENT:
245
+ return 'Please enter URL of website: ';
246
+ case ScannerTypes.SITEMAP:
247
+ return 'Please enter URL or file path to sitemap, or drag and drop a sitemap file here: ';
248
+ case ScannerTypes.LOCALFILE:
249
+ return 'Please enter file path: ';
250
+ default:
251
+ return 'Invalid option';
252
+ }
253
+ };
254
+
255
+ export const isInputValid = inputString => {
256
+ if (!validator.isEmpty(inputString)) {
257
+ const removeBlackListCharacters = validator.escape(inputString);
258
+
259
+ if (validator.isAscii(removeBlackListCharacters)) {
260
+ return true;
261
+ }
262
+ }
263
+
264
+ return false;
265
+ };
266
+
267
+ export const sanitizeUrlInput = (url: string): { isValid: boolean; url: string } => {
268
+ // Sanitize that there is no blacklist characters
269
+ const sanitizeUrl = validator.blacklist(url, blackListCharacters);
270
+ if (validator.isURL(sanitizeUrl, urlOptions)) {
271
+ return { isValid: true, url: sanitizeUrl };
272
+ }
273
+ return { isValid: false, url: sanitizeUrl };
274
+ };
275
+
276
+ const requestToUrl = async (
277
+ url: string,
278
+ isCustomFlow: boolean,
279
+ extraHTTPHeaders: Record<string, string>,
280
+ ) => {
281
+ // User-Agent is modified to emulate a browser to handle cases where some sites ban non browser agents, resulting in a 403 error
282
+ const res = new RES();
283
+ const parsedUrl = new URL(url);
284
+ await axios
285
+ .get(parsedUrl.href, {
286
+ headers: {
287
+ ...extraHTTPHeaders,
288
+ 'User-Agent': devices['Desktop Chrome HiDPI'].userAgent,
289
+ Host: parsedUrl.host,
290
+ },
291
+ auth: {
292
+ username: decodeURIComponent(parsedUrl.username),
293
+ password: decodeURIComponent(parsedUrl.password),
294
+ },
295
+ httpsAgent,
296
+ timeout: 5000,
297
+ })
298
+ .then(async response => {
299
+ let redirectUrl = response.request.res.responseUrl;
300
+ redirectUrl = new URL(redirectUrl).href;
301
+ res.status = constants.urlCheckStatuses.success.code;
302
+ let data;
303
+ if (typeof response.data === 'string' || response.data instanceof String) {
304
+ data = response.data;
305
+ } else if (typeof response.data === 'object' && response.data !== null) {
306
+ try {
307
+ data = JSON.stringify(response.data);
308
+ } catch (error) {
309
+ console.log('Error converting object to JSON:', error);
310
+ }
311
+ } else {
312
+ console.log('Unsupported data type:', typeof response.data);
313
+ }
314
+ const modifiedHTML = data.replace(/<noscript>[\s\S]*?<\/noscript>/gi, '');
315
+
316
+ const metaRefreshMatch =
317
+ /<meta\s+http-equiv="refresh"\s+content="(?:\d+;)?\s*url=(?:'([^']*)'|"([^"]*)"|([^>]*))"/i.exec(
318
+ modifiedHTML,
319
+ );
320
+
321
+ const hasMetaRefresh = metaRefreshMatch && metaRefreshMatch.length > 1;
322
+
323
+ if (redirectUrl != null && (hasMetaRefresh || !isCustomFlow)) {
324
+ res.url = redirectUrl;
325
+ } else {
326
+ res.url = url;
327
+ }
328
+
329
+ if (hasMetaRefresh) {
330
+ let urlOrRelativePath;
331
+
332
+ for (let i = 1; i < metaRefreshMatch.length; i++) {
333
+ if (metaRefreshMatch[i] !== undefined && metaRefreshMatch[i] !== null) {
334
+ urlOrRelativePath = metaRefreshMatch[i];
335
+ break; // Stop the loop once the first non-null value is found
336
+ }
337
+ }
338
+
339
+ if (urlOrRelativePath.includes('URL=')) {
340
+ res.url = urlOrRelativePath.split('URL=').pop();
341
+ } else {
342
+ const pathname = res.url.substring(0, res.url.lastIndexOf('/'));
343
+ res.url = new URL(urlOrRelativePath, pathname).toString();
344
+ }
345
+ }
346
+
347
+ res.content = response.data;
348
+ })
349
+ .catch(async error => {
350
+ if (error.code === 'ECONNABORTED' || error.code === 'ERR_FR_TOO_MANY_REDIRECTS') {
351
+ res.status = constants.urlCheckStatuses.axiosTimeout.code;
352
+ } else if (error.response) {
353
+ if (error.response.status === 401) {
354
+ // enters here if URL is protected by basic auth
355
+ res.status = constants.urlCheckStatuses.unauthorised.code;
356
+ } else {
357
+ // enters here if server responds with a status other than 2xx
358
+ // the scan should still proceed even if error codes are received, so that accessibility scans for error pages can be done too
359
+ res.status = constants.urlCheckStatuses.success.code;
360
+ }
361
+ res.url = url;
362
+ res.content = error.response.data;
363
+ return res;
364
+ } else if (error.request) {
365
+ // enters here if URL cannot be accessed
366
+ res.status = constants.urlCheckStatuses.cannotBeResolved.code;
367
+ } else {
368
+ res.status = constants.urlCheckStatuses.systemError.code;
369
+ }
370
+ silentLogger.error(error);
371
+ });
372
+ return res;
373
+ };
374
+
375
+ const checkUrlConnectivityWithBrowser = async (
376
+ url,
377
+ browserToRun,
378
+ clonedDataDir,
379
+ playwrightDeviceDetailsObject,
380
+ isCustomFlow,
381
+ extraHTTPHeaders,
382
+ ) => {
383
+ const res = new RES();
384
+
385
+ let viewport = null;
386
+ let userAgent = null;
387
+
388
+ if (Object.keys(playwrightDeviceDetailsObject).length > 0) {
389
+ if ('viewport' in playwrightDeviceDetailsObject) {
390
+ viewport = playwrightDeviceDetailsObject.viewport;
391
+ }
392
+
393
+ if ('userAgent' in playwrightDeviceDetailsObject) {
394
+ userAgent = playwrightDeviceDetailsObject.userAgent;
395
+ }
396
+ }
397
+
398
+ // Validate the connectivity of URL if the string format is url format
399
+ const data = sanitizeUrlInput(url);
400
+
401
+ if (data.isValid) {
402
+ let browserContext;
403
+
404
+ try {
405
+ browserContext = await constants.launcher.launchPersistentContext(clonedDataDir, {
406
+ ...getPlaywrightLaunchOptions(browserToRun),
407
+ ...(viewport && { viewport }),
408
+ ...(userAgent && { userAgent }),
409
+ ...(extraHTTPHeaders && { extraHTTPHeaders }),
410
+ });
411
+ } catch (err) {
412
+ printMessage([`Unable to launch browser\n${err}`], messageOptions);
413
+ res.status = constants.urlCheckStatuses.browserError.code;
414
+ return res;
415
+ }
416
+
417
+ // const context = await browser.newContext();
418
+ const page = await browserContext.newPage();
419
+
420
+ // method will not throw an error when any valid HTTP status code is returned by the remote server, including 404 "Not Found" and 500 "Internal Server Error".
421
+ // navigation to about:blank or navigation to the same URL with a different hash, which would succeed and return null.
422
+ try {
423
+ // playwright headless mode does not support navigation to pdf document
424
+ if (isUrlPdf(url)) {
425
+ // make http request to url to check
426
+ return await requestToUrl(url, false, extraHTTPHeaders);
427
+ }
428
+
429
+ const response = await page.goto(url, {
430
+ timeout: 30000,
431
+ ...(proxy && { waitUntil: 'commit' }),
432
+ });
433
+
434
+ try {
435
+ await page.waitForLoadState('networkidle', { timeout: 10000 });
436
+ } catch {
437
+ silentLogger.info('Unable to detect networkidle');
438
+ }
439
+
440
+ if (response.status() === 401) {
441
+ res.status = constants.urlCheckStatuses.unauthorised.code;
442
+ } else {
443
+ res.status = constants.urlCheckStatuses.success.code;
444
+ }
445
+
446
+ // set redirect link or final url
447
+ if (isCustomFlow) {
448
+ res.url = url;
449
+ } else {
450
+ res.url = page.url();
451
+ }
452
+
453
+ res.content = await page.content();
454
+
455
+ const contentType = response.headers()['content-type'];
456
+ if (contentType.includes('xml')) {
457
+ const responseFromUrl = await requestToUrl(res.url, true, extraHTTPHeaders);
458
+
459
+ res.content = responseFromUrl.content;
460
+ }
461
+ } catch (error) {
462
+ silentLogger.error(error);
463
+ res.status = constants.urlCheckStatuses.systemError.code;
464
+ } finally {
465
+ await browserContext.close();
466
+ }
467
+ } else {
468
+ // enters here if input is not a URL or not using http/https protocols
469
+ res.status = constants.urlCheckStatuses.invalidUrl.code;
470
+ }
471
+
472
+ return res;
473
+ };
474
+
475
+ export const isSitemapContent = (content: string) => {
476
+ const { isValid } = validateXML(content);
477
+ if (isValid) {
478
+ return true;
479
+ }
480
+
481
+ const regexForHtml = new RegExp('<(?:!doctype html|html|head|body)+?>', 'gmi');
482
+ const regexForXmlSitemap = new RegExp('<(?:urlset|feed|rss)+?.*>', 'gmi');
483
+ const regexForUrl = new RegExp('^.*(http|https):/{2}.*$', 'gmi');
484
+
485
+ if (content.match(regexForHtml) && content.match(regexForXmlSitemap)) {
486
+ // is an XML sitemap wrapped in a HTML document
487
+ return true;
488
+ }
489
+ if (!content.match(regexForHtml) && content.match(regexForUrl)) {
490
+ // treat this as a txt sitemap where all URLs will be extracted for crawling
491
+ return true;
492
+ }
493
+ // is HTML webpage
494
+ return false;
495
+ };
496
+
497
+ export const checkUrl = async (
498
+ scanner,
499
+ url,
500
+ browser,
501
+ clonedDataDir,
502
+ playwrightDeviceDetailsObject,
503
+ isCustomFlow,
504
+ extraHTTPHeaders,
505
+ ) => {
506
+ const res = await checkUrlConnectivityWithBrowser(
507
+ url,
508
+ browser,
509
+ clonedDataDir,
510
+ playwrightDeviceDetailsObject,
511
+ isCustomFlow,
512
+ extraHTTPHeaders,
513
+ );
514
+
515
+ if (
516
+ res.status === constants.urlCheckStatuses.success.code &&
517
+ (scanner === ScannerTypes.SITEMAP || scanner === ScannerTypes.LOCALFILE)
518
+ ) {
519
+ const isSitemap = isSitemapContent(res.content);
520
+
521
+ if (!isSitemap && scanner === ScannerTypes.LOCALFILE) {
522
+ res.status = constants.urlCheckStatuses.notALocalFile.code;
523
+ } else if (!isSitemap) {
524
+ res.status = constants.urlCheckStatuses.notASitemap.code;
525
+ }
526
+ }
527
+ return res;
528
+ };
529
+
530
+ const isEmptyObject = (obj: object): boolean => !Object.keys(obj).length;
531
+
532
+ export const parseHeaders = (header?: string): Record<string, string> => {
533
+ // parse HTTP headers from string
534
+ if (!header) return {};
535
+ const headerValues = header.split(', ');
536
+ const allHeaders = {};
537
+ headerValues.map((headerValue: string) => {
538
+ const headerValuePair = headerValue.split(/ (.*)/s);
539
+ if (headerValuePair.length < 2) {
540
+ printMessage(
541
+ [
542
+ `Invalid value for authorisation request header. Please provide valid keywords in the format: "<header> <value>". For multiple authentication headers, please provide the keywords in the format: "<header> <value>, <header2> <value2>, ..." .`,
543
+ ],
544
+ messageOptions,
545
+ );
546
+ process.exit(1);
547
+ }
548
+ allHeaders[headerValuePair[0]] = headerValuePair[1]; // {"header": "value", "header2": "value2", ...}
549
+ });
550
+ return allHeaders;
551
+ };
552
+
553
+ export const prepareData = async (argv: Answers): Promise<Data> => {
554
+ if (isEmptyObject(argv)) {
555
+ throw Error('No inputs should be provided');
556
+ }
557
+ const {
558
+ scanner,
559
+ headless,
560
+ url,
561
+ deviceChosen,
562
+ customDevice,
563
+ viewportWidth,
564
+ playwrightDeviceDetailsObject,
565
+ maxpages,
566
+ strategy,
567
+ isLocalFileScan,
568
+ finalUrl,
569
+ browserToRun,
570
+ nameEmail,
571
+ customFlowLabel,
572
+ specifiedMaxConcurrency,
573
+ fileTypes,
574
+ blacklistedPatternsFilename,
575
+ additional,
576
+ metadata,
577
+ followRobots,
578
+ header,
579
+ safeMode,
580
+ zip,
581
+ ruleset,
582
+ } = argv;
583
+
584
+ // construct filename for scan results
585
+ const [date, time] = new Date().toLocaleString('sv').replaceAll(/-|:/g, '').split(' ');
586
+ const domain = argv.isLocalFileScan ? path.basename(argv.url) : new URL(argv.url).hostname;
587
+ const sanitisedLabel = customFlowLabel ? `_${customFlowLabel.replaceAll(' ', '_')}` : '';
588
+ let resultFilename: string;
589
+ const randomThreeDigitNumber = randomThreeDigitNumberString();
590
+ if (process.env.OOBEE_VERBOSE) {
591
+ resultFilename = `${date}_${time}${sanitisedLabel}_${domain}_${randomThreeDigitNumber}`;
592
+ } else {
593
+ resultFilename = `${date}_${time}${sanitisedLabel}_${domain}`;
594
+ }
595
+
596
+ if (followRobots) {
597
+ constants.robotsTxtUrls = {};
598
+ await getUrlsFromRobotsTxt(url, browserToRun);
599
+ }
600
+
601
+ return {
602
+ type: scanner,
603
+ url: finalUrl,
604
+ entryUrl: url,
605
+ isHeadless: headless,
606
+ deviceChosen,
607
+ customDevice,
608
+ viewportWidth,
609
+ playwrightDeviceDetailsObject,
610
+ maxRequestsPerCrawl: maxpages || constants.maxRequestsPerCrawl,
611
+ strategy:
612
+ strategy === 'same-hostname' ? EnqueueStrategy.SameHostname : EnqueueStrategy.SameDomain,
613
+ isLocalFileScan,
614
+ browser: browserToRun,
615
+ nameEmail,
616
+ customFlowLabel,
617
+ specifiedMaxConcurrency,
618
+ randomToken: resultFilename,
619
+ fileTypes,
620
+ blacklistedPatternsFilename,
621
+ includeScreenshots: !(additional === 'none'),
622
+ metadata,
623
+ followRobots,
624
+ extraHTTPHeaders: parseHeaders(header),
625
+ safeMode,
626
+ zip,
627
+ ruleset,
628
+ };
629
+ };
630
+
631
+ export const getUrlsFromRobotsTxt = async (url: string, browserToRun: string): Promise<void> => {
632
+ if (!constants.robotsTxtUrls) return;
633
+
634
+ const domain = new URL(url).origin;
635
+ if (constants.robotsTxtUrls[domain]) return;
636
+ const robotsUrl = domain.concat('/robots.txt');
637
+
638
+ let robotsTxt: string;
639
+ try {
640
+ if (proxy) {
641
+ robotsTxt = await getRobotsTxtViaPlaywright(robotsUrl, browserToRun);
642
+ } else {
643
+ robotsTxt = await getRobotsTxtViaAxios(robotsUrl);
644
+ }
645
+ } catch (e) {
646
+ silentLogger.info(e);
647
+ }
648
+ console.log('robotsTxt', robotsTxt);
649
+ if (!robotsTxt) {
650
+ constants.robotsTxtUrls[domain] = {};
651
+ return;
652
+ }
653
+
654
+ console.log('Found robots.txt: ', robotsUrl);
655
+
656
+ const lines = robotsTxt.split(/\r?\n/);
657
+ let shouldCapture = false;
658
+ const disallowedUrls = [];
659
+ const allowedUrls = [];
660
+
661
+ const sanitisePattern = (pattern: string): string => {
662
+ const directoryRegex = /^\/(?:[^?#/]+\/)*[^?#]*$/;
663
+ const subdirWildcardRegex = /\/\*\//g;
664
+ const filePathRegex = /^\/(?:[^\/]+\/)*[^\/]+\.[a-zA-Z0-9]{1,6}$/;
665
+
666
+ if (subdirWildcardRegex.test(pattern)) {
667
+ pattern = pattern.replace(subdirWildcardRegex, '/**/');
668
+ }
669
+ if (pattern.match(directoryRegex) && !pattern.match(filePathRegex)) {
670
+ if (pattern.endsWith('*')) {
671
+ pattern = pattern.concat('*');
672
+ } else {
673
+ if (!pattern.endsWith('/')) pattern = pattern.concat('/');
674
+ pattern = pattern.concat('**');
675
+ }
676
+ }
677
+ const final = domain.concat(pattern);
678
+ return final;
679
+ };
680
+
681
+ for (const line of lines) {
682
+ if (line.toLowerCase().startsWith('user-agent: *')) {
683
+ shouldCapture = true;
684
+ } else if (line.toLowerCase().startsWith('user-agent:') && shouldCapture) {
685
+ break;
686
+ } else if (shouldCapture && line.toLowerCase().startsWith('disallow:')) {
687
+ let disallowed = line.substring('disallow: '.length).trim();
688
+ if (disallowed) {
689
+ disallowed = sanitisePattern(disallowed);
690
+ disallowedUrls.push(disallowed);
691
+ }
692
+ } else if (shouldCapture && line.toLowerCase().startsWith('allow:')) {
693
+ let allowed = line.substring('allow: '.length).trim();
694
+ if (allowed) {
695
+ allowed = sanitisePattern(allowed);
696
+ allowedUrls.push(allowed);
697
+ }
698
+ }
699
+ }
700
+ constants.robotsTxtUrls[domain] = { disallowedUrls, allowedUrls };
701
+ };
702
+
703
+ const getRobotsTxtViaPlaywright = async (robotsUrl: string, browser: string): Promise<string> => {
704
+ const browserContext = await constants.launcher.launchPersistentContext('', {
705
+ ...getPlaywrightLaunchOptions(browser),
706
+ });
707
+
708
+ const page = await browserContext.newPage();
709
+ await page.goto(robotsUrl, { waitUntil: 'networkidle', timeout: 30000 });
710
+
711
+ const robotsTxt: string | null = await page.evaluate(() => document.body.textContent);
712
+ return robotsTxt;
713
+ };
714
+
715
+ const getRobotsTxtViaAxios = async (robotsUrl: string): Promise<string> => {
716
+ const instance = axios.create({
717
+ httpsAgent: new https.Agent({
718
+ rejectUnauthorized: false,
719
+ keepAlive: true,
720
+ }),
721
+ });
722
+
723
+ const robotsTxt = (await (await instance.get(robotsUrl, { timeout: 2000 })).data) as string;
724
+ return robotsTxt;
725
+ };
726
+
727
+ export const isDisallowedInRobotsTxt = (url: string): boolean => {
728
+ if (!constants.robotsTxtUrls) return;
729
+
730
+ const domain = new URL(url).origin;
731
+ if (constants.robotsTxtUrls[domain]) {
732
+ const { disallowedUrls, allowedUrls } = constants.robotsTxtUrls[domain];
733
+
734
+ const isDisallowed =
735
+ disallowedUrls.filter((disallowedUrl: string) => {
736
+ const disallowed = minimatch(url, disallowedUrl);
737
+ return disallowed;
738
+ }).length > 0;
739
+
740
+ const isAllowed =
741
+ allowedUrls.filter((allowedUrl: string) => {
742
+ const allowed = minimatch(url, allowedUrl);
743
+ return allowed;
744
+ }).length > 0;
745
+
746
+ return isDisallowed && !isAllowed;
747
+ }
748
+ return false;
749
+ };
750
+
751
+ export const getLinksFromSitemap = async (
752
+ sitemapUrl: string,
753
+ maxLinksCount: number,
754
+ browser: string,
755
+ userDataDirectory: string,
756
+ userUrlInput: string,
757
+ isIntelligent: boolean,
758
+ username: string,
759
+ password: string,
760
+ ) => {
761
+ const scannedSitemaps = new Set<string>();
762
+ const urls = {}; // dictionary of requests to urls to be scanned
763
+
764
+ const isLimitReached = () => Object.keys(urls).length >= maxLinksCount;
765
+
766
+ const addToUrlList = url => {
767
+ if (!url) return;
768
+ if (isDisallowedInRobotsTxt(url)) return;
769
+
770
+ // add basic auth credentials to the URL
771
+ username !== '' && password !== ''
772
+ ? (url = addBasicAuthCredentials(url, username, password))
773
+ : url;
774
+
775
+ url = convertPathToLocalFile(url);
776
+
777
+ let request;
778
+ try {
779
+ request = new Request({ url });
780
+ } catch (e) {
781
+ console.log('Error creating request', e);
782
+ }
783
+ if (isUrlPdf(url)) {
784
+ request.skipNavigation = true;
785
+ }
786
+ urls[url] = request;
787
+ };
788
+
789
+ const addBasicAuthCredentials = (url, username, password) => {
790
+ const urlObject = new URL(url);
791
+ urlObject.username = username;
792
+ urlObject.password = password;
793
+ return urlObject.toString();
794
+ };
795
+
796
+ const calculateCloseness = sitemapUrl => {
797
+ // Remove 'http://', 'https://', and 'www.' prefixes from the URLs
798
+ const normalizedSitemapUrl = sitemapUrl.replace(/^(https?:\/\/)?(www\.)?/, '');
799
+ const normalizedUserUrlInput = userUrlInput
800
+ .replace(/^(https?:\/\/)?(www\.)?/, '')
801
+ .replace(/\/$/, ''); // Remove trailing slash also
802
+
803
+ if (normalizedSitemapUrl == normalizedUserUrlInput) {
804
+ return 2;
805
+ }
806
+ if (normalizedSitemapUrl.startsWith(normalizedUserUrlInput)) {
807
+ return 1;
808
+ }
809
+ return 0;
810
+ };
811
+ const processXmlSitemap = async ($, sitemapType, linkSelector, dateSelector, sectionSelector) => {
812
+ const urlList = [];
813
+ // Iterate through each URL element in the sitemap, collect url and modified date
814
+ $(sectionSelector).each((index, urlElement) => {
815
+ let url;
816
+ if (sitemapType === constants.xmlSitemapTypes.atom) {
817
+ url = $(urlElement).find(linkSelector).prop('href');
818
+ } else {
819
+ url = $(urlElement).find(linkSelector).text();
820
+ }
821
+ const lastModified = $(urlElement).find(dateSelector).text();
822
+ const lastModifiedDate = lastModified ? new Date(lastModified) : null;
823
+
824
+ urlList.push({ url, lastModifiedDate });
825
+ });
826
+ if (isIntelligent) {
827
+ // Sort by closeness to userUrlInput in descending order
828
+ urlList.sort((a, b) => {
829
+ const closenessA = calculateCloseness(a.url);
830
+ const closenessB = calculateCloseness(b.url);
831
+ if (closenessA !== closenessB) {
832
+ return closenessB - closenessA;
833
+ }
834
+
835
+ // If closeness is the same, sort by last modified date in descending order
836
+ const dateDifference = (b.lastModifiedDate || 0) - (a.lastModifiedDate || 0);
837
+ return dateDifference !== 0 ? dateDifference : 0; // Maintain original order for equal dates
838
+ });
839
+ }
840
+
841
+ // Add the sorted URLs to the main URL list
842
+ for (const { url } of urlList.slice(0, maxLinksCount)) {
843
+ addToUrlList(url);
844
+ }
845
+ };
846
+
847
+ const processNonStandardSitemap = data => {
848
+ const urlsFromData = crawlee
849
+ .extractUrls({ string: data, urlRegExp: new RegExp('^(http|https):/{2}.+$', 'gmi') })
850
+ .slice(0, maxLinksCount);
851
+ urlsFromData.forEach(url => {
852
+ addToUrlList(url);
853
+ });
854
+ };
855
+
856
+ let finalUserDataDirectory = userDataDirectory;
857
+ if (userDataDirectory === null || userDataDirectory === undefined) {
858
+ finalUserDataDirectory = '';
859
+ }
860
+
861
+ const fetchUrls = async (url: string) => {
862
+ let data;
863
+ let sitemapType;
864
+ let isBasicAuth = false;
865
+
866
+ let username = '';
867
+ let password = '';
868
+
869
+ let parsedUrl;
870
+
871
+ if (scannedSitemaps.has(url)) {
872
+ // Skip processing if the sitemap has already been scanned
873
+ return;
874
+ }
875
+
876
+ scannedSitemaps.add(url);
877
+
878
+ // Convert file if its not local file path
879
+ url = convertLocalFileToPath(url);
880
+
881
+ // Check whether its a file path or a URL
882
+ if (isFilePath(url)) {
883
+ if (!fs.existsSync(url)) {
884
+ return;
885
+ }
886
+ parsedUrl = url;
887
+ } else if (isValidHttpUrl(url)) {
888
+ parsedUrl = new URL(url);
889
+
890
+ if (parsedUrl.username !== '' && parsedUrl.password !== '') {
891
+ isBasicAuth = true;
892
+ username = decodeURIComponent(parsedUrl.username);
893
+ password = decodeURIComponent(parsedUrl.password);
894
+ parsedUrl.username = '';
895
+ parsedUrl.password = '';
896
+ }
897
+ } else {
898
+ printMessage([`Invalid Url/Filepath: ${url}`], messageOptions);
899
+ return;
900
+ }
901
+
902
+ const getDataUsingPlaywright = async () => {
903
+ const browserContext = await constants.launcher.launchPersistentContext(
904
+ finalUserDataDirectory,
905
+ {
906
+ ...getPlaywrightLaunchOptions(browser),
907
+ // Not necessary to parse http_credentials as I am parsing it directly in URL
908
+ },
909
+ );
910
+
911
+ const page = await browserContext.newPage();
912
+ await page.goto(url, { waitUntil: 'networkidle', timeout: 60000 });
913
+ if (constants.launcher === webkit) {
914
+ data = await page.locator('body').innerText();
915
+ } else {
916
+ const urlSet = page.locator('urlset');
917
+ const sitemapIndex = page.locator('sitemapindex');
918
+ const rss = page.locator('rss');
919
+ const feed = page.locator('feed');
920
+ const isRoot = async locator => (await locator.count()) > 0;
921
+
922
+ if (await isRoot(urlSet)) {
923
+ data = await urlSet.evaluate(elem => elem.outerHTML);
924
+ } else if (await isRoot(sitemapIndex)) {
925
+ data = await sitemapIndex.evaluate(elem => elem.outerHTML);
926
+ } else if (await isRoot(rss)) {
927
+ data = await rss.evaluate(elem => elem.outerHTML);
928
+ } else if (await isRoot(feed)) {
929
+ data = await feed.evaluate(elem => elem.outerHTML);
930
+ }
931
+ }
932
+
933
+ await browserContext.close();
934
+ };
935
+
936
+ if (validator.isURL(url, urlOptions)) {
937
+ if (isUrlPdf(url)) {
938
+ addToUrlList(url);
939
+ return;
940
+ }
941
+ if (proxy) {
942
+ await getDataUsingPlaywright();
943
+ } else {
944
+ try {
945
+ const instance = axios.create({
946
+ httpsAgent: new https.Agent({
947
+ rejectUnauthorized: false,
948
+ keepAlive: true,
949
+ }),
950
+ auth: {
951
+ username,
952
+ password,
953
+ },
954
+ });
955
+ try {
956
+ data = await (await instance.get(url, { timeout: 80000 })).data;
957
+ } catch {
958
+ return; // to skip the error
959
+ }
960
+ } catch (error) {
961
+ if (error.code === 'ECONNABORTED') {
962
+ await getDataUsingPlaywright();
963
+ }
964
+ }
965
+ }
966
+ } else {
967
+ url = convertLocalFileToPath(url);
968
+ data = fs.readFileSync(url, 'utf8');
969
+ }
970
+ const $ = cheerio.load(data, { xml: true });
971
+
972
+ // This case is when the document is not an XML format document
973
+ if ($(':root').length === 0) {
974
+ processNonStandardSitemap(data);
975
+ return;
976
+ }
977
+
978
+ // Root element
979
+ const root = $(':root')[0];
980
+
981
+ const { xmlns } = root.attribs;
982
+
983
+ const xmlFormatNamespace = '/schemas/sitemap';
984
+ if (root.name === 'urlset' && xmlns.includes(xmlFormatNamespace)) {
985
+ sitemapType = constants.xmlSitemapTypes.xml;
986
+ } else if (root.name === 'sitemapindex' && xmlns.includes(xmlFormatNamespace)) {
987
+ sitemapType = constants.xmlSitemapTypes.xmlIndex;
988
+ } else if (root.name === 'rss') {
989
+ sitemapType = constants.xmlSitemapTypes.rss;
990
+ } else if (root.name === 'feed') {
991
+ sitemapType = constants.xmlSitemapTypes.atom;
992
+ } else {
993
+ sitemapType = constants.xmlSitemapTypes.unknown;
994
+ }
995
+
996
+ switch (sitemapType) {
997
+ case constants.xmlSitemapTypes.xmlIndex:
998
+ silentLogger.info(`This is a XML format sitemap index.`);
999
+ for (const childSitemapUrl of $('loc')) {
1000
+ const childSitemapUrlText = $(childSitemapUrl).text();
1001
+ if (isLimitReached()) {
1002
+ break;
1003
+ }
1004
+ if (childSitemapUrlText.endsWith('.xml') || childSitemapUrlText.endsWith('.txt')) {
1005
+ await fetchUrls(childSitemapUrlText); // Recursive call for nested sitemaps
1006
+ } else {
1007
+ addToUrlList(childSitemapUrlText); // Add regular URLs to the list
1008
+ }
1009
+ }
1010
+ break;
1011
+ case constants.xmlSitemapTypes.xml:
1012
+ silentLogger.info(`This is a XML format sitemap.`);
1013
+ await processXmlSitemap($, sitemapType, 'loc', 'lastmod', 'url');
1014
+ break;
1015
+ case constants.xmlSitemapTypes.rss:
1016
+ silentLogger.info(`This is a RSS format sitemap.`);
1017
+ await processXmlSitemap($, sitemapType, 'link', 'pubDate', 'item');
1018
+ break;
1019
+ case constants.xmlSitemapTypes.atom:
1020
+ silentLogger.info(`This is a Atom format sitemap.`);
1021
+ await processXmlSitemap($, sitemapType, 'link', 'published', 'entry');
1022
+ break;
1023
+ default:
1024
+ silentLogger.info(`This is an unrecognised XML sitemap format.`);
1025
+ processNonStandardSitemap(data);
1026
+ }
1027
+ };
1028
+
1029
+ try {
1030
+ await fetchUrls(sitemapUrl);
1031
+ } catch (e) {
1032
+ silentLogger.error(e);
1033
+ }
1034
+
1035
+ const requestList = Object.values(urls);
1036
+
1037
+ return requestList;
1038
+ };
1039
+
1040
+ export const validEmail = email => {
1041
+ const emailRegex = /^.+@.+\..+$/u;
1042
+
1043
+ return emailRegex.test(email);
1044
+ };
1045
+
1046
+ // For new user flow.
1047
+ export const validName = name => {
1048
+ // Allow only printable characters from any language
1049
+ const regex = /^[\p{L}\p{N}\s'".,()\[\]{}!?:؛،؟…]+$/u;
1050
+
1051
+ // Check if the length is between 2 and 32000 characters
1052
+ if (name.length < 2 || name.length > 32000) {
1053
+ // Handle invalid name length
1054
+ return false;
1055
+ }
1056
+
1057
+ if (!regex.test(name)) {
1058
+ // Handle invalid name format
1059
+ return false;
1060
+ }
1061
+
1062
+ // Include a check for specific characters to sanitize injection patterns
1063
+ const preventInjectionRegex = /[<>'"\\/;|&!$*{}()\[\]\r\n\t]/;
1064
+ if (preventInjectionRegex.test(name)) {
1065
+ // Handle potential injection attempts
1066
+ return false;
1067
+ }
1068
+
1069
+ return true;
1070
+ };
1071
+
1072
+ /**
1073
+ * Check for browser available to run scan and clone data directory of the browser if needed.
1074
+ * @param preferredBrowser string of user's preferred browser
1075
+ * @param isCli boolean flag to indicate if function is called from cli
1076
+ * @returns object consisting of browser to run and cloned data directory
1077
+ */
1078
+ export const getBrowserToRun = (
1079
+ preferredBrowser: BrowserTypes,
1080
+ isCli = false,
1081
+ ): { browserToRun: BrowserTypes; clonedBrowserDataDir: string } => {
1082
+ const platform = os.platform();
1083
+
1084
+ // Prioritise Chrome on Windows and Mac platforms if user does not specify a browser
1085
+ if (!preferredBrowser && (os.platform() === 'win32' || os.platform() === 'darwin')) {
1086
+ preferredBrowser = BrowserTypes.CHROME;
1087
+ }
1088
+
1089
+ printMessage([`Preferred browser ${preferredBrowser}`], messageOptions);
1090
+
1091
+ if (preferredBrowser === BrowserTypes.CHROME) {
1092
+ const chromeData = getChromeData();
1093
+ if (chromeData) return chromeData;
1094
+
1095
+ if (platform === 'darwin') {
1096
+ // mac user who specified -b chrome but does not have chrome
1097
+ if (isCli) printMessage(['Unable to use Chrome, falling back to webkit...'], messageOptions);
1098
+
1099
+ constants.launcher = webkit;
1100
+ return { browserToRun: null, clonedBrowserDataDir: '' };
1101
+ }
1102
+ if (platform === 'win32') {
1103
+ if (isCli)
1104
+ printMessage(['Unable to use Chrome, falling back to Edge browser...'], messageOptions);
1105
+
1106
+ const edgeData = getEdgeData();
1107
+ if (edgeData) return edgeData;
1108
+
1109
+ if (isCli)
1110
+ printMessage(['Unable to use both Chrome and Edge. Please try again.'], messageOptions);
1111
+ process.exit(constants.urlCheckStatuses.browserError.code);
1112
+ }
1113
+
1114
+ if (isCli) {
1115
+ printMessage(['Unable to use Chrome, falling back to Chromium browser...'], messageOptions);
1116
+ }
1117
+ } else if (preferredBrowser === BrowserTypes.EDGE) {
1118
+ const edgeData = getEdgeData();
1119
+ if (edgeData) return edgeData;
1120
+
1121
+ if (isCli)
1122
+ printMessage(['Unable to use Edge, falling back to Chrome browser...'], messageOptions);
1123
+ const chromeData = getChromeData();
1124
+ if (chromeData) return chromeData;
1125
+
1126
+ if (platform === 'darwin') {
1127
+ // mac user who specified -b edge but does not have edge or chrome
1128
+ if (isCli)
1129
+ printMessage(
1130
+ ['Unable to use both Edge and Chrome, falling back to webkit...'],
1131
+ messageOptions,
1132
+ );
1133
+
1134
+ constants.launcher = webkit;
1135
+ return { browserToRun: null, clonedBrowserDataDir: '' };
1136
+ }
1137
+ if (platform === 'win32') {
1138
+ if (isCli)
1139
+ printMessage(['Unable to use both Edge and Chrome. Please try again.'], messageOptions);
1140
+ process.exit(constants.urlCheckStatuses.browserError.code);
1141
+ } else {
1142
+ // linux and other OS
1143
+ if (isCli)
1144
+ printMessage(
1145
+ ['Unable to use both Edge and Chrome, falling back to Chromium browser...'],
1146
+ messageOptions,
1147
+ );
1148
+ }
1149
+ }
1150
+
1151
+ // defaults to chromium
1152
+ return {
1153
+ browserToRun: BrowserTypes.CHROMIUM,
1154
+ clonedBrowserDataDir: cloneChromiumProfiles(),
1155
+ };
1156
+ };
1157
+
1158
+ /**
1159
+ * Cloning a second time with random token for parallel browser sessions
1160
+ * Also to mitigate against known bug where cookies are
1161
+ * overridden after each browser session - i.e. logs user out
1162
+ * after checkingUrl and unable to utilise same cookie for scan
1163
+ * */
1164
+ export const getClonedProfilesWithRandomToken = (browser: string, randomToken: string): string => {
1165
+ if (browser === BrowserTypes.CHROME) {
1166
+ return cloneChromeProfiles(randomToken);
1167
+ }
1168
+ if (browser === BrowserTypes.EDGE) {
1169
+ return cloneEdgeProfiles(randomToken);
1170
+ }
1171
+ return cloneChromiumProfiles(randomToken);
1172
+ };
1173
+
1174
+ export const getChromeData = () => {
1175
+ const browserDataDir = getDefaultChromeDataDir();
1176
+ const clonedBrowserDataDir = cloneChromeProfiles();
1177
+ if (browserDataDir && clonedBrowserDataDir) {
1178
+ const browserToRun = BrowserTypes.CHROME;
1179
+ return { browserToRun, clonedBrowserDataDir };
1180
+ }
1181
+ return null;
1182
+ };
1183
+
1184
+ export const getEdgeData = () => {
1185
+ const browserDataDir = getDefaultEdgeDataDir();
1186
+ const clonedBrowserDataDir = cloneEdgeProfiles();
1187
+ if (browserDataDir && clonedBrowserDataDir) {
1188
+ const browserToRun = BrowserTypes.EDGE;
1189
+ return { browserToRun, clonedBrowserDataDir };
1190
+ }
1191
+ };
1192
+
1193
+ /**
1194
+ * Clone the Chrome profile cookie files to the destination directory
1195
+ * @param {*} options glob options object
1196
+ * @param {*} destDir destination directory
1197
+ * @returns boolean indicating whether the operation was successful
1198
+ */
1199
+ const cloneChromeProfileCookieFiles = (options, destDir) => {
1200
+ let profileCookiesDir;
1201
+ // Cookies file per profile is located in .../User Data/<profile name>/Network/Cookies for windows
1202
+ // and ../Chrome/<profile name>/Cookies for mac
1203
+ let profileNamesRegex;
1204
+ if (os.platform() === 'win32') {
1205
+ profileCookiesDir = globSync('**/Network/Cookies', {
1206
+ ...options,
1207
+ ignore: ['oobee/**'],
1208
+ });
1209
+ profileNamesRegex = /User Data\\(.*?)\\Network/;
1210
+ } else if (os.platform() === 'darwin') {
1211
+ // maxDepth 2 to avoid copying cookies from the oobee directory if it exists
1212
+ profileCookiesDir = globSync('**/Cookies', {
1213
+ ...options,
1214
+ ignore: 'oobee/**',
1215
+ });
1216
+ profileNamesRegex = /Chrome\/(.*?)\/Cookies/;
1217
+ }
1218
+
1219
+ if (profileCookiesDir.length > 0) {
1220
+ let success = true;
1221
+ profileCookiesDir.forEach(dir => {
1222
+ const profileName = dir.match(profileNamesRegex)[1];
1223
+ if (profileName) {
1224
+ let destProfileDir = path.join(destDir, profileName);
1225
+ if (os.platform() === 'win32') {
1226
+ destProfileDir = path.join(destProfileDir, 'Network');
1227
+ }
1228
+ // Recursive true to create all parent directories (e.g. PbProfile/Default/Cookies)
1229
+ if (!fs.existsSync(destProfileDir)) {
1230
+ fs.mkdirSync(destProfileDir, { recursive: true });
1231
+ if (!fs.existsSync(destProfileDir)) {
1232
+ fs.mkdirSync(destProfileDir, { recursive: true });
1233
+ }
1234
+ }
1235
+
1236
+ // Prevents duplicate cookies file if the cookies already exist
1237
+ if (!fs.existsSync(path.join(destProfileDir, 'Cookies'))) {
1238
+ try {
1239
+ fs.copyFileSync(dir, path.join(destProfileDir, 'Cookies'));
1240
+ } catch (err) {
1241
+ silentLogger.error(err);
1242
+ if (err.code === 'EBUSY') {
1243
+ console.log(
1244
+ `Unable to copy the file for ${profileName} because it is currently in use.`,
1245
+ );
1246
+ console.log(
1247
+ 'Please close any applications that might be using this file and try again.',
1248
+ );
1249
+ } else {
1250
+ console.log(
1251
+ `An unexpected error occurred for ${profileName} while copying the file: ${err.message}`,
1252
+ );
1253
+ }
1254
+ // printMessage([err], messageOptions);
1255
+ success = false;
1256
+ }
1257
+ }
1258
+ }
1259
+ });
1260
+ return success;
1261
+ }
1262
+
1263
+ silentLogger.warn('Unable to find Chrome profile cookies file in the system.');
1264
+ printMessage(['Unable to find Chrome profile cookies file in the system.'], messageOptions);
1265
+ return false;
1266
+ };
1267
+
1268
+ /**
1269
+ * Clone the Chrome profile cookie files to the destination directory
1270
+ * @param {*} options glob options object
1271
+ * @param {*} destDir destination directory
1272
+ * @returns boolean indicating whether the operation was successful
1273
+ */
1274
+ const cloneEdgeProfileCookieFiles = (options, destDir) => {
1275
+ let profileCookiesDir;
1276
+ // Cookies file per profile is located in .../User Data/<profile name>/Network/Cookies for windows
1277
+ // and ../Chrome/<profile name>/Cookies for mac
1278
+ let profileNamesRegex;
1279
+ // Ignores the cloned oobee directory if exists
1280
+ if (os.platform() === 'win32') {
1281
+ profileCookiesDir = globSync('**/Network/Cookies', {
1282
+ ...options,
1283
+ ignore: 'oobee/**',
1284
+ });
1285
+ profileNamesRegex = /User Data\\(.*?)\\Network/;
1286
+ } else if (os.platform() === 'darwin') {
1287
+ // Ignores copying cookies from the oobee directory if it exists
1288
+ profileCookiesDir = globSync('**/Cookies', {
1289
+ ...options,
1290
+ ignore: 'oobee/**',
1291
+ });
1292
+ profileNamesRegex = /Microsoft Edge\/(.*?)\/Cookies/;
1293
+ }
1294
+
1295
+ if (profileCookiesDir.length > 0) {
1296
+ let success = true;
1297
+ profileCookiesDir.forEach(dir => {
1298
+ const profileName = dir.match(profileNamesRegex)[1];
1299
+ if (profileName) {
1300
+ let destProfileDir = path.join(destDir, profileName);
1301
+ if (os.platform() === 'win32') {
1302
+ destProfileDir = path.join(destProfileDir, 'Network');
1303
+ }
1304
+ // Recursive true to create all parent directories (e.g. PbProfile/Default/Cookies)
1305
+ if (!fs.existsSync(destProfileDir)) {
1306
+ fs.mkdirSync(destProfileDir, { recursive: true });
1307
+ if (!fs.existsSync(destProfileDir)) {
1308
+ fs.mkdirSync(destProfileDir, { recursive: true });
1309
+ }
1310
+ }
1311
+
1312
+ // Prevents duplicate cookies file if the cookies already exist
1313
+ if (!fs.existsSync(path.join(destProfileDir, 'Cookies'))) {
1314
+ try {
1315
+ fs.copyFileSync(dir, path.join(destProfileDir, 'Cookies'));
1316
+ } catch (err) {
1317
+ silentLogger.error(err);
1318
+ if (err.code === 'EBUSY') {
1319
+ console.log(
1320
+ `Unable to copy the file for ${profileName} because it is currently in use.`,
1321
+ );
1322
+ console.log(
1323
+ 'Please close any applications that might be using this file and try again.',
1324
+ );
1325
+ } else {
1326
+ console.log(`An unexpected error occurred while copying the file: ${err.message}`);
1327
+ }
1328
+ // printMessage([err], messageOptions);
1329
+ success = false;
1330
+ }
1331
+ }
1332
+ }
1333
+ });
1334
+ return success;
1335
+ }
1336
+ silentLogger.warn('Unable to find Edge profile cookies file in the system.');
1337
+ printMessage(['Unable to find Edge profile cookies file in the system.'], messageOptions);
1338
+ return false;
1339
+ };
1340
+
1341
+ /**
1342
+ * Both Edge and Chrome Local State files are located in the .../User Data directory
1343
+ * @param {*} options - glob options object
1344
+ * @param {string} destDir - destination directory
1345
+ * @returns boolean indicating whether the operation was successful
1346
+ */
1347
+ const cloneLocalStateFile = (options, destDir) => {
1348
+ const localState = globSync('**/*Local State', {
1349
+ ...options,
1350
+ maxDepth: 1,
1351
+ });
1352
+ const profileNamesRegex = /([^/\\]+)[/\\]Local State$/;
1353
+
1354
+ if (localState.length > 0) {
1355
+ let success = true;
1356
+
1357
+ localState.forEach(dir => {
1358
+ const profileName = dir.match(profileNamesRegex)[1];
1359
+ try {
1360
+ fs.copyFileSync(dir, path.join(destDir, 'Local State'));
1361
+ } catch (err) {
1362
+ silentLogger.error(err);
1363
+ if (err.code === 'EBUSY') {
1364
+ console.log(`Unable to copy the file because it is currently in use.`);
1365
+ console.log('Please close any applications that might be using this file and try again.');
1366
+ } else {
1367
+ console.log(
1368
+ `An unexpected error occurred for ${profileName} while copying the file: ${err.message}`,
1369
+ );
1370
+ }
1371
+ printMessage([err], messageOptions);
1372
+ success = false;
1373
+ }
1374
+ });
1375
+ return success;
1376
+ }
1377
+ silentLogger.warn('Unable to find local state file in the system.');
1378
+ printMessage(['Unable to find local state file in the system.'], messageOptions);
1379
+ return false;
1380
+ };
1381
+
1382
+ /**
1383
+ * Checks if the Chrome data directory exists and creates a clone
1384
+ * of all profile within the oobee directory located in the
1385
+ * .../User Data directory for Windows and
1386
+ * .../Chrome directory for Mac.
1387
+ * @param {string} randomToken - random token to append to the cloned directory
1388
+ * @returns {string} cloned data directory, null if any of the sub files failed to copy
1389
+ */
1390
+ export const cloneChromeProfiles = (randomToken?: string): string => {
1391
+ const baseDir = getDefaultChromeDataDir();
1392
+
1393
+ if (!baseDir) {
1394
+ return;
1395
+ }
1396
+
1397
+ let destDir;
1398
+
1399
+ if (randomToken) {
1400
+ destDir = path.join(baseDir, `oobee-${randomToken}`);
1401
+ } else {
1402
+ destDir = path.join(baseDir, 'oobee');
1403
+ }
1404
+
1405
+ if (fs.existsSync(destDir)) {
1406
+ if (process.env.OOBEE_VERBOSE) {
1407
+ deleteClonedChromeProfiles(randomToken);
1408
+ } else {
1409
+ deleteClonedChromeProfiles();
1410
+ }
1411
+ }
1412
+
1413
+ if (!fs.existsSync(destDir)) {
1414
+ fs.mkdirSync(destDir, { recursive: true });
1415
+ }
1416
+
1417
+ const baseOptions = {
1418
+ cwd: baseDir,
1419
+ recursive: true,
1420
+ absolute: true,
1421
+ nodir: true,
1422
+ };
1423
+ const cloneLocalStateFileSuccess = cloneLocalStateFile(baseOptions, destDir);
1424
+ if (cloneChromeProfileCookieFiles(baseOptions, destDir) && cloneLocalStateFileSuccess) {
1425
+ return destDir;
1426
+ }
1427
+
1428
+ return null;
1429
+ };
1430
+
1431
+ export const cloneChromiumProfiles = (randomToken?: string): string => {
1432
+ const baseDir = getDefaultChromiumDataDir();
1433
+
1434
+ if (!baseDir) {
1435
+ return;
1436
+ }
1437
+
1438
+ let destDir: string;
1439
+
1440
+ if (randomToken) {
1441
+ destDir = path.join(baseDir, `oobee-${randomToken}`);
1442
+ } else {
1443
+ destDir = path.join(baseDir, 'oobee');
1444
+ }
1445
+
1446
+ if (!fs.existsSync(destDir)) {
1447
+ fs.mkdirSync(destDir, { recursive: true });
1448
+ }
1449
+
1450
+ return destDir;
1451
+ };
1452
+
1453
+ /**
1454
+ * Checks if the Edge data directory exists and creates a clone
1455
+ * of all profile within the oobee directory located in the
1456
+ * .../User Data directory for Windows and
1457
+ * .../Microsoft Edge directory for Mac.
1458
+ * @param {string} randomToken - random token to append to the cloned directory
1459
+ * @returns {string} cloned data directory, null if any of the sub files failed to copy
1460
+ */
1461
+ export const cloneEdgeProfiles = (randomToken?: string): string => {
1462
+ const baseDir = getDefaultEdgeDataDir();
1463
+
1464
+ if (!baseDir) {
1465
+ return;
1466
+ }
1467
+
1468
+ let destDir;
1469
+
1470
+ if (randomToken) {
1471
+ destDir = path.join(baseDir, `oobee-${randomToken}`);
1472
+ } else {
1473
+ destDir = path.join(baseDir, 'oobee');
1474
+ }
1475
+
1476
+ if (fs.existsSync(destDir)) {
1477
+ if (process.env.OOBEE_VERBOSE) {
1478
+ deleteClonedEdgeProfiles(randomToken);
1479
+ } else {
1480
+ deleteClonedEdgeProfiles();
1481
+ }
1482
+ }
1483
+
1484
+ if (!fs.existsSync(destDir)) {
1485
+ fs.mkdirSync(destDir, { recursive: true });
1486
+ }
1487
+
1488
+ const baseOptions = {
1489
+ cwd: baseDir,
1490
+ recursive: true,
1491
+ absolute: true,
1492
+ nodir: true,
1493
+ };
1494
+
1495
+ const cloneLocalStateFileSuccess = cloneLocalStateFile(baseOptions, destDir);
1496
+ if (cloneEdgeProfileCookieFiles(baseOptions, destDir) && cloneLocalStateFileSuccess) {
1497
+ return destDir;
1498
+ }
1499
+
1500
+ return null;
1501
+ };
1502
+
1503
+ export const deleteClonedProfiles = (browser: string, randomToken?: string): void => {
1504
+ if (browser === BrowserTypes.CHROME) {
1505
+ deleteClonedChromeProfiles(randomToken);
1506
+ } else if (browser === BrowserTypes.EDGE) {
1507
+ deleteClonedEdgeProfiles(randomToken);
1508
+ } else if (browser === BrowserTypes.CHROMIUM) {
1509
+ deleteClonedChromiumProfiles(randomToken);
1510
+ }
1511
+ };
1512
+
1513
+ /**
1514
+ * Deletes all the cloned oobee directories in the Chrome data directory
1515
+ * @returns null
1516
+ */
1517
+ export const deleteClonedChromeProfiles = (randomToken?: string): void => {
1518
+ const baseDir = getDefaultChromeDataDir();
1519
+
1520
+ if (!baseDir) {
1521
+ return;
1522
+ }
1523
+ let destDir: string[];
1524
+ if (randomToken) {
1525
+ destDir = [`${baseDir}/oobee-${randomToken}`];
1526
+ } else {
1527
+ // Find all the oobee directories in the Chrome data directory
1528
+ destDir = globSync('**/oobee*', {
1529
+ cwd: baseDir,
1530
+ absolute: true,
1531
+ });
1532
+ }
1533
+
1534
+ if (destDir.length > 0) {
1535
+ destDir.forEach(dir => {
1536
+ if (fs.existsSync(dir)) {
1537
+ try {
1538
+ fs.rmSync(dir, { recursive: true });
1539
+ } catch (err) {
1540
+ silentLogger.error(
1541
+ `CHROME Unable to delete ${dir} folder in the Chrome data directory. ${err}`,
1542
+ );
1543
+ }
1544
+ }
1545
+ });
1546
+ return;
1547
+ }
1548
+
1549
+ silentLogger.warn('Unable to find oobee directory in the Chrome data directory.');
1550
+ console.warn('Unable to find oobee directory in the Chrome data directory.');
1551
+ };
1552
+
1553
+ /**
1554
+ * Deletes all the cloned oobee directories in the Edge data directory
1555
+ * @returns null
1556
+ */
1557
+ export const deleteClonedEdgeProfiles = (randomToken?: string): void => {
1558
+ if (process.env.OOBEE_VERBOSE) {
1559
+ return;
1560
+ }
1561
+ const baseDir = getDefaultEdgeDataDir();
1562
+
1563
+ if (!baseDir) {
1564
+ console.warn(`Unable to find Edge data directory in the system.`);
1565
+ return;
1566
+ }
1567
+ let destDir: string[];
1568
+ if (randomToken) {
1569
+ destDir = [`${baseDir}/oobee-${randomToken}`];
1570
+ } else {
1571
+ // Find all the oobee directories in the Chrome data directory
1572
+ destDir = globSync('**/oobee*', {
1573
+ cwd: baseDir,
1574
+ absolute: true,
1575
+ });
1576
+ }
1577
+
1578
+ if (destDir.length > 0) {
1579
+ destDir.forEach(dir => {
1580
+ if (fs.existsSync(dir)) {
1581
+ try {
1582
+ fs.rmSync(dir, { recursive: true });
1583
+ } catch (err) {
1584
+ silentLogger.error(
1585
+ `EDGE Unable to delete ${dir} folder in the Chrome data directory. ${err}`,
1586
+ );
1587
+ }
1588
+ }
1589
+ });
1590
+ }
1591
+ };
1592
+
1593
+ export const deleteClonedChromiumProfiles = (randomToken?: string): void => {
1594
+ const baseDir = getDefaultChromiumDataDir();
1595
+
1596
+ if (!baseDir) {
1597
+ return;
1598
+ }
1599
+ let destDir: string[];
1600
+ if (randomToken) {
1601
+ destDir = [`${baseDir}/oobee-${randomToken}`];
1602
+ } else {
1603
+ // Find all the oobee directories in the Chrome data directory
1604
+ destDir = globSync('**/oobee*', {
1605
+ cwd: baseDir,
1606
+ absolute: true,
1607
+ });
1608
+ }
1609
+
1610
+ if (destDir.length > 0) {
1611
+ destDir.forEach(dir => {
1612
+ if (fs.existsSync(dir)) {
1613
+ try {
1614
+ fs.rmSync(dir, { recursive: true });
1615
+ } catch (err) {
1616
+ silentLogger.error(
1617
+ `CHROMIUM Unable to delete ${dir} folder in the Chromium data directory. ${err}`,
1618
+ );
1619
+ }
1620
+ }
1621
+ });
1622
+ return;
1623
+ }
1624
+
1625
+ silentLogger.warn('Unable to find oobee directory in Chromium support directory');
1626
+ console.warn('Unable to find oobee directory in Chromium support directory');
1627
+ };
1628
+
1629
+ export const getPlaywrightDeviceDetailsObject = (
1630
+ deviceChosen: string,
1631
+ customDevice: string,
1632
+ viewportWidth: number,
1633
+ ) => {
1634
+ let playwrightDeviceDetailsObject = {};
1635
+ if (deviceChosen === 'Mobile' || customDevice === 'iPhone 11') {
1636
+ playwrightDeviceDetailsObject = devices['iPhone 11'];
1637
+ } else if (customDevice === 'Samsung Galaxy S9+') {
1638
+ playwrightDeviceDetailsObject = devices['Galaxy S9+'];
1639
+ } else if (viewportWidth) {
1640
+ playwrightDeviceDetailsObject = {
1641
+ viewport: { width: viewportWidth, height: 720 },
1642
+ };
1643
+ } else if (customDevice) {
1644
+ playwrightDeviceDetailsObject = devices[customDevice.replace(/_/g, ' ')];
1645
+ }
1646
+ return playwrightDeviceDetailsObject;
1647
+ };
1648
+
1649
+ export const getScreenToScan = (
1650
+ deviceChosen: string,
1651
+ customDevice: string,
1652
+ viewportWidth: number,
1653
+ ): string => {
1654
+ if (deviceChosen) {
1655
+ return deviceChosen;
1656
+ }
1657
+ if (customDevice) {
1658
+ return customDevice;
1659
+ }
1660
+ if (viewportWidth) {
1661
+ return `CustomWidth_${viewportWidth}px`;
1662
+ }
1663
+ return 'Desktop';
1664
+ };
1665
+
1666
+ export const submitFormViaPlaywright = async (
1667
+ browserToRun: string,
1668
+ userDataDirectory: string,
1669
+ finalUrl: string,
1670
+ ) => {
1671
+ const dirName = `clone-${Date.now()}`;
1672
+ let clonedDir = null;
1673
+ if (proxy && browserToRun === BrowserTypes.EDGE) {
1674
+ clonedDir = cloneEdgeProfiles(dirName);
1675
+ } else if (proxy && browserToRun === BrowserTypes.CHROME) {
1676
+ clonedDir = cloneChromeProfiles(dirName);
1677
+ }
1678
+ const browserContext = await constants.launcher.launchPersistentContext(
1679
+ clonedDir || userDataDirectory,
1680
+ {
1681
+ ...getPlaywrightLaunchOptions(browserToRun),
1682
+ },
1683
+ );
1684
+
1685
+ const page = await browserContext.newPage();
1686
+
1687
+ try {
1688
+ await page.goto(finalUrl, {
1689
+ timeout: 30000,
1690
+ ...(proxy && { waitUntil: 'commit' }),
1691
+ });
1692
+
1693
+ try {
1694
+ await page.waitForLoadState('networkidle', { timeout: 10000 });
1695
+ } catch {
1696
+ silentLogger.info('Unable to detect networkidle');
1697
+ }
1698
+ } catch (error) {
1699
+ silentLogger.error(error);
1700
+ } finally {
1701
+ await browserContext.close();
1702
+ if (proxy && browserToRun === BrowserTypes.EDGE) {
1703
+ if (!process.env.OOBEE_VERBOSE) {
1704
+ deleteClonedEdgeProfiles();
1705
+ }
1706
+ } else if (proxy && browserToRun === BrowserTypes.CHROME) {
1707
+ if (!process.env.OOBEE_VERBOSE) {
1708
+ deleteClonedChromeProfiles();
1709
+ }
1710
+ }
1711
+ }
1712
+ };
1713
+
1714
+ export const submitForm = async (
1715
+ browserToRun: string,
1716
+ userDataDirectory: string,
1717
+ scannedUrl: string,
1718
+ entryUrl: string,
1719
+ scanType: string,
1720
+ email: string,
1721
+ name: string,
1722
+ scanResultsJson: string,
1723
+ numberOfPagesScanned: number,
1724
+ numberOfRedirectsScanned: number,
1725
+ numberOfPagesNotScanned: number,
1726
+ metadata: string,
1727
+ ) => {
1728
+ const additionalPageDataJson = JSON.stringify({
1729
+ redirectsScanned: numberOfRedirectsScanned,
1730
+ pagesNotScanned: numberOfPagesNotScanned,
1731
+ });
1732
+
1733
+ let finalUrl =
1734
+ `${formDataFields.formUrl}?` +
1735
+ `${formDataFields.entryUrlField}=${entryUrl}&` +
1736
+ `${formDataFields.scanTypeField}=${scanType}&` +
1737
+ `${formDataFields.emailField}=${email}&` +
1738
+ `${formDataFields.nameField}=${name}&` +
1739
+ `${formDataFields.resultsField}=${encodeURIComponent(scanResultsJson)}&` +
1740
+ `${formDataFields.numberOfPagesScannedField}=${numberOfPagesScanned}&` +
1741
+ `${formDataFields.additionalPageDataField}=${encodeURIComponent(additionalPageDataJson)}&` +
1742
+ `${formDataFields.metadataField}=${encodeURIComponent(metadata)}`;
1743
+
1744
+ if (scannedUrl !== entryUrl) {
1745
+ finalUrl += `&${formDataFields.redirectUrlField}=${scannedUrl}`;
1746
+ }
1747
+
1748
+ if (proxy) {
1749
+ await submitFormViaPlaywright(browserToRun, userDataDirectory, finalUrl);
1750
+ } else {
1751
+ try {
1752
+ await axios.get(finalUrl, { timeout: 2000 });
1753
+ } catch (error) {
1754
+ if (error.code === 'ECONNABORTED') {
1755
+ if (browserToRun || constants.launcher === webkit) {
1756
+ await submitFormViaPlaywright(browserToRun, userDataDirectory, finalUrl);
1757
+ }
1758
+ }
1759
+ }
1760
+ }
1761
+ };
1762
+ /**
1763
+ * @param {string} browser browser name ("chrome" or "edge", null for chromium, the default Playwright browser)
1764
+ * @returns playwright launch options object. For more details: https://playwright.dev/docs/api/class-browsertype#browser-type-launch
1765
+ */
1766
+ export const getPlaywrightLaunchOptions = (browser?: string): LaunchOptions => {
1767
+ let channel: string;
1768
+ if (browser) {
1769
+ channel = browser;
1770
+ }
1771
+ const options: LaunchOptions = {
1772
+ // Drop the --use-mock-keychain flag to allow MacOS devices
1773
+ // to use the cloned cookies.
1774
+ ignoreDefaultArgs: ['--use-mock-keychain'],
1775
+ args: constants.launchOptionsArgs,
1776
+ ...(channel && { channel }), // Having no channel is equivalent to "chromium"
1777
+ };
1778
+ if (proxy) {
1779
+ options.headless = false;
1780
+ options.slowMo = 1000; // To ensure server-side rendered proxy page is loaded
1781
+ } else if (browser === BrowserTypes.EDGE && os.platform() === 'win32') {
1782
+ // edge should be in non-headless mode
1783
+ options.headless = false;
1784
+ }
1785
+ return options;
1786
+ };
1787
+
1788
+ export const urlWithoutAuth = (url: string): string => {
1789
+ const parsedUrl = new URL(url);
1790
+ parsedUrl.username = '';
1791
+ parsedUrl.password = '';
1792
+ return parsedUrl.toString();
1793
+ };
1794
+
1795
+ export const waitForPageLoaded = async (page, timeout = 10000) => {
1796
+ return Promise.race([
1797
+ page.waitForLoadState('load'),
1798
+ page.waitForLoadState('networkidle'),
1799
+ new Promise(resolve => setTimeout(resolve, timeout)),
1800
+ ]);
1801
+ };
1802
+
1803
+ function isValidHttpUrl(urlString) {
1804
+ const pattern = /^(http|https):\/\/[^ "]+$/;
1805
+ return pattern.test(urlString);
1806
+ }
1807
+
1808
+ export const isFilePath = (url: string): boolean => {
1809
+ const driveLetterPattern = /^[A-Z]:/i;
1810
+ const backslashPattern = /\\/;
1811
+ return (
1812
+ url.startsWith('file://') ||
1813
+ url.startsWith('/') ||
1814
+ driveLetterPattern.test(url) ||
1815
+ backslashPattern.test(url)
1816
+ );
1817
+ };
1818
+
1819
+ export function convertLocalFileToPath(url: string): string {
1820
+ if (url.startsWith('file://')) {
1821
+ url = fileURLToPath(url);
1822
+ }
1823
+ return url;
1824
+ }
1825
+
1826
+ export function convertPathToLocalFile(filePath: string): string {
1827
+ if (filePath.startsWith('/')) {
1828
+ filePath = pathToFileURL(filePath).toString();
1829
+ }
1830
+ return filePath;
1831
+ }
1832
+
1833
+ export function convertToFilePath(fileUrl: string) {
1834
+ // Parse the file URL
1835
+ const parsedUrl = url.parse(fileUrl);
1836
+ // Decode the URL-encoded path
1837
+ const filePath = decodeURIComponent(parsedUrl.path);
1838
+ // Return the file path without the 'file://' prefix
1839
+ return filePath;
1840
+ }