@govtechsg/oobee 0.10.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/.dockerignore +22 -0
  2. package/.github/pull_request_template.md +11 -0
  3. package/.github/workflows/docker-test.yml +54 -0
  4. package/.github/workflows/image.yml +107 -0
  5. package/.github/workflows/publish.yml +18 -0
  6. package/.idea/modules.xml +8 -0
  7. package/.idea/purple-a11y.iml +9 -0
  8. package/.idea/vcs.xml +6 -0
  9. package/.prettierrc.json +12 -0
  10. package/.vscode/extensions.json +5 -0
  11. package/.vscode/settings.json +10 -0
  12. package/CODE_OF_CONDUCT.md +128 -0
  13. package/DETAILS.md +163 -0
  14. package/Dockerfile +60 -0
  15. package/INSTALLATION.md +146 -0
  16. package/INTEGRATION.md +785 -0
  17. package/LICENSE +22 -0
  18. package/README.md +587 -0
  19. package/SECURITY.md +5 -0
  20. package/__mocks__/mock-report.html +1431 -0
  21. package/__mocks__/mockFunctions.ts +32 -0
  22. package/__mocks__/mockIssues.ts +64 -0
  23. package/__mocks__/mock_all_issues/000000001.json +64 -0
  24. package/__mocks__/mock_all_issues/000000002.json +53 -0
  25. package/__mocks__/mock_all_issues/fake-file.txt +0 -0
  26. package/__tests__/logs.test.ts +25 -0
  27. package/__tests__/mergeAxeResults.test.ts +278 -0
  28. package/__tests__/utils.test.ts +118 -0
  29. package/a11y-scan-results.zip +0 -0
  30. package/eslint.config.js +53 -0
  31. package/exclusions.txt +2 -0
  32. package/gitlab-pipeline-template.yml +54 -0
  33. package/jest.config.js +1 -0
  34. package/package.json +96 -0
  35. package/scripts/copyFiles.js +44 -0
  36. package/scripts/install_oobee_dependencies.cmd +13 -0
  37. package/scripts/install_oobee_dependencies.command +101 -0
  38. package/scripts/install_oobee_dependencies.ps1 +110 -0
  39. package/scripts/oobee_shell.cmd +13 -0
  40. package/scripts/oobee_shell.command +11 -0
  41. package/scripts/oobee_shell.sh +55 -0
  42. package/scripts/oobee_shell_ps.ps1 +54 -0
  43. package/src/cli.ts +401 -0
  44. package/src/combine.ts +240 -0
  45. package/src/constants/__tests__/common.test.ts +44 -0
  46. package/src/constants/cliFunctions.ts +305 -0
  47. package/src/constants/common.ts +1840 -0
  48. package/src/constants/constants.ts +443 -0
  49. package/src/constants/errorMeta.json +319 -0
  50. package/src/constants/itemTypeDescription.ts +11 -0
  51. package/src/constants/oobeeAi.ts +141 -0
  52. package/src/constants/questions.ts +181 -0
  53. package/src/constants/sampleData.ts +187 -0
  54. package/src/crawlers/__tests__/commonCrawlerFunc.test.ts +51 -0
  55. package/src/crawlers/commonCrawlerFunc.ts +656 -0
  56. package/src/crawlers/crawlDomain.ts +877 -0
  57. package/src/crawlers/crawlIntelligentSitemap.ts +156 -0
  58. package/src/crawlers/crawlLocalFile.ts +193 -0
  59. package/src/crawlers/crawlSitemap.ts +356 -0
  60. package/src/crawlers/custom/extractAndGradeText.ts +57 -0
  61. package/src/crawlers/custom/flagUnlabelledClickableElements.ts +964 -0
  62. package/src/crawlers/custom/utils.ts +486 -0
  63. package/src/crawlers/customAxeFunctions.ts +82 -0
  64. package/src/crawlers/pdfScanFunc.ts +468 -0
  65. package/src/crawlers/runCustom.ts +117 -0
  66. package/src/index.ts +173 -0
  67. package/src/logs.ts +66 -0
  68. package/src/mergeAxeResults.ts +964 -0
  69. package/src/npmIndex.ts +284 -0
  70. package/src/screenshotFunc/htmlScreenshotFunc.ts +411 -0
  71. package/src/screenshotFunc/pdfScreenshotFunc.ts +762 -0
  72. package/src/static/ejs/partials/components/categorySelector.ejs +4 -0
  73. package/src/static/ejs/partials/components/categorySelectorDropdown.ejs +57 -0
  74. package/src/static/ejs/partials/components/pagesScannedModal.ejs +70 -0
  75. package/src/static/ejs/partials/components/reportSearch.ejs +47 -0
  76. package/src/static/ejs/partials/components/ruleOffcanvas.ejs +105 -0
  77. package/src/static/ejs/partials/components/scanAbout.ejs +263 -0
  78. package/src/static/ejs/partials/components/screenshotLightbox.ejs +13 -0
  79. package/src/static/ejs/partials/components/summaryScanAbout.ejs +141 -0
  80. package/src/static/ejs/partials/components/summaryScanResults.ejs +16 -0
  81. package/src/static/ejs/partials/components/summaryTable.ejs +20 -0
  82. package/src/static/ejs/partials/components/summaryWcagCompliance.ejs +94 -0
  83. package/src/static/ejs/partials/components/topFive.ejs +6 -0
  84. package/src/static/ejs/partials/components/wcagCompliance.ejs +70 -0
  85. package/src/static/ejs/partials/footer.ejs +21 -0
  86. package/src/static/ejs/partials/header.ejs +230 -0
  87. package/src/static/ejs/partials/main.ejs +40 -0
  88. package/src/static/ejs/partials/scripts/bootstrap.ejs +8 -0
  89. package/src/static/ejs/partials/scripts/categorySelectorDropdownScript.ejs +190 -0
  90. package/src/static/ejs/partials/scripts/categorySummary.ejs +141 -0
  91. package/src/static/ejs/partials/scripts/highlightjs.ejs +335 -0
  92. package/src/static/ejs/partials/scripts/popper.ejs +7 -0
  93. package/src/static/ejs/partials/scripts/reportSearch.ejs +248 -0
  94. package/src/static/ejs/partials/scripts/ruleOffcanvas.ejs +801 -0
  95. package/src/static/ejs/partials/scripts/screenshotLightbox.ejs +71 -0
  96. package/src/static/ejs/partials/scripts/summaryScanResults.ejs +14 -0
  97. package/src/static/ejs/partials/scripts/summaryTable.ejs +78 -0
  98. package/src/static/ejs/partials/scripts/utils.ejs +441 -0
  99. package/src/static/ejs/partials/styles/bootstrap.ejs +12375 -0
  100. package/src/static/ejs/partials/styles/highlightjs.ejs +54 -0
  101. package/src/static/ejs/partials/styles/styles.ejs +1843 -0
  102. package/src/static/ejs/partials/styles/summaryBootstrap.ejs +12458 -0
  103. package/src/static/ejs/partials/summaryHeader.ejs +70 -0
  104. package/src/static/ejs/partials/summaryMain.ejs +75 -0
  105. package/src/static/ejs/report.ejs +420 -0
  106. package/src/static/ejs/summary.ejs +47 -0
  107. package/src/static/mustache/.prettierrc +4 -0
  108. package/src/static/mustache/Attention Deficit.mustache +11 -0
  109. package/src/static/mustache/Blind.mustache +11 -0
  110. package/src/static/mustache/Cognitive.mustache +7 -0
  111. package/src/static/mustache/Colorblindness.mustache +20 -0
  112. package/src/static/mustache/Deaf.mustache +12 -0
  113. package/src/static/mustache/Deafblind.mustache +7 -0
  114. package/src/static/mustache/Dyslexia.mustache +14 -0
  115. package/src/static/mustache/Low Vision.mustache +7 -0
  116. package/src/static/mustache/Mobility.mustache +15 -0
  117. package/src/static/mustache/Sighted Keyboard Users.mustache +42 -0
  118. package/src/static/mustache/report.mustache +1709 -0
  119. package/src/types/print-message.d.ts +28 -0
  120. package/src/types/types.ts +46 -0
  121. package/src/types/xpath-to-css.d.ts +3 -0
  122. package/src/utils.ts +332 -0
  123. package/tsconfig.json +15 -0
@@ -0,0 +1,468 @@
1
+ import { spawnSync } from 'child_process';
2
+ import fs from 'fs';
3
+ import { randomUUID } from 'crypto';
4
+ import { createRequire } from 'module';
5
+ import os from 'os';
6
+ import path from 'path';
7
+ import { ensureDirSync, ReadStream } from 'fs-extra';
8
+ import { Request } from 'crawlee';
9
+ import { getPageFromContext, getPdfScreenshots } from '../screenshotFunc/pdfScreenshotFunc.js';
10
+ import { isFilePath } from '../constants/common.js';
11
+ import { consoleLogger, guiInfoLog, silentLogger } from '../logs.js';
12
+ import constants, {
13
+ getExecutablePath,
14
+ guiInfoStatusTypes,
15
+ UrlsCrawled,
16
+ } from '../constants/constants.js';
17
+
18
+ const require = createRequire(import.meta.url);
19
+
20
+ // CONSTANTS
21
+
22
+ type RulesMap = { [key: string]: TransformedRuleObject };
23
+ // Classes
24
+ class TranslatedObject {
25
+ goodToFix: {
26
+ rules: RulesMap;
27
+ totalItems: number;
28
+ };
29
+ mustFix: {
30
+ rules: RulesMap;
31
+ totalItems: number;
32
+ };
33
+ needsReview: {
34
+ rules: RulesMap;
35
+ totalItems: number;
36
+ };
37
+ url: string = '';
38
+ pageTitle: string = '';
39
+ filePath: string = '';
40
+ totalItems: number = 0;
41
+
42
+ constructor() {
43
+ this.goodToFix = {
44
+ rules: {},
45
+ totalItems: 0,
46
+ };
47
+ this.mustFix = {
48
+ rules: {},
49
+ totalItems: 0,
50
+ };
51
+ this.needsReview = {
52
+ rules: {},
53
+ totalItems: 0,
54
+ };
55
+ }
56
+ }
57
+ export class TransformedRuleObject {
58
+ description: string;
59
+ totalItems: number;
60
+ conformance: string[];
61
+ items: { message: string; page: number; screenshotPath?: string; context: string }[];
62
+
63
+ constructor() {
64
+ this.description = '';
65
+ this.totalItems = 0;
66
+ this.conformance = [];
67
+ this.items = [];
68
+ }
69
+ }
70
+
71
+ // VeraPDF Scan Results types
72
+ type VeraPdfScanResults = { report: Report };
73
+
74
+ type Report = {
75
+ buildInformation: BuildInformation;
76
+ jobs: Job[];
77
+ batchSummary: BatchSummary;
78
+ };
79
+
80
+ type BuildInformation = {
81
+ releaseDetails: ReleaseDetail[];
82
+ };
83
+
84
+ type ReleaseDetail = {
85
+ id: string;
86
+ version: string;
87
+ buildDate: number;
88
+ };
89
+
90
+ type Job = {
91
+ itemDetails: ItemDetails;
92
+ validationResult: ValidationResult;
93
+ processingTime: ProcessingTime;
94
+ };
95
+
96
+ type ItemDetails = {
97
+ name: string;
98
+ size: number;
99
+ };
100
+
101
+ type ValidationResult = {
102
+ details: ValidationDetails;
103
+ jobEndStatus: string;
104
+ profileName: string;
105
+ statement: string;
106
+ compliant: boolean;
107
+ };
108
+
109
+ type ValidationDetails = {
110
+ passedRules: number;
111
+ failedRules: number;
112
+ passedChecks: number;
113
+ failedChecks: number;
114
+ ruleSummaries: RuleSummary[];
115
+ };
116
+
117
+ type RuleSummary = {
118
+ ruleStatus: string;
119
+ specification: string;
120
+ clause: string;
121
+ testNumber: number;
122
+ status: string;
123
+ failedChecks: number;
124
+ description: string;
125
+ object: string;
126
+ test: string;
127
+ checks: Check[];
128
+ };
129
+
130
+ type Check = {
131
+ status: string;
132
+ context: string;
133
+ errorMessage: string;
134
+ errorArguments: any[];
135
+ };
136
+
137
+ type ProcessingTime = {
138
+ start: number;
139
+ finish: number;
140
+ duration: string;
141
+ difference: number;
142
+ };
143
+
144
+ type BatchSummary = {
145
+ duration: Duration;
146
+ totalJobs: number;
147
+ outOfMemory: number;
148
+ veraExceptions: number;
149
+ failedEncryptedJobs: number;
150
+ failedParsingJobs: number;
151
+ validationSummary: ValidationSummary;
152
+ featuresSummary: FeaturesSummary;
153
+ repairSummary: RepairSummary;
154
+ multiJob: boolean;
155
+ };
156
+
157
+ type Duration = {
158
+ start: number;
159
+ finish: number;
160
+ duration: string;
161
+ difference: number;
162
+ };
163
+
164
+ type ValidationSummary = {
165
+ nonCompliantPdfaCount: number;
166
+ compliantPdfaCount: number;
167
+ failedJobCount: number;
168
+ totalJobCount: number;
169
+ successfulJobCount: number;
170
+ };
171
+
172
+ type FeaturesSummary = {
173
+ failedJobCount: number;
174
+ totalJobCount: number;
175
+ successfulJobCount: number;
176
+ };
177
+
178
+ type RepairSummary = {
179
+ failedJobCount: number;
180
+ totalJobCount: number;
181
+ successfulJobCount: number;
182
+ };
183
+ // AAA: 1.4.8, 2.4.9
184
+ // AA: 1.3.4, 1.4.3, 1.4.4, 1.4.10
185
+ // A: 1.3.1, 4.1.1, 4.1.2
186
+ const LEVEL_AAA = ['2.4.9', '1.4.8'];
187
+ const LEVEL_AA = ['1.3.4', '1.4.3', '1.4.4', '1.4.10'];
188
+ const LEVEL_A = ['1.3.1', '4.1.1', '4.1.2'];
189
+ const clauseToLevel = {
190
+ // mapping of clause to its A/AA/AAA level
191
+ ...LEVEL_AAA.reduce((prev, curr) => {
192
+ prev[curr] = 'wcag2aaa';
193
+ return prev;
194
+ }, {}),
195
+ ...LEVEL_AA.reduce((prev, curr) => {
196
+ prev[curr] = 'wcag2aa';
197
+ return prev;
198
+ }, {}),
199
+ ...LEVEL_A.reduce((prev, curr) => {
200
+ prev[curr] = 'wcag2a';
201
+ return prev;
202
+ }, {}),
203
+ };
204
+
205
+ const metaToCategoryMap = {
206
+ critical: 'mustFix',
207
+ error: 'goodToFix',
208
+ serious: 'goodToFix',
209
+ warning: 'goodToFix',
210
+ ignore: 'goodToFix',
211
+ };
212
+
213
+ const EXCLUDED_RULES = {
214
+ '1.3.4': { 1: true }, // test for page orientation deemed a false positive, so its excluded
215
+ };
216
+
217
+ const isRuleExcluded = (rule: RuleSummary) => {
218
+ const isExcluded = EXCLUDED_RULES[rule.clause]
219
+ ? EXCLUDED_RULES[rule.clause][rule.testNumber]
220
+ : false;
221
+ return isExcluded || LEVEL_AAA.includes(rule.clause);
222
+ };
223
+
224
+ const getVeraExecutable = () => {
225
+ let veraPdfExe: string;
226
+ if (os.platform() === 'win32') {
227
+ veraPdfExe = getExecutablePath('**/verapdf', 'verapdf.bat');
228
+ } else {
229
+ veraPdfExe = getExecutablePath('**/verapdf', 'verapdf');
230
+ }
231
+ if (!veraPdfExe) {
232
+ const veraPdfExeNotFoundError =
233
+ 'Could not find veraPDF executable. Please ensure veraPDF is installed at current directory.';
234
+ consoleLogger.error(veraPdfExeNotFoundError);
235
+ silentLogger.error(veraPdfExeNotFoundError);
236
+ }
237
+ return veraPdfExe;
238
+ };
239
+
240
+ const isPDF = (buffer: Buffer) => {
241
+ return (
242
+ Buffer.isBuffer(buffer) && buffer.lastIndexOf('%PDF-') === 0 && buffer.lastIndexOf('%%EOF') > -1
243
+ );
244
+ };
245
+
246
+ export const handlePdfDownload = (
247
+ randomToken: string,
248
+ pdfDownloads: Promise<void>[],
249
+ request: Request,
250
+ sendRequest: any,
251
+ urlsCrawled: UrlsCrawled,
252
+ ): { pdfFileName: string; url: string } => {
253
+ const pdfFileName = randomUUID();
254
+ const { url } = request;
255
+ const pageTitle = decodeURI(request.url).split('/').pop();
256
+
257
+ pdfDownloads.push(
258
+ new Promise<void>(async resolve => {
259
+ const bufs = [];
260
+ let pdfResponse: ReadStream;
261
+
262
+ if (isFilePath(url)) {
263
+ // Read the file from the file system
264
+ const filePath = new URL(url).pathname;
265
+ pdfResponse = fs.createReadStream(filePath, { encoding: 'binary' });
266
+ } else {
267
+ // Send HTTP/HTTPS request
268
+ pdfResponse = await sendRequest({ responseType: 'buffer', isStream: true });
269
+ pdfResponse.setEncoding('binary');
270
+ }
271
+ const downloadFile = fs.createWriteStream(`${randomToken}/${pdfFileName}.pdf`, {
272
+ flags: 'a',
273
+ });
274
+
275
+ pdfResponse.on('data', (chunk: Buffer) => {
276
+ downloadFile.write(chunk, 'binary');
277
+ bufs.push(Buffer.from(chunk));
278
+ });
279
+
280
+ pdfResponse.on('end', () => {
281
+ downloadFile.end();
282
+ const buf = Buffer.concat(bufs);
283
+ if (isPDF(buf)) {
284
+ guiInfoLog(guiInfoStatusTypes.SCANNED, {
285
+ numScanned: urlsCrawled.scanned.length,
286
+ urlScanned: request.url,
287
+ });
288
+ urlsCrawled.scanned.push({
289
+ url: request.url,
290
+ pageTitle,
291
+ actualUrl: url,
292
+ });
293
+ } else {
294
+ guiInfoLog(guiInfoStatusTypes.SKIPPED, {
295
+ numScanned: urlsCrawled.scanned.length,
296
+ urlScanned: request.url,
297
+ });
298
+ urlsCrawled.invalid.push(url);
299
+ }
300
+ resolve();
301
+ });
302
+ }),
303
+ );
304
+
305
+ return { pdfFileName, url };
306
+ };
307
+
308
+ export const runPdfScan = async (randomToken: string) => {
309
+ const execFile = getVeraExecutable();
310
+ const veraPdfExe = `"${execFile}"`;
311
+ // const veraPdfProfile = getVeraProfile();
312
+ const veraPdfProfile = `"${path.join(
313
+ execFile,
314
+ '..',
315
+ 'profiles/veraPDF-validation-profiles-rel-1.26/PDF_UA/WCAG-2-2.xml',
316
+ )}"`;
317
+ if (!veraPdfExe || !veraPdfProfile) {
318
+ process.exit(1);
319
+ }
320
+
321
+ const intermediateFolder = randomToken; // NOTE: assumes this folder is already created for crawlee
322
+
323
+ // store in a intermediate folder as we transfer final results later
324
+ const intermediateResultPath = `${intermediateFolder}/${constants.pdfScanResultFileName}`;
325
+
326
+ const veraPdfCmdArgs = [
327
+ '-p',
328
+ veraPdfProfile,
329
+ '--format',
330
+ 'json',
331
+ '-r', // recurse through directory
332
+ `"${intermediateFolder}"`,
333
+ ];
334
+
335
+ const ls = spawnSync(veraPdfExe, veraPdfCmdArgs, { shell: true });
336
+ fs.writeFileSync(intermediateResultPath, ls.stdout, { encoding: 'utf-8' });
337
+ };
338
+
339
+ // transform results from veraPDF to desired format for report
340
+ export const mapPdfScanResults = async (
341
+ randomToken: string,
342
+ uuidToUrlMapping: Record<string, string>,
343
+ ) => {
344
+ const intermediateFolder = randomToken;
345
+ const intermediateResultPath = `${intermediateFolder}/${constants.pdfScanResultFileName}`;
346
+
347
+ const rawdata = fs.readFileSync(intermediateResultPath, 'utf-8');
348
+
349
+ let parsedJsonData: VeraPdfScanResults;
350
+ try {
351
+ parsedJsonData = JSON.parse(rawdata);
352
+ } catch (err) {
353
+ consoleLogger.log(err);
354
+ }
355
+
356
+ const errorMeta = require('../constants/errorMeta.json');
357
+
358
+ const resultsList = [];
359
+
360
+ if (parsedJsonData) {
361
+ // jobs: files that are scanned
362
+ const {
363
+ report: { jobs },
364
+ } = parsedJsonData;
365
+
366
+ // loop through all jobs
367
+ for (let jobIdx = 0; jobIdx < jobs.length; jobIdx++) {
368
+ const translated = new TranslatedObject();
369
+
370
+ const { itemDetails, validationResult } = jobs[jobIdx];
371
+ const { name: fileName } = itemDetails;
372
+
373
+ const uuid = fileName
374
+ .split(os.platform() === 'win32' ? '\\' : '/')
375
+ .pop()
376
+ .split('.')[0];
377
+ const url = uuidToUrlMapping[uuid];
378
+ const pageTitle = decodeURI(url).split('/').pop();
379
+ const filePath = `${randomToken}/${uuid}.pdf`;
380
+
381
+ translated.url = url;
382
+ translated.pageTitle = pageTitle;
383
+ translated.filePath = filePath;
384
+
385
+ if (!validationResult) {
386
+ // check for error in scan
387
+ consoleLogger.info(`Unable to scan ${pageTitle}, skipping`);
388
+ continue; // skip this job
389
+ }
390
+
391
+ // destructure validation result
392
+ const { passedChecks, failedChecks, ruleSummaries } = validationResult.details;
393
+ const totalChecks = passedChecks + failedChecks;
394
+
395
+ translated.totalItems = totalChecks;
396
+
397
+ // loop through all failed rules
398
+ for (let ruleIdx = 0; ruleIdx < ruleSummaries.length; ruleIdx++) {
399
+ const rule = ruleSummaries[ruleIdx];
400
+ const { specification, testNumber, clause } = rule;
401
+
402
+ if (isRuleExcluded(rule)) continue;
403
+ const [ruleId, transformedRule] = await transformRule(rule, filePath);
404
+
405
+ // ignore if violation is not in the meta file
406
+ const meta = errorMeta[specification][clause][testNumber]?.STATUS ?? 'ignore';
407
+ const category = translated[metaToCategoryMap[meta]];
408
+
409
+ category.rules[ruleId] = transformedRule;
410
+ category.totalItems += transformedRule.totalItems;
411
+ }
412
+
413
+ resultsList.push(translated);
414
+ }
415
+ }
416
+ return resultsList;
417
+ };
418
+
419
+ const transformRule = async (
420
+ rule: RuleSummary,
421
+ filePath: string,
422
+ ): Promise<[string, TransformedRuleObject]> => {
423
+ // get specific rule
424
+ const transformed = new TransformedRuleObject();
425
+ const { specification, description, clause, testNumber, checks } = rule;
426
+
427
+ transformed.description = description;
428
+ transformed.totalItems = checks.length;
429
+
430
+ if (specification === 'WCAG2.1') {
431
+ transformed.conformance = [clauseToLevel[clause], `wcag${clause.split('.').join('')}`];
432
+ } else {
433
+ transformed.conformance = ['best-practice'];
434
+ }
435
+
436
+ transformed.items = [];
437
+
438
+ for (let checkIdx = 0; checkIdx < checks.length; checkIdx++) {
439
+ const { errorMessage, context } = checks[checkIdx];
440
+ const page = await getPageFromContext(context, filePath);
441
+ transformed.items.push({ message: errorMessage, page, context });
442
+ }
443
+ const ruleId = `pdf-${specification}-${clause}-${testNumber}`.replaceAll(' ', '_');
444
+
445
+ return [ruleId, transformed];
446
+ };
447
+
448
+ export const doPdfScreenshots = async (randomToken: string, result: TranslatedObject) => {
449
+ const { filePath, pageTitle } = result;
450
+ const formattedPageTitle = pageTitle.replaceAll(' ', '_').split('.')[0];
451
+ const screenshotsDir = path.join(randomToken, 'elemScreenshots', 'pdf');
452
+
453
+ ensureDirSync(screenshotsDir);
454
+
455
+ for (const category of ['mustFix', 'goodToFix']) {
456
+ const ruleItems = Object.entries(result[category].rules) as [
457
+ keyof RulesMap,
458
+ RulesMap[keyof RulesMap],
459
+ ][];
460
+ for (const [ruleId, ruleInfo] of ruleItems) {
461
+ const { items } = ruleInfo;
462
+ const filename = `${formattedPageTitle}-${category}-${ruleId}`;
463
+ const screenshotPath = path.join(screenshotsDir, filename);
464
+ const newItems = await getPdfScreenshots(filePath, items, screenshotPath);
465
+ ruleInfo.items = newItems;
466
+ }
467
+ }
468
+ };
@@ -0,0 +1,117 @@
1
+ /* eslint-env browser */
2
+ import { chromium } from 'playwright';
3
+ import { createCrawleeSubFolders } from './commonCrawlerFunc.js';
4
+ import { cleanUp } from '../utils.js';
5
+ import constants, {
6
+ getIntermediateScreenshotsPath,
7
+ guiInfoStatusTypes,
8
+ UrlsCrawled,
9
+ } from '../constants/constants.js';
10
+ import { DEBUG, initNewPage, log } from './custom/utils.js';
11
+ import { guiInfoLog } from '../logs.js';
12
+ import { ViewportSettingsClass } from '../combine.js';
13
+
14
+ // Export of classes
15
+
16
+ export class ProcessPageParams {
17
+ scannedIdx: number;
18
+ blacklistedPatterns: string[] | null;
19
+ includeScreenshots: boolean;
20
+ dataset: any;
21
+ intermediateScreenshotsPath: string;
22
+ urlsCrawled: UrlsCrawled;
23
+ randomToken: string;
24
+ constructor(
25
+ scannedIdx: number,
26
+ blacklistedPatterns: string[] | null,
27
+ includeScreenshots: boolean,
28
+ dataset: any,
29
+ intermediateScreenshotsPath: string,
30
+ urlsCrawled: UrlsCrawled,
31
+ randomToken: string,
32
+ ) {
33
+ this.scannedIdx = scannedIdx;
34
+ this.blacklistedPatterns = blacklistedPatterns;
35
+ this.includeScreenshots = includeScreenshots;
36
+ this.dataset = dataset;
37
+ this.intermediateScreenshotsPath = intermediateScreenshotsPath;
38
+ this.urlsCrawled = urlsCrawled;
39
+ this.randomToken = randomToken;
40
+ }
41
+ }
42
+
43
+ const runCustom = async (
44
+ url: string,
45
+ randomToken: string,
46
+ viewportSettings: ViewportSettingsClass,
47
+ blacklistedPatterns: string[] | null,
48
+ includeScreenshots: boolean,
49
+ ) => {
50
+ // checks and delete datasets path if it already exists
51
+ await cleanUp(randomToken);
52
+ process.env.CRAWLEE_STORAGE_DIR = randomToken;
53
+
54
+ const urlsCrawled: UrlsCrawled = { ...constants.urlsCrawledObj };
55
+ const { dataset } = await createCrawleeSubFolders(randomToken);
56
+ const intermediateScreenshotsPath = getIntermediateScreenshotsPath(randomToken);
57
+ const processPageParams = new ProcessPageParams(
58
+ 0, // scannedIdx
59
+ blacklistedPatterns,
60
+ includeScreenshots,
61
+ dataset,
62
+ intermediateScreenshotsPath,
63
+ urlsCrawled,
64
+ randomToken,
65
+ );
66
+
67
+ const pagesDict = {};
68
+ const pageClosePromises = [];
69
+
70
+ try {
71
+ const browser = await chromium.launch({
72
+ args: ['--window-size=1920,1040'],
73
+ headless: false,
74
+ channel: 'chrome',
75
+ // bypassCSP: true,
76
+ devtools: DEBUG,
77
+ });
78
+
79
+ const context = await browser.newContext({
80
+ ignoreHTTPSErrors: true,
81
+ serviceWorkers: 'block',
82
+ viewport: null,
83
+ ...viewportSettings.playwrightDeviceDetailsObject,
84
+ });
85
+
86
+ // Detection of new page
87
+ context.on('page', async newPage => {
88
+ await initNewPage(newPage, pageClosePromises, processPageParams, pagesDict);
89
+ });
90
+
91
+ const page = await context.newPage();
92
+ await page.goto(url, { timeout: 0 });
93
+
94
+ // to execute and wait for all pages to close
95
+ // idea is for promise to be pending until page.on('close') detected
96
+ const allPagesClosedPromise = async promises =>
97
+ Promise.all(promises)
98
+ // necessary to recheck as during time of execution, more pages added
99
+ .then(() => {
100
+ if (Object.keys(pagesDict).length > 0) {
101
+ return allPagesClosedPromise(promises);
102
+ }
103
+
104
+ return Promise.resolve(true);
105
+ });
106
+
107
+ await allPagesClosedPromise(pageClosePromises);
108
+ } catch (error) {
109
+ log(`PLAYWRIGHT EXECUTION ERROR ${error}`);
110
+ process.exit(1);
111
+ }
112
+
113
+ guiInfoLog(guiInfoStatusTypes.COMPLETED, {});
114
+ return urlsCrawled;
115
+ };
116
+
117
+ export default runCustom;