@govtechsg/oobee 0.10.88 → 0.10.90

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,46 @@
1
1
  import { createWriteStream } from 'fs';
2
- import { AsyncParser, ParserOptions } from '@json2csv/node';
3
2
  import { a11yRuleShortDescriptionMap } from '../constants/constants.js';
4
3
  import type { AllIssues, RuleInfo } from './types.js';
4
+ import type { ItemsStore } from './itemsStore.js';
5
5
 
6
- const writeCsv = async (allIssues: AllIssues, storagePath: string): Promise<void> => {
6
+ function escapeCsvField(value: string): string {
7
+ if (value == null) return '';
8
+ const str = String(value);
9
+ return `"${str.replace(/"/g, '""')}"`;
10
+ }
11
+
12
+ const writeCsv = async (
13
+ allIssues: AllIssues,
14
+ storagePath: string,
15
+ itemsStore?: ItemsStore,
16
+ ): Promise<void> => {
7
17
  const csvOutput = createWriteStream(`${storagePath}/report.csv`, { encoding: 'utf8' });
18
+
8
19
  const formatPageViolation = (pageNum: number) => {
9
20
  if (pageNum < 0) return 'Document';
10
21
  return `Page ${pageNum}`;
11
22
  };
12
23
 
13
- // transform allIssues into the form:
14
- // [['mustFix', rule1], ['mustFix', rule2], ['goodToFix', rule3], ...]
15
- const getRulesByCategory = (issues: AllIssues) => {
24
+ const fields = [
25
+ 'customFlowLabel',
26
+ 'deviceChosen',
27
+ 'scanCompletedAt',
28
+ 'severity',
29
+ 'issueId',
30
+ 'issueDescription',
31
+ 'wcagConformance',
32
+ 'url',
33
+ 'pageTitle',
34
+ 'context',
35
+ 'howToFix',
36
+ 'axeImpact',
37
+ 'xpath',
38
+ 'learnMore',
39
+ ];
40
+
41
+ csvOutput.write(fields.map(escapeCsvField).join(',') + '\n');
42
+
43
+ const getRulesByCategory = (issues: AllIssues): [string, RuleInfo][] => {
16
44
  return Object.entries(issues.items)
17
45
  .filter(([category]) => category !== 'passed')
18
46
  .reduce((prev: [string, RuleInfo][], [category, value]) => {
@@ -23,15 +51,14 @@ const writeCsv = async (allIssues: AllIssues, storagePath: string): Promise<void
23
51
  return prev;
24
52
  }, [])
25
53
  .sort((a, b) => {
26
- // sort rules according to severity, then ruleId
27
54
  const compareCategory = -a[0].localeCompare(b[0]);
28
55
  return compareCategory === 0 ? a[1].rule.localeCompare(b[1].rule) : compareCategory;
29
56
  });
30
57
  };
31
58
 
32
- const flattenRule = (catAndRule: [string, RuleInfo]) => {
33
- const [severity, rule] = catAndRule;
34
- const results = [];
59
+ const rulesByCategory = getRulesByCategory(allIssues);
60
+
61
+ for (const [severity, rule] of rulesByCategory) {
35
62
  const {
36
63
  rule: issueId,
37
64
  description: issueDescription,
@@ -41,104 +68,106 @@ const writeCsv = async (allIssues: AllIssues, storagePath: string): Promise<void
41
68
  helpUrl: learnMore,
42
69
  } = rule;
43
70
 
44
- // format clauses as a string
45
71
  const wcagConformance = conformance.join(',');
46
72
 
47
- pagesAffected.sort((a, b) => a.url.localeCompare(b.url));
48
-
49
- pagesAffected.forEach(affectedPage => {
50
- const { url, items } = affectedPage;
51
- items.forEach(item => {
52
- const { html, message, xpath } = item;
53
- const page = (item as any).page;
54
- const howToFix = message.replace(/(\r\n|\n|\r)/g, '\\n'); // preserve newlines as \n
55
- const violation = html || formatPageViolation(page); // page is a number, not a string
56
- const context = violation.replace(/(\r\n|\n|\r)/g, ''); // remove newlines
57
-
58
- results.push({
59
- customFlowLabel: allIssues.customFlowLabel || '',
60
- deviceChosen: allIssues.deviceChosen || '',
61
- scanCompletedAt: allIssues.endTime ? allIssues.endTime.toISOString() : '',
62
- severity: severity || '',
63
- issueId: issueId || '',
64
- issueDescription: a11yRuleShortDescriptionMap[issueId] || issueDescription || '',
65
- wcagConformance: wcagConformance || '',
66
- url: url || '',
67
- pageTitle: affectedPage.pageTitle || 'No page title',
68
- context: context || '',
69
- howToFix: howToFix || '',
70
- axeImpact: axeImpact || '',
71
- xpath: xpath || '',
72
- learnMore: learnMore || '',
73
- });
74
- });
75
- });
76
- if (results.length === 0) return {};
77
- return results;
78
- };
73
+ if (itemsStore) {
74
+ const itemsMap = await itemsStore.readRuleItemsMap(severity, issueId);
75
+ const sortedPages = [...pagesAffected].sort((a, b) => (a.url || '').localeCompare(b.url || ''));
79
76
 
80
- const opts: ParserOptions<any, any> = {
81
- transforms: [getRulesByCategory, flattenRule],
82
- fields: [
83
- 'customFlowLabel',
84
- 'deviceChosen',
85
- 'scanCompletedAt',
86
- 'severity',
87
- 'issueId',
88
- 'issueDescription',
89
- 'wcagConformance',
90
- 'url',
91
- 'pageTitle',
92
- 'context',
93
- 'howToFix',
94
- 'axeImpact',
95
- 'xpath',
96
- 'learnMore',
97
- ],
98
- includeEmptyRows: true,
99
- };
77
+ for (const affectedPage of sortedPages) {
78
+ const key = affectedPage.pageIndex != null ? String(affectedPage.pageIndex) : affectedPage.url;
79
+ const entry = itemsMap.get(key);
80
+ if (!entry) continue;
100
81
 
101
- // Create the parse stream (it's asynchronous)
102
- const parser = new AsyncParser(opts);
103
- const parseStream = parser.parse(allIssues);
104
-
105
- // Pipe JSON2CSV output into the file, but don't end automatically
106
- parseStream.pipe(csvOutput, { end: false });
107
-
108
- // Once JSON2CSV is done writing all normal rows, append any "pagesNotScanned"
109
- parseStream.on('end', () => {
110
- if (allIssues.pagesNotScanned && allIssues.pagesNotScanned.length > 0) {
111
- csvOutput.write('\n');
112
- allIssues.pagesNotScanned.forEach(page => {
113
- const skippedPage = {
114
- customFlowLabel: allIssues.customFlowLabel || '',
115
- deviceChosen: allIssues.deviceChosen || '',
116
- scanCompletedAt: allIssues.endTime ? allIssues.endTime.toISOString() : '',
117
- severity: 'error',
118
- issueId: 'error-pages-skipped',
119
- issueDescription: page.metadata
120
- ? page.metadata
121
- : 'An unknown error caused the page to be skipped',
122
- wcagConformance: '',
123
- url: page.url || page || '',
124
- pageTitle: 'Error',
125
- context: '',
126
- howToFix: '',
127
- axeImpact: '',
128
- xpath: '',
129
- learnMore: '',
130
- };
131
- csvOutput.write(`${Object.values(skippedPage).join(',')}\n`);
132
- });
82
+ for (const item of entry.items) {
83
+ const { html, message, xpath } = item;
84
+ const page = (item as any).page;
85
+ const howToFix = (message || '').replace(/(\r\n|\n|\r)/g, '\\n');
86
+ const violation = html || formatPageViolation(page);
87
+ const context = violation.replace(/(\r\n|\n|\r)/g, '');
88
+
89
+ const row = [
90
+ allIssues.customFlowLabel || '',
91
+ allIssues.deviceChosen || '',
92
+ allIssues.endTime ? allIssues.endTime.toISOString() : '',
93
+ severity || '',
94
+ issueId || '',
95
+ a11yRuleShortDescriptionMap[issueId] || issueDescription || '',
96
+ wcagConformance || '',
97
+ affectedPage.url || '',
98
+ affectedPage.pageTitle || 'No page title',
99
+ context || '',
100
+ howToFix || '',
101
+ axeImpact || '',
102
+ xpath || '',
103
+ learnMore || '',
104
+ ].map(escapeCsvField);
105
+
106
+ csvOutput.write(row.join(',') + '\n');
107
+ }
108
+ }
109
+ } else {
110
+ const sortedPages = [...pagesAffected].sort((a, b) => (a.url || '').localeCompare(b.url || ''));
111
+
112
+ for (const affectedPage of sortedPages) {
113
+ const items = (affectedPage as any).items || [];
114
+ for (const item of items) {
115
+ const { html, message, xpath } = item;
116
+ const page = (item as any).page;
117
+ const howToFix = (message || '').replace(/(\r\n|\n|\r)/g, '\\n');
118
+ const violation = html || formatPageViolation(page);
119
+ const context = violation.replace(/(\r\n|\n|\r)/g, '');
120
+
121
+ const row = [
122
+ allIssues.customFlowLabel || '',
123
+ allIssues.deviceChosen || '',
124
+ allIssues.endTime ? allIssues.endTime.toISOString() : '',
125
+ severity || '',
126
+ issueId || '',
127
+ a11yRuleShortDescriptionMap[issueId] || issueDescription || '',
128
+ wcagConformance || '',
129
+ affectedPage.url || '',
130
+ affectedPage.pageTitle || 'No page title',
131
+ context || '',
132
+ howToFix || '',
133
+ axeImpact || '',
134
+ xpath || '',
135
+ learnMore || '',
136
+ ].map(escapeCsvField);
137
+
138
+ csvOutput.write(row.join(',') + '\n');
139
+ }
140
+ }
133
141
  }
142
+ }
134
143
 
135
- // Now close the CSV file
136
- csvOutput.end();
137
- });
144
+ if (allIssues.pagesNotScanned && allIssues.pagesNotScanned.length > 0) {
145
+ allIssues.pagesNotScanned.forEach(page => {
146
+ const row = [
147
+ allIssues.customFlowLabel || '',
148
+ allIssues.deviceChosen || '',
149
+ allIssues.endTime ? allIssues.endTime.toISOString() : '',
150
+ 'error',
151
+ 'error-pages-skipped',
152
+ page.metadata ? page.metadata : 'An unknown error caused the page to be skipped',
153
+ '',
154
+ (page as any).url || page || '',
155
+ 'Error',
156
+ '',
157
+ '',
158
+ '',
159
+ '',
160
+ '',
161
+ ].map(escapeCsvField);
162
+
163
+ csvOutput.write(row.join(',') + '\n');
164
+ });
165
+ }
138
166
 
139
- parseStream.on('error', (err: unknown) => {
140
- console.error('Error parsing CSV:', err);
141
- csvOutput.end();
167
+ csvOutput.end();
168
+ await new Promise<void>((resolve, reject) => {
169
+ csvOutput.on('finish', resolve);
170
+ csvOutput.on('error', reject);
142
171
  });
143
172
  };
144
173
 
@@ -31,6 +31,7 @@ import { consoleLogger } from './logs.js';
31
31
  import itemTypeDescription from './constants/itemTypeDescription.js';
32
32
  import { oobeeAiHtmlETL, oobeeAiRules } from './constants/oobeeAi.js';
33
33
  import { buildHtmlGroups, convertItemsToReferences } from './mergeAxeResults/itemReferences.js';
34
+ import { ItemsStore } from './mergeAxeResults/itemsStore.js';
34
35
  import {
35
36
  compressJsonFileStreaming,
36
37
  writeJsonAndBase64Files,
@@ -418,7 +419,7 @@ const writeSummaryPdf = async (
418
419
  // Tracking WCAG occurrences
419
420
  const wcagOccurrencesMap = new Map<string, number>();
420
421
 
421
- const pushResults = async (pageResults, allIssues, isCustomFlow) => {
422
+ const pushResults = async (pageResults, allIssues, isCustomFlow, itemsStore: ItemsStore) => {
422
423
  const { url, pageTitle, filePath } = pageResults;
423
424
 
424
425
  const totalIssuesInPage = new Set();
@@ -433,15 +434,15 @@ const pushResults = async (pageResults, allIssues, isCustomFlow) => {
433
434
  totalOccurrences: 0,
434
435
  });
435
436
 
436
- ['mustFix', 'goodToFix', 'needsReview', 'passed'].forEach(category => {
437
- if (!pageResults[category]) return;
437
+ for (const category of ['mustFix', 'goodToFix', 'needsReview', 'passed'] as const) {
438
+ if (!pageResults[category]) continue;
438
439
 
439
440
  const { totalItems, rules } = pageResults[category];
440
441
  const currCategoryFromAllIssues = allIssues.items[category];
441
442
 
442
443
  currCategoryFromAllIssues.totalItems += totalItems;
443
444
 
444
- Object.keys(rules).forEach(rule => {
445
+ for (const rule of Object.keys(rules)) {
445
446
  const {
446
447
  description,
447
448
  axeImpact,
@@ -457,7 +458,6 @@ const pushResults = async (pageResults, allIssues, isCustomFlow) => {
457
458
  helpUrl,
458
459
  conformance,
459
460
  totalItems: 0,
460
- // numberOfPagesAffectedAfterRedirects: 0,
461
461
  pagesAffected: {},
462
462
  };
463
463
  }
@@ -470,7 +470,6 @@ const pushResults = async (pageResults, allIssues, isCustomFlow) => {
470
470
  allIssues.wcagViolations.push(c);
471
471
  }
472
472
 
473
- // Track WCAG criteria occurrences for Sentry
474
473
  const currentCount = wcagOccurrencesMap.get(c) || 0;
475
474
  wcagOccurrencesMap.set(c, currentCount + count);
476
475
  });
@@ -480,9 +479,6 @@ const pushResults = async (pageResults, allIssues, isCustomFlow) => {
480
479
 
481
480
  currRuleFromAllIssues.totalItems += count;
482
481
 
483
- // Build htmlGroups for pre-computed Group by HTML Element
484
- buildHtmlGroups(currRuleFromAllIssues, items, url);
485
-
486
482
  if (isCustomFlow) {
487
483
  const { pageIndex, pageImagePath, metadata } = pageResults;
488
484
  currRuleFromAllIssues.pagesAffected[pageIndex] = {
@@ -490,17 +486,31 @@ const pushResults = async (pageResults, allIssues, isCustomFlow) => {
490
486
  pageTitle,
491
487
  pageImagePath,
492
488
  metadata,
493
- items: [...items],
489
+ itemsCount: items.length,
494
490
  };
491
+ await itemsStore.appendPageItems(category, rule, {
492
+ url,
493
+ pageTitle,
494
+ items,
495
+ pageIndex,
496
+ pageImagePath,
497
+ metadata,
498
+ });
495
499
  } else if (!(url in currRuleFromAllIssues.pagesAffected)) {
496
500
  currRuleFromAllIssues.pagesAffected[url] = {
497
501
  pageTitle,
498
- items: [...items],
502
+ itemsCount: items.length,
499
503
  ...(filePath && { filePath }),
500
504
  };
505
+ await itemsStore.appendPageItems(category, rule, {
506
+ url,
507
+ pageTitle,
508
+ items,
509
+ ...(filePath && { filePath }),
510
+ });
501
511
  }
502
- });
503
- });
512
+ }
513
+ }
504
514
  };
505
515
 
506
516
  const getTopTenIssues = allIssues => {
@@ -561,26 +571,26 @@ const flattenAndSortResults = (allIssues: AllIssues, isCustomFlow: boolean) => {
561
571
  .map(pageEntry => {
562
572
  if (isCustomFlow) {
563
573
  const [pageIndex, pageInfo] = pageEntry as unknown as [number, PageInfo];
564
- // Only update the occurrences map if not passed.
565
574
  if (category !== 'passed') {
566
575
  urlOccurrencesMap.set(
567
576
  pageInfo.url!,
568
- (urlOccurrencesMap.get(pageInfo.url!) || 0) + pageInfo.items.length,
577
+ (urlOccurrencesMap.get(pageInfo.url!) || 0) + (pageInfo.itemsCount || 0),
569
578
  );
570
579
  }
571
580
  return { pageIndex, ...pageInfo };
572
581
  }
573
582
  const [url, pageInfo] = pageEntry as unknown as [string, PageInfo];
574
583
  if (category !== 'passed') {
575
- urlOccurrencesMap.set(url, (urlOccurrencesMap.get(url) || 0) + pageInfo.items.length);
584
+ urlOccurrencesMap.set(
585
+ url,
586
+ (urlOccurrencesMap.get(url) || 0) + (pageInfo.itemsCount || 0),
587
+ );
576
588
  }
577
589
  return { url, ...pageInfo };
578
590
  })
579
- // Sort pages so that those with the most items come first
580
- .sort((page1, page2) => page2.items.length - page1.items.length);
591
+ .sort((page1, page2) => (page2.itemsCount || 0) - (page1.itemsCount || 0));
581
592
  return { rule, ...ruleInfo };
582
593
  })
583
- // Sort the rules by totalItems (descending)
584
594
  .sort((rule1, rule2) => rule2.totalItems - rule1.totalItems);
585
595
  });
586
596
 
@@ -631,19 +641,24 @@ const extractRuleAiData = (
631
641
  };
632
642
 
633
643
  // This is for telemetry purposes called within mergeAxeResults.ts
634
- export const createRuleIdJson = allIssues => {
644
+ export const createRuleIdJson = async (allIssues, itemsStore?: ItemsStore) => {
635
645
  const compiledRuleJson = {};
636
646
 
637
- ['mustFix', 'goodToFix', 'needsReview'].forEach(category => {
638
- allIssues.items[category].rules.forEach(rule => {
639
- const allItems = rule.pagesAffected.flatMap(page => page.items || []);
640
- compiledRuleJson[rule.rule] = extractRuleAiData(rule.rule, rule.totalItems, allItems, () => {
641
- rule.pagesAffected.forEach(p => {
642
- delete p.items;
643
- });
644
- });
645
- });
646
- });
647
+ for (const category of ['mustFix', 'goodToFix', 'needsReview'] as const) {
648
+ for (const rule of allIssues.items[category].rules) {
649
+ let allItems: any[] = [];
650
+
651
+ if (itemsStore) {
652
+ for await (const entry of itemsStore.readRuleItems(category, rule.rule)) {
653
+ allItems.push(...entry.items);
654
+ }
655
+ } else {
656
+ allItems = rule.pagesAffected.flatMap(page => page.items || []);
657
+ }
658
+
659
+ compiledRuleJson[rule.rule] = extractRuleAiData(rule.rule, rule.totalItems, allItems);
660
+ }
661
+ }
647
662
 
648
663
  return compiledRuleJson;
649
664
  };
@@ -815,20 +830,20 @@ const generateArtifacts = async (
815
830
  };
816
831
 
817
832
  const allFiles = await extractFileNames(intermediateDatasetsPath);
833
+ const itemsStore = new ItemsStore(storagePath);
818
834
 
819
- const jsonArray = await Promise.all(
820
- allFiles.map(async file => parseContentToJson(`${intermediateDatasetsPath}/${file}`)),
821
- );
822
-
823
- await Promise.all(
824
- jsonArray.map(async pageResults => {
825
- await pushResults(pageResults, allIssues, isCustomFlow);
826
- }),
827
- ).catch(flattenIssuesError => {
828
- consoleLogger.error(
829
- `[generateArtifacts] Error flattening issues: ${flattenIssuesError?.stack || flattenIssuesError}`,
830
- );
831
- });
835
+ for (const file of allFiles) {
836
+ try {
837
+ const pageResults = await parseContentToJson(`${intermediateDatasetsPath}/${file}`);
838
+ if (pageResults) {
839
+ await pushResults(pageResults, allIssues, isCustomFlow, itemsStore);
840
+ }
841
+ } catch (flattenIssuesError: any) {
842
+ consoleLogger.error(
843
+ `[generateArtifacts] Error processing ${file}: ${flattenIssuesError?.stack || flattenIssuesError}`,
844
+ );
845
+ }
846
+ }
832
847
 
833
848
  flattenAndSortResults(allIssues, isCustomFlow);
834
849
 
@@ -864,6 +879,15 @@ const generateArtifacts = async (
864
879
 
865
880
  populateScanPagesDetail(allIssues);
866
881
 
882
+ // Build htmlGroups in a second pass from disk-backed items
883
+ for (const category of ['mustFix', 'goodToFix', 'needsReview', 'passed'] as const) {
884
+ for (const rule of allIssues.items[category].rules) {
885
+ for await (const entry of itemsStore.readRuleItems(category, rule.rule)) {
886
+ buildHtmlGroups(rule, entry.items, entry.url);
887
+ }
888
+ }
889
+ }
890
+
867
891
  allIssues.wcagPassPercentage = getWcagPassPercentage(
868
892
  allIssues.wcagViolations,
869
893
  allIssues.advancedScanOptionsSummaryItems.showEnableWcagAaa,
@@ -928,7 +952,7 @@ const generateArtifacts = async (
928
952
  rest.minor = axeImpactCount.minor;
929
953
  }
930
954
 
931
- await writeCsv(allIssues, storagePath);
955
+ await writeCsv(allIssues, storagePath, itemsStore);
932
956
  await writeSitemap(pagesScanned, storagePath);
933
957
  const {
934
958
  scanDataJsonFilePath,
@@ -945,7 +969,7 @@ const generateArtifacts = async (
945
969
  scanPagesSummaryBase64FilePath,
946
970
  scanDataJsonFileSize,
947
971
  scanItemsJsonFileSize,
948
- } = await writeJsonAndBase64Files(allIssues, storagePath);
972
+ } = await writeJsonAndBase64Files(allIssues, storagePath, itemsStore);
949
973
  // Removed BIG_RESULTS_THRESHOLD check - always use full scanItems
950
974
 
951
975
  await writeScanDetailsCsv(
@@ -1058,7 +1082,10 @@ const generateArtifacts = async (
1058
1082
  }
1059
1083
 
1060
1084
  // Generate scrubbed HTML Code Snippets
1061
- const ruleIdJson = createRuleIdJson(allIssues);
1085
+ const ruleIdJson = await createRuleIdJson(allIssues, itemsStore);
1086
+
1087
+ // Clean up intermediate items files
1088
+ await itemsStore.cleanup();
1062
1089
 
1063
1090
  // At the end of the function where results are generated, add:
1064
1091
  try {