@govtechsg/oobee 0.10.86 → 0.10.87

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.github/workflows/image.yml +2 -3
  2. package/dist/cli.js +18 -5
  3. package/dist/combine.js +2 -0
  4. package/dist/constants/cliFunctions.js +2 -2
  5. package/dist/constants/common.js +55 -13
  6. package/dist/crawlers/crawlDomain.js +38 -13
  7. package/dist/crawlers/crawlIntelligentSitemap.js +62 -30
  8. package/dist/crawlers/crawlSitemap.js +44 -5
  9. package/dist/crawlers/custom/utils.js +81 -40
  10. package/dist/generateHtmlReport.js +18 -11
  11. package/dist/mergeAxeResults/itemReferences.js +60 -25
  12. package/dist/mergeAxeResults/sentryTelemetry.js +4 -1
  13. package/dist/mergeAxeResults.js +18 -9
  14. package/dist/static/ejs/partials/scripts/decodeUnzipParse.ejs +6 -3
  15. package/dist/static/ejs/partials/scripts/ruleModal/itemCardRenderer.ejs +38 -2
  16. package/dist/static/ejs/partials/scripts/ruleModal/pageAccordionBuilder.ejs +1 -1
  17. package/dist/static/ejs/partials/scripts/ruleModal/ruleOffcanvas.ejs +4 -4
  18. package/dist/static/ejs/summary.ejs +18 -12
  19. package/dist/utils.js +4 -3
  20. package/fix-summary-html-oom-pr.md +62 -0
  21. package/package.json +5 -5
  22. package/src/cli.ts +19 -5
  23. package/src/combine.ts +2 -0
  24. package/src/constants/cliFunctions.ts +2 -2
  25. package/src/constants/common.ts +65 -12
  26. package/src/crawlers/crawlDomain.ts +39 -13
  27. package/src/crawlers/crawlIntelligentSitemap.ts +63 -30
  28. package/src/crawlers/crawlSitemap.ts +50 -3
  29. package/src/crawlers/custom/utils.ts +99 -43
  30. package/src/generateHtmlReport.ts +21 -11
  31. package/src/mergeAxeResults/itemReferences.ts +70 -26
  32. package/src/mergeAxeResults/sentryTelemetry.ts +4 -1
  33. package/src/mergeAxeResults.ts +21 -11
  34. package/src/static/ejs/partials/scripts/decodeUnzipParse.ejs +6 -3
  35. package/src/static/ejs/partials/scripts/ruleModal/itemCardRenderer.ejs +38 -2
  36. package/src/static/ejs/partials/scripts/ruleModal/pageAccordionBuilder.ejs +1 -1
  37. package/src/static/ejs/partials/scripts/ruleModal/ruleOffcanvas.ejs +4 -4
  38. package/src/static/ejs/summary.ejs +18 -12
  39. package/src/utils.ts +4 -3
  40. package/testStaticJSScanner.html +1 -1
@@ -25,6 +25,7 @@ const parseBoolEnv = (val, defaultVal) => {
25
25
  return defaultVal;
26
26
  };
27
27
  const RESTRICT_OVERLAY_TO_ENTRY_DOMAIN = parseBoolEnv(process.env.RESTRICT_OVERLAY_TO_ENTRY_DOMAIN, false);
28
+ const OVERLAY_OPERATION_TIMEOUT_MS = 5000;
28
29
  const isOverlayAllowed = (currentUrl, entryUrl) => {
29
30
  try {
30
31
  const cur = new URL(currentUrl);
@@ -206,7 +207,7 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
206
207
  inProgress: false,
207
208
  collapsed: false,
208
209
  }) => {
209
- await page.waitForLoadState('domcontentloaded');
210
+ await page.waitForLoadState('domcontentloaded', { timeout: OVERLAY_OPERATION_TIMEOUT_MS });
210
211
  consoleLogger.info(`Overlay menu: adding to ${menuPos}...`);
211
212
  // Add the overlay menu with initial styling
212
213
  return page
@@ -987,6 +988,7 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
987
988
  })
988
989
  .catch(error => {
989
990
  consoleLogger.error('Overlay menu: failed to add', error);
991
+ throw error;
990
992
  });
991
993
  };
992
994
  export const removeOverlayMenu = async (page) => {
@@ -1007,6 +1009,8 @@ export const removeOverlayMenu = async (page) => {
1007
1009
  };
1008
1010
  export const initNewPage = async (page, pageClosePromises, processPageParams, pagesDict) => {
1009
1011
  let menuPos = MENU_POSITION.right;
1012
+ let overlayRefreshSeq = 0;
1013
+ let overlayRefreshChain = Promise.resolve();
1010
1014
  // eslint-disable-next-line no-underscore-dangle
1011
1015
  const pageId = page._guid;
1012
1016
  page.on('dialog', async (dialog) => {
@@ -1032,6 +1036,68 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
1032
1036
  collapsed: false,
1033
1037
  };
1034
1038
  }
1039
+ const reconcileOverlayMenu = async (trigger) => {
1040
+ // Mark this as the latest refresh so older ones can stop.
1041
+ const refreshSeq = ++overlayRefreshSeq;
1042
+ // Serialize overlay updates so multiple navigation events do not add/remove concurrently.
1043
+ overlayRefreshChain = overlayRefreshChain
1044
+ .catch(() => { })
1045
+ .then(async () => {
1046
+ if (refreshSeq !== overlayRefreshSeq || page.isClosed())
1047
+ return;
1048
+ try {
1049
+ // `framenavigated` can fire before the new document is ready for DOM inspection/injection.
1050
+ await page.waitForLoadState('domcontentloaded', { timeout: 5000 });
1051
+ }
1052
+ catch {
1053
+ // Best effort only. The page may still be mid-navigation.
1054
+ }
1055
+ try {
1056
+ // Give fast redirect chains a brief chance to advance before we inject/remove the overlay.
1057
+ await page.waitForTimeout(300);
1058
+ }
1059
+ catch {
1060
+ // Best effort only. The page may already be closing.
1061
+ }
1062
+ // Re-check staleness after waiting because a newer navigation may have happened meanwhile.
1063
+ if (refreshSeq !== overlayRefreshSeq || page.isClosed())
1064
+ return;
1065
+ const allowed = isOverlayAllowed(page.url(), processPageParams.entryUrl);
1066
+ if (!allowed) {
1067
+ await Promise.race([
1068
+ removeOverlayMenu(page),
1069
+ new Promise((_, reject) => {
1070
+ setTimeout(() => {
1071
+ reject(new Error(`removeOverlayMenu timed out after ${OVERLAY_OPERATION_TIMEOUT_MS}ms`));
1072
+ }, OVERLAY_OPERATION_TIMEOUT_MS);
1073
+ }),
1074
+ ]);
1075
+ return;
1076
+ }
1077
+ const hasOverlay = await page.evaluate(() => Boolean(document.querySelector('#oobeeShadowHost')));
1078
+ consoleLogger.info(`Overlay state (${trigger}): ${hasOverlay}`);
1079
+ if (!hasOverlay) {
1080
+ // Recreate the overlay after allowed redirects while preserving current UI state.
1081
+ consoleLogger.info(`Adding overlay menu to page (${trigger}): ${page.url()}`);
1082
+ await Promise.race([
1083
+ addOverlayMenu(page, processPageParams.urlsCrawled, menuPos, {
1084
+ inProgress: !!pagesDict[pageId]?.isScanning,
1085
+ collapsed: !!pagesDict[pageId]?.collapsed,
1086
+ hideStopInput: !!processPageParams.customFlowLabel,
1087
+ }),
1088
+ new Promise((_, reject) => {
1089
+ setTimeout(() => {
1090
+ reject(new Error(`addOverlayMenu timed out after ${OVERLAY_OPERATION_TIMEOUT_MS}ms`));
1091
+ }, OVERLAY_OPERATION_TIMEOUT_MS);
1092
+ }),
1093
+ ]);
1094
+ }
1095
+ })
1096
+ .catch(() => {
1097
+ consoleLogger.info('Error in adding overlay menu to page');
1098
+ });
1099
+ await overlayRefreshChain;
1100
+ };
1035
1101
  // Window functions exposed in browser
1036
1102
  const handleOnScanClick = async () => {
1037
1103
  consoleLogger.info('Scan: click detected');
@@ -1044,17 +1110,7 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
1044
1110
  pagesDict[pageId].isScanning = false;
1045
1111
  if (page.isClosed())
1046
1112
  return;
1047
- const allowed = isOverlayAllowed(page.url(), processPageParams.entryUrl);
1048
- if (allowed) {
1049
- await addOverlayMenu(page, processPageParams.urlsCrawled, menuPos, {
1050
- inProgress: false,
1051
- collapsed: !!pagesDict[pageId]?.collapsed,
1052
- hideStopInput: !!processPageParams.customFlowLabel,
1053
- });
1054
- }
1055
- else {
1056
- await removeOverlayMenu(page);
1057
- }
1113
+ await reconcileOverlayMenu('scan-click');
1058
1114
  }
1059
1115
  catch (error) {
1060
1116
  log(`Scan failed ${error}`);
@@ -1118,37 +1174,21 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
1118
1174
  page.on('domcontentloaded', async () => {
1119
1175
  if (page.isClosed())
1120
1176
  return;
1121
- try {
1122
- const allowed = isOverlayAllowed(page.url(), processPageParams.entryUrl);
1123
- if (!allowed) {
1124
- await removeOverlayMenu(page);
1125
- return;
1126
- }
1127
- const existingOverlay = await page.evaluate(() => {
1128
- return document.querySelector('#oobeeShadowHost');
1129
- });
1130
- consoleLogger.info(`Overlay state: ${existingOverlay}`);
1131
- if (!existingOverlay) {
1132
- consoleLogger.info(`Adding overlay menu to page: ${page.url()}`);
1133
- await addOverlayMenu(page, processPageParams.urlsCrawled, menuPos, {
1134
- inProgress: !!pagesDict[pageId]?.isScanning,
1135
- collapsed: !!pagesDict[pageId]?.collapsed,
1136
- hideStopInput: !!processPageParams.customFlowLabel,
1137
- });
1177
+ await reconcileOverlayMenu('domcontentloaded');
1178
+ if (isCypressTest) {
1179
+ try {
1180
+ await handleOnScanClick();
1181
+ page.close();
1138
1182
  }
1139
- if (isCypressTest) {
1140
- try {
1141
- await handleOnScanClick();
1142
- page.close();
1143
- }
1144
- catch {
1145
- consoleLogger.info(`Error in calling handleOnScanClick, isCypressTest: ${isCypressTest}`);
1146
- }
1183
+ catch {
1184
+ consoleLogger.info(`Error in calling handleOnScanClick, isCypressTest: ${isCypressTest}`);
1147
1185
  }
1148
1186
  }
1149
- catch {
1150
- consoleLogger.info('Error in adding overlay menu to page');
1151
- }
1187
+ });
1188
+ page.on('framenavigated', async (frame) => {
1189
+ if (frame !== page.mainFrame() || page.isClosed())
1190
+ return;
1191
+ await reconcileOverlayMenu('framenavigated');
1152
1192
  });
1153
1193
  try {
1154
1194
  if (page.isClosed())
@@ -1167,5 +1207,6 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
1167
1207
  catch (e) {
1168
1208
  log(`Error exposing functions on page: ${e}`);
1169
1209
  }
1210
+ await reconcileOverlayMenu('init');
1170
1211
  return page;
1171
1212
  };
@@ -1,7 +1,7 @@
1
1
  import fs from 'fs-extra';
2
2
  import path from 'path';
3
3
  import { compressJsonFileStreaming, writeHTML, flattenAndSortResults, populateScanPagesDetail, getWcagPassPercentage, getProgressPercentage, getIssuesPercentage, itemTypeDescription, oobeeAiHtmlETL, oobeeAiRules, formatAboutStartTime, convertItemsToReferences, } from './mergeAxeResults.js';
4
- import constants, { ScannerTypes, WCAGclauses, a11yRuleShortDescriptionMap, disabilityBadgesMap, a11yRuleLongDescriptionMap, } from './constants/constants.js';
4
+ import constants, { ScannerTypes, WCAGclauses, a11yRuleShortDescriptionMap, disabilityBadgesMap, a11yRuleLongDescriptionMap, a11yRuleStepByStepGuide, } from './constants/constants.js';
5
5
  import { consoleLogger } from './logs.js';
6
6
  const ensureCategory = (categoryObj, categoryName) => {
7
7
  const rulesRaw = categoryObj?.rules ?? [];
@@ -23,7 +23,12 @@ const ensureCategory = (categoryObj, categoryName) => {
23
23
  rule.pagesAffected = [];
24
24
  }
25
25
  if (typeof rule.totalItems !== 'number') {
26
- rule.totalItems = rule.pagesAffected.reduce((accumulate, page) => accumulate + (Array.isArray(page.items) ? page.items.length : 0), 0);
26
+ rule.totalItems = rule.pagesAffected.reduce((accumulate, page) => accumulate +
27
+ (Array.isArray(page.items)
28
+ ? page.items.length
29
+ : typeof page.itemsCount === 'number'
30
+ ? page.itemsCount
31
+ : 0), 0);
27
32
  }
28
33
  });
29
34
  const totals = {
@@ -38,7 +43,7 @@ const ensureCategory = (categoryObj, categoryName) => {
38
43
  rules,
39
44
  };
40
45
  };
41
- export const generateHtmlReport = async (resultDir) => {
46
+ export const generateHtmlReport = async (resultDir, htmlFilename = 'report') => {
42
47
  try {
43
48
  const storagePath = path.resolve(resultDir);
44
49
  const scanDataJsonPath = path.join(storagePath, 'scanData.json');
@@ -61,17 +66,16 @@ export const generateHtmlReport = async (resultDir) => {
61
66
  }
62
67
  const scanData = JSON.parse(await fs.readFile(scanDataJsonPath, 'utf8'));
63
68
  const scanItemsAll = JSON.parse(await fs.readFile(scanItemsJsonPath, 'utf8'));
64
- // Use convertItemsToReferences to normalize items structure to match scanItemsWithHtmlGroupRefs format
65
- const scanItemsWithHtmlGroupRefs = convertItemsToReferences({
69
+ // Build the lighter scanItems payload used by the HTML report.
70
+ const lightScanItemsPayload = convertItemsToReferences({
66
71
  items: scanItemsAll,
67
- ...scanData
68
72
  });
69
- const { mustFix = {}, goodToFix = {}, needsReview = {}, passed = {}, } = scanItemsWithHtmlGroupRefs;
73
+ const { mustFix = {}, goodToFix = {}, needsReview = {}, } = lightScanItemsPayload;
70
74
  const items = {
71
75
  mustFix: ensureCategory(mustFix, 'mustFix'),
72
76
  goodToFix: ensureCategory(goodToFix, 'goodToFix'),
73
77
  needsReview: ensureCategory(needsReview, 'needsReview'),
74
- passed: ensureCategory(passed, 'passed'),
78
+ passed: ensureCategory(scanItemsAll.passed || {}, 'passed'),
75
79
  };
76
80
  const pagesScanned = Array.isArray(scanData.pagesScanned) ? scanData.pagesScanned : [];
77
81
  const pagesNotScanned = Array.isArray(scanData.pagesNotScanned) ? scanData.pagesNotScanned : [];
@@ -116,6 +120,8 @@ export const generateHtmlReport = async (resultDir) => {
116
120
  a11yRuleShortDescriptionMap,
117
121
  disabilityBadgesMap,
118
122
  a11yRuleLongDescriptionMap,
123
+ a11yRuleStepByStepGuide,
124
+ wcagCriteriaLabels: constants.wcagCriteriaLabels,
119
125
  advancedScanOptionsSummaryItems: {
120
126
  showIncludeScreenshots: !!scanData.advancedScanOptionsSummaryItems?.showIncludeScreenshots,
121
127
  showAllowSubdomains: !!scanData.advancedScanOptionsSummaryItems?.showAllowSubdomains,
@@ -137,9 +143,10 @@ export const generateHtmlReport = async (resultDir) => {
137
143
  allIssues.wcagPassPercentage = getWcagPassPercentage(allIssues.wcagViolations, allIssues.advancedScanOptionsSummaryItems.showEnableWcagAaa);
138
144
  allIssues.progressPercentage = getProgressPercentage(allIssues.scanPagesDetail, allIssues.advancedScanOptionsSummaryItems.showEnableWcagAaa);
139
145
  allIssues.issuesPercentage = await getIssuesPercentage(allIssues.scanPagesDetail, allIssues.advancedScanOptionsSummaryItems.showEnableWcagAaa, allIssues.advancedScanOptionsSummaryItems?.disableOobee);
140
- await writeHTML(allIssues, storagePath, 'report', scanDataB64Path, scanItemsB64Path);
141
- consoleLogger.info(`Report generated at: ${path.join(storagePath, 'report.html')}`);
142
- return path.join(storagePath, 'report.html');
146
+ await writeHTML(allIssues, storagePath, htmlFilename, scanDataB64Path, scanItemsB64Path);
147
+ const outputPath = path.join(storagePath, `${htmlFilename}.html`);
148
+ consoleLogger.info(`Report generated at: ${outputPath}`);
149
+ return outputPath;
143
150
  }
144
151
  catch (err) {
145
152
  consoleLogger.error(`generateHtmlReport failed: ${err?.message || err}`);
@@ -25,31 +25,66 @@ export const buildHtmlGroups = (rule, items, pageUrl) => {
25
25
  }
26
26
  });
27
27
  };
28
+ /*
29
+ // Commenting this out for now as we are not including htmlGroups in the embedded report payload to keep it lean.
30
+ // We can revisit this if we want to include htmlGroups in the future and need a reference builder for it.
31
+ const toHtmlGroupReference = (item: any) => {
32
+ if (typeof item === 'string') {
33
+ return item;
34
+ }
35
+
36
+ return `${item?.html || 'No HTML element'}\x00${item?.xpath || ''}`;
37
+ };
38
+
39
+ const cloneCategoryWithReferenceItems = (category: ScanCategory): ScanCategory =>
40
+ ({
41
+ ...category,
42
+ rules: category.rules.map(
43
+ rule =>
44
+ ({
45
+ ...rule,
46
+ pagesAffected: rule.pagesAffected.map(
47
+ page => {
48
+ const { items, ...pageWithoutItems } = page;
49
+
50
+ return {
51
+ ...pageWithoutItems,
52
+ itemsCount: page.itemsCount ?? (Array.isArray(items) ? items.length : 0),
53
+ items: Array.isArray(items) ? items.map(toHtmlGroupReference) : items,
54
+ } as any;
55
+ },
56
+ ),
57
+ }) as any,
58
+ ),
59
+ }) as ScanCategory;
60
+ */
61
+ const cloneCategoryLight = (category, includeHtmlGroups) => ({
62
+ ...category,
63
+ rules: category.rules.map(rule => ({
64
+ rule: rule.rule,
65
+ description: rule.description,
66
+ helpUrl: rule.helpUrl,
67
+ conformance: rule.conformance,
68
+ totalItems: rule.totalItems,
69
+ axeImpact: rule.axeImpact,
70
+ ...(includeHtmlGroups && rule.htmlGroups ? { htmlGroups: rule.htmlGroups } : {}),
71
+ pagesAffected: rule.pagesAffected.map(page => ({
72
+ url: page.url,
73
+ pageTitle: page.pageTitle,
74
+ itemsCount: page.itemsCount ?? (Array.isArray(page.items) ? page.items.length : 0),
75
+ })),
76
+ })),
77
+ });
28
78
  /**
29
- * Converts items in pagesAffected to references (html\x00xpath composite keys) for embedding in HTML report.
30
- * Additionally, it deep-clones allIssues, replaces page.items objects with composite reference keys.
31
- * Those refs are specifically for htmlGroups lookup (html + xpath).
79
+ * Builds the embedded HTML-report payload from the full scan items.
80
+ * Includes htmlGroups for non-passed categories (Group by HTML Element),
81
+ * excludes them from passed to keep payload within browser memory limits.
32
82
  */
33
- export const convertItemsToReferences = (allIssues) => {
34
- const cloned = JSON.parse(JSON.stringify(allIssues));
35
- ['mustFix', 'goodToFix', 'needsReview', 'passed'].forEach(category => {
36
- if (!cloned.items[category]?.rules)
37
- return;
38
- cloned.items[category].rules.forEach((rule) => {
39
- if (!rule.pagesAffected || !rule.htmlGroups)
40
- return;
41
- rule.pagesAffected.forEach((page) => {
42
- if (!page.items)
43
- return;
44
- page.items = page.items.map((item) => {
45
- if (typeof item === 'string')
46
- return item; // Already a reference
47
- // Use composite key matching buildHtmlGroups
48
- const htmlKey = `${item.html || 'No HTML element'}\x00${item.xpath || ''}`;
49
- return htmlKey;
50
- });
51
- });
52
- });
53
- });
54
- return cloned;
83
+ export const convertItemsToReferences = (source) => {
84
+ return {
85
+ mustFix: cloneCategoryLight(source.items.mustFix, true),
86
+ goodToFix: cloneCategoryLight(source.items.goodToFix, true),
87
+ needsReview: cloneCategoryLight(source.items.needsReview, true),
88
+ passed: cloneCategoryLight(source.items.passed, false),
89
+ };
55
90
  };
@@ -110,7 +110,10 @@ const sendWcagBreakdownToSentry = async (appVersion, wcagBreakdown, ruleIdJson,
110
110
  event_type: 'accessibility_scan',
111
111
  scanType: scanInfo.scanType,
112
112
  browser: scanInfo.browser,
113
- entryUrl: scanInfo.entryUrl,
113
+ entryUrl: process.env.OOBEE_SCAN_METADATA ?? scanInfo.entryUrl,
114
+ ...(process.env.OOBEE_SCAN_PRODUCT && {
115
+ scanProduct: process.env.OOBEE_SCAN_PRODUCT,
116
+ }),
114
117
  },
115
118
  user: {
116
119
  ...(scanInfo.email && scanInfo.name
@@ -119,10 +119,10 @@ const writeHTML = async (allIssues, storagePath, htmlFilename = 'report', scanDe
119
119
  const { topFilePath, bottomFilePath } = await splitHtmlAndCreateFiles(htmlFilePath, storagePath);
120
120
  const prefixData = fs.readFileSync(path.join(storagePath, 'report-partial-top.htm.txt'), 'utf-8');
121
121
  const suffixData = fs.readFileSync(path.join(storagePath, 'report-partial-bottom.htm.txt'), 'utf-8');
122
- // Create lighter version with item references for embedding in HTML
123
- const scanItemsWithHtmlGroupRefs = convertItemsToReferences(allIssues);
122
+ // Create the lighter scanItems payload for embedding in the HTML report.
123
+ const lightScanItemsPayload = convertItemsToReferences(allIssues);
124
124
  // Write the lighter items to a file and get the base64 path
125
- const { jsonFilePath: scanItemsWithHtmlGroupRefsJsonFilePath, base64FilePath: scanItemsWithHtmlGroupRefsBase64FilePath, } = await writeJsonFileAndCompressedJsonFile(scanItemsWithHtmlGroupRefs.items, storagePath, 'scanItems-light');
125
+ const { jsonFilePath: lightScanItemsPayloadJsonFilePath, base64FilePath: lightScanItemsPayloadBase64FilePath, } = await writeJsonFileAndCompressedJsonFile(lightScanItemsPayload, storagePath, 'scanItems-light');
126
126
  return new Promise((resolve, reject) => {
127
127
  const scanDetailsReadStream = fs.createReadStream(scanDetailsFilePath, {
128
128
  encoding: 'utf8',
@@ -135,8 +135,8 @@ const writeHTML = async (allIssues, storagePath, htmlFilename = 'report', scanDe
135
135
  await Promise.all([
136
136
  fs.promises.unlink(topFilePath),
137
137
  fs.promises.unlink(bottomFilePath),
138
- fs.promises.unlink(scanItemsWithHtmlGroupRefsBase64FilePath),
139
- fs.promises.unlink(scanItemsWithHtmlGroupRefsJsonFilePath),
138
+ fs.promises.unlink(lightScanItemsPayloadBase64FilePath),
139
+ fs.promises.unlink(lightScanItemsPayloadJsonFilePath),
140
140
  ]);
141
141
  }
142
142
  catch (err) {
@@ -172,22 +172,28 @@ const writeHTML = async (allIssues, storagePath, htmlFilename = 'report', scanDe
172
172
  } else {
173
173
  console.warn('Skipping fetch GenAI feature as it is local report');
174
174
  }
175
+
176
+ var scanData = null;
177
+ var scanItems = null;
175
178
  \n`);
176
179
  outputStream.write('</script>\n<script type="text/plain" id="scanDataRaw">');
177
180
  scanDetailsReadStream.pipe(outputStream, { end: false });
178
181
  scanDetailsReadStream.on('end', async () => {
179
182
  outputStream.write('</script>\n<script>\n');
180
- outputStream.write("var scanDataPromise = (async () => { console.log('Loading scanData...'); scanData = await decodeUnzipParse(document.getElementById('scanDataRaw').textContent); })();\n");
183
+ outputStream.write("var scanDataPromise = (async () => { console.log('Loading scanData...'); scanData = await decodeUnzipParse(document.getElementById('scanDataRaw').textContent); console.log('[report] scanData loaded'); })();\n");
181
184
  outputStream.write('</script>\n');
182
185
  // Write scanItems in 2MB chunks using a stream to avoid loading entire file into memory
183
186
  try {
184
187
  let chunkIndex = 1;
185
- const scanItemsStream = fs.createReadStream(scanItemsWithHtmlGroupRefsBase64FilePath, {
188
+ const scanItemsStream = fs.createReadStream(lightScanItemsPayloadBase64FilePath, {
186
189
  encoding: 'utf8',
187
190
  highWaterMark: CHUNK_SIZE,
188
191
  });
189
192
  for await (const chunk of scanItemsStream) {
190
- outputStream.write(`<script type="text/plain" id="scanItemsRaw${chunkIndex}">${chunk}</script>\n`);
193
+ const ok = outputStream.write(`<script type="text/plain" id="scanItemsRaw${chunkIndex}">${chunk}</script>\n`);
194
+ if (!ok) {
195
+ await new Promise(resolve => outputStream.once('drain', resolve));
196
+ }
191
197
  chunkIndex++;
192
198
  }
193
199
  outputStream.write('<script>\n');
@@ -203,6 +209,7 @@ var scanItemsPromise = (async () => {
203
209
  i++;
204
210
  }
205
211
  scanItems = await decodeUnzipParse(chunks);
212
+ console.log('[report] scanItems loaded');
206
213
  })();\n`);
207
214
  outputStream.write(suffixData);
208
215
  outputStream.end();
@@ -722,11 +729,13 @@ generateJsonFiles = false) => {
722
729
  const browserChannel = getBrowserToRun(randomToken, BrowserTypes.CHROME, false).browserToRun;
723
730
  // Should consider refactor constants.userDataDirectory to be a parameter in future
724
731
  await retryFunction(() => writeSummaryPdf(storagePath, pagesScanned.length, 'summary', browserChannel, constants.userDataDirectory), 1);
732
+ // Brief delay to allow lingering async crawlee storage operations to flush
733
+ await new Promise(resolve => setTimeout(resolve, 3000));
725
734
  try {
726
735
  await fs.promises.rm(path.join(storagePath, 'crawlee'), { recursive: true, force: true });
727
736
  }
728
737
  catch (error) {
729
- consoleLogger.warn(`Unable to force remove crawlee folder: ${error.message}`);
738
+ // Silently ignore folder may already be gone or still locked
730
739
  }
731
740
  try {
732
741
  await fs.promises.rm(path.join(storagePath, 'pdfs'), { recursive: true, force: true });
@@ -28,8 +28,11 @@ async function decodeUnzipParse(input) {
28
28
  offset += arr.length;
29
29
  }
30
30
 
31
- // Step 2: Decompress with pako (GZIP)
32
- const decompressed = pako.ungzip(merged, { to: 'string' });
31
+ // Step 2: Decompress with pako (GZIP) to bytes first to avoid large-string
32
+ // construction inside pako for very large payloads.
33
+ const decompressedBytes = pako.ungzip(merged);
34
+
35
+ const decompressed = new TextDecoder().decode(decompressedBytes);
33
36
 
34
37
  // Step 3: Parse JSON
35
38
  return JSON.parse(decompressed);
@@ -37,4 +40,4 @@ async function decodeUnzipParse(input) {
37
40
  throw new Error(`Failed to decode/unzip/parse: ${err.message}`);
38
41
  }
39
42
  }
40
- </script>
43
+ </script>
@@ -1,10 +1,44 @@
1
1
  <script>
2
2
  /**
3
- * Resolves item references (composite "html\x00xpath" strings) to full item data using htmlGroups.
3
+ * Rebuilds the item list for a page from pre-computed htmlGroups when the light report omits page.items.
4
+ */
5
+ function buildItemsFromHtmlGroupsForPage(page, ruleInCategory) {
6
+ const htmlGroups = ruleInCategory.htmlGroups || {};
7
+ const resolvedItems = [];
8
+
9
+ Object.values(htmlGroups).forEach(groupData => {
10
+ if (!Array.isArray(groupData.pageUrls) || !groupData.pageUrls.includes(page.url)) {
11
+ return;
12
+ }
13
+
14
+ resolvedItems.push({
15
+ html: groupData.html,
16
+ xpath: groupData.xpath,
17
+ message: groupData.message,
18
+ screenshotPath: groupData.screenshotPath,
19
+ displayNeedsReview: groupData.displayNeedsReview,
20
+ pageUrl: page.url,
21
+ pageTitle: page.pageTitle || page.metadata
22
+ });
23
+ });
24
+
25
+ return resolvedItems;
26
+ }
27
+
28
+ /**
29
+ * The embedded report payload now omits page.items and rebuilds occurrences from
30
+ * htmlGroups + page metadata. Keep the older page.items resolution logic below
31
+ * commented for an easy rollback if we need to restore mixed payload support.
4
32
  */
5
33
  function resolveItemReferencesForPage(page, ruleInCategory) {
34
+ return buildItemsFromHtmlGroupsForPage(page, ruleInCategory);
35
+
36
+ /*
6
37
  const items = page.items || [];
7
- if (items.length === 0) return [];
38
+
39
+ if (items.length === 0) {
40
+ return buildItemsFromHtmlGroupsForPage(page, ruleInCategory);
41
+ }
8
42
 
9
43
  const isReference = typeof items[0] === 'string';
10
44
 
@@ -27,6 +61,7 @@
27
61
  pageTitle: page.pageTitle || page.metadata
28
62
  };
29
63
  }
64
+
30
65
  // Fallback: parse composite key
31
66
  const nullByteIndex = compositeKey.indexOf('\x00');
32
67
  const html = nullByteIndex !== -1 ? compositeKey.slice(0, nullByteIndex) : compositeKey;
@@ -40,6 +75,7 @@
40
75
  pageTitle: page.pageTitle || page.metadata
41
76
  };
42
77
  });
78
+ */
43
79
  }
44
80
 
45
81
  function buildItemCardsWithPagination(accordionId, category, ruleInCategory, page, index) {
@@ -86,7 +86,7 @@
86
86
  // Use pre-computed htmlGroups for count if available, otherwise use pages
87
87
  const count = isHtmlGrouping && selectedCategory.htmlGroups
88
88
  ? Object.keys(selectedCategory.htmlGroups).length
89
- : selectedCategory.pagesAffected.length;
89
+ : (selectedCategory.pagesAffectedCount || selectedCategory.pagesAffected.length);
90
90
  if (isHtmlGrouping) {
91
91
  dropdownTitle.innerText = `HTML elements affected by this issue (${count})`;
92
92
  } else {
@@ -270,8 +270,8 @@ include('./pageAccordionBuilder') %> <%- include('./constants') %>
270
270
  if (!Array.isArray(rule.pagesAffected)) return;
271
271
 
272
272
  rule.pagesAffected.sort((a, b) => {
273
- const lenA = Array.isArray(a.items) ? a.items.length : 0;
274
- const lenB = Array.isArray(b.items) ? b.items.length : 0;
273
+ const lenA = Array.isArray(a.items) ? a.items.length : a.itemsCount || 0;
274
+ const lenB = Array.isArray(b.items) ? b.items.length : b.itemsCount || 0;
275
275
  return lenB - lenA; // DESC
276
276
  });
277
277
  });
@@ -295,10 +295,10 @@ include('./pageAccordionBuilder') %> <%- include('./constants') %>
295
295
  dropdownToggle.innerText = `${ruleInCategory.totalItems} Total occ.`;
296
296
  dropdownToggle.setAttribute('aria-label', occurrencesText);
297
297
  document.getElementById('expandedRuleDropdownTitle').innerText =
298
- `Pages affected by this issue (${ruleInCategory.pagesAffected.length})`;
298
+ `Pages affected by this issue (${(ruleInCategory.pagesAffectedCount || ruleInCategory.pagesAffected.length)})`;
299
299
  buildExpandedRuleCategoryContent(category, ruleInCategory);
300
300
  document.getElementById('expandedRulePageContent').innerText =
301
- `Total ${ruleInCategory.pagesAffected.length} affected pages`;
301
+ `Total ${(ruleInCategory.pagesAffectedCount || ruleInCategory.pagesAffected.length)} affected pages`;
302
302
  }
303
303
  }
304
304
  });
@@ -21,18 +21,24 @@
21
21
  %>
22
22
  <script>
23
23
  const scanItems = <%- JSON.stringify(
24
- {
25
- ...items,
26
- ...['mustFix','goodToFix','needsReview','passed'].reduce((acc, cat) => {
27
- if (items[cat]) {
28
- acc[cat] = {
29
- ...items[cat],
30
- rules: (items[cat].rules || []).map(({ htmlGroups, ...rest }) => rest),
31
- };
32
- }
33
- return acc;
34
- }, {}),
35
- }
24
+ ['mustFix','goodToFix','needsReview','passed'].reduce((acc, cat) => {
25
+ if (items[cat]) {
26
+ acc[cat] = {
27
+ description: items[cat].description,
28
+ totalItems: items[cat].totalItems,
29
+ totalRuleIssues: items[cat].totalRuleIssues,
30
+ rules: (items[cat].rules || []).map(rule => ({
31
+ rule: rule.rule,
32
+ description: rule.description,
33
+ helpUrl: rule.helpUrl,
34
+ conformance: rule.conformance,
35
+ totalItems: rule.totalItems,
36
+ pagesAffected: { length: (rule.pagesAffected || []).length },
37
+ })),
38
+ };
39
+ }
40
+ return acc;
41
+ }, {})
36
42
  ).replace(/<\//g, '<\\/') %>
37
43
  </script>
38
44
  <%- include('partials/scripts/summaryTable') %>
package/dist/utils.js CHANGED
@@ -4,6 +4,7 @@ import fs from 'fs-extra';
4
4
  import axe from 'axe-core';
5
5
  import { v4 as uuidv4 } from 'uuid';
6
6
  import { getDomain } from 'tldts';
7
+ import { normalizeUrl } from '@apify/utilities';
7
8
  import constants, { destinationPath, getIntermediateScreenshotsPath, } from './constants/constants.js';
8
9
  import { consoleLogger, errorsTxtPath } from './logs.js';
9
10
  import { getAxeConfiguration } from './crawlers/custom/getAxeConfiguration.js';
@@ -852,13 +853,13 @@ export const randomThreeDigitNumberString = () => {
852
853
  const threeDigitNumber = Math.floor(scaledDecimal) + 100;
853
854
  return String(threeDigitNumber);
854
855
  };
856
+ export const normUrl = (u) => (u ? normalizeUrl(u) || u : '');
855
857
  export const isFollowStrategy = (link1, link2, rule) => {
858
+ if (rule === 'all')
859
+ return true;
856
860
  try {
857
861
  const parsedLink1 = new URL(link1);
858
862
  const parsedLink2 = new URL(link2);
859
- if (rule === 'all') {
860
- return true;
861
- }
862
863
  if (rule === 'same-origin') {
863
864
  return parsedLink1.origin === parsedLink2.origin;
864
865
  }