@govtechsg/oobee 0.10.86 → 0.10.88

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/.github/workflows/docker-push-ghcr.yml +49 -0
  2. package/.github/workflows/image.yml +2 -3
  3. package/DETAILS_OUTPUT_EXAMPLES.md +178 -0
  4. package/Dockerfile +6 -7
  5. package/dist/cli.js +18 -5
  6. package/dist/combine.js +3 -0
  7. package/dist/constants/cliFunctions.js +2 -2
  8. package/dist/constants/common.js +55 -13
  9. package/dist/crawlers/commonCrawlerFunc.js +523 -2
  10. package/dist/crawlers/crawlDomain.js +38 -13
  11. package/dist/crawlers/crawlIntelligentSitemap.js +62 -30
  12. package/dist/crawlers/crawlLocalFile.js +2 -2
  13. package/dist/crawlers/crawlSitemap.js +44 -5
  14. package/dist/crawlers/custom/extractAndGradeText.js +1 -1
  15. package/dist/crawlers/custom/getAxeConfiguration.js +26 -21
  16. package/dist/crawlers/custom/gradeReadability.js +1 -1
  17. package/dist/crawlers/custom/utils.js +81 -40
  18. package/dist/generateHtmlReport.js +18 -11
  19. package/dist/mergeAxeResults/itemReferences.js +60 -25
  20. package/dist/mergeAxeResults/sentryTelemetry.js +4 -1
  21. package/dist/mergeAxeResults.js +18 -9
  22. package/dist/npmIndex.js +16 -12
  23. package/dist/screenshotFunc/htmlScreenshotFunc.js +67 -0
  24. package/dist/static/ejs/partials/scripts/decodeUnzipParse.ejs +6 -3
  25. package/dist/static/ejs/partials/scripts/ruleModal/itemCardRenderer.ejs +45 -6
  26. package/dist/static/ejs/partials/scripts/ruleModal/pageAccordionBuilder.ejs +8 -5
  27. package/dist/static/ejs/partials/scripts/ruleModal/ruleOffcanvas.ejs +4 -4
  28. package/dist/static/ejs/partials/scripts/ruleModal/utilities.ejs +2 -1
  29. package/dist/static/ejs/summary.ejs +18 -12
  30. package/dist/utils.js +4 -3
  31. package/examples/oobee-test-details-runner.js +214 -0
  32. package/examples/test-violations.html +42 -0
  33. package/fix-summary-html-oom-pr.md +62 -0
  34. package/package.json +5 -5
  35. package/src/cli.ts +19 -5
  36. package/src/combine.ts +3 -0
  37. package/src/constants/cliFunctions.ts +2 -2
  38. package/src/constants/common.ts +65 -12
  39. package/src/crawlers/commonCrawlerFunc.ts +625 -2
  40. package/src/crawlers/crawlDomain.ts +39 -13
  41. package/src/crawlers/crawlIntelligentSitemap.ts +63 -30
  42. package/src/crawlers/crawlLocalFile.ts +4 -1
  43. package/src/crawlers/crawlSitemap.ts +50 -3
  44. package/src/crawlers/custom/extractAndGradeText.ts +1 -1
  45. package/src/crawlers/custom/getAxeConfiguration.ts +25 -23
  46. package/src/crawlers/custom/gradeReadability.ts +1 -1
  47. package/src/crawlers/custom/utils.ts +99 -43
  48. package/src/generateHtmlReport.ts +21 -11
  49. package/src/mergeAxeResults/itemReferences.ts +70 -26
  50. package/src/mergeAxeResults/sentryTelemetry.ts +4 -1
  51. package/src/mergeAxeResults.ts +21 -11
  52. package/src/npmIndex.ts +17 -12
  53. package/src/screenshotFunc/htmlScreenshotFunc.ts +81 -1
  54. package/src/static/ejs/partials/scripts/decodeUnzipParse.ejs +6 -3
  55. package/src/static/ejs/partials/scripts/ruleModal/itemCardRenderer.ejs +45 -6
  56. package/src/static/ejs/partials/scripts/ruleModal/pageAccordionBuilder.ejs +8 -5
  57. package/src/static/ejs/partials/scripts/ruleModal/ruleOffcanvas.ejs +4 -4
  58. package/src/static/ejs/partials/scripts/ruleModal/utilities.ejs +2 -1
  59. package/src/static/ejs/summary.ejs +18 -12
  60. package/src/utils.ts +4 -3
  61. package/testStaticJSScanner.html +1 -1
@@ -3,7 +3,7 @@ import constants, { guiInfoStatusTypes, sitemapPaths } from '../constants/consta
3
3
  import { consoleLogger, guiInfoLog } from '../logs.js';
4
4
  import crawlDomain from './crawlDomain.js';
5
5
  import crawlSitemap from './crawlSitemap.js';
6
- import { getPlaywrightLaunchOptions } from '../constants/common.js';
6
+ import { getPlaywrightLaunchOptions, getSitemapsFromRobotsTxt } from '../constants/common.js';
7
7
  import { register } from '../utils.js';
8
8
  const crawlIntelligentSitemap = async (url, randomToken, host, viewportSettings, maxRequestsPerCrawl, browser, userDataDirectory, strategy, specifiedMaxConcurrency, fileTypes, blacklistedPatterns, includeScreenshots, followRobots, extraHTTPHeaders, safeMode, scanDuration) => {
9
9
  const startTime = Date.now(); // Track start time
@@ -66,12 +66,30 @@ const crawlIntelligentSitemap = async (url, randomToken, host, viewportSettings,
66
66
  return false;
67
67
  }
68
68
  };
69
+ // Discover sitemaps from robots.txt first (supports multiple Sitemap: directives)
70
+ let sitemapUrls = [];
69
71
  try {
70
- sitemapUrl = await findSitemap(url, userDataDirectory, extraHTTPHeaders);
72
+ sitemapUrls = await getSitemapsFromRobotsTxt(url, browser, userDataDirectory, extraHTTPHeaders);
73
+ if (sitemapUrls.length > 0) {
74
+ console.log(`Found ${sitemapUrls.length} sitemap(s) in robots.txt: ${sitemapUrls.join(', ')}`);
75
+ sitemapExist = true;
76
+ }
71
77
  }
72
78
  catch (error) {
73
79
  consoleLogger.error(error);
74
80
  }
81
+ // Fall back to hardcoded path probing if robots.txt had no sitemaps
82
+ if (!sitemapExist) {
83
+ try {
84
+ sitemapUrl = await findSitemap(url, userDataDirectory, extraHTTPHeaders);
85
+ if (sitemapExist) {
86
+ sitemapUrls = [sitemapUrl];
87
+ }
88
+ }
89
+ catch (error) {
90
+ consoleLogger.error(error);
91
+ }
92
+ }
75
93
  if (!sitemapExist) {
76
94
  console.log('Unable to find sitemap. Commencing website crawl instead.');
77
95
  return await crawlDomain({
@@ -90,34 +108,48 @@ const crawlIntelligentSitemap = async (url, randomToken, host, viewportSettings,
90
108
  followRobots,
91
109
  extraHTTPHeaders,
92
110
  safeMode,
93
- scanDuration, // Use full duration since no sitemap
111
+ scanDuration,
112
+ });
113
+ }
114
+ // Process all discovered sitemaps sequentially, sharing dataset and urlsCrawled
115
+ for (const currentSitemapUrl of sitemapUrls) {
116
+ if (urlsCrawled.scanned.length >= maxRequestsPerCrawl)
117
+ break;
118
+ const elapsed = Date.now() - startTime;
119
+ const remainingDuration = scanDuration > 0 ? Math.max(scanDuration - elapsed / 1000, 0) : scanDuration;
120
+ if (scanDuration > 0 && remainingDuration <= 0) {
121
+ durationExceeded = true;
122
+ break;
123
+ }
124
+ console.log(`Processing sitemap: ${currentSitemapUrl}`);
125
+ urlsCrawledFinal = await crawlSitemap({
126
+ sitemapUrl: currentSitemapUrl,
127
+ randomToken,
128
+ host,
129
+ viewportSettings,
130
+ maxRequestsPerCrawl,
131
+ browser,
132
+ userDataDirectory,
133
+ specifiedMaxConcurrency,
134
+ fileTypes,
135
+ blacklistedPatterns,
136
+ includeScreenshots,
137
+ extraHTTPHeaders,
138
+ strategy,
139
+ userUrl: url,
140
+ fromCrawlIntelligentSitemap,
141
+ userUrlInputFromIntelligent: url,
142
+ datasetFromIntelligent: dataset,
143
+ urlsCrawledFromIntelligent: urlsCrawled,
144
+ crawledFromLocalFile: false,
145
+ scanDuration: scanDuration > 0 ? remainingDuration : 0,
94
146
  });
95
147
  }
96
- console.log(`Sitemap found at ${sitemapUrl}`);
97
- urlsCrawledFinal = await crawlSitemap({
98
- sitemapUrl,
99
- randomToken,
100
- host,
101
- viewportSettings,
102
- maxRequestsPerCrawl,
103
- browser,
104
- userDataDirectory,
105
- specifiedMaxConcurrency,
106
- fileTypes,
107
- blacklistedPatterns,
108
- includeScreenshots,
109
- extraHTTPHeaders,
110
- fromCrawlIntelligentSitemap,
111
- userUrlInputFromIntelligent: url,
112
- datasetFromIntelligent: dataset,
113
- urlsCrawledFromIntelligent: urlsCrawled,
114
- crawledFromLocalFile: false,
115
- scanDuration,
116
- });
117
148
  const elapsed = Date.now() - startTime;
118
- const remainingScanDuration = Math.max(scanDuration - elapsed / 1000, 0); // in seconds
119
- if (urlsCrawledFinal.scanned.length < maxRequestsPerCrawl && remainingScanDuration > 0) {
120
- console.log(`Continuing crawl from root website. Remaining scan time: ${remainingScanDuration.toFixed(1)}s`);
149
+ const remainingScanDuration = scanDuration > 0 ? Math.max(scanDuration - elapsed / 1000, 0) : 0;
150
+ const hasDurationRemaining = scanDuration === 0 || remainingScanDuration > 0;
151
+ if (urlsCrawled.scanned.length < maxRequestsPerCrawl && hasDurationRemaining) {
152
+ console.log(`Continuing crawl from root website.${scanDuration > 0 ? ` Remaining scan time: ${remainingScanDuration.toFixed(1)}s` : ''}`);
121
153
  urlsCrawledFinal = await crawlDomain({
122
154
  url,
123
155
  randomToken,
@@ -136,15 +168,15 @@ const crawlIntelligentSitemap = async (url, randomToken, host, viewportSettings,
136
168
  safeMode,
137
169
  fromCrawlIntelligentSitemap,
138
170
  datasetFromIntelligent: dataset,
139
- urlsCrawledFromIntelligent: urlsCrawledFinal,
171
+ urlsCrawledFromIntelligent: urlsCrawled,
140
172
  scanDuration: remainingScanDuration,
141
173
  });
142
174
  }
143
- else if (remainingScanDuration <= 0) {
175
+ else if (!hasDurationRemaining) {
144
176
  console.log(`Crawl duration exceeded before more pages could be found (limit: ${scanDuration}s).`);
145
177
  durationExceeded = true;
146
178
  }
147
179
  guiInfoLog(guiInfoStatusTypes.COMPLETED, {});
148
- return { urlsCrawled: urlsCrawledFinal, durationExceeded };
180
+ return { urlsCrawled, durationExceeded };
149
181
  };
150
182
  export default crawlIntelligentSitemap;
@@ -8,7 +8,7 @@ import { runPdfScan, mapPdfScanResults, doPdfScreenshots } from './pdfScanFunc.j
8
8
  import { guiInfoLog } from '../logs.js';
9
9
  import crawlSitemap from './crawlSitemap.js';
10
10
  import { getPdfStoragePath, register } from '../utils.js';
11
- export const crawlLocalFile = async ({ url, randomToken, host, viewportSettings, maxRequestsPerCrawl, browser, userDataDirectory, specifiedMaxConcurrency, fileTypes, blacklistedPatterns, includeScreenshots, extraHTTPHeaders, scanDuration = 0, fromCrawlIntelligentSitemap = false, userUrlInputFromIntelligent = null, datasetFromIntelligent = null, urlsCrawledFromIntelligent = null, }) => {
11
+ export const crawlLocalFile = async ({ url, randomToken, host, viewportSettings, maxRequestsPerCrawl, browser, userDataDirectory, specifiedMaxConcurrency, fileTypes, blacklistedPatterns, includeScreenshots, extraHTTPHeaders, scanDuration = 0, ruleset = [], fromCrawlIntelligentSitemap = false, userUrlInputFromIntelligent = null, datasetFromIntelligent = null, urlsCrawledFromIntelligent = null, }) => {
12
12
  let dataset;
13
13
  let urlsCrawled;
14
14
  let linksFromSitemap = [];
@@ -105,7 +105,7 @@ export const crawlLocalFile = async ({ url, randomToken, host, viewportSettings,
105
105
  await browserContext.close().catch(() => { });
106
106
  return urlsCrawled;
107
107
  }
108
- const results = await runAxeScript({ includeScreenshots, page, randomToken });
108
+ const results = await runAxeScript({ includeScreenshots, page, randomToken, ruleset });
109
109
  const actualUrl = page.url() || request.loadedUrl || url;
110
110
  guiInfoLog(guiInfoStatusTypes.SCANNED, {
111
111
  numScanned: urlsCrawled.scanned.length,
@@ -1,13 +1,13 @@
1
- import crawlee, { RequestList } from 'crawlee';
1
+ import crawlee, { EnqueueStrategy, RequestList } from 'crawlee';
2
2
  import * as path from 'path';
3
3
  import fsp from 'fs/promises';
4
4
  import { createCrawleeSubFolders, preNavigationHooks, runAxeScript, } from './commonCrawlerFunc.js';
5
5
  import constants, { STATUS_CODE_METADATA, guiInfoStatusTypes, disallowedListOfPatterns, FileTypes, } from '../constants/constants.js';
6
6
  import { getLinksFromSitemap, getPlaywrightLaunchOptions, isSkippedUrl, waitForPageLoaded, isFilePath, } from '../constants/common.js';
7
- import { areLinksEqual, isWhitelistedContentType, register } from '../utils.js';
7
+ import { areLinksEqual, isFollowStrategy, isWhitelistedContentType, normUrl, register } from '../utils.js';
8
8
  import { handlePdfDownload, runPdfScan, mapPdfScanResults, doPdfScreenshots, } from './pdfScanFunc.js';
9
9
  import { guiInfoLog } from '../logs.js';
10
- const crawlSitemap = async ({ sitemapUrl, randomToken, host, viewportSettings, maxRequestsPerCrawl, browser, userDataDirectory, specifiedMaxConcurrency, fileTypes, blacklistedPatterns, includeScreenshots, extraHTTPHeaders, scanDuration = 0, fromCrawlIntelligentSitemap = false, userUrlInputFromIntelligent = null, datasetFromIntelligent = null, urlsCrawledFromIntelligent = null, crawledFromLocalFile = false, }) => {
10
+ const crawlSitemap = async ({ sitemapUrl, randomToken, host, viewportSettings, maxRequestsPerCrawl, browser, userDataDirectory, specifiedMaxConcurrency, fileTypes, blacklistedPatterns, includeScreenshots, extraHTTPHeaders, strategy = EnqueueStrategy.All, userUrl = '', scanDuration = 0, fromCrawlIntelligentSitemap = false, userUrlInputFromIntelligent = null, datasetFromIntelligent = null, urlsCrawledFromIntelligent = null, crawledFromLocalFile = false, }) => {
11
11
  const crawlStartTime = Date.now();
12
12
  let dataset;
13
13
  let urlsCrawled;
@@ -25,7 +25,7 @@ const crawlSitemap = async ({ sitemapUrl, randomToken, host, viewportSettings, m
25
25
  console.log('Local file crawling not supported for sitemap. Please provide a valid URL.');
26
26
  return;
27
27
  }
28
- const linksFromSitemap = await getLinksFromSitemap(sitemapUrl, maxRequestsPerCrawl, browser, userDataDirectory, userUrlInputFromIntelligent, fromCrawlIntelligentSitemap, extraHTTPHeaders);
28
+ const linksFromSitemap = await getLinksFromSitemap(sitemapUrl, maxRequestsPerCrawl, browser, userDataDirectory, userUrlInputFromIntelligent, fromCrawlIntelligentSitemap, extraHTTPHeaders, strategy, userUrl || sitemapUrl);
29
29
  sitemapUrl = encodeURI(sitemapUrl);
30
30
  const pdfDownloads = [];
31
31
  const uuidToPdfMapping = {};
@@ -182,7 +182,7 @@ const crawlSitemap = async ({ sitemapUrl, randomToken, host, viewportSettings, m
182
182
  const status = response ? response.status() : 0;
183
183
  if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
184
184
  const isRedirected = !areLinksEqual(page.url(), request.url);
185
- const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(item => (item.actualUrl || item.url) === page.url());
185
+ const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(item => normUrl(item.actualUrl || item.url) === normUrl(page.url()));
186
186
  if (isRedirected && isLoadedUrlInCrawledUrls) {
187
187
  urlsCrawled.notScannedRedirects.push({
188
188
  fromUrl: request.url,
@@ -205,7 +205,46 @@ const crawlSitemap = async ({ sitemapUrl, randomToken, host, viewportSettings, m
205
205
  });
206
206
  return;
207
207
  }
208
+ if (isRedirected && !isFollowStrategy(actualUrl, request.url, 'same-hostname')) {
209
+ urlsCrawled.notScannedRedirects.push({
210
+ fromUrl: request.url,
211
+ toUrl: actualUrl,
212
+ });
213
+ guiInfoLog(guiInfoStatusTypes.SKIPPED, {
214
+ numScanned: urlsCrawled.scanned.length,
215
+ urlScanned: request.url,
216
+ });
217
+ return;
218
+ }
208
219
  const results = await runAxeScript({ includeScreenshots, page, randomToken });
220
+ // Detect JS redirects that fire during/after axe scan.
221
+ // Listen for navigation, then give a brief window for pending redirects to complete.
222
+ try {
223
+ let navigatedToUrl = null;
224
+ const onFrameNavigated = (frame) => {
225
+ if (frame === page.mainFrame()) {
226
+ navigatedToUrl = frame.url();
227
+ }
228
+ };
229
+ page.on('framenavigated', onFrameNavigated);
230
+ await page.waitForTimeout(1000);
231
+ page.off('framenavigated', onFrameNavigated);
232
+ const postScanUrl = navigatedToUrl || page.url();
233
+ if (postScanUrl && postScanUrl !== 'about:blank' && !isFollowStrategy(postScanUrl, request.url, 'same-hostname')) {
234
+ urlsCrawled.notScannedRedirects.push({
235
+ fromUrl: request.url,
236
+ toUrl: postScanUrl,
237
+ });
238
+ guiInfoLog(guiInfoStatusTypes.SKIPPED, {
239
+ numScanned: urlsCrawled.scanned.length,
240
+ urlScanned: request.url,
241
+ });
242
+ return;
243
+ }
244
+ }
245
+ catch (_) {
246
+ // Page/context was destroyed during navigation — handled by outer catch
247
+ }
209
248
  guiInfoLog(guiInfoStatusTypes.SCANNED, {
210
249
  numScanned: urlsCrawled.scanned.length,
211
250
  urlScanned: request.url,
@@ -34,7 +34,7 @@ export async function extractAndGradeText(page) {
34
34
  const readabilityScore = wordCount >= 20 ? textReadability.fleschReadingEase(filteredText) : 0;
35
35
  // Log details for debugging
36
36
  // Determine the return value
37
- const result = readabilityScore === 0 || readabilityScore > 50 ? '' : readabilityScore.toString(); // Convert readabilityScore to string
37
+ const result = readabilityScore <= 0 || readabilityScore > 50 ? '' : readabilityScore.toString();
38
38
  return result;
39
39
  }
40
40
  catch (error) {
@@ -1,5 +1,13 @@
1
1
  import { evaluateAltText } from "./evaluateAltText.js";
2
2
  export function getAxeConfiguration({ enableWcagAaa = false, gradingReadabilityFlag = '', disableOobee = false, }) {
3
+ function getReadabilityInterpretation(score) {
4
+ const num = parseFloat(score);
5
+ if (Number.isNaN(num))
6
+ return '';
7
+ if (num > 30)
8
+ return 'It is targeted for junior college (JC) level comprehension and above.';
9
+ return 'It is targeted for university graduate level comprehension and above.';
10
+ }
3
11
  return {
4
12
  branding: {
5
13
  application: 'oobee',
@@ -29,7 +37,7 @@ export function getAxeConfiguration({ enableWcagAaa = false, gradingReadabilityF
29
37
  return !node.dataset.flagged; // fail any element with a data-flagged attribute set to true
30
38
  },
31
39
  },
32
- ...(enableWcagAaa
40
+ ...((enableWcagAaa && gradingReadabilityFlag !== '')
33
41
  ? [
34
42
  {
35
43
  id: 'oobee-grading-text-contents',
@@ -37,16 +45,11 @@ export function getAxeConfiguration({ enableWcagAaa = false, gradingReadabilityF
37
45
  impact: 'moderate',
38
46
  messages: {
39
47
  pass: 'The text content is easy to understand.',
40
- fail: 'The text content is potentially difficult to understand.',
41
- incomplete: `The text content is potentially difficult to read, with a Flesch-Kincaid Reading Ease score of ${gradingReadabilityFlag}.\nThe target passing score is above 50, indicating content readable by university students and lower grade levels.\nA higher score reflects better readability.`,
48
+ fail: `Text content is potentially difficult to read.\n It scored ${gradingReadabilityFlag} out of 50 on the Flesch-Kincaid Readability Test.\n ${getReadabilityInterpretation(gradingReadabilityFlag)}`,
49
+ incomplete: `Text content is potentially difficult to read.\n It scored ${gradingReadabilityFlag} out of 50 on the Flesch-Kincaid Readability Test.\n ${getReadabilityInterpretation(gradingReadabilityFlag)}`,
42
50
  },
43
51
  },
44
- evaluate: (_node) => {
45
- if (gradingReadabilityFlag === '') {
46
- return true; // Pass if no readability issues
47
- }
48
- // Fail if readability issues are detected
49
- },
52
+ evaluate: (_node) => false,
50
53
  },
51
54
  ]
52
55
  : []),
@@ -77,18 +80,20 @@ export function getAxeConfiguration({ enableWcagAaa = false, gradingReadabilityF
77
80
  helpUrl: 'https://www.deque.com/blog/accessible-aria-buttons',
78
81
  },
79
82
  },
80
- {
81
- id: 'oobee-grading-text-contents',
82
- selector: 'html',
83
- enabled: true,
84
- any: ['oobee-grading-text-contents'],
85
- tags: ['wcag2aaa', 'wcag315'],
86
- metadata: {
87
- description: 'Text content should be easy to understand for individuals with education levels up to university graduates. If the text content is difficult to understand, provide supplemental content or a version that is easy to understand.',
88
- help: 'Text content should be clear and plain to ensure that it is easily understood.',
89
- helpUrl: 'https://www.wcag.com/uncategorized/3-1-5-reading-level/',
90
- },
91
- },
83
+ ...((enableWcagAaa && gradingReadabilityFlag !== '')
84
+ ? [{
85
+ id: 'oobee-grading-text-contents',
86
+ selector: 'html',
87
+ enabled: true,
88
+ any: ['oobee-grading-text-contents'],
89
+ tags: ['wcag2aaa', 'wcag315'],
90
+ metadata: {
91
+ description: 'Text content should be easy to understand for individuals with education levels up to university graduates. If the text content is difficult to understand, provide supplemental content or a version that is easy to understand.',
92
+ help: 'Text content should be clear and plain to ensure that it is easily understood.',
93
+ helpUrl: 'https://www.wcag.com/uncategorized/3-1-5-reading-level/',
94
+ },
95
+ }]
96
+ : []),
92
97
  ]
93
98
  .filter(rule => (disableOobee ? !rule.id.startsWith('oobee') : true))
94
99
  .concat(enableWcagAaa
@@ -13,7 +13,7 @@ export function gradeReadability(sentences) {
13
13
  const readabilityScore = wordCount >= 20 ? textReadability.fleschReadingEase(filteredText) : 0;
14
14
  // Log details for debugging
15
15
  // Determine the return value
16
- const result = readabilityScore === 0 || readabilityScore > 50 ? '' : readabilityScore.toString(); // Convert readabilityScore to string
16
+ const result = readabilityScore <= 0 || readabilityScore > 50 ? '' : readabilityScore.toString();
17
17
  return result;
18
18
  }
19
19
  catch (error) {
@@ -25,6 +25,7 @@ const parseBoolEnv = (val, defaultVal) => {
25
25
  return defaultVal;
26
26
  };
27
27
  const RESTRICT_OVERLAY_TO_ENTRY_DOMAIN = parseBoolEnv(process.env.RESTRICT_OVERLAY_TO_ENTRY_DOMAIN, false);
28
+ const OVERLAY_OPERATION_TIMEOUT_MS = 5000;
28
29
  const isOverlayAllowed = (currentUrl, entryUrl) => {
29
30
  try {
30
31
  const cur = new URL(currentUrl);
@@ -206,7 +207,7 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
206
207
  inProgress: false,
207
208
  collapsed: false,
208
209
  }) => {
209
- await page.waitForLoadState('domcontentloaded');
210
+ await page.waitForLoadState('domcontentloaded', { timeout: OVERLAY_OPERATION_TIMEOUT_MS });
210
211
  consoleLogger.info(`Overlay menu: adding to ${menuPos}...`);
211
212
  // Add the overlay menu with initial styling
212
213
  return page
@@ -987,6 +988,7 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
987
988
  })
988
989
  .catch(error => {
989
990
  consoleLogger.error('Overlay menu: failed to add', error);
991
+ throw error;
990
992
  });
991
993
  };
992
994
  export const removeOverlayMenu = async (page) => {
@@ -1007,6 +1009,8 @@ export const removeOverlayMenu = async (page) => {
1007
1009
  };
1008
1010
  export const initNewPage = async (page, pageClosePromises, processPageParams, pagesDict) => {
1009
1011
  let menuPos = MENU_POSITION.right;
1012
+ let overlayRefreshSeq = 0;
1013
+ let overlayRefreshChain = Promise.resolve();
1010
1014
  // eslint-disable-next-line no-underscore-dangle
1011
1015
  const pageId = page._guid;
1012
1016
  page.on('dialog', async (dialog) => {
@@ -1032,6 +1036,68 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
1032
1036
  collapsed: false,
1033
1037
  };
1034
1038
  }
1039
+ const reconcileOverlayMenu = async (trigger) => {
1040
+ // Mark this as the latest refresh so older ones can stop.
1041
+ const refreshSeq = ++overlayRefreshSeq;
1042
+ // Serialize overlay updates so multiple navigation events do not add/remove concurrently.
1043
+ overlayRefreshChain = overlayRefreshChain
1044
+ .catch(() => { })
1045
+ .then(async () => {
1046
+ if (refreshSeq !== overlayRefreshSeq || page.isClosed())
1047
+ return;
1048
+ try {
1049
+ // `framenavigated` can fire before the new document is ready for DOM inspection/injection.
1050
+ await page.waitForLoadState('domcontentloaded', { timeout: 5000 });
1051
+ }
1052
+ catch {
1053
+ // Best effort only. The page may still be mid-navigation.
1054
+ }
1055
+ try {
1056
+ // Give fast redirect chains a brief chance to advance before we inject/remove the overlay.
1057
+ await page.waitForTimeout(300);
1058
+ }
1059
+ catch {
1060
+ // Best effort only. The page may already be closing.
1061
+ }
1062
+ // Re-check staleness after waiting because a newer navigation may have happened meanwhile.
1063
+ if (refreshSeq !== overlayRefreshSeq || page.isClosed())
1064
+ return;
1065
+ const allowed = isOverlayAllowed(page.url(), processPageParams.entryUrl);
1066
+ if (!allowed) {
1067
+ await Promise.race([
1068
+ removeOverlayMenu(page),
1069
+ new Promise((_, reject) => {
1070
+ setTimeout(() => {
1071
+ reject(new Error(`removeOverlayMenu timed out after ${OVERLAY_OPERATION_TIMEOUT_MS}ms`));
1072
+ }, OVERLAY_OPERATION_TIMEOUT_MS);
1073
+ }),
1074
+ ]);
1075
+ return;
1076
+ }
1077
+ const hasOverlay = await page.evaluate(() => Boolean(document.querySelector('#oobeeShadowHost')));
1078
+ consoleLogger.info(`Overlay state (${trigger}): ${hasOverlay}`);
1079
+ if (!hasOverlay) {
1080
+ // Recreate the overlay after allowed redirects while preserving current UI state.
1081
+ consoleLogger.info(`Adding overlay menu to page (${trigger}): ${page.url()}`);
1082
+ await Promise.race([
1083
+ addOverlayMenu(page, processPageParams.urlsCrawled, menuPos, {
1084
+ inProgress: !!pagesDict[pageId]?.isScanning,
1085
+ collapsed: !!pagesDict[pageId]?.collapsed,
1086
+ hideStopInput: !!processPageParams.customFlowLabel,
1087
+ }),
1088
+ new Promise((_, reject) => {
1089
+ setTimeout(() => {
1090
+ reject(new Error(`addOverlayMenu timed out after ${OVERLAY_OPERATION_TIMEOUT_MS}ms`));
1091
+ }, OVERLAY_OPERATION_TIMEOUT_MS);
1092
+ }),
1093
+ ]);
1094
+ }
1095
+ })
1096
+ .catch(() => {
1097
+ consoleLogger.info('Error in adding overlay menu to page');
1098
+ });
1099
+ await overlayRefreshChain;
1100
+ };
1035
1101
  // Window functions exposed in browser
1036
1102
  const handleOnScanClick = async () => {
1037
1103
  consoleLogger.info('Scan: click detected');
@@ -1044,17 +1110,7 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
1044
1110
  pagesDict[pageId].isScanning = false;
1045
1111
  if (page.isClosed())
1046
1112
  return;
1047
- const allowed = isOverlayAllowed(page.url(), processPageParams.entryUrl);
1048
- if (allowed) {
1049
- await addOverlayMenu(page, processPageParams.urlsCrawled, menuPos, {
1050
- inProgress: false,
1051
- collapsed: !!pagesDict[pageId]?.collapsed,
1052
- hideStopInput: !!processPageParams.customFlowLabel,
1053
- });
1054
- }
1055
- else {
1056
- await removeOverlayMenu(page);
1057
- }
1113
+ await reconcileOverlayMenu('scan-click');
1058
1114
  }
1059
1115
  catch (error) {
1060
1116
  log(`Scan failed ${error}`);
@@ -1118,37 +1174,21 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
1118
1174
  page.on('domcontentloaded', async () => {
1119
1175
  if (page.isClosed())
1120
1176
  return;
1121
- try {
1122
- const allowed = isOverlayAllowed(page.url(), processPageParams.entryUrl);
1123
- if (!allowed) {
1124
- await removeOverlayMenu(page);
1125
- return;
1126
- }
1127
- const existingOverlay = await page.evaluate(() => {
1128
- return document.querySelector('#oobeeShadowHost');
1129
- });
1130
- consoleLogger.info(`Overlay state: ${existingOverlay}`);
1131
- if (!existingOverlay) {
1132
- consoleLogger.info(`Adding overlay menu to page: ${page.url()}`);
1133
- await addOverlayMenu(page, processPageParams.urlsCrawled, menuPos, {
1134
- inProgress: !!pagesDict[pageId]?.isScanning,
1135
- collapsed: !!pagesDict[pageId]?.collapsed,
1136
- hideStopInput: !!processPageParams.customFlowLabel,
1137
- });
1177
+ await reconcileOverlayMenu('domcontentloaded');
1178
+ if (isCypressTest) {
1179
+ try {
1180
+ await handleOnScanClick();
1181
+ page.close();
1138
1182
  }
1139
- if (isCypressTest) {
1140
- try {
1141
- await handleOnScanClick();
1142
- page.close();
1143
- }
1144
- catch {
1145
- consoleLogger.info(`Error in calling handleOnScanClick, isCypressTest: ${isCypressTest}`);
1146
- }
1183
+ catch {
1184
+ consoleLogger.info(`Error in calling handleOnScanClick, isCypressTest: ${isCypressTest}`);
1147
1185
  }
1148
1186
  }
1149
- catch {
1150
- consoleLogger.info('Error in adding overlay menu to page');
1151
- }
1187
+ });
1188
+ page.on('framenavigated', async (frame) => {
1189
+ if (frame !== page.mainFrame() || page.isClosed())
1190
+ return;
1191
+ await reconcileOverlayMenu('framenavigated');
1152
1192
  });
1153
1193
  try {
1154
1194
  if (page.isClosed())
@@ -1167,5 +1207,6 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
1167
1207
  catch (e) {
1168
1208
  log(`Error exposing functions on page: ${e}`);
1169
1209
  }
1210
+ await reconcileOverlayMenu('init');
1170
1211
  return page;
1171
1212
  };
@@ -1,7 +1,7 @@
1
1
  import fs from 'fs-extra';
2
2
  import path from 'path';
3
3
  import { compressJsonFileStreaming, writeHTML, flattenAndSortResults, populateScanPagesDetail, getWcagPassPercentage, getProgressPercentage, getIssuesPercentage, itemTypeDescription, oobeeAiHtmlETL, oobeeAiRules, formatAboutStartTime, convertItemsToReferences, } from './mergeAxeResults.js';
4
- import constants, { ScannerTypes, WCAGclauses, a11yRuleShortDescriptionMap, disabilityBadgesMap, a11yRuleLongDescriptionMap, } from './constants/constants.js';
4
+ import constants, { ScannerTypes, WCAGclauses, a11yRuleShortDescriptionMap, disabilityBadgesMap, a11yRuleLongDescriptionMap, a11yRuleStepByStepGuide, } from './constants/constants.js';
5
5
  import { consoleLogger } from './logs.js';
6
6
  const ensureCategory = (categoryObj, categoryName) => {
7
7
  const rulesRaw = categoryObj?.rules ?? [];
@@ -23,7 +23,12 @@ const ensureCategory = (categoryObj, categoryName) => {
23
23
  rule.pagesAffected = [];
24
24
  }
25
25
  if (typeof rule.totalItems !== 'number') {
26
- rule.totalItems = rule.pagesAffected.reduce((accumulate, page) => accumulate + (Array.isArray(page.items) ? page.items.length : 0), 0);
26
+ rule.totalItems = rule.pagesAffected.reduce((accumulate, page) => accumulate +
27
+ (Array.isArray(page.items)
28
+ ? page.items.length
29
+ : typeof page.itemsCount === 'number'
30
+ ? page.itemsCount
31
+ : 0), 0);
27
32
  }
28
33
  });
29
34
  const totals = {
@@ -38,7 +43,7 @@ const ensureCategory = (categoryObj, categoryName) => {
38
43
  rules,
39
44
  };
40
45
  };
41
- export const generateHtmlReport = async (resultDir) => {
46
+ export const generateHtmlReport = async (resultDir, htmlFilename = 'report') => {
42
47
  try {
43
48
  const storagePath = path.resolve(resultDir);
44
49
  const scanDataJsonPath = path.join(storagePath, 'scanData.json');
@@ -61,17 +66,16 @@ export const generateHtmlReport = async (resultDir) => {
61
66
  }
62
67
  const scanData = JSON.parse(await fs.readFile(scanDataJsonPath, 'utf8'));
63
68
  const scanItemsAll = JSON.parse(await fs.readFile(scanItemsJsonPath, 'utf8'));
64
- // Use convertItemsToReferences to normalize items structure to match scanItemsWithHtmlGroupRefs format
65
- const scanItemsWithHtmlGroupRefs = convertItemsToReferences({
69
+ // Build the lighter scanItems payload used by the HTML report.
70
+ const lightScanItemsPayload = convertItemsToReferences({
66
71
  items: scanItemsAll,
67
- ...scanData
68
72
  });
69
- const { mustFix = {}, goodToFix = {}, needsReview = {}, passed = {}, } = scanItemsWithHtmlGroupRefs;
73
+ const { mustFix = {}, goodToFix = {}, needsReview = {}, } = lightScanItemsPayload;
70
74
  const items = {
71
75
  mustFix: ensureCategory(mustFix, 'mustFix'),
72
76
  goodToFix: ensureCategory(goodToFix, 'goodToFix'),
73
77
  needsReview: ensureCategory(needsReview, 'needsReview'),
74
- passed: ensureCategory(passed, 'passed'),
78
+ passed: ensureCategory(scanItemsAll.passed || {}, 'passed'),
75
79
  };
76
80
  const pagesScanned = Array.isArray(scanData.pagesScanned) ? scanData.pagesScanned : [];
77
81
  const pagesNotScanned = Array.isArray(scanData.pagesNotScanned) ? scanData.pagesNotScanned : [];
@@ -116,6 +120,8 @@ export const generateHtmlReport = async (resultDir) => {
116
120
  a11yRuleShortDescriptionMap,
117
121
  disabilityBadgesMap,
118
122
  a11yRuleLongDescriptionMap,
123
+ a11yRuleStepByStepGuide,
124
+ wcagCriteriaLabels: constants.wcagCriteriaLabels,
119
125
  advancedScanOptionsSummaryItems: {
120
126
  showIncludeScreenshots: !!scanData.advancedScanOptionsSummaryItems?.showIncludeScreenshots,
121
127
  showAllowSubdomains: !!scanData.advancedScanOptionsSummaryItems?.showAllowSubdomains,
@@ -137,9 +143,10 @@ export const generateHtmlReport = async (resultDir) => {
137
143
  allIssues.wcagPassPercentage = getWcagPassPercentage(allIssues.wcagViolations, allIssues.advancedScanOptionsSummaryItems.showEnableWcagAaa);
138
144
  allIssues.progressPercentage = getProgressPercentage(allIssues.scanPagesDetail, allIssues.advancedScanOptionsSummaryItems.showEnableWcagAaa);
139
145
  allIssues.issuesPercentage = await getIssuesPercentage(allIssues.scanPagesDetail, allIssues.advancedScanOptionsSummaryItems.showEnableWcagAaa, allIssues.advancedScanOptionsSummaryItems?.disableOobee);
140
- await writeHTML(allIssues, storagePath, 'report', scanDataB64Path, scanItemsB64Path);
141
- consoleLogger.info(`Report generated at: ${path.join(storagePath, 'report.html')}`);
142
- return path.join(storagePath, 'report.html');
146
+ await writeHTML(allIssues, storagePath, htmlFilename, scanDataB64Path, scanItemsB64Path);
147
+ const outputPath = path.join(storagePath, `${htmlFilename}.html`);
148
+ consoleLogger.info(`Report generated at: ${outputPath}`);
149
+ return outputPath;
143
150
  }
144
151
  catch (err) {
145
152
  consoleLogger.error(`generateHtmlReport failed: ${err?.message || err}`);