@govtechsg/oobee 0.10.91 → 0.10.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/AGENTS.md +303 -0
  2. package/README.md +22 -0
  3. package/dist/cli.js +3 -0
  4. package/dist/combine.js +15 -3
  5. package/dist/constants/cliFunctions.js +7 -0
  6. package/dist/constants/common.js +149 -80
  7. package/dist/constants/constants.js +1 -0
  8. package/dist/crawlers/commonCrawlerFunc.js +136 -15
  9. package/dist/crawlers/crawlDomain.js +55 -58
  10. package/dist/crawlers/crawlIntelligentSitemap.js +21 -11
  11. package/dist/crawlers/crawlRateController.js +47 -0
  12. package/dist/crawlers/crawlSitemap.js +51 -62
  13. package/dist/crawlers/runCustom.js +8 -2
  14. package/dist/generateOobeeClientScanner.js +32 -1
  15. package/dist/mergeAxeResults/itemsStore.js +32 -3
  16. package/dist/mergeAxeResults/sentryTelemetry.js +3 -0
  17. package/dist/mergeAxeResults.js +120 -92
  18. package/dist/npmIndex.js +1 -0
  19. package/dist/utils.js +23 -28
  20. package/oobee-client-scanner.js +35 -4
  21. package/package.json +3 -3
  22. package/src/cli.ts +4 -0
  23. package/src/combine.ts +16 -1
  24. package/src/constants/cliFunctions.ts +7 -0
  25. package/src/constants/common.ts +162 -90
  26. package/src/constants/constants.ts +1 -0
  27. package/src/crawlers/commonCrawlerFunc.ts +148 -14
  28. package/src/crawlers/crawlDomain.ts +64 -66
  29. package/src/crawlers/crawlIntelligentSitemap.ts +23 -11
  30. package/src/crawlers/crawlRateController.ts +63 -0
  31. package/src/crawlers/crawlSitemap.ts +57 -70
  32. package/src/crawlers/runCustom.ts +10 -1
  33. package/src/generateOobeeClientScanner.ts +32 -1
  34. package/src/index.ts +1 -0
  35. package/src/mergeAxeResults/itemsStore.ts +37 -3
  36. package/src/mergeAxeResults/sentryTelemetry.ts +3 -0
  37. package/src/mergeAxeResults.ts +139 -99
  38. package/src/npmIndex.ts +1 -0
  39. package/src/utils.ts +25 -33
  40. /package/{bf04540e-0894-4d00-98ec-c1be74c6f199.txt → 7339fae5-e8ed-4b50-af13-317847620dbf.txt} +0 -0
@@ -1,9 +1,11 @@
1
1
  import fs from 'fs-extra';
2
2
  import path from 'path';
3
3
  import readline from 'readline';
4
+ import { consoleLogger } from '../logs.js';
4
5
  export class ItemsStore {
5
6
  constructor(storagePath) {
6
7
  this.ensuredDirs = new Set();
8
+ this.fileWriteQueues = new Map();
7
9
  this.basePath = path.join(storagePath, 'tmp-items');
8
10
  }
9
11
  sanitizeRuleId(ruleId) {
@@ -22,8 +24,25 @@ export class ItemsStore {
22
24
  async appendPageItems(category, ruleId, entry) {
23
25
  await this.ensureDir(category);
24
26
  const filePath = this.getRuleFilePath(category, ruleId);
25
- const line = JSON.stringify(entry) + '\n';
26
- await fs.appendFile(filePath, line, 'utf8');
27
+ let line = JSON.stringify(entry);
28
+ // JSON.stringify should never produce literal newlines inside strings, but HTML content
29
+ // from page evaluation may contain edge-case characters (e.g. unescaped control chars in
30
+ // non-spec-compliant innerHTML). Strip any embedded \r or \n that would break JSONL format readline parsing.
31
+ line = line.replace(/[\n\r]/g, (match) => {
32
+ if (match === '\n')
33
+ return '\\n';
34
+ if (match === '\r')
35
+ return '\\r';
36
+ return match;
37
+ });
38
+ line += '\n';
39
+ // Serialize writes per rule file to avoid concurrent append interleaving/truncation.
40
+ const previous = this.fileWriteQueues.get(filePath) ?? Promise.resolve();
41
+ const next = previous.then(() => fs.appendFile(filePath, line, 'utf8'));
42
+ this.fileWriteQueues.set(filePath, next.catch(() => {
43
+ // Keep queue alive for subsequent writes.
44
+ }));
45
+ await next;
27
46
  }
28
47
  async *readRuleItems(category, ruleId) {
29
48
  const filePath = this.getRuleFilePath(category, ruleId);
@@ -31,10 +50,19 @@ export class ItemsStore {
31
50
  return;
32
51
  const fileStream = fs.createReadStream(filePath, { encoding: 'utf8' });
33
52
  const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity });
53
+ let lineNumber = 0;
34
54
  for await (const line of rl) {
35
- if (line.trim()) {
55
+ lineNumber += 1;
56
+ if (!line.trim())
57
+ continue;
58
+ try {
36
59
  yield JSON.parse(line);
37
60
  }
61
+ catch (error) {
62
+ // Tolerate malformed/truncated JSONL lines (e.g. interrupted append) so report generation can continue.
63
+ const preview = line.slice(0, 200);
64
+ consoleLogger.warn(`Skipping malformed itemsStore JSONL line ${lineNumber} in ${filePath}: ${error.message}. Content preview: ${preview}`);
65
+ }
38
66
  }
39
67
  }
40
68
  async readRuleItemsMap(category, ruleId) {
@@ -46,6 +74,7 @@ export class ItemsStore {
46
74
  return map;
47
75
  }
48
76
  async cleanup() {
77
+ await Promise.all(this.fileWriteQueues.values());
49
78
  await fs.rm(this.basePath, { recursive: true, force: true });
50
79
  }
51
80
  }
@@ -114,6 +114,9 @@ const sendWcagBreakdownToSentry = async (appVersion, wcagBreakdown, ruleIdJson,
114
114
  ...(process.env.OOBEE_SCAN_PRODUCT && {
115
115
  scanProduct: process.env.OOBEE_SCAN_PRODUCT,
116
116
  }),
117
+ ...(process.env.OOBEE_TAGGED_WEBSITE && {
118
+ websiteTag: process.env.OOBEE_TAGGED_WEBSITE,
119
+ }),
117
120
  },
118
121
  user: {
119
122
  ...(scanInfo.email && scanInfo.name
@@ -5,6 +5,7 @@ import printMessage from 'print-message';
5
5
  import path from 'path';
6
6
  import ejs from 'ejs';
7
7
  import { fileURLToPath } from 'url';
8
+ import { Dataset, RequestQueue, Configuration } from 'crawlee';
8
9
  import constants, { BrowserTypes, ScannerTypes, WCAGclauses, a11yRuleShortDescriptionMap, disabilityBadgesMap, a11yRuleLongDescriptionMap, a11yRuleStepByStepGuide, } from './constants/constants.js';
9
10
  import { getBrowserToRun, getPlaywrightLaunchOptions } from './constants/common.js';
10
11
  import { createScreenshotsFolder, getStoragePath, getVersion, getWcagPassPercentage, getProgressPercentage, retryFunction, zipResults, getIssuesPercentage, register, } from './utils.js';
@@ -255,54 +256,84 @@ const cleanUpJsonFiles = async (filesToDelete) => {
255
256
  });
256
257
  };
257
258
  const writeSummaryPdf = async (storagePath, pagesScanned, filename = 'summary', browser, _userDataDirectory) => {
258
- let browserInstance;
259
- let context;
260
- let page;
261
- try {
262
- const htmlFilePath = path.join(storagePath, `${filename}.html`);
263
- const fileDestinationPath = path.join(storagePath, `${filename}.pdf`);
264
- const htmlFileUrl = `file://${htmlFilePath}`;
265
- const launchOptions = getPlaywrightLaunchOptions(browser);
266
- browserInstance = await constants.launcher.launch({
267
- ...launchOptions,
268
- headless: true,
269
- });
270
- register(browserInstance);
271
- context = await browserInstance.newContext();
272
- page = await context.newPage();
273
- await page.goto(htmlFileUrl, {
274
- waitUntil: 'domcontentloaded',
275
- timeout: 120000,
276
- });
277
- await page.emulateMedia({ media: 'print' });
278
- await page.pdf({
279
- margin: { bottom: '32px' },
280
- path: fileDestinationPath,
281
- format: 'A4',
282
- displayHeaderFooter: true,
283
- footerTemplate: `
259
+ const renderPdfWithBrowser = async (browserToUse) => {
260
+ let browserInstance;
261
+ let context;
262
+ let page;
263
+ try {
264
+ const htmlFilePath = path.join(storagePath, `${filename}.html`);
265
+ const fileDestinationPath = path.join(storagePath, `${filename}.pdf`);
266
+ const htmlFileUrl = `file://${htmlFilePath}`;
267
+ const launchOptions = getPlaywrightLaunchOptions(browserToUse);
268
+ browserInstance = await constants.launcher.launch({
269
+ ...launchOptions,
270
+ headless: true,
271
+ });
272
+ register(browserInstance);
273
+ context = await browserInstance.newContext();
274
+ page = await context.newPage();
275
+ await page.goto(htmlFileUrl, {
276
+ waitUntil: 'domcontentloaded',
277
+ timeout: 120000,
278
+ });
279
+ await page.emulateMedia({ media: 'print' });
280
+ await page.pdf({
281
+ margin: { bottom: '32px' },
282
+ path: fileDestinationPath,
283
+ format: 'A4',
284
+ displayHeaderFooter: true,
285
+ footerTemplate: `
284
286
  <div style="margin-top:50px;color:#26241b;font-family:Open Sans;text-align: center;width: 100%;font-weight:400">
285
287
  <span style="color:#26241b;font-size: 14px;font-weight:400">Page <span class="pageNumber"></span> of <span class="totalPages"></span></span>
286
288
  </div>
287
289
  `,
288
- });
289
- if (pagesScanned < 2000) {
290
+ });
290
291
  fs.unlinkSync(htmlFilePath);
291
292
  }
293
+ finally {
294
+ try {
295
+ await page?.close();
296
+ }
297
+ catch (err) {
298
+ consoleLogger.info(`Error at page close writeSummaryPDF ${err}`);
299
+ }
300
+ try {
301
+ await context?.close();
302
+ }
303
+ catch (err) {
304
+ consoleLogger.info(`Error at context close writeSummaryPDF ${err}`);
305
+ }
306
+ try {
307
+ await browserInstance?.close();
308
+ }
309
+ catch (err) {
310
+ consoleLogger.info(`Error at browserInstance close writeSummaryPDF ${err}`);
311
+ }
312
+ }
313
+ };
314
+ const browserAttempts = [browser];
315
+ // Runtime fallback: if Chrome launch fails on Windows, try Edge once for PDF generation.
316
+ if (process.platform === 'win32' && browser === BrowserTypes.CHROME) {
317
+ browserAttempts.push(BrowserTypes.EDGE);
292
318
  }
293
- catch (err) {
294
- consoleLogger.info(`Error at writeSummaryPDF ${err instanceof Error ? err.stack : err}`);
295
- }
296
- finally {
297
- await page?.close().catch(err => {
298
- consoleLogger.info(`Error at page close writeSummaryPDF ${err}`);
299
- });
300
- await context?.close().catch(err => {
301
- consoleLogger.info(`Error at context close writeSummaryPDF ${err}`);
302
- });
303
- await browserInstance?.close().catch(err => {
304
- consoleLogger.info(`Error at browserInstance close writeSummaryPDF ${err}`);
305
- });
319
+ for (let i = 0; i < browserAttempts.length; i++) {
320
+ const currentBrowser = browserAttempts[i];
321
+ try {
322
+ await renderPdfWithBrowser(currentBrowser);
323
+ if (i > 0) {
324
+ consoleLogger.warn(`writeSummaryPDF succeeded with fallback browser '${currentBrowser}' after '${browser}' failed.`);
325
+ }
326
+ return;
327
+ }
328
+ catch (err) {
329
+ const isLastAttempt = i === browserAttempts.length - 1;
330
+ consoleLogger.info(`Error at writeSummaryPDF using browser '${currentBrowser}': ${err instanceof Error ? err.stack : err}`);
331
+ if (isLastAttempt) {
332
+ return;
333
+ }
334
+ const nextBrowser = browserAttempts[i + 1];
335
+ consoleLogger.warn(`writeSummaryPDF failed using browser '${currentBrowser}', retrying with '${nextBrowser}'.`);
336
+ }
306
337
  }
307
338
  };
308
339
  // Tracking WCAG occurrences
@@ -489,6 +520,7 @@ const extractRuleAiData = (ruleId, totalItems, items, callback) => {
489
520
  export const createRuleIdJson = async (allIssues, itemsStore) => {
490
521
  const compiledRuleJson = {};
491
522
  for (const category of ['mustFix', 'goodToFix', 'needsReview']) {
523
+ compiledRuleJson[category] = {};
492
524
  for (const rule of allIssues.items[category].rules) {
493
525
  let allItems = [];
494
526
  if (itemsStore) {
@@ -499,7 +531,7 @@ export const createRuleIdJson = async (allIssues, itemsStore) => {
499
531
  else {
500
532
  allItems = rule.pagesAffected.flatMap(page => page.items || []);
501
533
  }
502
- compiledRuleJson[rule.rule] = extractRuleAiData(rule.rule, rule.totalItems, allItems);
534
+ compiledRuleJson[category][rule.rule] = extractRuleAiData(rule.rule, rule.totalItems, allItems);
503
535
  }
504
536
  }
505
537
  return compiledRuleJson;
@@ -508,9 +540,10 @@ export const createRuleIdJson = async (allIssues, itemsStore) => {
508
540
  export const createBasicFormHTMLSnippet = filteredResults => {
509
541
  const compiledRuleJson = {};
510
542
  ['mustFix', 'goodToFix', 'needsReview'].forEach(category => {
543
+ compiledRuleJson[category] = {};
511
544
  if (filteredResults[category] && filteredResults[category].rules) {
512
545
  Object.entries(filteredResults[category].rules).forEach(([ruleId, ruleVal]) => {
513
- compiledRuleJson[ruleId] = extractRuleAiData(ruleId, ruleVal.totalItems, ruleVal.items);
546
+ compiledRuleJson[category][ruleId] = extractRuleAiData(ruleId, ruleVal.totalItems, ruleVal.items);
514
547
  });
515
548
  }
516
549
  });
@@ -547,7 +580,7 @@ const formatAboutStartTime = (dateString) => {
547
580
  return htmlFormattedStartTime;
548
581
  };
549
582
  const generateArtifacts = async (randomToken, urlScanned, scanType, viewport, pagesScanned, pagesNotScanned, customFlowLabel, cypressScanAboutMetadata, scanDetails, zip = undefined, // optional
550
- generateJsonFiles = false) => {
583
+ generateJsonFiles = false, preferredBrowser) => {
551
584
  consoleLogger.info('Generating report artifacts');
552
585
  const storagePath = getStoragePath(randomToken);
553
586
  const intermediateDatasetsPath = `${storagePath}/crawlee`;
@@ -564,6 +597,8 @@ generateJsonFiles = false) => {
564
597
  endTime: scanDetails.endTime ? scanDetails.endTime : new Date(),
565
598
  urlScanned,
566
599
  scanType,
600
+ totalLinksFetchedFromSitemaps: constants.sitemapFetchedLinks?.totalLinksFetchedFromSitemaps ?? 0,
601
+ fetchedSitemaps: constants.sitemapFetchedLinks?.fetchedSitemaps ?? [],
567
602
  deviceChosen: scanDetails.deviceChosen || 'Desktop',
568
603
  formatAboutStartTime,
569
604
  isCustomFlow,
@@ -752,39 +787,37 @@ generateJsonFiles = false) => {
752
787
  scanPagesSummaryBase64FilePath,
753
788
  ]);
754
789
  }
755
- const browserChannel = getBrowserToRun(randomToken, BrowserTypes.CHROME, false).browserToRun;
790
+ const browserChannel = getBrowserToRun(randomToken, preferredBrowser || BrowserTypes.CHROME, false).browserToRun;
756
791
  // Should consider refactor constants.userDataDirectory to be a parameter in future
757
792
  await retryFunction(() => writeSummaryPdf(storagePath, pagesScanned.length, 'summary', browserChannel, constants.userDataDirectory), 1);
758
- // Suppress uncaught EPERM errors from lingering Crawlee async lock-file operations
759
- // (Windows holds mandatory file locks; Crawlee may still attempt mkdir on .json.lock
760
- // files after the crawl has finished). Without this, Node crashes with uncaughtException.
761
- const crawleeEpermHandler = (err) => {
762
- if (err.code === 'EPERM' && err.message?.includes('crawlee')) {
763
- consoleLogger.info(`Suppressed lingering Crawlee storage error: ${err.message}`);
764
- return;
765
- }
766
- // Re-throw non-crawlee EPERM errors so they aren't silently swallowed
767
- throw err;
768
- };
769
- process.on('uncaughtException', crawleeEpermHandler);
770
- process.on('unhandledRejection', crawleeEpermHandler);
771
- // Brief delay to allow lingering async crawlee storage operations to flush
772
- await new Promise(resolve => setTimeout(resolve, process.platform === 'win32' ? 5000 : 3000));
793
+ // Flush pending background storage operations (metadata writes, lock-file ops)
794
+ const storageClient = Configuration.getStorageClient();
795
+ if (storageClient.teardown) {
796
+ await storageClient.teardown();
797
+ }
798
+ // Gracefully drop Dataset and RequestQueue — releases locks and removes files
799
+ const crawleeDir = path.join(storagePath, 'crawlee');
800
+ try {
801
+ const dataset = await Dataset.open(crawleeDir);
802
+ await dataset.drop();
803
+ }
804
+ catch (error) {
805
+ consoleLogger.info(`Dataset drop: ${error.message}`);
806
+ }
807
+ try {
808
+ const requestQueue = await RequestQueue.open(crawleeDir);
809
+ await requestQueue.drop();
810
+ }
811
+ catch (error) {
812
+ consoleLogger.info(`RequestQueue drop: ${error.message}`);
813
+ }
814
+ // Fallback rm for any leftover files not managed by Crawlee's storage API
773
815
  const crawleePath = path.join(storagePath, 'crawlee');
774
816
  try {
775
817
  await fs.promises.rm(crawleePath, { recursive: true, force: true });
776
818
  }
777
- catch (error) {
778
- // On Windows, retry once after a delay if the folder is still locked
779
- if (process.platform === 'win32') {
780
- await new Promise(resolve => setTimeout(resolve, 3000));
781
- try {
782
- await fs.promises.rm(crawleePath, { recursive: true, force: true });
783
- }
784
- catch {
785
- // Best-effort cleanup — leave the folder; report generation continues
786
- }
787
- }
819
+ catch {
820
+ // Best-effort; storage was already dropped via API
788
821
  }
789
822
  try {
790
823
  await fs.promises.rm(path.join(storagePath, 'pdfs'), { recursive: true, force: true });
@@ -792,6 +825,22 @@ generateJsonFiles = false) => {
792
825
  catch (error) {
793
826
  consoleLogger.warn(`Unable to force remove pdfs folder: ${error.message}`);
794
827
  }
828
+ // Generate scrubbed HTML Code Snippets
829
+ const ruleIdJson = await createRuleIdJson(allIssues, itemsStore);
830
+ // Clean up intermediate items files before zipping
831
+ await itemsStore.cleanup();
832
+ try {
833
+ await sendWcagBreakdownToSentry(oobeeAppVersion, wcagOccurrencesMap, ruleIdJson, {
834
+ entryUrl: urlScanned,
835
+ scanType,
836
+ browser: scanDetails.deviceChosen,
837
+ email: scanDetails.nameEmail?.email,
838
+ name: scanDetails.nameEmail?.name,
839
+ }, allIssues, pagesScanned.length);
840
+ }
841
+ catch (error) {
842
+ console.error('Error sending WCAG data to Sentry:', error);
843
+ }
795
844
  // Take option if set
796
845
  if (typeof zip === 'string') {
797
846
  constants.cliZipFileName = zip;
@@ -827,29 +876,8 @@ generateJsonFiles = false) => {
827
876
  catch (error) {
828
877
  printMessage([`Error in zipping results: ${error}`]);
829
878
  }
830
- // Generate scrubbed HTML Code Snippets
831
- const ruleIdJson = await createRuleIdJson(allIssues, itemsStore);
832
- // Clean up intermediate items files
833
- await itemsStore.cleanup();
834
- // At the end of the function where results are generated, add:
835
- try {
836
- // Always send WCAG breakdown to Sentry, even if no violations were found
837
- // This ensures that all criteria are reported, including those with 0 occurrences
838
- await sendWcagBreakdownToSentry(oobeeAppVersion, wcagOccurrencesMap, ruleIdJson, {
839
- entryUrl: urlScanned,
840
- scanType,
841
- browser: scanDetails.deviceChosen,
842
- email: scanDetails.nameEmail?.email,
843
- name: scanDetails.nameEmail?.name,
844
- }, allIssues, pagesScanned.length);
845
- }
846
- catch (error) {
847
- console.error('Error sending WCAG data to Sentry:', error);
848
- }
849
879
  if (process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE)
850
880
  console.log('Report generated successfully');
851
- process.removeListener('uncaughtException', crawleeEpermHandler);
852
- process.removeListener('unhandledRejection', crawleeEpermHandler);
853
881
  return ruleIdJson;
854
882
  };
855
883
  export { writeHTML, compressJsonFileStreaming, convertItemsToReferences, flattenAndSortResults, populateScanPagesDetail, sendWcagBreakdownToSentry, getWcagPassPercentage, getProgressPercentage, getIssuesPercentage, itemTypeDescription, oobeeAiHtmlETL, oobeeAiRules, formatAboutStartTime, };
package/dist/npmIndex.js CHANGED
@@ -290,6 +290,7 @@ thresholds = { mustFix: undefined, goodToFix: undefined }, scanAboutMetadata = u
290
290
  // max numbers of mustFix/goodToFix occurrences before test returns a fail
291
291
  const { mustFix: mustFixThreshold, goodToFix: goodToFixThreshold } = thresholds;
292
292
  process.env.CRAWLEE_STORAGE_DIR = randomToken;
293
+ constants.sitemapFetchedLinks = null;
293
294
  const scanDetails = {
294
295
  startTime: new Date(),
295
296
  endTime: new Date(),
package/dist/utils.js CHANGED
@@ -5,6 +5,7 @@ import axe from 'axe-core';
5
5
  import { v4 as uuidv4 } from 'uuid';
6
6
  import { getDomain } from 'tldts';
7
7
  import { normalizeUrl } from '@apify/utilities';
8
+ import { Dataset, RequestQueue, Configuration } from 'crawlee';
8
9
  import constants, { destinationPath, getIntermediateScreenshotsPath, } from './constants/constants.js';
9
10
  import { consoleLogger, errorsTxtPath } from './logs.js';
10
11
  import { getAxeConfiguration } from './crawlers/custom/getAxeConfiguration.js';
@@ -346,6 +347,20 @@ export const cleanUp = async (randomToken, isError = false) => {
346
347
  }
347
348
  if (randomToken !== undefined) {
348
349
  const storagePath = getStoragePath(randomToken);
350
+ try {
351
+ const storageClient = Configuration.getStorageClient();
352
+ if (storageClient.teardown) {
353
+ await storageClient.teardown();
354
+ }
355
+ const crawleeDir = path.join(storagePath, 'crawlee');
356
+ const dataset = await Dataset.open(crawleeDir);
357
+ await dataset.drop();
358
+ const requestQueue = await RequestQueue.open(crawleeDir);
359
+ await requestQueue.drop();
360
+ }
361
+ catch (error) {
362
+ consoleLogger.info(`Crawlee storage drop in cleanUp: ${error.message}`);
363
+ }
349
364
  try {
350
365
  fs.rmSync(path.join(storagePath, 'crawlee'), { recursive: true, force: true });
351
366
  }
@@ -358,32 +373,8 @@ export const cleanUp = async (randomToken, isError = false) => {
358
373
  catch (error) {
359
374
  consoleLogger.warn(`Unable to force remove pdfs folder: ${error.message}`);
360
375
  }
361
- let deleteErrorLogFile = true;
362
- if (isError) {
363
- let logsPath = storagePath;
364
- if (process.env.OOBEE_LOGS_PATH) {
365
- logsPath = process.env.OOBEE_LOGS_PATH;
366
- }
367
- if (fs.existsSync(errorsTxtPath)) {
368
- try {
369
- const logFilePath = path.join(logsPath, `logs-${randomToken}.txt`);
370
- fs.copyFileSync(errorsTxtPath, logFilePath);
371
- console.log(`An error occured. Log file is located at: ${logFilePath}`);
372
- }
373
- catch (copyError) {
374
- consoleLogger.error(`Error copying errors file during cleanup: ${copyError.message}`);
375
- console.log(`An error occured. Log file is located at: ${errorsTxtPath}`);
376
- deleteErrorLogFile = false; // Do not delete the log file if copy failed
377
- }
378
- if (deleteErrorLogFile && fs.existsSync(errorsTxtPath)) {
379
- try {
380
- fs.unlinkSync(errorsTxtPath);
381
- }
382
- catch (error) {
383
- consoleLogger.warn(`Unable to delete log file ${errorsTxtPath}: ${error.message}`);
384
- }
385
- }
386
- }
376
+ if (isError && fs.existsSync(errorsTxtPath)) {
377
+ console.log(`An error occured. Log file is located at: ${errorsTxtPath}`);
387
378
  }
388
379
  if (fs.existsSync(storagePath) && fs.readdirSync(storagePath).length === 0) {
389
380
  try {
@@ -854,6 +845,8 @@ export const randomThreeDigitNumberString = () => {
854
845
  return String(threeDigitNumber);
855
846
  };
856
847
  export const normUrl = (u) => (u ? normalizeUrl(u) || u : '');
848
+ export const stripWwwPrefix = (hostname) => hostname.replace(/^www\./, '');
849
+ export const isSameHostname = (hostname1, hostname2) => stripWwwPrefix(hostname1) === stripWwwPrefix(hostname2);
857
850
  export const isFollowStrategy = (link1, link2, rule) => {
858
851
  if (rule === 'all')
859
852
  return true;
@@ -861,7 +854,9 @@ export const isFollowStrategy = (link1, link2, rule) => {
861
854
  const parsedLink1 = new URL(link1);
862
855
  const parsedLink2 = new URL(link2);
863
856
  if (rule === 'same-origin') {
864
- return parsedLink1.origin === parsedLink2.origin;
857
+ return parsedLink1.protocol === parsedLink2.protocol &&
858
+ isSameHostname(parsedLink1.hostname, parsedLink2.hostname) &&
859
+ parsedLink1.port === parsedLink2.port;
865
860
  }
866
861
  if (rule === 'same-domain') {
867
862
  const link1Domain = getDomain(parsedLink1.hostname, { allowPrivateDomains: true }) || parsedLink1.hostname;
@@ -869,7 +864,7 @@ export const isFollowStrategy = (link1, link2, rule) => {
869
864
  return link1Domain.toLowerCase() === link2Domain.toLowerCase();
870
865
  }
871
866
  // default: same-hostname
872
- return parsedLink1.hostname === parsedLink2.hostname;
867
+ return isSameHostname(parsedLink1.hostname, parsedLink2.hostname);
873
868
  }
874
869
  catch {
875
870
  return false;
@@ -3,9 +3,9 @@
3
3
  * DO NOT EDIT MANUALLY. Re-generate with: node dist/generateOobeeClientScanner.js
4
4
  *
5
5
  * Embedded at generation time:
6
- * App version : 0.10.91
6
+ * App version : 0.10.93
7
7
  * Sentry DSN : (from OOBEE_SENTRY_DSN env var or constants.ts default)
8
- * Sentry SDK : @sentry/browser 9.47.1 (loaded from CDN at runtime)
8
+ * Sentry SDK : @sentry/browser 10.58.0 (loaded from CDN at runtime)
9
9
  *
10
10
  * Usage:
11
11
  * <script src="oobee-client-scanner.js"></script>
@@ -34883,8 +34883,8 @@
34883
34883
  // ── Sentry browser telemetry (Sentry JS SDK, loaded from CDN) ────────────
34884
34884
 
34885
34885
  var _oobeeSentryDsn = "https://3b8c7ee46b06f33815a1301b6713ebc3@o4509047624761344.ingest.us.sentry.io/4509327783559168";
34886
- var _oobeeAppVersion = "0.10.91";
34887
- var _oobeeSentryVersion = "9.47.1";
34886
+ var _oobeeAppVersion = "0.10.93";
34887
+ var _oobeeSentryVersion = "10.58.0";
34888
34888
  var _oobeeSentryInitialized = false;
34889
34889
  var _oobeeSentryLoadPromise = null;
34890
34890
 
@@ -35091,6 +35091,37 @@
35091
35091
  // Run axe-core + oobee custom checks
35092
35092
  var scanResult = await window.runA11yScan(elementsToScan, '');
35093
35093
 
35094
+ // Re-verify aria-hidden-focus violations against the live DOM to handle
35095
+ // race conditions with JS that sets tabindex="-1" after aria-hidden
35096
+ var axeViolations = scanResult.axeScanResults.violations || [];
35097
+ var ariaHiddenViolation = axeViolations.find(function(v) { return v.id === 'aria-hidden-focus'; });
35098
+ if (ariaHiddenViolation) {
35099
+ await new Promise(function(resolve) { setTimeout(resolve, 0); });
35100
+ ariaHiddenViolation.nodes = ariaHiddenViolation.nodes.filter(function(node) {
35101
+ var selector = node.target && node.target[0];
35102
+ if (typeof selector !== 'string') return true;
35103
+ try {
35104
+ var el = document.querySelector(selector);
35105
+ if (!el) return true;
35106
+ var focusables = el.querySelectorAll(
35107
+ 'a[href], area[href], button:not([disabled]), input:not([disabled]):not([type="hidden"]), select:not([disabled]), textarea:not([disabled]), [tabindex]'
35108
+ );
35109
+ if (focusables.length === 0) return false;
35110
+ return Array.from(focusables).some(function(child) {
35111
+ var tabindex = child.getAttribute('tabindex');
35112
+ if (tabindex === null) return true;
35113
+ var parsed = parseInt(tabindex, 10);
35114
+ return isNaN(parsed) || parsed >= 0;
35115
+ });
35116
+ } catch (e) { return true; }
35117
+ });
35118
+ if (ariaHiddenViolation.nodes.length === 0) {
35119
+ scanResult.axeScanResults.violations = axeViolations.filter(function(v) {
35120
+ return v.id !== 'aria-hidden-focus';
35121
+ });
35122
+ }
35123
+ }
35124
+
35094
35125
  // Convert raw axe results into oobee category structure
35095
35126
  var filtered = _oobeeFilterAxeResults(scanResult.axeScanResults, scanResult.pageTitle);
35096
35127
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@govtechsg/oobee",
3
3
  "main": "dist/npmIndex.js",
4
- "version": "0.10.91",
4
+ "version": "0.10.93",
5
5
  "type": "module",
6
6
  "author": "Government Technology Agency <info@tech.gov.sg>",
7
7
  "bin": {
@@ -11,7 +11,7 @@
11
11
  "@aws-sdk/client-s3": "^3.1049.0",
12
12
  "@json2csv/node": "^7.0.3",
13
13
  "@napi-rs/canvas": "^0.1.53",
14
- "@sentry/node": "^9.13.0",
14
+ "@sentry/node": "^10.58.0",
15
15
  "@types/aws-sdk": "^0.0.42",
16
16
  "axe-core": "^4.11.4",
17
17
  "axios": "^1.8.2",
@@ -26,7 +26,7 @@
26
26
  "inquirer": "^9.2.12",
27
27
  "jsdom": "^29.0.0",
28
28
  "jszip": "^3.10.1",
29
- "lodash": "^4.17.21",
29
+ "lodash": "^4.18.1",
30
30
  "mime": "^4.0.7",
31
31
  "mime-types": "^2.1.35",
32
32
  "minimatch": "^10.2.4",
package/src/cli.ts CHANGED
@@ -228,6 +228,10 @@ if (!options.strategy) {
228
228
  options.strategy = options.scanner === ScannerTypes.SITEMAP ? 'ignore' : 'same-domain';
229
229
  }
230
230
 
231
+ if (options.websiteTag) {
232
+ process.env.OOBEE_TAGGED_WEBSITE = options.websiteTag;
233
+ }
234
+
231
235
  const scanInit = async (argvs: Answers): Promise<string> => {
232
236
  const updatedArgvs = { ...argvs };
233
237
 
package/src/combine.ts CHANGED
@@ -6,7 +6,7 @@ import crawlLocalFile from './crawlers/crawlLocalFile.js';
6
6
  import crawlIntelligentSitemap from './crawlers/crawlIntelligentSitemap.js';
7
7
  import generateArtifacts from './mergeAxeResults.js';
8
8
  import { getHost, createAndUpdateResultsFolders, cleanUpAndExit, getStoragePath } from './utils.js';
9
- import { ScannerTypes, UrlsCrawled } from './constants/constants.js';
9
+ import constants, { ScannerTypes, UrlsCrawled } from './constants/constants.js';
10
10
  import { getBlackListedPatterns, submitForm } from './constants/common.js';
11
11
  import { consoleLogger, silentLogger } from './logs.js';
12
12
  import runCustom from './crawlers/runCustom.js';
@@ -72,6 +72,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
72
72
 
73
73
  process.env.CRAWLEE_LOG_LEVEL = 'ERROR';
74
74
  process.env.CRAWLEE_STORAGE_DIR = randomToken;
75
+ constants.sitemapFetchedLinks = null;
75
76
 
76
77
  if (process.env.CRAWLEE_SYSTEM_INFO_V2 === undefined) {
77
78
  // Set the environment variable to enable system info v2
@@ -79,6 +80,18 @@ const combineRun = async (details: Data, deviceToScan: string) => {
79
80
  process.env.CRAWLEE_SYSTEM_INFO_V2 = '1';
80
81
  }
81
82
 
83
+ // Suppress non-fatal Crawlee ps-tree errors on Windows with non-English locales.
84
+ // The system info module tries to parse process listing headers and crashes when
85
+ // headers are in a different language (e.g. "Wo" instead of "PID").
86
+ const psTreeHandler = (err: Error) => {
87
+ if (err.message?.includes('Unknown process listing header')) {
88
+ consoleLogger.info(`Suppressed Crawlee ps-tree locale error: ${err.message}`);
89
+ return;
90
+ }
91
+ throw err;
92
+ };
93
+ process.on('uncaughtException', psTreeHandler);
94
+
82
95
  const host = type === ScannerTypes.SITEMAP || type === ScannerTypes.LOCALFILE ? '' : getHost(url);
83
96
 
84
97
  let blacklistedPatterns: string[] | null = null;
@@ -141,6 +154,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
141
154
  blacklistedPatterns,
142
155
  includeScreenshots,
143
156
  customFlowLabel && customFlowLabel !== 'None' ? customFlowLabel : '',
157
+ extraHTTPHeaders,
144
158
  );
145
159
 
146
160
  urlsCrawledObj = res.urlsCrawled;
@@ -274,6 +288,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
274
288
  scanDetails,
275
289
  zip,
276
290
  generateJsonFiles,
291
+ browser,
277
292
  );
278
293
  const [name, email] = nameEmail.split(':');
279
294
 
@@ -341,5 +341,12 @@ To obtain the JSON files, you need to base64-decode the file followed by gunzip.
341
341
  demandOption: false,
342
342
  coerce: val => Number(val),
343
343
  },
344
+ z: {
345
+ alias: 'websiteTag',
346
+ describe: 'Tag to identify the website in telemetry. Overrides OOBEE_TAGGED_WEBSITE env var.',
347
+ type: 'string',
348
+ requiresArg: true,
349
+ demandOption: false,
350
+ },
344
351
  };
345
352