@govtechsg/oobee 0.10.84 → 0.10.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.github/workflows/image.yml +3 -2
  2. package/.github/workflows/publish.yml +10 -0
  3. package/DETAILS.md +29 -0
  4. package/dist/cli.js +7 -6
  5. package/dist/combine.js +1 -1
  6. package/dist/constants/common.js +15 -4
  7. package/dist/constants/constants.js +604 -1
  8. package/dist/crawlers/commonCrawlerFunc.js +3 -2
  9. package/dist/crawlers/crawlSitemap.js +98 -80
  10. package/dist/crawlers/custom/utils.js +218 -71
  11. package/dist/crawlers/guards/urlGuard.js +8 -15
  12. package/dist/crawlers/runCustom.js +24 -15
  13. package/dist/generateOobeeClientScanner.js +570 -0
  14. package/dist/mergeAxeResults.js +49 -29
  15. package/dist/npmIndex.js +10 -2
  16. package/dist/proxyService.js +18 -3
  17. package/dist/services/s3Uploader.js +21 -10
  18. package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
  19. package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
  20. package/dist/static/ejs/summary.ejs +10 -5
  21. package/oobee-client-scanner.js +34992 -0
  22. package/package.json +3 -3
  23. package/src/cli.ts +20 -15
  24. package/src/combine.ts +3 -1
  25. package/src/constants/common.ts +22 -10
  26. package/src/constants/constants.ts +602 -1
  27. package/src/crawlers/commonCrawlerFunc.ts +4 -3
  28. package/src/crawlers/crawlSitemap.ts +116 -98
  29. package/src/crawlers/custom/utils.ts +244 -84
  30. package/src/crawlers/guards/urlGuard.ts +24 -31
  31. package/src/crawlers/runCustom.ts +38 -15
  32. package/src/generateOobeeClientScanner.ts +591 -0
  33. package/src/mergeAxeResults.ts +48 -29
  34. package/src/npmIndex.ts +12 -2
  35. package/src/proxyService.ts +25 -4
  36. package/src/services/s3Uploader.ts +23 -11
  37. package/src/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
  38. package/src/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
  39. package/src/static/ejs/summary.ejs +10 -5
  40. package/testStaticJSScanner.html +534 -0
@@ -12,6 +12,7 @@ import constants, {
12
12
  a11yRuleShortDescriptionMap,
13
13
  disabilityBadgesMap,
14
14
  a11yRuleLongDescriptionMap,
15
+ a11yRuleStepByStepGuide,
15
16
  } from './constants/constants.js';
16
17
  import { getBrowserToRun, getPlaywrightLaunchOptions } from './constants/common.js';
17
18
 
@@ -349,44 +350,61 @@ const writeSummaryPdf = async (
349
350
  browser: string,
350
351
  _userDataDirectory: string,
351
352
  ) => {
352
- const htmlFilePath = `${storagePath}/${filename}.html`;
353
- const fileDestinationPath = `${storagePath}/${filename}.pdf`;
353
+ let browserInstance;
354
+ let context;
355
+ let page;
354
356
 
355
- const launchOptions = getPlaywrightLaunchOptions(browser);
357
+ try {
358
+ const htmlFilePath = path.join(storagePath, `${filename}.html`);
359
+ const fileDestinationPath = path.join(storagePath, `${filename}.pdf`);
360
+ const htmlFileUrl = `file://${htmlFilePath}`;
356
361
 
357
- const browserInstance = await constants.launcher.launch({
358
- ...launchOptions,
359
- headless: true, // force headless for PDF
360
- });
362
+ const launchOptions = getPlaywrightLaunchOptions(browser);
363
+
364
+ browserInstance = await constants.launcher.launch({
365
+ ...launchOptions,
366
+ headless: true,
367
+ });
361
368
 
362
- register(browserInstance as unknown as { close: () => Promise<void> });
369
+ register(browserInstance as unknown as { close: () => Promise<void> });
363
370
 
364
- const context = await browserInstance.newContext();
365
- const page = await context.newPage();
371
+ context = await browserInstance.newContext();
372
+ page = await context.newPage();
366
373
 
367
- const data = fs.readFileSync(htmlFilePath, { encoding: 'utf-8' });
368
- await page.setContent(data, { waitUntil: 'domcontentloaded' });
374
+ await page.goto(htmlFileUrl, {
375
+ waitUntil: 'domcontentloaded',
376
+ timeout: 120000,
377
+ });
369
378
 
370
- await page.emulateMedia({ media: 'print' });
379
+ await page.emulateMedia({ media: 'print' });
371
380
 
372
- await page.pdf({
373
- margin: { bottom: '32px' },
374
- path: fileDestinationPath,
375
- format: 'A4',
376
- displayHeaderFooter: true,
377
- footerTemplate: `
381
+ await page.pdf({
382
+ margin: { bottom: '32px' },
383
+ path: fileDestinationPath,
384
+ format: 'A4',
385
+ displayHeaderFooter: true,
386
+ footerTemplate: `
378
387
  <div style="margin-top:50px;color:#26241b;font-family:Open Sans;text-align: center;width: 100%;font-weight:400">
379
388
  <span style="color:#26241b;font-size: 14px;font-weight:400">Page <span class="pageNumber"></span> of <span class="totalPages"></span></span>
380
389
  </div>
381
390
  `,
382
- });
383
-
384
- await page.close();
385
- await context.close().catch(() => {});
386
- await browserInstance.close().catch(() => {});
391
+ });
387
392
 
388
- if (pagesScanned < 2000) {
389
- fs.unlinkSync(htmlFilePath);
393
+ if (pagesScanned < 2000) {
394
+ fs.unlinkSync(htmlFilePath);
395
+ }
396
+ } catch (err) {
397
+ consoleLogger.info(`Error at writeSummaryPDF ${err instanceof Error ? err.stack : err}`);
398
+ } finally {
399
+ await page?.close().catch(err => {
400
+ consoleLogger.info(`Error at page close writeSummaryPDF ${err}`);
401
+ });
402
+ await context?.close().catch(err => {
403
+ consoleLogger.info(`Error at context close writeSummaryPDF ${err}`);
404
+ });
405
+ await browserInstance?.close().catch(err => {
406
+ consoleLogger.info(`Error at browserInstance close writeSummaryPDF ${err}`);
407
+ });
390
408
  }
391
409
  };
392
410
 
@@ -397,9 +415,9 @@ const pushResults = async (pageResults, allIssues, isCustomFlow) => {
397
415
  const { url, pageTitle, filePath } = pageResults;
398
416
 
399
417
  const totalIssuesInPage = new Set();
400
- Object.keys(pageResults.mustFix.rules).forEach(k => totalIssuesInPage.add(k));
401
- Object.keys(pageResults.goodToFix.rules).forEach(k => totalIssuesInPage.add(k));
402
- Object.keys(pageResults.needsReview.rules).forEach(k => totalIssuesInPage.add(k));
418
+ Object.keys(pageResults.mustFix?.rules ?? {}).forEach(k => totalIssuesInPage.add(k));
419
+ Object.keys(pageResults.goodToFix?.rules ?? {}).forEach(k => totalIssuesInPage.add(k));
420
+ Object.keys(pageResults.needsReview?.rules ?? {}).forEach(k => totalIssuesInPage.add(k));
403
421
 
404
422
  allIssues.topFiveMostIssues.push({
405
423
  url,
@@ -767,6 +785,7 @@ const generateArtifacts = async (
767
785
  a11yRuleShortDescriptionMap,
768
786
  disabilityBadgesMap,
769
787
  a11yRuleLongDescriptionMap,
788
+ a11yRuleStepByStepGuide,
770
789
  wcagCriteriaLabels: constants.wcagCriteriaLabels,
771
790
  scanPagesDetail: {
772
791
  pagesAffected: [],
package/src/npmIndex.ts CHANGED
@@ -5,7 +5,7 @@ import axe, { AxeResults, ImpactValue } from 'axe-core';
5
5
  import { JSDOM } from 'jsdom';
6
6
  import { fileURLToPath } from 'url';
7
7
  import { EnqueueStrategy } from 'crawlee';
8
- import constants, { BrowserTypes, RuleFlags, ScannerTypes } from './constants/constants.js';
8
+ import constants, { BrowserTypes, RuleFlags, ScannerTypes, a11yRuleShortDescriptionMap, a11yRuleLongDescriptionMap, a11yRuleStepByStepGuide } from './constants/constants.js';
9
9
  import {
10
10
  deleteClonedProfiles,
11
11
  getBrowserToRun,
@@ -638,6 +638,11 @@ const processAndSubmitResults = async (
638
638
  if (constants.a11yRuleShortDescriptionMap[ruleId]) {
639
639
  mergedResults[category].rules[ruleId].description = constants.a11yRuleShortDescriptionMap[ruleId];
640
640
  }
641
+
642
+ // Add short description, long description and step-by-step guide
643
+ mergedResults[category].rules[ruleId].shortDescription = a11yRuleShortDescriptionMap[ruleId];
644
+ mergedResults[category].rules[ruleId].longDescription = a11yRuleLongDescriptionMap[ruleId];
645
+ mergedResults[category].rules[ruleId].stepByStepGuide = a11yRuleStepByStepGuide[ruleId];
641
646
 
642
647
  // Add url to items
643
648
  mergedResults[category].rules[ruleId].items.forEach((item: any) => {
@@ -733,6 +738,11 @@ const processAndSubmitResults = async (
733
738
  rule.description = constants.a11yRuleShortDescriptionMap[rule.rule];
734
739
  }
735
740
 
741
+ // Add short description, long description and step-by-step guide
742
+ rule.shortDescription = a11yRuleShortDescriptionMap[rule.rule];
743
+ rule.longDescription = a11yRuleLongDescriptionMap[rule.rule];
744
+ rule.stepByStepGuide = a11yRuleStepByStepGuide[rule.rule];
745
+
736
746
  if (rule.items) {
737
747
  rule.items.forEach((item: any) => {
738
748
  // Ensure item URL matches the result URL
@@ -877,5 +887,5 @@ export const scanPage = async (
877
887
  );
878
888
  };
879
889
 
880
- export { RuleFlags };
890
+ export { RuleFlags, a11yRuleLongDescriptionMap, a11yRuleStepByStepGuide, getOobeeFunctionsScript };
881
891
 
@@ -17,7 +17,9 @@ import path from 'path';
17
17
  import { spawnSync } from 'child_process';
18
18
 
19
19
  export interface ProxyInfo {
20
- // host:port OR user:pass@host:port (no scheme)
20
+ // http/https: host:port OR user:pass@host:port (no scheme)
21
+ // socks: scheme://host:port OR scheme://user:pass@host:port (scheme preserved from ALL_PROXY)
22
+ // OR bare host:port when sourced from scutil (defaults to socks5:// on use)
21
23
  http?: string;
22
24
  https?: string;
23
25
  socks?: string;
@@ -88,7 +90,7 @@ function parseEnvProxyCommon(): ProxyInfo | null {
88
90
  const info: ProxyInfo = {};
89
91
  if (http) info.http = stripScheme(http);
90
92
  if (https) info.https = stripScheme(https);
91
- if (socks) info.socks = stripScheme(socks);
93
+ if (socks) info.socks = socks; // keep original scheme so proxyInfoToResolution can use the right protocol
92
94
  if (noProxy) info.bypassList = semiJoin(noProxy.split(/[,;]/));
93
95
 
94
96
  const { username, password } = readCredsFromEnv();
@@ -435,6 +437,15 @@ function buildIncludeOnlyPac(proxyServer: string, includeList: string[]): string
435
437
  return pac;
436
438
  }
437
439
 
440
+ /**
441
+ * Convert an info.socks value to a full proxy server URL.
442
+ * When the value already carries a scheme (e.g. ALL_PROXY=http://..., socks4://...),
443
+ * it is used as-is. Bare host:port values (from scutil) default to socks5://.
444
+ */
445
+ function toSocksServer(socks: string): string {
446
+ return /^[a-zA-Z][a-zA-Z0-9+.-]*:\/\//.test(socks) ? socks : `socks5://${socks}`;
447
+ }
448
+
438
449
  export function proxyInfoToResolution(info: ProxyInfo | null): ProxyResolution {
439
450
  if (!info) return { kind: 'none' };
440
451
 
@@ -444,7 +455,7 @@ export function proxyInfoToResolution(info: ProxyInfo | null): ProxyResolution {
444
455
  let proxyServer: string | undefined;
445
456
  if (info.http) proxyServer = `http://${info.http}`;
446
457
  else if (info.https) proxyServer = `http://${info.https}`;
447
- else if (info.socks) proxyServer = `socks5://${info.socks}`;
458
+ else if (info.socks) proxyServer = toSocksServer(info.socks);
448
459
 
449
460
  if (proxyServer) {
450
461
  // If credentials exist, embed them for the manual proxy auth
@@ -457,6 +468,16 @@ export function proxyInfoToResolution(info: ProxyInfo | null): ProxyResolution {
457
468
  const pacDataUrl = `data:application/x-ns-proxy-autoconfig;base64,${Buffer.from(pac).toString('base64')}`;
458
469
  return { kind: 'pac', pacUrl: pacDataUrl, bypass: info.bypassList };
459
470
  }
471
+
472
+ // No direct proxy server was found — the configured proxy is PAC-based or auto-detect only.
473
+ // INCLUDE_PROXY needs a concrete server address to build a routing PAC script, so it cannot
474
+ // be applied here. Warn and fall through to use the existing PAC/autodetect as-is.
475
+ console.warn(
476
+ 'INCLUDE_PROXY is set but no direct proxy server address was found. ' +
477
+ 'INCLUDE_PROXY requires HTTP_PROXY, HTTPS_PROXY, or ALL_PROXY to be set with a direct ' +
478
+ 'server address; it cannot be applied to a PAC URL or auto-detect proxy. ' +
479
+ 'INCLUDE_PROXY will be ignored.',
480
+ );
460
481
  }
461
482
 
462
483
  // Prefer manual proxies first (these work with Playwright's proxy option)
@@ -478,7 +499,7 @@ export function proxyInfoToResolution(info: ProxyInfo | null): ProxyResolution {
478
499
  }
479
500
  if (info.socks) {
480
501
  return { kind: 'manual', settings: {
481
- server: `socks5://${info.socks}`,
502
+ server: toSocksServer(info.socks),
482
503
  username: info.username,
483
504
  password: info.password,
484
505
  bypass: info.bypassList,
@@ -7,6 +7,18 @@ import { consoleLogger } from '../logs.js';
7
7
  const REGION = process.env.AWS_REGION || 'ap-southeast-1';
8
8
  const s3Client = new S3Client({ region: REGION });
9
9
 
10
+ // S3 user metadata is sent over REST as x-amz-meta-* HTTP headers.
11
+ // To avoid request-header validation failures in the Node/AWS SDK path,
12
+ // normalize to printable ASCII before attaching metadata values.
13
+ const sanitizeS3MetadataValue = (value: string): string => {
14
+ return value
15
+ .normalize('NFKD') // e.g. "é" -> "e" + combining accent, "A" -> "A"
16
+ .replace(/[\u0300-\u036f]/g, '') // e.g. remove the combining accent from the decomposed "é"
17
+ .replace(/[^\x20-\x7E]+/g, ' ') // e.g. "公益金" or emoji -> " "
18
+ .replace(/\s+/g, ' ') // e.g. "Community Chest \n" -> "Community Chest "
19
+ .trim(); // e.g. " Homepage | Community Chest " -> "Homepage | Community Chest"
20
+ };
21
+
10
22
  export interface UploadedFileInfo {
11
23
  filename: string;
12
24
  s3Path: string;
@@ -75,32 +87,32 @@ export const uploadFolderToS3 = async (
75
87
  const allowedFileExtRegex = /\.(html|csv|pdf|zip)$/;
76
88
 
77
89
  const metadata: Record<string, string> = {
78
- scanid: scanMetadata.scanId,
79
- userid: scanMetadata.userId,
80
- useremail: scanMetadata.email,
90
+ scanid: sanitizeS3MetadataValue(scanMetadata.scanId),
91
+ userid: sanitizeS3MetadataValue(scanMetadata.userId),
92
+ useremail: sanitizeS3MetadataValue(scanMetadata.email),
81
93
  };
82
94
 
83
95
  // Add optional metadata fields if present
84
96
  if (scanMetadata.messageId) {
85
- metadata.messageid = scanMetadata.messageId;
97
+ metadata.messageid = sanitizeS3MetadataValue(scanMetadata.messageId);
86
98
  }
87
99
  if (scanMetadata.amplitudeUserId) {
88
- metadata.amplitudeuserid = scanMetadata.amplitudeUserId;
100
+ metadata.amplitudeuserid = sanitizeS3MetadataValue(scanMetadata.amplitudeUserId);
89
101
  }
90
102
  if (scanMetadata.deviceId) {
91
- metadata.deviceid = scanMetadata.deviceId;
103
+ metadata.deviceid = sanitizeS3MetadataValue(scanMetadata.deviceId);
92
104
  }
93
105
  if (scanMetadata.orgId) {
94
- metadata.orgid = scanMetadata.orgId;
106
+ metadata.orgid = sanitizeS3MetadataValue(scanMetadata.orgId);
95
107
  }
96
108
  if (scanMetadata.userRole) {
97
- metadata.userrole = scanMetadata.userRole;
109
+ metadata.userrole = sanitizeS3MetadataValue(scanMetadata.userRole);
98
110
  }
99
111
  if (scanMetadata.siteName) {
100
- metadata.sitename = scanMetadata.siteName;
112
+ metadata.sitename = sanitizeS3MetadataValue(scanMetadata.siteName);
101
113
  }
102
114
  if (scanMetadata.durationExceeded !== undefined) {
103
- metadata.durationexceeded = scanMetadata.durationExceeded;
115
+ metadata.durationexceeded = sanitizeS3MetadataValue(scanMetadata.durationExceeded);
104
116
  }
105
117
 
106
118
  consoleLogger.info(`Uploading ${files.length} files to S3...`);
@@ -181,4 +193,4 @@ export const getS3UploadPrefix = (): string | null => {
181
193
  }
182
194
 
183
195
  return `users/${userId}/scans/${scanId}`;
184
- };
196
+ };
@@ -21,7 +21,7 @@
21
21
  >
22
22
  </div>
23
23
  <div class="display-url-container">
24
- <a href="${page.url}" target="_blank">${page.pageTitle.length > 0 ? page.pageTitle : page.url}</a>
24
+ <a href="${page.url}" target="_blank">${page.pageTitle?.length > 0 ? page.pageTitle : page.url}</a>
25
25
  <p>${page.url}</p>
26
26
  </div>
27
27
  </div>
@@ -29,7 +29,7 @@
29
29
  } else {
30
30
  listItem.innerHTML = `
31
31
  <a href="${page.url}" target="_blank">
32
- ${page.pageTitle.length > 0 ? page.pageTitle : page.url}
32
+ ${page.pageTitle?.length > 0 ? page.pageTitle : page.url}
33
33
  <svg class="link-external-icon" width="16" height="12" viewBox="0 0 8 8" aria-hidden="true" focusable="false">
34
34
  <path d="M7.11111 7.11111H0.888889V0.888889H4V0H0.888889C0.395556 0 0 0.4 0 0.888889V7.11111C0 7.6 0.395556 8 0.888889 8H7.11111C7.6 8 8 7.6 8 7.11111V4H7.11111V7.11111ZM4.88889 0V0.888889H6.48444L2.11556 5.25778L2.74222 5.88444L7.11111 1.51556V3.11111H8V0H4.88889Z" fill="#5735DF"/>
35
35
  </svg>