@govtechsg/oobee 0.10.85 → 0.10.87

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/.github/workflows/publish.yml +10 -0
  2. package/DETAILS.md +29 -0
  3. package/dist/cli.js +18 -5
  4. package/dist/combine.js +3 -1
  5. package/dist/constants/cliFunctions.js +2 -2
  6. package/dist/constants/common.js +70 -17
  7. package/dist/constants/constants.js +604 -1
  8. package/dist/crawlers/commonCrawlerFunc.js +3 -2
  9. package/dist/crawlers/crawlDomain.js +38 -13
  10. package/dist/crawlers/crawlIntelligentSitemap.js +62 -30
  11. package/dist/crawlers/crawlSitemap.js +141 -84
  12. package/dist/crawlers/custom/utils.js +218 -71
  13. package/dist/crawlers/guards/urlGuard.js +8 -15
  14. package/dist/crawlers/runCustom.js +18 -11
  15. package/dist/generateHtmlReport.js +18 -11
  16. package/dist/generateOobeeClientScanner.js +570 -0
  17. package/dist/mergeAxeResults/itemReferences.js +60 -25
  18. package/dist/mergeAxeResults/sentryTelemetry.js +4 -1
  19. package/dist/mergeAxeResults.js +23 -13
  20. package/dist/npmIndex.js +10 -2
  21. package/dist/proxyService.js +18 -3
  22. package/dist/services/s3Uploader.js +21 -10
  23. package/dist/static/ejs/partials/scripts/decodeUnzipParse.ejs +6 -3
  24. package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
  25. package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
  26. package/dist/static/ejs/partials/scripts/ruleModal/itemCardRenderer.ejs +38 -2
  27. package/dist/static/ejs/partials/scripts/ruleModal/pageAccordionBuilder.ejs +1 -1
  28. package/dist/static/ejs/partials/scripts/ruleModal/ruleOffcanvas.ejs +4 -4
  29. package/dist/static/ejs/summary.ejs +19 -8
  30. package/dist/utils.js +4 -3
  31. package/fix-summary-html-oom-pr.md +62 -0
  32. package/oobee-client-scanner.js +34992 -0
  33. package/package.json +5 -5
  34. package/src/cli.ts +19 -5
  35. package/src/combine.ts +5 -1
  36. package/src/constants/cliFunctions.ts +2 -2
  37. package/src/constants/common.ts +87 -22
  38. package/src/constants/constants.ts +602 -1
  39. package/src/crawlers/commonCrawlerFunc.ts +4 -3
  40. package/src/crawlers/crawlDomain.ts +39 -13
  41. package/src/crawlers/crawlIntelligentSitemap.ts +63 -30
  42. package/src/crawlers/crawlSitemap.ts +165 -100
  43. package/src/crawlers/custom/utils.ts +241 -80
  44. package/src/crawlers/guards/urlGuard.ts +24 -31
  45. package/src/crawlers/runCustom.ts +29 -11
  46. package/src/generateHtmlReport.ts +21 -11
  47. package/src/generateOobeeClientScanner.ts +591 -0
  48. package/src/mergeAxeResults/itemReferences.ts +70 -26
  49. package/src/mergeAxeResults/sentryTelemetry.ts +4 -1
  50. package/src/mergeAxeResults.ts +26 -14
  51. package/src/npmIndex.ts +12 -2
  52. package/src/proxyService.ts +25 -4
  53. package/src/services/s3Uploader.ts +23 -11
  54. package/src/static/ejs/partials/scripts/decodeUnzipParse.ejs +6 -3
  55. package/src/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
  56. package/src/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
  57. package/src/static/ejs/partials/scripts/ruleModal/itemCardRenderer.ejs +38 -2
  58. package/src/static/ejs/partials/scripts/ruleModal/pageAccordionBuilder.ejs +1 -1
  59. package/src/static/ejs/partials/scripts/ruleModal/ruleOffcanvas.ejs +4 -4
  60. package/src/static/ejs/summary.ejs +19 -8
  61. package/src/utils.ts +4 -3
  62. package/testStaticJSScanner.html +534 -0
@@ -1,5 +1,9 @@
1
1
  import type { AllIssues, ItemsInfo, RuleInfo } from './types.js';
2
2
 
3
+ type ScanItems = AllIssues['items'];
4
+ type ScanCategory = ScanItems[keyof ScanItems];
5
+ type ScanItemsLight = Pick<ScanItems, 'mustFix' | 'goodToFix' | 'needsReview' | 'passed'>;
6
+
3
7
  /**
4
8
  * Builds pre-computed HTML groups to optimize Group by HTML Element functionality.
5
9
  * Keys are composite "html\x00xpath" strings to ensure unique matching per element instance.
@@ -31,32 +35,72 @@ export const buildHtmlGroups = (rule: RuleInfo, items: ItemsInfo[], pageUrl: str
31
35
  });
32
36
  };
33
37
 
38
+ /*
39
+ // Commenting this out for now as we are not including htmlGroups in the embedded report payload to keep it lean.
40
+ // We can revisit this if we want to include htmlGroups in the future and need a reference builder for it.
41
+ const toHtmlGroupReference = (item: any) => {
42
+ if (typeof item === 'string') {
43
+ return item;
44
+ }
45
+
46
+ return `${item?.html || 'No HTML element'}\x00${item?.xpath || ''}`;
47
+ };
48
+
49
+ const cloneCategoryWithReferenceItems = (category: ScanCategory): ScanCategory =>
50
+ ({
51
+ ...category,
52
+ rules: category.rules.map(
53
+ rule =>
54
+ ({
55
+ ...rule,
56
+ pagesAffected: rule.pagesAffected.map(
57
+ page => {
58
+ const { items, ...pageWithoutItems } = page;
59
+
60
+ return {
61
+ ...pageWithoutItems,
62
+ itemsCount: page.itemsCount ?? (Array.isArray(items) ? items.length : 0),
63
+ items: Array.isArray(items) ? items.map(toHtmlGroupReference) : items,
64
+ } as any;
65
+ },
66
+ ),
67
+ }) as any,
68
+ ),
69
+ }) as ScanCategory;
70
+ */
71
+
72
+ const cloneCategoryLight = (category: ScanCategory, includeHtmlGroups: boolean): ScanCategory =>
73
+ ({
74
+ ...category,
75
+ rules: category.rules.map(
76
+ rule =>
77
+ ({
78
+ rule: rule.rule,
79
+ description: rule.description,
80
+ helpUrl: rule.helpUrl,
81
+ conformance: rule.conformance,
82
+ totalItems: rule.totalItems,
83
+ axeImpact: rule.axeImpact,
84
+ ...(includeHtmlGroups && rule.htmlGroups ? { htmlGroups: rule.htmlGroups } : {}),
85
+ pagesAffected: rule.pagesAffected.map(page => ({
86
+ url: page.url,
87
+ pageTitle: page.pageTitle,
88
+ itemsCount: page.itemsCount ?? (Array.isArray((page as any).items) ? (page as any).items.length : 0),
89
+ })),
90
+ }) as any,
91
+ ),
92
+ }) as ScanCategory;
93
+
34
94
  /**
35
- * Converts items in pagesAffected to references (html\x00xpath composite keys) for embedding in HTML report.
36
- * Additionally, it deep-clones allIssues, replaces page.items objects with composite reference keys.
37
- * Those refs are specifically for htmlGroups lookup (html + xpath).
95
+ * Builds the embedded HTML-report payload from the full scan items.
96
+ * Includes htmlGroups for non-passed categories (Group by HTML Element),
97
+ * excludes them from passed to keep payload within browser memory limits.
38
98
  */
39
- export const convertItemsToReferences = (allIssues: AllIssues): AllIssues => {
40
- const cloned = JSON.parse(JSON.stringify(allIssues));
41
-
42
- ['mustFix', 'goodToFix', 'needsReview', 'passed'].forEach(category => {
43
- if (!cloned.items[category]?.rules) return;
44
-
45
- cloned.items[category].rules.forEach((rule: any) => {
46
- if (!rule.pagesAffected || !rule.htmlGroups) return;
47
-
48
- rule.pagesAffected.forEach((page: any) => {
49
- if (!page.items) return;
50
-
51
- page.items = page.items.map((item: any) => {
52
- if (typeof item === 'string') return item; // Already a reference
53
- // Use composite key matching buildHtmlGroups
54
- const htmlKey = `${item.html || 'No HTML element'}\x00${item.xpath || ''}`;
55
- return htmlKey;
56
- });
57
- });
58
- });
59
- });
60
-
61
- return cloned;
99
+ export const convertItemsToReferences = (source: Pick<AllIssues, 'items'>): ScanItemsLight => {
100
+ return {
101
+ mustFix: cloneCategoryLight(source.items.mustFix, true),
102
+ goodToFix: cloneCategoryLight(source.items.goodToFix, true),
103
+ needsReview: cloneCategoryLight(source.items.needsReview, true),
104
+ passed: cloneCategoryLight(source.items.passed, false),
105
+ };
62
106
  };
@@ -140,7 +140,10 @@ const sendWcagBreakdownToSentry = async (
140
140
  event_type: 'accessibility_scan',
141
141
  scanType: scanInfo.scanType,
142
142
  browser: scanInfo.browser,
143
- entryUrl: scanInfo.entryUrl,
143
+ entryUrl: process.env.OOBEE_SCAN_METADATA ?? scanInfo.entryUrl,
144
+ ...(process.env.OOBEE_SCAN_PRODUCT && {
145
+ scanProduct: process.env.OOBEE_SCAN_PRODUCT,
146
+ }),
144
147
  },
145
148
  user: {
146
149
  ...(scanInfo.email && scanInfo.name
@@ -12,6 +12,7 @@ import constants, {
12
12
  a11yRuleShortDescriptionMap,
13
13
  disabilityBadgesMap,
14
14
  a11yRuleLongDescriptionMap,
15
+ a11yRuleStepByStepGuide,
15
16
  } from './constants/constants.js';
16
17
  import { getBrowserToRun, getPlaywrightLaunchOptions } from './constants/common.js';
17
18
 
@@ -184,15 +185,15 @@ const writeHTML = async (
184
185
  'utf-8',
185
186
  );
186
187
 
187
- // Create lighter version with item references for embedding in HTML
188
- const scanItemsWithHtmlGroupRefs = convertItemsToReferences(allIssues);
188
+ // Create the lighter scanItems payload for embedding in the HTML report.
189
+ const lightScanItemsPayload = convertItemsToReferences(allIssues);
189
190
 
190
191
  // Write the lighter items to a file and get the base64 path
191
192
  const {
192
- jsonFilePath: scanItemsWithHtmlGroupRefsJsonFilePath,
193
- base64FilePath: scanItemsWithHtmlGroupRefsBase64FilePath,
193
+ jsonFilePath: lightScanItemsPayloadJsonFilePath,
194
+ base64FilePath: lightScanItemsPayloadBase64FilePath,
194
195
  } = await writeJsonFileAndCompressedJsonFile(
195
- scanItemsWithHtmlGroupRefs.items,
196
+ lightScanItemsPayload,
196
197
  storagePath,
197
198
  'scanItems-light',
198
199
  );
@@ -211,8 +212,8 @@ const writeHTML = async (
211
212
  await Promise.all([
212
213
  fs.promises.unlink(topFilePath),
213
214
  fs.promises.unlink(bottomFilePath),
214
- fs.promises.unlink(scanItemsWithHtmlGroupRefsBase64FilePath),
215
- fs.promises.unlink(scanItemsWithHtmlGroupRefsJsonFilePath),
215
+ fs.promises.unlink(lightScanItemsPayloadBase64FilePath),
216
+ fs.promises.unlink(lightScanItemsPayloadJsonFilePath),
216
217
  ]);
217
218
  } catch (err) {
218
219
  console.error('Error cleaning up temporary files:', err);
@@ -250,6 +251,9 @@ const writeHTML = async (
250
251
  } else {
251
252
  console.warn('Skipping fetch GenAI feature as it is local report');
252
253
  }
254
+
255
+ var scanData = null;
256
+ var scanItems = null;
253
257
  \n`);
254
258
 
255
259
  outputStream.write('</script>\n<script type="text/plain" id="scanDataRaw">');
@@ -258,22 +262,25 @@ const writeHTML = async (
258
262
  scanDetailsReadStream.on('end', async () => {
259
263
  outputStream.write('</script>\n<script>\n');
260
264
  outputStream.write(
261
- "var scanDataPromise = (async () => { console.log('Loading scanData...'); scanData = await decodeUnzipParse(document.getElementById('scanDataRaw').textContent); })();\n",
265
+ "var scanDataPromise = (async () => { console.log('Loading scanData...'); scanData = await decodeUnzipParse(document.getElementById('scanDataRaw').textContent); console.log('[report] scanData loaded'); })();\n",
262
266
  );
263
267
  outputStream.write('</script>\n');
264
268
 
265
269
  // Write scanItems in 2MB chunks using a stream to avoid loading entire file into memory
266
270
  try {
267
271
  let chunkIndex = 1;
268
- const scanItemsStream = fs.createReadStream(scanItemsWithHtmlGroupRefsBase64FilePath, {
272
+ const scanItemsStream = fs.createReadStream(lightScanItemsPayloadBase64FilePath, {
269
273
  encoding: 'utf8',
270
274
  highWaterMark: CHUNK_SIZE,
271
275
  });
272
276
 
273
277
  for await (const chunk of scanItemsStream) {
274
- outputStream.write(
278
+ const ok = outputStream.write(
275
279
  `<script type="text/plain" id="scanItemsRaw${chunkIndex}">${chunk}</script>\n`,
276
280
  );
281
+ if (!ok) {
282
+ await new Promise<void>(resolve => outputStream.once('drain', resolve));
283
+ }
277
284
  chunkIndex++;
278
285
  }
279
286
 
@@ -290,6 +297,7 @@ var scanItemsPromise = (async () => {
290
297
  i++;
291
298
  }
292
299
  scanItems = await decodeUnzipParse(chunks);
300
+ console.log('[report] scanItems loaded');
293
301
  })();\n`);
294
302
  outputStream.write(suffixData);
295
303
  outputStream.end();
@@ -414,9 +422,9 @@ const pushResults = async (pageResults, allIssues, isCustomFlow) => {
414
422
  const { url, pageTitle, filePath } = pageResults;
415
423
 
416
424
  const totalIssuesInPage = new Set();
417
- Object.keys(pageResults.mustFix.rules).forEach(k => totalIssuesInPage.add(k));
418
- Object.keys(pageResults.goodToFix.rules).forEach(k => totalIssuesInPage.add(k));
419
- Object.keys(pageResults.needsReview.rules).forEach(k => totalIssuesInPage.add(k));
425
+ Object.keys(pageResults.mustFix?.rules ?? {}).forEach(k => totalIssuesInPage.add(k));
426
+ Object.keys(pageResults.goodToFix?.rules ?? {}).forEach(k => totalIssuesInPage.add(k));
427
+ Object.keys(pageResults.needsReview?.rules ?? {}).forEach(k => totalIssuesInPage.add(k));
420
428
 
421
429
  allIssues.topFiveMostIssues.push({
422
430
  url,
@@ -784,6 +792,7 @@ const generateArtifacts = async (
784
792
  a11yRuleShortDescriptionMap,
785
793
  disabilityBadgesMap,
786
794
  a11yRuleLongDescriptionMap,
795
+ a11yRuleStepByStepGuide,
787
796
  wcagCriteriaLabels: constants.wcagCriteriaLabels,
788
797
  scanPagesDetail: {
789
798
  pagesAffected: [],
@@ -987,10 +996,13 @@ const generateArtifacts = async (
987
996
  1,
988
997
  );
989
998
 
999
+ // Brief delay to allow lingering async crawlee storage operations to flush
1000
+ await new Promise(resolve => setTimeout(resolve, 3000));
1001
+
990
1002
  try {
991
1003
  await fs.promises.rm(path.join(storagePath, 'crawlee'), { recursive: true, force: true });
992
1004
  } catch (error) {
993
- consoleLogger.warn(`Unable to force remove crawlee folder: ${error.message}`);
1005
+ // Silently ignore folder may already be gone or still locked
994
1006
  }
995
1007
 
996
1008
  try {
package/src/npmIndex.ts CHANGED
@@ -5,7 +5,7 @@ import axe, { AxeResults, ImpactValue } from 'axe-core';
5
5
  import { JSDOM } from 'jsdom';
6
6
  import { fileURLToPath } from 'url';
7
7
  import { EnqueueStrategy } from 'crawlee';
8
- import constants, { BrowserTypes, RuleFlags, ScannerTypes } from './constants/constants.js';
8
+ import constants, { BrowserTypes, RuleFlags, ScannerTypes, a11yRuleShortDescriptionMap, a11yRuleLongDescriptionMap, a11yRuleStepByStepGuide } from './constants/constants.js';
9
9
  import {
10
10
  deleteClonedProfiles,
11
11
  getBrowserToRun,
@@ -638,6 +638,11 @@ const processAndSubmitResults = async (
638
638
  if (constants.a11yRuleShortDescriptionMap[ruleId]) {
639
639
  mergedResults[category].rules[ruleId].description = constants.a11yRuleShortDescriptionMap[ruleId];
640
640
  }
641
+
642
+ // Add short description, long description and step-by-step guide
643
+ mergedResults[category].rules[ruleId].shortDescription = a11yRuleShortDescriptionMap[ruleId];
644
+ mergedResults[category].rules[ruleId].longDescription = a11yRuleLongDescriptionMap[ruleId];
645
+ mergedResults[category].rules[ruleId].stepByStepGuide = a11yRuleStepByStepGuide[ruleId];
641
646
 
642
647
  // Add url to items
643
648
  mergedResults[category].rules[ruleId].items.forEach((item: any) => {
@@ -733,6 +738,11 @@ const processAndSubmitResults = async (
733
738
  rule.description = constants.a11yRuleShortDescriptionMap[rule.rule];
734
739
  }
735
740
 
741
+ // Add short description, long description and step-by-step guide
742
+ rule.shortDescription = a11yRuleShortDescriptionMap[rule.rule];
743
+ rule.longDescription = a11yRuleLongDescriptionMap[rule.rule];
744
+ rule.stepByStepGuide = a11yRuleStepByStepGuide[rule.rule];
745
+
736
746
  if (rule.items) {
737
747
  rule.items.forEach((item: any) => {
738
748
  // Ensure item URL matches the result URL
@@ -877,5 +887,5 @@ export const scanPage = async (
877
887
  );
878
888
  };
879
889
 
880
- export { RuleFlags };
890
+ export { RuleFlags, a11yRuleLongDescriptionMap, a11yRuleStepByStepGuide, getOobeeFunctionsScript };
881
891
 
@@ -17,7 +17,9 @@ import path from 'path';
17
17
  import { spawnSync } from 'child_process';
18
18
 
19
19
  export interface ProxyInfo {
20
- // host:port OR user:pass@host:port (no scheme)
20
+ // http/https: host:port OR user:pass@host:port (no scheme)
21
+ // socks: scheme://host:port OR scheme://user:pass@host:port (scheme preserved from ALL_PROXY)
22
+ // OR bare host:port when sourced from scutil (defaults to socks5:// on use)
21
23
  http?: string;
22
24
  https?: string;
23
25
  socks?: string;
@@ -88,7 +90,7 @@ function parseEnvProxyCommon(): ProxyInfo | null {
88
90
  const info: ProxyInfo = {};
89
91
  if (http) info.http = stripScheme(http);
90
92
  if (https) info.https = stripScheme(https);
91
- if (socks) info.socks = stripScheme(socks);
93
+ if (socks) info.socks = socks; // keep original scheme so proxyInfoToResolution can use the right protocol
92
94
  if (noProxy) info.bypassList = semiJoin(noProxy.split(/[,;]/));
93
95
 
94
96
  const { username, password } = readCredsFromEnv();
@@ -435,6 +437,15 @@ function buildIncludeOnlyPac(proxyServer: string, includeList: string[]): string
435
437
  return pac;
436
438
  }
437
439
 
440
+ /**
441
+ * Convert an info.socks value to a full proxy server URL.
442
+ * When the value already carries a scheme (e.g. ALL_PROXY=http://..., socks4://...),
443
+ * it is used as-is. Bare host:port values (from scutil) default to socks5://.
444
+ */
445
+ function toSocksServer(socks: string): string {
446
+ return /^[a-zA-Z][a-zA-Z0-9+.-]*:\/\//.test(socks) ? socks : `socks5://${socks}`;
447
+ }
448
+
438
449
  export function proxyInfoToResolution(info: ProxyInfo | null): ProxyResolution {
439
450
  if (!info) return { kind: 'none' };
440
451
 
@@ -444,7 +455,7 @@ export function proxyInfoToResolution(info: ProxyInfo | null): ProxyResolution {
444
455
  let proxyServer: string | undefined;
445
456
  if (info.http) proxyServer = `http://${info.http}`;
446
457
  else if (info.https) proxyServer = `http://${info.https}`;
447
- else if (info.socks) proxyServer = `socks5://${info.socks}`;
458
+ else if (info.socks) proxyServer = toSocksServer(info.socks);
448
459
 
449
460
  if (proxyServer) {
450
461
  // If credentials exist, embed them for the manual proxy auth
@@ -457,6 +468,16 @@ export function proxyInfoToResolution(info: ProxyInfo | null): ProxyResolution {
457
468
  const pacDataUrl = `data:application/x-ns-proxy-autoconfig;base64,${Buffer.from(pac).toString('base64')}`;
458
469
  return { kind: 'pac', pacUrl: pacDataUrl, bypass: info.bypassList };
459
470
  }
471
+
472
+ // No direct proxy server was found — the configured proxy is PAC-based or auto-detect only.
473
+ // INCLUDE_PROXY needs a concrete server address to build a routing PAC script, so it cannot
474
+ // be applied here. Warn and fall through to use the existing PAC/autodetect as-is.
475
+ console.warn(
476
+ 'INCLUDE_PROXY is set but no direct proxy server address was found. ' +
477
+ 'INCLUDE_PROXY requires HTTP_PROXY, HTTPS_PROXY, or ALL_PROXY to be set with a direct ' +
478
+ 'server address; it cannot be applied to a PAC URL or auto-detect proxy. ' +
479
+ 'INCLUDE_PROXY will be ignored.',
480
+ );
460
481
  }
461
482
 
462
483
  // Prefer manual proxies first (these work with Playwright's proxy option)
@@ -478,7 +499,7 @@ export function proxyInfoToResolution(info: ProxyInfo | null): ProxyResolution {
478
499
  }
479
500
  if (info.socks) {
480
501
  return { kind: 'manual', settings: {
481
- server: `socks5://${info.socks}`,
502
+ server: toSocksServer(info.socks),
482
503
  username: info.username,
483
504
  password: info.password,
484
505
  bypass: info.bypassList,
@@ -7,6 +7,18 @@ import { consoleLogger } from '../logs.js';
7
7
  const REGION = process.env.AWS_REGION || 'ap-southeast-1';
8
8
  const s3Client = new S3Client({ region: REGION });
9
9
 
10
+ // S3 user metadata is sent over REST as x-amz-meta-* HTTP headers.
11
+ // To avoid request-header validation failures in the Node/AWS SDK path,
12
+ // normalize to printable ASCII before attaching metadata values.
13
+ const sanitizeS3MetadataValue = (value: string): string => {
14
+ return value
15
+ .normalize('NFKD') // e.g. "é" -> "e" + combining accent, "A" -> "A"
16
+ .replace(/[\u0300-\u036f]/g, '') // e.g. remove the combining accent from the decomposed "é"
17
+ .replace(/[^\x20-\x7E]+/g, ' ') // e.g. "公益金" or emoji -> " "
18
+ .replace(/\s+/g, ' ') // e.g. "Community Chest \n" -> "Community Chest "
19
+ .trim(); // e.g. " Homepage | Community Chest " -> "Homepage | Community Chest"
20
+ };
21
+
10
22
  export interface UploadedFileInfo {
11
23
  filename: string;
12
24
  s3Path: string;
@@ -75,32 +87,32 @@ export const uploadFolderToS3 = async (
75
87
  const allowedFileExtRegex = /\.(html|csv|pdf|zip)$/;
76
88
 
77
89
  const metadata: Record<string, string> = {
78
- scanid: scanMetadata.scanId,
79
- userid: scanMetadata.userId,
80
- useremail: scanMetadata.email,
90
+ scanid: sanitizeS3MetadataValue(scanMetadata.scanId),
91
+ userid: sanitizeS3MetadataValue(scanMetadata.userId),
92
+ useremail: sanitizeS3MetadataValue(scanMetadata.email),
81
93
  };
82
94
 
83
95
  // Add optional metadata fields if present
84
96
  if (scanMetadata.messageId) {
85
- metadata.messageid = scanMetadata.messageId;
97
+ metadata.messageid = sanitizeS3MetadataValue(scanMetadata.messageId);
86
98
  }
87
99
  if (scanMetadata.amplitudeUserId) {
88
- metadata.amplitudeuserid = scanMetadata.amplitudeUserId;
100
+ metadata.amplitudeuserid = sanitizeS3MetadataValue(scanMetadata.amplitudeUserId);
89
101
  }
90
102
  if (scanMetadata.deviceId) {
91
- metadata.deviceid = scanMetadata.deviceId;
103
+ metadata.deviceid = sanitizeS3MetadataValue(scanMetadata.deviceId);
92
104
  }
93
105
  if (scanMetadata.orgId) {
94
- metadata.orgid = scanMetadata.orgId;
106
+ metadata.orgid = sanitizeS3MetadataValue(scanMetadata.orgId);
95
107
  }
96
108
  if (scanMetadata.userRole) {
97
- metadata.userrole = scanMetadata.userRole;
109
+ metadata.userrole = sanitizeS3MetadataValue(scanMetadata.userRole);
98
110
  }
99
111
  if (scanMetadata.siteName) {
100
- metadata.sitename = scanMetadata.siteName;
112
+ metadata.sitename = sanitizeS3MetadataValue(scanMetadata.siteName);
101
113
  }
102
114
  if (scanMetadata.durationExceeded !== undefined) {
103
- metadata.durationexceeded = scanMetadata.durationExceeded;
115
+ metadata.durationexceeded = sanitizeS3MetadataValue(scanMetadata.durationExceeded);
104
116
  }
105
117
 
106
118
  consoleLogger.info(`Uploading ${files.length} files to S3...`);
@@ -181,4 +193,4 @@ export const getS3UploadPrefix = (): string | null => {
181
193
  }
182
194
 
183
195
  return `users/${userId}/scans/${scanId}`;
184
- };
196
+ };
@@ -28,8 +28,11 @@ async function decodeUnzipParse(input) {
28
28
  offset += arr.length;
29
29
  }
30
30
 
31
- // Step 2: Decompress with pako (GZIP)
32
- const decompressed = pako.ungzip(merged, { to: 'string' });
31
+ // Step 2: Decompress with pako (GZIP) to bytes first to avoid large-string
32
+ // construction inside pako for very large payloads.
33
+ const decompressedBytes = pako.ungzip(merged);
34
+
35
+ const decompressed = new TextDecoder().decode(decompressedBytes);
33
36
 
34
37
  // Step 3: Parse JSON
35
38
  return JSON.parse(decompressed);
@@ -37,4 +40,4 @@ async function decodeUnzipParse(input) {
37
40
  throw new Error(`Failed to decode/unzip/parse: ${err.message}`);
38
41
  }
39
42
  }
40
- </script>
43
+ </script>
@@ -21,7 +21,7 @@
21
21
  >
22
22
  </div>
23
23
  <div class="display-url-container">
24
- <a href="${page.url}" target="_blank">${page.pageTitle.length > 0 ? page.pageTitle : page.url}</a>
24
+ <a href="${page.url}" target="_blank">${page.pageTitle?.length > 0 ? page.pageTitle : page.url}</a>
25
25
  <p>${page.url}</p>
26
26
  </div>
27
27
  </div>
@@ -29,7 +29,7 @@
29
29
  } else {
30
30
  listItem.innerHTML = `
31
31
  <a href="${page.url}" target="_blank">
32
- ${page.pageTitle.length > 0 ? page.pageTitle : page.url}
32
+ ${page.pageTitle?.length > 0 ? page.pageTitle : page.url}
33
33
  <svg class="link-external-icon" width="16" height="12" viewBox="0 0 8 8" aria-hidden="true" focusable="false">
34
34
  <path d="M7.11111 7.11111H0.888889V0.888889H4V0H0.888889C0.395556 0 0 0.4 0 0.888889V7.11111C0 7.6 0.395556 8 0.888889 8H7.11111C7.6 8 8 7.6 8 7.11111V4H7.11111V7.11111ZM4.88889 0V0.888889H6.48444L2.11556 5.25778L2.74222 5.88444L7.11111 1.51556V3.11111H8V0H4.88889Z" fill="#5735DF"/>
35
35
  </svg>