@govtechsg/oobee 0.10.85 → 0.10.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/.github/workflows/image.yml +3 -2
  2. package/.github/workflows/publish.yml +10 -0
  3. package/DETAILS.md +29 -0
  4. package/dist/combine.js +1 -1
  5. package/dist/constants/common.js +15 -4
  6. package/dist/constants/constants.js +604 -1
  7. package/dist/crawlers/commonCrawlerFunc.js +3 -2
  8. package/dist/crawlers/crawlSitemap.js +98 -80
  9. package/dist/crawlers/custom/utils.js +137 -31
  10. package/dist/crawlers/guards/urlGuard.js +8 -15
  11. package/dist/crawlers/runCustom.js +18 -11
  12. package/dist/generateOobeeClientScanner.js +570 -0
  13. package/dist/mergeAxeResults.js +5 -4
  14. package/dist/npmIndex.js +10 -2
  15. package/dist/proxyService.js +18 -3
  16. package/dist/services/s3Uploader.js +21 -10
  17. package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
  18. package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
  19. package/dist/static/ejs/summary.ejs +10 -5
  20. package/oobee-client-scanner.js +34992 -0
  21. package/package.json +2 -2
  22. package/src/combine.ts +3 -1
  23. package/src/constants/common.ts +22 -10
  24. package/src/constants/constants.ts +602 -1
  25. package/src/crawlers/commonCrawlerFunc.ts +4 -3
  26. package/src/crawlers/crawlSitemap.ts +116 -98
  27. package/src/crawlers/custom/utils.ts +143 -38
  28. package/src/crawlers/guards/urlGuard.ts +24 -31
  29. package/src/crawlers/runCustom.ts +29 -11
  30. package/src/generateOobeeClientScanner.ts +591 -0
  31. package/src/mergeAxeResults.ts +5 -3
  32. package/src/npmIndex.ts +12 -2
  33. package/src/proxyService.ts +25 -4
  34. package/src/services/s3Uploader.ts +23 -11
  35. package/src/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
  36. package/src/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
  37. package/src/static/ejs/summary.ejs +10 -5
  38. package/testStaticJSScanner.html +534 -0
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@govtechsg/oobee",
3
3
  "main": "dist/npmIndex.js",
4
- "version": "0.10.85",
4
+ "version": "0.10.86",
5
5
  "type": "module",
6
6
  "author": "Government Technology Agency <info@tech.gov.sg>",
7
7
  "bin": {
@@ -86,7 +86,7 @@
86
86
  "fast-xml-parser": ">=5.3.8",
87
87
  "js-yaml": "^4.1.1",
88
88
  "minimatch": "^10.2.4",
89
- "brace-expansion": "^5.0.4",
89
+ "brace-expansion": "^5.0.5",
90
90
  "glob": "^13.0.6",
91
91
  "flatted": "^3.4.1",
92
92
  "file-type": "^21.3.3"
package/src/combine.ts CHANGED
@@ -135,6 +135,8 @@ const combineRun = async (details: Data, deviceToScan: string) => {
135
135
  const res = await runCustom(
136
136
  url,
137
137
  randomToken,
138
+ browser,
139
+ userDataDirectory,
138
140
  viewportSettings,
139
141
  blacklistedPatterns,
140
142
  includeScreenshots,
@@ -328,4 +330,4 @@ const combineRun = async (details: Data, deviceToScan: string) => {
328
330
  }
329
331
  };
330
332
 
331
- export default combineRun;
333
+ export default combineRun;
@@ -213,6 +213,15 @@ export const validateXML = (content: string): { isValid: boolean; parsedContent:
213
213
  return { isValid, parsedContent };
214
214
  };
215
215
 
216
+ export const validateTXT = (content: string): { isValid: boolean } => {
217
+ // Strip HTML tags first — browsers wrap .txt files in HTML when fetched via Playwright
218
+ const plainText = content.replace(/<[^>]+>/g, '\n');
219
+ const lines = plainText.split(/\r?\n/).map(l => l.trim()).filter(l => l.length > 0);
220
+ // Allow http, https and relative paths (starting with /) for txt sitemaps, as some sitemaps use relative paths and some txt sitemaps are fetched as HTML by Playwright
221
+ const urlPattern = /^(https?:\/\/|\/)[^\s]+$/i;
222
+ return { isValid: lines.some(line => urlPattern.test(line)) };
223
+ };
224
+
216
225
  export const isSkippedUrl = (pageUrl: string, whitelistedDomains: string[]) => {
217
226
  const matched =
218
227
  whitelistedDomains.filter(p => {
@@ -541,14 +550,13 @@ export const isSitemapContent = (content: string) => {
541
550
 
542
551
  const regexForHtml = new RegExp('<(?:!doctype html|html|head|body)+?>', 'gmi');
543
552
  const regexForXmlSitemap = new RegExp('<(?:urlset|feed|rss)+?.*>', 'gmi');
544
- const regexForUrl = new RegExp('^.*(http|https):/{2}.*$', 'gmi');
545
-
546
553
  if (content.match(regexForHtml) && content.match(regexForXmlSitemap)) {
547
554
  // is an XML sitemap wrapped in a HTML document
548
555
  return true;
549
556
  }
550
- if (!content.match(regexForHtml) && content.match(regexForUrl)) {
551
- // treat this as a txt sitemap where all URLs will be extracted for crawling
557
+ const { isValid: isTxtSitemap } = validateTXT(content);
558
+ if (isTxtSitemap) {
559
+ // treat this as a txt sitemap (plain text or browser-wrapped with HTML)
552
560
  return true;
553
561
  }
554
562
  // is HTML webpage
@@ -1924,14 +1932,16 @@ export const getPlaywrightLaunchOptions = (browser?: string): LaunchOptions => {
1924
1932
  const channel = browser || undefined;
1925
1933
 
1926
1934
  const resolution = proxyInfoToResolution(cacheProxyInfo);
1935
+ const shouldIgnoreMuteAudio =
1936
+ process.env.OOBEE_PLAYWRIGHT_IGNORE_DEFAULT_ARGS === '--mute-audio';
1927
1937
 
1928
1938
  // Start with your base args and sanitise
1929
1939
  const finalArgs = [...constants.launchOptionsArgs].filter(
1930
- arg =>
1931
- !arg.startsWith('--headless') &&
1932
- !arg.startsWith('--user-agent=') &&
1933
- arg !== '--mute-audio' &&
1934
- !(browser === BrowserTypes.CHROME && arg === '--edge-skip-compat-layer-relaunch'),
1940
+ arg =>
1941
+ !arg.startsWith('--headless') &&
1942
+ !arg.startsWith('--user-agent=') &&
1943
+ arg !== '--mute-audio' &&
1944
+ !(browser === BrowserTypes.CHROME && arg === '--edge-skip-compat-layer-relaunch'),
1935
1945
  );
1936
1946
 
1937
1947
  // Headless flags (unchanged)
@@ -1956,7 +1966,9 @@ export const getPlaywrightLaunchOptions = (browser?: string): LaunchOptions => {
1956
1966
  }
1957
1967
 
1958
1968
  const options: LaunchOptions = {
1959
- ignoreDefaultArgs: ['--use-mock-keychain'],
1969
+ ignoreDefaultArgs: shouldIgnoreMuteAudio
1970
+ ? ['--use-mock-keychain', '--mute-audio']
1971
+ : ['--use-mock-keychain'],
1960
1972
  args: finalArgs,
1961
1973
  headless: process.env.CRAWLEE_HEADLESS === '1',
1962
1974
  ...(channel && { channel }),