@govtechsg/oobee 0.10.85 → 0.10.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/image.yml +3 -2
- package/.github/workflows/publish.yml +10 -0
- package/DETAILS.md +29 -0
- package/dist/combine.js +1 -1
- package/dist/constants/common.js +15 -4
- package/dist/constants/constants.js +604 -1
- package/dist/crawlers/commonCrawlerFunc.js +3 -2
- package/dist/crawlers/crawlSitemap.js +98 -80
- package/dist/crawlers/custom/utils.js +137 -31
- package/dist/crawlers/guards/urlGuard.js +8 -15
- package/dist/crawlers/runCustom.js +18 -11
- package/dist/generateOobeeClientScanner.js +570 -0
- package/dist/mergeAxeResults.js +5 -4
- package/dist/npmIndex.js +10 -2
- package/dist/proxyService.js +18 -3
- package/dist/services/s3Uploader.js +21 -10
- package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
- package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
- package/dist/static/ejs/summary.ejs +10 -5
- package/oobee-client-scanner.js +34992 -0
- package/package.json +2 -2
- package/src/combine.ts +3 -1
- package/src/constants/common.ts +22 -10
- package/src/constants/constants.ts +602 -1
- package/src/crawlers/commonCrawlerFunc.ts +4 -3
- package/src/crawlers/crawlSitemap.ts +116 -98
- package/src/crawlers/custom/utils.ts +143 -38
- package/src/crawlers/guards/urlGuard.ts +24 -31
- package/src/crawlers/runCustom.ts +29 -11
- package/src/generateOobeeClientScanner.ts +591 -0
- package/src/mergeAxeResults.ts +5 -3
- package/src/npmIndex.ts +12 -2
- package/src/proxyService.ts +25 -4
- package/src/services/s3Uploader.ts +23 -11
- package/src/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
- package/src/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
- package/src/static/ejs/summary.ejs +10 -5
- package/testStaticJSScanner.html +534 -0
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@govtechsg/oobee",
|
|
3
3
|
"main": "dist/npmIndex.js",
|
|
4
|
-
"version": "0.10.
|
|
4
|
+
"version": "0.10.86",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"author": "Government Technology Agency <info@tech.gov.sg>",
|
|
7
7
|
"bin": {
|
|
@@ -86,7 +86,7 @@
|
|
|
86
86
|
"fast-xml-parser": ">=5.3.8",
|
|
87
87
|
"js-yaml": "^4.1.1",
|
|
88
88
|
"minimatch": "^10.2.4",
|
|
89
|
-
"brace-expansion": "^5.0.
|
|
89
|
+
"brace-expansion": "^5.0.5",
|
|
90
90
|
"glob": "^13.0.6",
|
|
91
91
|
"flatted": "^3.4.1",
|
|
92
92
|
"file-type": "^21.3.3"
|
package/src/combine.ts
CHANGED
|
@@ -135,6 +135,8 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
|
135
135
|
const res = await runCustom(
|
|
136
136
|
url,
|
|
137
137
|
randomToken,
|
|
138
|
+
browser,
|
|
139
|
+
userDataDirectory,
|
|
138
140
|
viewportSettings,
|
|
139
141
|
blacklistedPatterns,
|
|
140
142
|
includeScreenshots,
|
|
@@ -328,4 +330,4 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
|
328
330
|
}
|
|
329
331
|
};
|
|
330
332
|
|
|
331
|
-
export default combineRun;
|
|
333
|
+
export default combineRun;
|
package/src/constants/common.ts
CHANGED
|
@@ -213,6 +213,15 @@ export const validateXML = (content: string): { isValid: boolean; parsedContent:
|
|
|
213
213
|
return { isValid, parsedContent };
|
|
214
214
|
};
|
|
215
215
|
|
|
216
|
+
export const validateTXT = (content: string): { isValid: boolean } => {
|
|
217
|
+
// Strip HTML tags first — browsers wrap .txt files in HTML when fetched via Playwright
|
|
218
|
+
const plainText = content.replace(/<[^>]+>/g, '\n');
|
|
219
|
+
const lines = plainText.split(/\r?\n/).map(l => l.trim()).filter(l => l.length > 0);
|
|
220
|
+
// Allow http, https and relative paths (starting with /) for txt sitemaps, as some sitemaps use relative paths and some txt sitemaps are fetched as HTML by Playwright
|
|
221
|
+
const urlPattern = /^(https?:\/\/|\/)[^\s]+$/i;
|
|
222
|
+
return { isValid: lines.some(line => urlPattern.test(line)) };
|
|
223
|
+
};
|
|
224
|
+
|
|
216
225
|
export const isSkippedUrl = (pageUrl: string, whitelistedDomains: string[]) => {
|
|
217
226
|
const matched =
|
|
218
227
|
whitelistedDomains.filter(p => {
|
|
@@ -541,14 +550,13 @@ export const isSitemapContent = (content: string) => {
|
|
|
541
550
|
|
|
542
551
|
const regexForHtml = new RegExp('<(?:!doctype html|html|head|body)+?>', 'gmi');
|
|
543
552
|
const regexForXmlSitemap = new RegExp('<(?:urlset|feed|rss)+?.*>', 'gmi');
|
|
544
|
-
const regexForUrl = new RegExp('^.*(http|https):/{2}.*$', 'gmi');
|
|
545
|
-
|
|
546
553
|
if (content.match(regexForHtml) && content.match(regexForXmlSitemap)) {
|
|
547
554
|
// is an XML sitemap wrapped in a HTML document
|
|
548
555
|
return true;
|
|
549
556
|
}
|
|
550
|
-
|
|
551
|
-
|
|
557
|
+
const { isValid: isTxtSitemap } = validateTXT(content);
|
|
558
|
+
if (isTxtSitemap) {
|
|
559
|
+
// treat this as a txt sitemap (plain text or browser-wrapped with HTML)
|
|
552
560
|
return true;
|
|
553
561
|
}
|
|
554
562
|
// is HTML webpage
|
|
@@ -1924,14 +1932,16 @@ export const getPlaywrightLaunchOptions = (browser?: string): LaunchOptions => {
|
|
|
1924
1932
|
const channel = browser || undefined;
|
|
1925
1933
|
|
|
1926
1934
|
const resolution = proxyInfoToResolution(cacheProxyInfo);
|
|
1935
|
+
const shouldIgnoreMuteAudio =
|
|
1936
|
+
process.env.OOBEE_PLAYWRIGHT_IGNORE_DEFAULT_ARGS === '--mute-audio';
|
|
1927
1937
|
|
|
1928
1938
|
// Start with your base args and sanitise
|
|
1929
1939
|
const finalArgs = [...constants.launchOptionsArgs].filter(
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1940
|
+
arg =>
|
|
1941
|
+
!arg.startsWith('--headless') &&
|
|
1942
|
+
!arg.startsWith('--user-agent=') &&
|
|
1943
|
+
arg !== '--mute-audio' &&
|
|
1944
|
+
!(browser === BrowserTypes.CHROME && arg === '--edge-skip-compat-layer-relaunch'),
|
|
1935
1945
|
);
|
|
1936
1946
|
|
|
1937
1947
|
// Headless flags (unchanged)
|
|
@@ -1956,7 +1966,9 @@ export const getPlaywrightLaunchOptions = (browser?: string): LaunchOptions => {
|
|
|
1956
1966
|
}
|
|
1957
1967
|
|
|
1958
1968
|
const options: LaunchOptions = {
|
|
1959
|
-
ignoreDefaultArgs:
|
|
1969
|
+
ignoreDefaultArgs: shouldIgnoreMuteAudio
|
|
1970
|
+
? ['--use-mock-keychain', '--mute-audio']
|
|
1971
|
+
: ['--use-mock-keychain'],
|
|
1960
1972
|
args: finalArgs,
|
|
1961
1973
|
headless: process.env.CRAWLEE_HEADLESS === '1',
|
|
1962
1974
|
...(channel && { channel }),
|