@govtechsg/oobee 0.10.84 → 0.10.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/image.yml +3 -2
- package/.github/workflows/publish.yml +10 -0
- package/DETAILS.md +29 -0
- package/dist/cli.js +7 -6
- package/dist/combine.js +1 -1
- package/dist/constants/common.js +15 -4
- package/dist/constants/constants.js +604 -1
- package/dist/crawlers/commonCrawlerFunc.js +3 -2
- package/dist/crawlers/crawlSitemap.js +98 -80
- package/dist/crawlers/custom/utils.js +218 -71
- package/dist/crawlers/guards/urlGuard.js +8 -15
- package/dist/crawlers/runCustom.js +24 -15
- package/dist/generateOobeeClientScanner.js +570 -0
- package/dist/mergeAxeResults.js +49 -29
- package/dist/npmIndex.js +10 -2
- package/dist/proxyService.js +18 -3
- package/dist/services/s3Uploader.js +21 -10
- package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
- package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
- package/dist/static/ejs/summary.ejs +10 -5
- package/oobee-client-scanner.js +34992 -0
- package/package.json +3 -3
- package/src/cli.ts +20 -15
- package/src/combine.ts +3 -1
- package/src/constants/common.ts +22 -10
- package/src/constants/constants.ts +602 -1
- package/src/crawlers/commonCrawlerFunc.ts +4 -3
- package/src/crawlers/crawlSitemap.ts +116 -98
- package/src/crawlers/custom/utils.ts +244 -84
- package/src/crawlers/guards/urlGuard.ts +24 -31
- package/src/crawlers/runCustom.ts +38 -15
- package/src/generateOobeeClientScanner.ts +591 -0
- package/src/mergeAxeResults.ts +48 -29
- package/src/npmIndex.ts +12 -2
- package/src/proxyService.ts +25 -4
- package/src/services/s3Uploader.ts +23 -11
- package/src/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
- package/src/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
- package/src/static/ejs/summary.ejs +10 -5
- package/testStaticJSScanner.html +534 -0
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@govtechsg/oobee",
|
|
3
3
|
"main": "dist/npmIndex.js",
|
|
4
|
-
"version": "0.10.
|
|
4
|
+
"version": "0.10.86",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"author": "Government Technology Agency <info@tech.gov.sg>",
|
|
7
7
|
"bin": {
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
"print-message": "^3.0.1",
|
|
37
37
|
"safe-regex": "^2.1.1",
|
|
38
38
|
"text-readability": "^1.1.0",
|
|
39
|
-
"tldts": "^7.0.
|
|
39
|
+
"tldts": "^7.0.27",
|
|
40
40
|
"typescript": "^5.4.5",
|
|
41
41
|
"url": "^0.11.3",
|
|
42
42
|
"uuid": "^11.0.3",
|
|
@@ -86,7 +86,7 @@
|
|
|
86
86
|
"fast-xml-parser": ">=5.3.8",
|
|
87
87
|
"js-yaml": "^4.1.1",
|
|
88
88
|
"minimatch": "^10.2.4",
|
|
89
|
-
"brace-expansion": "^5.0.
|
|
89
|
+
"brace-expansion": "^5.0.5",
|
|
90
90
|
"glob": "^13.0.6",
|
|
91
91
|
"flatted": "^3.4.1",
|
|
92
92
|
"file-type": "^21.3.3"
|
package/src/cli.ts
CHANGED
|
@@ -5,7 +5,13 @@ import printMessage from 'print-message';
|
|
|
5
5
|
import { devices } from 'playwright';
|
|
6
6
|
import { fileURLToPath } from 'url';
|
|
7
7
|
import path from 'path';
|
|
8
|
-
import {
|
|
8
|
+
import {
|
|
9
|
+
setHeadlessMode,
|
|
10
|
+
getVersion,
|
|
11
|
+
getStoragePath,
|
|
12
|
+
listenForCleanUp,
|
|
13
|
+
cleanUpAndExit,
|
|
14
|
+
} from './utils.js';
|
|
9
15
|
import {
|
|
10
16
|
checkUrl,
|
|
11
17
|
prepareData,
|
|
@@ -185,12 +191,14 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
|
185
191
|
return true;
|
|
186
192
|
})
|
|
187
193
|
.check(argvs => {
|
|
188
|
-
|
|
189
|
-
|
|
194
|
+
const scanner = String(argvs.scanner ?? '');
|
|
195
|
+
|
|
196
|
+
if (argvs.strategy && scanner !== ScannerTypes.WEBSITE && scanner !== ScannerTypes.CUSTOM) {
|
|
197
|
+
throw new Error('-s or --strategy is only available in website and custom flow scans.');
|
|
190
198
|
}
|
|
191
199
|
return true;
|
|
192
200
|
})
|
|
193
|
-
.coerce('l',
|
|
201
|
+
.coerce('l', option => {
|
|
194
202
|
const duration = Number(option);
|
|
195
203
|
if (isNaN(duration) || duration < 0) {
|
|
196
204
|
printMessage(
|
|
@@ -202,8 +210,8 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
|
|
|
202
210
|
return duration;
|
|
203
211
|
})
|
|
204
212
|
.check(argvs => {
|
|
205
|
-
if (argvs.scanner
|
|
206
|
-
throw new Error('-
|
|
213
|
+
if (argvs.scanner !== ScannerTypes.WEBSITE && argvs.strategy) {
|
|
214
|
+
throw new Error('-s or --strategy is only available in website scans.');
|
|
207
215
|
}
|
|
208
216
|
return true;
|
|
209
217
|
})
|
|
@@ -235,10 +243,11 @@ const scanInit = async (argvs: Answers): Promise<string> => {
|
|
|
235
243
|
data.userDataDirectory,
|
|
236
244
|
data.playwrightDeviceDetailsObject,
|
|
237
245
|
data.extraHTTPHeaders,
|
|
238
|
-
data.fileTypes
|
|
246
|
+
data.fileTypes,
|
|
239
247
|
);
|
|
240
248
|
|
|
241
|
-
if (res.httpStatus)
|
|
249
|
+
if (res.httpStatus)
|
|
250
|
+
consoleLogger.info(`Connectivity Check HTTP Response Code: ${res.httpStatus}`);
|
|
242
251
|
|
|
243
252
|
if (res.status === statuses.success.code) {
|
|
244
253
|
data.url = res.url;
|
|
@@ -267,15 +276,11 @@ const scanInit = async (argvs: Answers): Promise<string> => {
|
|
|
267
276
|
}
|
|
268
277
|
}
|
|
269
278
|
|
|
270
|
-
const screenToScan = getScreenToScan(
|
|
271
|
-
data.deviceChosen,
|
|
272
|
-
data.customDevice,
|
|
273
|
-
data.viewportWidth,
|
|
274
|
-
);
|
|
279
|
+
const screenToScan = getScreenToScan(data.deviceChosen, data.customDevice, data.viewportWidth);
|
|
275
280
|
|
|
276
281
|
printMessage([`Oobee version: ${appVersion}`, 'Starting scan...'], messageOptions);
|
|
277
|
-
consoleLogger.info(`Oobee version: ${appVersion}`);
|
|
278
|
-
|
|
282
|
+
consoleLogger.info(`Oobee version: ${appVersion}`);
|
|
283
|
+
|
|
279
284
|
await combineRun(data, screenToScan);
|
|
280
285
|
|
|
281
286
|
return getStoragePath(data.randomToken);
|
package/src/combine.ts
CHANGED
|
@@ -135,6 +135,8 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
|
135
135
|
const res = await runCustom(
|
|
136
136
|
url,
|
|
137
137
|
randomToken,
|
|
138
|
+
browser,
|
|
139
|
+
userDataDirectory,
|
|
138
140
|
viewportSettings,
|
|
139
141
|
blacklistedPatterns,
|
|
140
142
|
includeScreenshots,
|
|
@@ -328,4 +330,4 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
|
328
330
|
}
|
|
329
331
|
};
|
|
330
332
|
|
|
331
|
-
export default combineRun;
|
|
333
|
+
export default combineRun;
|
package/src/constants/common.ts
CHANGED
|
@@ -213,6 +213,15 @@ export const validateXML = (content: string): { isValid: boolean; parsedContent:
|
|
|
213
213
|
return { isValid, parsedContent };
|
|
214
214
|
};
|
|
215
215
|
|
|
216
|
+
export const validateTXT = (content: string): { isValid: boolean } => {
|
|
217
|
+
// Strip HTML tags first — browsers wrap .txt files in HTML when fetched via Playwright
|
|
218
|
+
const plainText = content.replace(/<[^>]+>/g, '\n');
|
|
219
|
+
const lines = plainText.split(/\r?\n/).map(l => l.trim()).filter(l => l.length > 0);
|
|
220
|
+
// Allow http, https and relative paths (starting with /) for txt sitemaps, as some sitemaps use relative paths and some txt sitemaps are fetched as HTML by Playwright
|
|
221
|
+
const urlPattern = /^(https?:\/\/|\/)[^\s]+$/i;
|
|
222
|
+
return { isValid: lines.some(line => urlPattern.test(line)) };
|
|
223
|
+
};
|
|
224
|
+
|
|
216
225
|
export const isSkippedUrl = (pageUrl: string, whitelistedDomains: string[]) => {
|
|
217
226
|
const matched =
|
|
218
227
|
whitelistedDomains.filter(p => {
|
|
@@ -541,14 +550,13 @@ export const isSitemapContent = (content: string) => {
|
|
|
541
550
|
|
|
542
551
|
const regexForHtml = new RegExp('<(?:!doctype html|html|head|body)+?>', 'gmi');
|
|
543
552
|
const regexForXmlSitemap = new RegExp('<(?:urlset|feed|rss)+?.*>', 'gmi');
|
|
544
|
-
const regexForUrl = new RegExp('^.*(http|https):/{2}.*$', 'gmi');
|
|
545
|
-
|
|
546
553
|
if (content.match(regexForHtml) && content.match(regexForXmlSitemap)) {
|
|
547
554
|
// is an XML sitemap wrapped in a HTML document
|
|
548
555
|
return true;
|
|
549
556
|
}
|
|
550
|
-
|
|
551
|
-
|
|
557
|
+
const { isValid: isTxtSitemap } = validateTXT(content);
|
|
558
|
+
if (isTxtSitemap) {
|
|
559
|
+
// treat this as a txt sitemap (plain text or browser-wrapped with HTML)
|
|
552
560
|
return true;
|
|
553
561
|
}
|
|
554
562
|
// is HTML webpage
|
|
@@ -1924,14 +1932,16 @@ export const getPlaywrightLaunchOptions = (browser?: string): LaunchOptions => {
|
|
|
1924
1932
|
const channel = browser || undefined;
|
|
1925
1933
|
|
|
1926
1934
|
const resolution = proxyInfoToResolution(cacheProxyInfo);
|
|
1935
|
+
const shouldIgnoreMuteAudio =
|
|
1936
|
+
process.env.OOBEE_PLAYWRIGHT_IGNORE_DEFAULT_ARGS === '--mute-audio';
|
|
1927
1937
|
|
|
1928
1938
|
// Start with your base args and sanitise
|
|
1929
1939
|
const finalArgs = [...constants.launchOptionsArgs].filter(
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1940
|
+
arg =>
|
|
1941
|
+
!arg.startsWith('--headless') &&
|
|
1942
|
+
!arg.startsWith('--user-agent=') &&
|
|
1943
|
+
arg !== '--mute-audio' &&
|
|
1944
|
+
!(browser === BrowserTypes.CHROME && arg === '--edge-skip-compat-layer-relaunch'),
|
|
1935
1945
|
);
|
|
1936
1946
|
|
|
1937
1947
|
// Headless flags (unchanged)
|
|
@@ -1956,7 +1966,9 @@ export const getPlaywrightLaunchOptions = (browser?: string): LaunchOptions => {
|
|
|
1956
1966
|
}
|
|
1957
1967
|
|
|
1958
1968
|
const options: LaunchOptions = {
|
|
1959
|
-
ignoreDefaultArgs:
|
|
1969
|
+
ignoreDefaultArgs: shouldIgnoreMuteAudio
|
|
1970
|
+
? ['--use-mock-keychain', '--mute-audio']
|
|
1971
|
+
: ['--use-mock-keychain'],
|
|
1960
1972
|
args: finalArgs,
|
|
1961
1973
|
headless: process.env.CRAWLEE_HEADLESS === '1',
|
|
1962
1974
|
...(channel && { channel }),
|