@govtechsg/oobee 0.10.84 → 0.10.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/image.yml +3 -2
- package/.github/workflows/publish.yml +10 -0
- package/DETAILS.md +29 -0
- package/dist/cli.js +7 -6
- package/dist/combine.js +1 -1
- package/dist/constants/common.js +15 -4
- package/dist/constants/constants.js +604 -1
- package/dist/crawlers/commonCrawlerFunc.js +3 -2
- package/dist/crawlers/crawlSitemap.js +98 -80
- package/dist/crawlers/custom/utils.js +218 -71
- package/dist/crawlers/guards/urlGuard.js +8 -15
- package/dist/crawlers/runCustom.js +24 -15
- package/dist/generateOobeeClientScanner.js +570 -0
- package/dist/mergeAxeResults.js +49 -29
- package/dist/npmIndex.js +10 -2
- package/dist/proxyService.js +18 -3
- package/dist/services/s3Uploader.js +21 -10
- package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
- package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
- package/dist/static/ejs/summary.ejs +10 -5
- package/oobee-client-scanner.js +34992 -0
- package/package.json +3 -3
- package/src/cli.ts +20 -15
- package/src/combine.ts +3 -1
- package/src/constants/common.ts +22 -10
- package/src/constants/constants.ts +602 -1
- package/src/crawlers/commonCrawlerFunc.ts +4 -3
- package/src/crawlers/crawlSitemap.ts +116 -98
- package/src/crawlers/custom/utils.ts +244 -84
- package/src/crawlers/guards/urlGuard.ts +24 -31
- package/src/crawlers/runCustom.ts +38 -15
- package/src/generateOobeeClientScanner.ts +591 -0
- package/src/mergeAxeResults.ts +48 -29
- package/src/npmIndex.ts +12 -2
- package/src/proxyService.ts +25 -4
- package/src/services/s3Uploader.ts +23 -11
- package/src/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
- package/src/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
- package/src/static/ejs/summary.ejs +10 -5
- package/testStaticJSScanner.html +534 -0
|
@@ -5,41 +5,34 @@ export function addUrlGuardScript(context, opts = {}) {
|
|
|
5
5
|
|
|
6
6
|
const lastAllowedUrlByPage = new WeakMap();
|
|
7
7
|
|
|
8
|
-
const attachGuardsToPage =
|
|
8
|
+
const attachGuardsToPage = page => {
|
|
9
9
|
if (!lastAllowedUrlByPage.has(page) && fallbackUrl) {
|
|
10
10
|
lastAllowedUrlByPage.set(page, String(fallbackUrl));
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
-
page
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
13
|
+
page
|
|
14
|
+
.addInitScript(() => {
|
|
15
|
+
const isAllowedProtocol = value => {
|
|
16
|
+
try {
|
|
17
|
+
const s = value instanceof URL ? value.toString() : String(value);
|
|
18
|
+
const { protocol } = new URL(s, window.location.href);
|
|
19
|
+
return protocol === 'http:' || protocol === 'https:';
|
|
20
|
+
} catch {
|
|
21
|
+
return false;
|
|
22
|
+
}
|
|
23
|
+
};
|
|
23
24
|
|
|
24
|
-
|
|
25
|
+
const win = window;
|
|
25
26
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
win.location.assign = (nextUrl) => { if (isAllowedProtocol(nextUrl)) assignOriginal(nextUrl); };
|
|
36
|
-
win.location.replace = (nextUrl) => { if (isAllowedProtocol(nextUrl)) replaceOriginal(nextUrl); };
|
|
37
|
-
|
|
38
|
-
Object.defineProperty(win.location, 'href', {
|
|
39
|
-
get() { return String(win.location.toString()); },
|
|
40
|
-
set(nextUrl) { if (isAllowedProtocol(nextUrl)) assignOriginal(nextUrl); },
|
|
27
|
+
const openOriginal = win.open;
|
|
28
|
+
win.open = function (targetUrl, ...args) {
|
|
29
|
+
if (!isAllowedProtocol(targetUrl)) return null;
|
|
30
|
+
return openOriginal.call(this, targetUrl, ...args);
|
|
31
|
+
};
|
|
32
|
+
})
|
|
33
|
+
.catch(() => {
|
|
34
|
+
// page may have closed before addInitScript completed; safe to ignore
|
|
41
35
|
});
|
|
42
|
-
});
|
|
43
36
|
|
|
44
37
|
const restoreToSafeUrl = async (page, attemptedUrl) => {
|
|
45
38
|
try {
|
|
@@ -50,15 +43,15 @@ export function addUrlGuardScript(context, opts = {}) {
|
|
|
50
43
|
}
|
|
51
44
|
};
|
|
52
45
|
|
|
53
|
-
page.on('framenavigated', async
|
|
46
|
+
page.on('framenavigated', async frame => {
|
|
54
47
|
if (frame !== page.mainFrame()) return;
|
|
55
48
|
|
|
56
49
|
const urlStr = frame.url();
|
|
57
50
|
let urlObj;
|
|
58
51
|
try {
|
|
59
|
-
|
|
52
|
+
urlObj = new URL(urlStr);
|
|
60
53
|
} catch {
|
|
61
|
-
|
|
54
|
+
return restoreToSafeUrl(page, urlStr);
|
|
62
55
|
}
|
|
63
56
|
|
|
64
57
|
if (ALLOWED_PROTOCOLS.has(urlObj.protocol)) {
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
/* eslint-env browser */
|
|
2
|
-
import { chromium } from 'playwright';
|
|
3
2
|
import { createCrawleeSubFolders } from './commonCrawlerFunc.js';
|
|
4
3
|
import { cleanUpAndExit, register, registerSoftClose } from '../utils.js';
|
|
5
4
|
import constants, {
|
|
@@ -11,7 +10,12 @@ import { DEBUG, initNewPage, log } from './custom/utils.js';
|
|
|
11
10
|
import { guiInfoLog } from '../logs.js';
|
|
12
11
|
import { ViewportSettingsClass } from '../combine.js';
|
|
13
12
|
import { addUrlGuardScript } from './guards/urlGuard.js';
|
|
14
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
getBrowserToRun,
|
|
15
|
+
getPlaywrightLaunchOptions,
|
|
16
|
+
initModifiedUserAgent,
|
|
17
|
+
} from '../constants/common.js';
|
|
18
|
+
import { BrowserTypes } from '../constants/constants.js';
|
|
15
19
|
|
|
16
20
|
// Export of classes
|
|
17
21
|
|
|
@@ -25,6 +29,8 @@ export class ProcessPageParams {
|
|
|
25
29
|
randomToken: string;
|
|
26
30
|
customFlowLabel?: string;
|
|
27
31
|
stopAll?: () => Promise<void>;
|
|
32
|
+
entryUrl!: string;
|
|
33
|
+
strategy: string;
|
|
28
34
|
|
|
29
35
|
constructor(
|
|
30
36
|
scannedIdx: number,
|
|
@@ -48,6 +54,8 @@ export class ProcessPageParams {
|
|
|
48
54
|
const runCustom = async (
|
|
49
55
|
url: string,
|
|
50
56
|
randomToken: string,
|
|
57
|
+
browserToRun: string,
|
|
58
|
+
userDataDirectory: string,
|
|
51
59
|
viewportSettings: ViewportSettingsClass,
|
|
52
60
|
blacklistedPatterns: string[] | null,
|
|
53
61
|
includeScreenshots: boolean,
|
|
@@ -69,6 +77,8 @@ const runCustom = async (
|
|
|
69
77
|
randomToken,
|
|
70
78
|
);
|
|
71
79
|
|
|
80
|
+
processPageParams.entryUrl = url;
|
|
81
|
+
|
|
72
82
|
if (initialCustomFlowLabel && initialCustomFlowLabel.trim()) {
|
|
73
83
|
processPageParams.customFlowLabel = initialCustomFlowLabel.trim();
|
|
74
84
|
}
|
|
@@ -77,28 +87,37 @@ const runCustom = async (
|
|
|
77
87
|
const pageClosePromises = [];
|
|
78
88
|
|
|
79
89
|
try {
|
|
90
|
+
const { browserToRun: resolvedBrowserToRun } = getBrowserToRun(
|
|
91
|
+
randomToken,
|
|
92
|
+
browserToRun as BrowserTypes,
|
|
93
|
+
false,
|
|
94
|
+
);
|
|
80
95
|
const deviceConfig = viewportSettings.playwrightDeviceDetailsObject;
|
|
81
96
|
const hasCustomViewport = !!deviceConfig;
|
|
97
|
+
const rawDevice = (deviceConfig || {}) as Record<string, unknown>;
|
|
98
|
+
const { userAgent: deviceUserAgent, ...contextDeviceOptions } = rawDevice;
|
|
99
|
+
|
|
100
|
+
await initModifiedUserAgent(resolvedBrowserToRun, viewportSettings.playwrightDeviceDetailsObject);
|
|
82
101
|
|
|
83
|
-
const baseLaunchOptions = getPlaywrightLaunchOptions(
|
|
102
|
+
const baseLaunchOptions = getPlaywrightLaunchOptions(resolvedBrowserToRun);
|
|
84
103
|
|
|
85
104
|
// Merge base args with custom flow specific args
|
|
86
105
|
const baseArgs = baseLaunchOptions.args || [];
|
|
87
106
|
const customArgs = hasCustomViewport ? ['--window-size=1920,1040'] : ['--start-maximized'];
|
|
88
|
-
const mergedArgs = [
|
|
107
|
+
const mergedArgs = [
|
|
108
|
+
...baseArgs.filter(a => !a.startsWith('--window-size') && a !== '--start-maximized'),
|
|
109
|
+
...customArgs,
|
|
110
|
+
];
|
|
89
111
|
|
|
90
|
-
const
|
|
112
|
+
const context = await constants.launcher.launchPersistentContext(userDataDirectory, {
|
|
91
113
|
...baseLaunchOptions,
|
|
92
114
|
args: mergedArgs,
|
|
93
115
|
headless: false,
|
|
94
|
-
channel: 'chrome',
|
|
95
|
-
});
|
|
96
|
-
|
|
97
|
-
const context = await browser.newContext({
|
|
98
116
|
ignoreHTTPSErrors: true,
|
|
99
117
|
serviceWorkers: 'block',
|
|
100
118
|
viewport: null,
|
|
101
|
-
...(hasCustomViewport ?
|
|
119
|
+
...(hasCustomViewport ? contextDeviceOptions : {}),
|
|
120
|
+
userAgent: process.env.OOBEE_USER_AGENT || (deviceUserAgent as string | undefined),
|
|
102
121
|
});
|
|
103
122
|
|
|
104
123
|
register(context);
|
|
@@ -106,9 +125,7 @@ const runCustom = async (
|
|
|
106
125
|
processPageParams.stopAll = async () => {
|
|
107
126
|
try {
|
|
108
127
|
await context.close().catch(() => {});
|
|
109
|
-
|
|
110
|
-
} catch {
|
|
111
|
-
}
|
|
128
|
+
} catch {}
|
|
112
129
|
};
|
|
113
130
|
|
|
114
131
|
// For handling closing playwright browser and continue generate artifacts etc
|
|
@@ -116,12 +133,18 @@ const runCustom = async (
|
|
|
116
133
|
|
|
117
134
|
addUrlGuardScript(context, { fallbackUrl: url });
|
|
118
135
|
|
|
136
|
+
const page = context.pages().find(existingPage => !existingPage.isClosed()) || (await context.newPage());
|
|
137
|
+
await initNewPage(page, pageClosePromises, processPageParams, pagesDict);
|
|
138
|
+
|
|
119
139
|
// Detection of new page
|
|
120
140
|
context.on('page', async newPage => {
|
|
121
|
-
|
|
141
|
+
try {
|
|
142
|
+
await initNewPage(newPage, pageClosePromises, processPageParams, pagesDict);
|
|
143
|
+
} catch (e) {
|
|
144
|
+
log(`Error initializing new page: ${e}`);
|
|
145
|
+
}
|
|
122
146
|
});
|
|
123
147
|
|
|
124
|
-
const page = await context.newPage();
|
|
125
148
|
await page.goto(url, { timeout: 0 });
|
|
126
149
|
|
|
127
150
|
// to execute and wait for all pages to close
|