@govtechsg/oobee 0.10.58 → 0.10.62
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DETAILS.md +1 -1
- package/README.md +1 -0
- package/package.json +3 -2
- package/src/cli.ts +46 -99
- package/src/combine.ts +18 -6
- package/src/constants/cliFunctions.ts +5 -4
- package/src/constants/common.ts +207 -295
- package/src/constants/constants.ts +65 -32
- package/src/constants/questions.ts +11 -5
- package/src/crawlers/commonCrawlerFunc.ts +11 -5
- package/src/crawlers/crawlDomain.ts +34 -86
- package/src/crawlers/crawlIntelligentSitemap.ts +18 -11
- package/src/crawlers/crawlLocalFile.ts +9 -17
- package/src/crawlers/crawlSitemap.ts +30 -96
- package/src/crawlers/custom/utils.ts +5 -5
- package/src/crawlers/pdfScanFunc.ts +3 -2
- package/src/crawlers/runCustom.ts +4 -3
- package/src/index.ts +8 -9
- package/src/logs.ts +36 -11
- package/src/mergeAxeResults.ts +37 -31
- package/src/npmIndex.ts +4 -4
- package/src/screenshotFunc/htmlScreenshotFunc.ts +4 -4
- package/src/static/ejs/partials/scripts/utils.ejs +8 -11
- package/src/utils.ts +304 -15
@@ -17,15 +17,15 @@ import {
|
|
17
17
|
getLinksFromSitemap,
|
18
18
|
getPlaywrightLaunchOptions,
|
19
19
|
isSkippedUrl,
|
20
|
-
urlWithoutAuth,
|
21
20
|
waitForPageLoaded,
|
22
21
|
isFilePath,
|
23
|
-
initModifiedUserAgent,
|
24
22
|
} from '../constants/common.js';
|
25
|
-
import { areLinksEqual, isWhitelistedContentType,
|
23
|
+
import { areLinksEqual, isWhitelistedContentType, register } from '../utils.js';
|
26
24
|
import { handlePdfDownload, runPdfScan, mapPdfScanResults } from './pdfScanFunc.js';
|
27
25
|
import { guiInfoLog } from '../logs.js';
|
28
26
|
import { ViewportSettingsClass } from '../combine.js';
|
27
|
+
import * as path from 'path';
|
28
|
+
import fsp from 'fs/promises';
|
29
29
|
|
30
30
|
const crawlSitemap = async ({
|
31
31
|
sitemapUrl,
|
@@ -70,50 +70,19 @@ const crawlSitemap = async ({
|
|
70
70
|
let dataset: crawlee.Dataset;
|
71
71
|
let urlsCrawled: UrlsCrawled;
|
72
72
|
|
73
|
-
// Boolean to omit axe scan for basic auth URL
|
74
|
-
let isBasicAuth: boolean;
|
75
|
-
let basicAuthPage = 0;
|
76
|
-
let finalLinks = [];
|
77
|
-
let authHeader = '';
|
78
|
-
|
79
73
|
if (fromCrawlIntelligentSitemap) {
|
80
74
|
dataset = datasetFromIntelligent;
|
81
75
|
urlsCrawled = urlsCrawledFromIntelligent;
|
82
76
|
} else {
|
83
77
|
({ dataset } = await createCrawleeSubFolders(randomToken));
|
84
78
|
urlsCrawled = { ...constants.urlsCrawledObj };
|
85
|
-
|
86
|
-
if (!fs.existsSync(randomToken)) {
|
87
|
-
fs.mkdirSync(randomToken);
|
88
|
-
}
|
89
79
|
}
|
90
80
|
|
91
|
-
let parsedUrl;
|
92
|
-
let username = '';
|
93
|
-
let password = '';
|
94
|
-
|
95
81
|
if (!crawledFromLocalFile && isFilePath(sitemapUrl)) {
|
96
82
|
console.log('Local file crawling not supported for sitemap. Please provide a valid URL.');
|
97
83
|
return;
|
98
84
|
}
|
99
85
|
|
100
|
-
if (isFilePath(sitemapUrl)) {
|
101
|
-
parsedUrl = sitemapUrl;
|
102
|
-
} else {
|
103
|
-
parsedUrl = new URL(sitemapUrl);
|
104
|
-
if (parsedUrl.username !== '' && parsedUrl.password !== '') {
|
105
|
-
isBasicAuth = true;
|
106
|
-
username = decodeURIComponent(parsedUrl.username);
|
107
|
-
password = decodeURIComponent(parsedUrl.password);
|
108
|
-
|
109
|
-
// Create auth header
|
110
|
-
authHeader = `Basic ${Buffer.from(`${username}:${password}`).toString('base64')}`;
|
111
|
-
|
112
|
-
parsedUrl.username = '';
|
113
|
-
parsedUrl.password = '';
|
114
|
-
}
|
115
|
-
}
|
116
|
-
|
117
86
|
const linksFromSitemap = await getLinksFromSitemap(
|
118
87
|
sitemapUrl,
|
119
88
|
maxRequestsPerCrawl,
|
@@ -121,29 +90,11 @@ const crawlSitemap = async ({
|
|
121
90
|
userDataDirectory,
|
122
91
|
userUrlInputFromIntelligent,
|
123
92
|
fromCrawlIntelligentSitemap,
|
124
|
-
|
125
|
-
password,
|
93
|
+
extraHTTPHeaders,
|
126
94
|
);
|
127
|
-
/**
|
128
|
-
* Regex to match http://username:password@hostname.com
|
129
|
-
* utilised in scan strategy to ensure subsequent URLs within the same domain are scanned.
|
130
|
-
* First time scan with original `url` containing credentials is strictly to authenticate for browser session
|
131
|
-
* subsequent URLs are without credentials.
|
132
|
-
* basicAuthPage is set to -1 for basic auth URL to ensure it is not counted towards maxRequestsPerCrawl
|
133
|
-
*/
|
134
95
|
|
135
96
|
sitemapUrl = encodeURI(sitemapUrl);
|
136
97
|
|
137
|
-
if (isBasicAuth) {
|
138
|
-
// request to basic auth URL to authenticate for browser session
|
139
|
-
finalLinks.push(new Request({ url: sitemapUrl, uniqueKey: `auth:${sitemapUrl}` }));
|
140
|
-
const finalUrl = `${sitemapUrl.split('://')[0]}://${sitemapUrl.split('@')[1]}`;
|
141
|
-
|
142
|
-
// obtain base URL without credentials so that subsequent URLs within the same domain can be scanned
|
143
|
-
finalLinks.push(new Request({ url: finalUrl }));
|
144
|
-
basicAuthPage = -2;
|
145
|
-
}
|
146
|
-
|
147
98
|
const pdfDownloads: Promise<void>[] = [];
|
148
99
|
const uuidToPdfMapping: Record<string, string> = {};
|
149
100
|
const isScanHtml = ['all', 'html-only'].includes(fileTypes);
|
@@ -151,36 +102,43 @@ const crawlSitemap = async ({
|
|
151
102
|
const { playwrightDeviceDetailsObject } = viewportSettings;
|
152
103
|
const { maxConcurrency } = constants;
|
153
104
|
|
154
|
-
finalLinks = [...finalLinks, ...linksFromSitemap];
|
155
|
-
|
156
105
|
const requestList = await RequestList.open({
|
157
|
-
sources:
|
106
|
+
sources: linksFromSitemap,
|
158
107
|
});
|
159
108
|
|
160
|
-
|
161
|
-
if (userDataDirectory) {
|
162
|
-
userDataDir = process.env.CRAWLEE_HEADLESS !== '0' ? userDataDirectory : '';
|
163
|
-
}
|
164
|
-
|
165
|
-
await initModifiedUserAgent(browser, playwrightDeviceDetailsObject);
|
166
|
-
const crawler = new crawlee.PlaywrightCrawler({
|
109
|
+
const crawler = register(new crawlee.PlaywrightCrawler({
|
167
110
|
launchContext: {
|
168
111
|
launcher: constants.launcher,
|
169
112
|
launchOptions: getPlaywrightLaunchOptions(browser),
|
170
113
|
// Bug in Chrome which causes browser pool crash when userDataDirectory is set in non-headless mode
|
171
|
-
...(process.env.CRAWLEE_HEADLESS === '
|
114
|
+
...(process.env.CRAWLEE_HEADLESS === '1' && { userDataDir: userDataDirectory }),
|
172
115
|
},
|
173
116
|
retryOnBlocked: true,
|
174
117
|
browserPoolOptions: {
|
175
118
|
useFingerprints: false,
|
176
119
|
preLaunchHooks: [
|
177
|
-
async (_pageId
|
120
|
+
async (_pageId, launchContext) => {
|
121
|
+
const baseDir = userDataDirectory; // e.g., /Users/young/.../Chrome/oobee-...
|
122
|
+
|
123
|
+
// Ensure base exists
|
124
|
+
await fsp.mkdir(baseDir, { recursive: true });
|
125
|
+
|
126
|
+
// Create a unique subdir per browser
|
127
|
+
const subProfileDir = path.join(baseDir, `profile-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`);
|
128
|
+
await fsp.mkdir(subProfileDir, { recursive: true });
|
129
|
+
|
130
|
+
// Assign to Crawlee's launcher
|
131
|
+
launchContext.userDataDir = subProfileDir;
|
132
|
+
|
133
|
+
// Safely extend launchOptions
|
178
134
|
launchContext.launchOptions = {
|
179
135
|
...launchContext.launchOptions,
|
180
|
-
bypassCSP: true,
|
181
136
|
ignoreHTTPSErrors: true,
|
182
137
|
...playwrightDeviceDetailsObject,
|
183
138
|
};
|
139
|
+
|
140
|
+
// Optionally log for debugging
|
141
|
+
// console.log(`[HOOK] Using userDataDir: ${subProfileDir}`);
|
184
142
|
},
|
185
143
|
],
|
186
144
|
},
|
@@ -193,7 +151,7 @@ const crawlSitemap = async ({
|
|
193
151
|
return new Promise(resolve => {
|
194
152
|
let timeout;
|
195
153
|
let mutationCount = 0;
|
196
|
-
const MAX_MUTATIONS =
|
154
|
+
const MAX_MUTATIONS = 500; // stop if things never quiet down
|
197
155
|
const OBSERVER_TIMEOUT = 5000; // hard cap on total wait
|
198
156
|
|
199
157
|
const observer = new MutationObserver(() => {
|
@@ -252,15 +210,7 @@ const crawlSitemap = async ({
|
|
252
210
|
return;
|
253
211
|
}
|
254
212
|
|
255
|
-
|
256
|
-
if (isBasicAuth) {
|
257
|
-
await page.setExtraHTTPHeaders({
|
258
|
-
Authorization: authHeader,
|
259
|
-
...extraHTTPHeaders,
|
260
|
-
});
|
261
|
-
} else {
|
262
|
-
preNavigationHooks(extraHTTPHeaders);
|
263
|
-
}
|
213
|
+
preNavigationHooks(extraHTTPHeaders);
|
264
214
|
},
|
265
215
|
],
|
266
216
|
requestHandlerTimeoutSecs: 90,
|
@@ -282,17 +232,6 @@ const crawlSitemap = async ({
|
|
282
232
|
return;
|
283
233
|
}
|
284
234
|
|
285
|
-
// Set basic auth header if needed
|
286
|
-
if (isBasicAuth) {
|
287
|
-
await page.setExtraHTTPHeaders({
|
288
|
-
Authorization: authHeader,
|
289
|
-
});
|
290
|
-
const currentUrl = new URL(request.url);
|
291
|
-
currentUrl.username = username;
|
292
|
-
currentUrl.password = password;
|
293
|
-
request.url = currentUrl.href;
|
294
|
-
}
|
295
|
-
|
296
235
|
await waitForPageLoaded(page, 10000);
|
297
236
|
|
298
237
|
const actualUrl = page.url() || request.loadedUrl || request.url;
|
@@ -341,9 +280,7 @@ const crawlSitemap = async ({
|
|
341
280
|
const contentType = response?.headers?.()['content-type'] || '';
|
342
281
|
const status = response ? response.status() : 0;
|
343
282
|
|
344
|
-
if (
|
345
|
-
basicAuthPage += 1;
|
346
|
-
} else if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
|
283
|
+
if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
|
347
284
|
const isRedirected = !areLinksEqual(page.url(), request.url);
|
348
285
|
const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(
|
349
286
|
item => (item.actualUrl || item.url) === page.url(),
|
@@ -382,13 +319,13 @@ const crawlSitemap = async ({
|
|
382
319
|
});
|
383
320
|
|
384
321
|
urlsCrawled.scanned.push({
|
385
|
-
url:
|
322
|
+
url: request.url,
|
386
323
|
pageTitle: results.pageTitle,
|
387
324
|
actualUrl, // i.e. actualUrl
|
388
325
|
});
|
389
326
|
|
390
327
|
urlsCrawled.scannedRedirects.push({
|
391
|
-
fromUrl:
|
328
|
+
fromUrl: request.url,
|
392
329
|
toUrl: actualUrl,
|
393
330
|
});
|
394
331
|
|
@@ -421,9 +358,6 @@ const crawlSitemap = async ({
|
|
421
358
|
}
|
422
359
|
},
|
423
360
|
failedRequestHandler: async ({ request, response, error }) => {
|
424
|
-
if (isBasicAuth && request.url) {
|
425
|
-
request.url = `${request.url.split('://')[0]}://${request.url.split('@')[1]}`;
|
426
|
-
}
|
427
361
|
|
428
362
|
// check if scanned pages have reached limit due to multi-instances of handler running
|
429
363
|
if (urlsCrawled.scanned.length >= maxRequestsPerCrawl) {
|
@@ -461,7 +395,7 @@ const crawlSitemap = async ({
|
|
461
395
|
scaleDownStepRatio: 0.1, // Scale down slower
|
462
396
|
},
|
463
397
|
}),
|
464
|
-
});
|
398
|
+
}));
|
465
399
|
|
466
400
|
await crawler.run();
|
467
401
|
|
@@ -6,7 +6,7 @@ import path from 'path';
|
|
6
6
|
import { runAxeScript } from '../commonCrawlerFunc.js';
|
7
7
|
import { consoleLogger, guiInfoLog, silentLogger } from '../../logs.js';
|
8
8
|
import { guiInfoStatusTypes } from '../../constants/constants.js';
|
9
|
-
import { isSkippedUrl
|
9
|
+
import { isSkippedUrl } from '../../constants/common.js';
|
10
10
|
|
11
11
|
//! For Cypress Test
|
12
12
|
// env to check if Cypress test is running
|
@@ -77,8 +77,8 @@ export const screenshotFullPage = async (page, screenshotsDir: string, screensho
|
|
77
77
|
window.scrollTo(0, 0);
|
78
78
|
});
|
79
79
|
|
80
|
-
consoleLogger.info(`Screenshot page at: ${
|
81
|
-
|
80
|
+
consoleLogger.info(`Screenshot page at: ${page.url()}`);
|
81
|
+
consoleLogger.info(`Screenshot page at: ${page.url()}`);
|
82
82
|
|
83
83
|
await page.screenshot({
|
84
84
|
timeout: 5000,
|
@@ -116,7 +116,7 @@ export const runAxeScan = async (
|
|
116
116
|
await dataset.pushData(result);
|
117
117
|
|
118
118
|
urlsCrawled.scanned.push({
|
119
|
-
url:
|
119
|
+
url: page.url(),
|
120
120
|
pageTitle: result.pageTitle,
|
121
121
|
pageImagePath: customFlowDetails.pageImagePath,
|
122
122
|
});
|
@@ -469,7 +469,7 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
|
|
469
469
|
consoleLogger.info(`Overlay state: ${existingOverlay}`);
|
470
470
|
} catch {
|
471
471
|
consoleLogger.info('Error in adding overlay menu to page');
|
472
|
-
|
472
|
+
consoleLogger.info('Error in adding overlay menu to page');
|
473
473
|
}
|
474
474
|
});
|
475
475
|
|
@@ -15,6 +15,7 @@ import constants, {
|
|
15
15
|
STATUS_CODE_METADATA,
|
16
16
|
UrlsCrawled,
|
17
17
|
} from '../constants/constants.js';
|
18
|
+
import { cleanUpAndExit } from '../utils.js';
|
18
19
|
|
19
20
|
const require = createRequire(import.meta.url);
|
20
21
|
|
@@ -233,7 +234,7 @@ const getVeraExecutable = () => {
|
|
233
234
|
const veraPdfExeNotFoundError =
|
234
235
|
'Could not find veraPDF executable. Please ensure veraPDF is installed at current directory.';
|
235
236
|
consoleLogger.error(veraPdfExeNotFoundError);
|
236
|
-
|
237
|
+
consoleLogger.error(veraPdfExeNotFoundError);
|
237
238
|
}
|
238
239
|
return veraPdfExe;
|
239
240
|
};
|
@@ -355,7 +356,7 @@ export const runPdfScan = async (randomToken: string) => {
|
|
355
356
|
'profiles/veraPDF-validation-profiles-rel-1.26/PDF_UA/WCAG-2-2.xml',
|
356
357
|
)}"`;
|
357
358
|
if (!veraPdfExe || !veraPdfProfile) {
|
358
|
-
|
359
|
+
cleanUpAndExit(1);
|
359
360
|
}
|
360
361
|
|
361
362
|
const intermediateFolder = randomToken; // NOTE: assumes this folder is already created for crawlee
|
@@ -1,7 +1,7 @@
|
|
1
1
|
/* eslint-env browser */
|
2
2
|
import { chromium } from 'playwright';
|
3
3
|
import { createCrawleeSubFolders } from './commonCrawlerFunc.js';
|
4
|
-
import {
|
4
|
+
import { cleanUpAndExit, register} from '../utils.js';
|
5
5
|
import constants, {
|
6
6
|
getIntermediateScreenshotsPath,
|
7
7
|
guiInfoStatusTypes,
|
@@ -48,7 +48,6 @@ const runCustom = async (
|
|
48
48
|
includeScreenshots: boolean,
|
49
49
|
) => {
|
50
50
|
// checks and delete datasets path if it already exists
|
51
|
-
cleanUp(randomToken);
|
52
51
|
process.env.CRAWLEE_STORAGE_DIR = randomToken;
|
53
52
|
|
54
53
|
const urlsCrawled: UrlsCrawled = { ...constants.urlsCrawledObj };
|
@@ -83,6 +82,8 @@ const runCustom = async (
|
|
83
82
|
...viewportSettings.playwrightDeviceDetailsObject,
|
84
83
|
});
|
85
84
|
|
85
|
+
register(context);
|
86
|
+
|
86
87
|
// Detection of new page
|
87
88
|
context.on('page', async newPage => {
|
88
89
|
await initNewPage(newPage, pageClosePromises, processPageParams, pagesDict);
|
@@ -107,7 +108,7 @@ const runCustom = async (
|
|
107
108
|
await allPagesClosedPromise(pageClosePromises);
|
108
109
|
} catch (error) {
|
109
110
|
log(`PLAYWRIGHT EXECUTION ERROR ${error}`);
|
110
|
-
|
111
|
+
cleanUpAndExit(1, randomToken, true);
|
111
112
|
}
|
112
113
|
|
113
114
|
guiInfoLog(guiInfoStatusTypes.COMPLETED, {});
|
package/src/index.ts
CHANGED
@@ -7,6 +7,8 @@ import {
|
|
7
7
|
cleanUp,
|
8
8
|
getUserDataTxt,
|
9
9
|
writeToUserDataTxt,
|
10
|
+
listenForCleanUp,
|
11
|
+
cleanUpAndExit,
|
10
12
|
} from './utils.js';
|
11
13
|
import {
|
12
14
|
prepareData,
|
@@ -97,10 +99,7 @@ const runScan = async (answers: Answers) => {
|
|
97
99
|
answers.customDevice,
|
98
100
|
answers.viewportWidth,
|
99
101
|
);
|
100
|
-
|
101
|
-
deleteClonedProfiles(browserToRun);
|
102
|
-
answers.browserToRun = browserToRun;
|
103
|
-
|
102
|
+
|
104
103
|
if (!answers.nameEmail) {
|
105
104
|
answers.nameEmail = `${userData.name}:${userData.email}`;
|
106
105
|
}
|
@@ -109,19 +108,19 @@ const runScan = async (answers: Answers) => {
|
|
109
108
|
answers.metadata = '{}';
|
110
109
|
|
111
110
|
const data: Data = await prepareData(answers);
|
111
|
+
|
112
|
+
// Executes cleanUp script if error encountered
|
113
|
+
listenForCleanUp(data.randomToken);
|
114
|
+
|
112
115
|
data.userDataDirectory = getClonedProfilesWithRandomToken(data.browser, data.randomToken);
|
113
116
|
|
114
117
|
printMessage(['Scanning website...'], messageOptions);
|
115
118
|
|
116
119
|
await combineRun(data, screenToScan);
|
117
120
|
|
118
|
-
// Delete cloned directory
|
119
|
-
deleteClonedProfiles(data.browser);
|
120
|
-
|
121
121
|
// Delete dataset and request queues
|
122
|
-
|
122
|
+
cleanUpAndExit(0, data.randomToken);
|
123
123
|
|
124
|
-
process.exit(0);
|
125
124
|
};
|
126
125
|
|
127
126
|
if (userData) {
|
package/src/logs.ts
CHANGED
@@ -2,7 +2,8 @@
|
|
2
2
|
/* eslint-disable no-shadow */
|
3
3
|
import { createLogger, format, transports } from 'winston';
|
4
4
|
import { guiInfoStatusTypes } from './constants/constants.js';
|
5
|
-
import
|
5
|
+
import path from 'path';
|
6
|
+
import { randomUUID } from 'crypto';
|
6
7
|
|
7
8
|
const { combine, timestamp, printf } = format;
|
8
9
|
|
@@ -21,12 +22,32 @@ const logFormat = printf(({ timestamp, level, message }) => {
|
|
21
22
|
// transport: storage device for logs
|
22
23
|
// Enabled for console and storing into files; Files are overwritten each time
|
23
24
|
// All logs in combined.txt, error in errors.txt
|
25
|
+
const uuid = randomUUID();
|
26
|
+
let basePath: string;
|
27
|
+
|
28
|
+
if (process.env.OOBEE_LOGS_PATH) {
|
29
|
+
basePath = process.env.OOBEE_LOGS_PATH;
|
30
|
+
} else if (process.platform === 'win32') {
|
31
|
+
basePath = path.join(process.env.APPDATA, 'Oobee');
|
32
|
+
} else if (process.platform === 'darwin') {
|
33
|
+
basePath = path.join(process.env.HOME, 'Library', 'Application Support', 'Oobee');
|
34
|
+
} else {
|
35
|
+
basePath = path.join(process.cwd());
|
36
|
+
}
|
37
|
+
|
38
|
+
export const errorsTxtPath = path.join(basePath, `${uuid}.txt`);
|
24
39
|
|
25
40
|
const consoleLogger = createLogger({
|
26
41
|
silent: !(process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE),
|
27
42
|
format: combine(timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), logFormat),
|
28
|
-
transports:
|
29
|
-
|
43
|
+
transports: [
|
44
|
+
new transports.Console({ level: 'info' }),
|
45
|
+
new transports.File({
|
46
|
+
filename: errorsTxtPath,
|
47
|
+
level: 'info',
|
48
|
+
handleExceptions: true,
|
49
|
+
}),
|
50
|
+
],
|
30
51
|
});
|
31
52
|
|
32
53
|
// No display in consoles, this will mostly be used within the interactive script to avoid disrupting the flow
|
@@ -35,9 +56,10 @@ const consoleLogger = createLogger({
|
|
35
56
|
const silentLogger = createLogger({
|
36
57
|
format: combine(timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), logFormat),
|
37
58
|
transports: [
|
38
|
-
|
39
|
-
|
40
|
-
|
59
|
+
new transports.File({
|
60
|
+
filename: errorsTxtPath,
|
61
|
+
level: 'warn',
|
62
|
+
handleExceptions: true }),
|
41
63
|
].filter(Boolean),
|
42
64
|
});
|
43
65
|
|
@@ -47,16 +69,17 @@ export const guiInfoLog = (status: string, data: { numScanned?: number; urlScann
|
|
47
69
|
switch (status) {
|
48
70
|
case guiInfoStatusTypes.COMPLETED:
|
49
71
|
console.log('Scan completed');
|
72
|
+
silentLogger.info('Scan completed');
|
50
73
|
break;
|
51
74
|
case guiInfoStatusTypes.SCANNED:
|
52
75
|
case guiInfoStatusTypes.SKIPPED:
|
53
76
|
case guiInfoStatusTypes.ERROR:
|
54
77
|
case guiInfoStatusTypes.DUPLICATE:
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
);
|
78
|
+
const msg = `crawling::${data.numScanned || 0}::${status}::${
|
79
|
+
data.urlScanned || 'no url provided'
|
80
|
+
}`;
|
81
|
+
console.log(msg);
|
82
|
+
silentLogger.info(msg);
|
60
83
|
break;
|
61
84
|
default:
|
62
85
|
console.log(`Status provided to gui info log not recognized: ${status}`);
|
@@ -65,4 +88,6 @@ export const guiInfoLog = (status: string, data: { numScanned?: number; urlScann
|
|
65
88
|
}
|
66
89
|
};
|
67
90
|
|
91
|
+
consoleLogger.info(`Logger writing to: ${errorsTxtPath}`);
|
92
|
+
|
68
93
|
export { logFormat, consoleLogger, silentLogger };
|
package/src/mergeAxeResults.ts
CHANGED
@@ -15,7 +15,7 @@ import { pipeline } from 'stream/promises';
|
|
15
15
|
// @ts-ignore
|
16
16
|
import * as Sentry from '@sentry/node';
|
17
17
|
import constants, { ScannerTypes, sentryConfig, setSentryUser } from './constants/constants.js';
|
18
|
-
import {
|
18
|
+
import { getBrowserToRun, getPlaywrightLaunchOptions } from './constants/common.js';
|
19
19
|
|
20
20
|
import {
|
21
21
|
createScreenshotsFolder,
|
@@ -29,6 +29,7 @@ import {
|
|
29
29
|
getWcagCriteriaMap,
|
30
30
|
categorizeWcagCriteria,
|
31
31
|
getUserDataTxt,
|
32
|
+
register
|
32
33
|
} from './utils.js';
|
33
34
|
import { consoleLogger, silentLogger } from './logs.js';
|
34
35
|
import itemTypeDescription from './constants/itemTypeDescription.js';
|
@@ -961,29 +962,21 @@ const writeScanDetailsCsv = async (
|
|
961
962
|
});
|
962
963
|
};
|
963
964
|
|
964
|
-
let browserChannel =
|
965
|
+
let browserChannel = getBrowserToRun().browserToRun;
|
965
966
|
|
966
|
-
|
967
|
-
browserChannel = 'msedge';
|
968
|
-
}
|
969
|
-
|
970
|
-
if (os.platform() === 'linux') {
|
971
|
-
browserChannel = 'chromium';
|
972
|
-
}
|
973
|
-
|
974
|
-
const writeSummaryPdf = async (storagePath: string, pagesScanned: number, filename = 'summary') => {
|
967
|
+
const writeSummaryPdf = async (storagePath: string, pagesScanned: number, filename = 'summary', browser: string, userDataDirectory: string) => {
|
975
968
|
const htmlFilePath = `${storagePath}/${filename}.html`;
|
976
969
|
const fileDestinationPath = `${storagePath}/${filename}.pdf`;
|
977
|
-
const browser = await chromium.launch({
|
978
|
-
headless: false,
|
979
|
-
channel: browserChannel,
|
980
|
-
args: ['--headless=new', '--no-sandbox'],
|
981
|
-
});
|
982
970
|
|
983
|
-
const
|
984
|
-
|
985
|
-
|
986
|
-
|
971
|
+
const effectiveUserDataDirectory = process.env.CRAWLEE_HEADLESS === '1'
|
972
|
+
? userDataDirectory
|
973
|
+
: '';
|
974
|
+
const context = await constants.launcher.launchPersistentContext(effectiveUserDataDirectory, {
|
975
|
+
headless: process.env.CRAWLEE_HEADLESS === '1',
|
976
|
+
...getPlaywrightLaunchOptions(browser),
|
977
|
+
});
|
978
|
+
|
979
|
+
register(context);
|
987
980
|
|
988
981
|
const page = await context.newPage();
|
989
982
|
|
@@ -1008,8 +1001,7 @@ const writeSummaryPdf = async (storagePath: string, pagesScanned: number, filena
|
|
1008
1001
|
|
1009
1002
|
await page.close();
|
1010
1003
|
|
1011
|
-
await context.close();
|
1012
|
-
await browser.close();
|
1004
|
+
await context.close().catch(() => {});
|
1013
1005
|
|
1014
1006
|
if (pagesScanned < 2000) {
|
1015
1007
|
fs.unlinkSync(htmlFilePath);
|
@@ -1721,14 +1713,9 @@ const generateArtifacts = async (
|
|
1721
1713
|
zip: string = undefined, // optional
|
1722
1714
|
generateJsonFiles = false,
|
1723
1715
|
) => {
|
1724
|
-
const intermediateDatasetsPath = `${randomToken}/datasets/${randomToken}`;
|
1725
|
-
const oobeeAppVersion = getVersion();
|
1726
1716
|
const storagePath = getStoragePath(randomToken);
|
1727
|
-
|
1728
|
-
|
1729
|
-
scanType === ScannerTypes.SITEMAP || scanType === ScannerTypes.LOCALFILE
|
1730
|
-
? urlScanned
|
1731
|
-
: urlWithoutAuth(urlScanned);
|
1717
|
+
const intermediateDatasetsPath = `${storagePath}/crawlee`;
|
1718
|
+
const oobeeAppVersion = getVersion();
|
1732
1719
|
|
1733
1720
|
const formatAboutStartTime = (dateString: string) => {
|
1734
1721
|
const utcStartTimeDate = new Date(dateString);
|
@@ -1851,10 +1838,18 @@ const generateArtifacts = async (
|
|
1851
1838
|
|
1852
1839
|
printMessage([
|
1853
1840
|
'Scan Summary',
|
1841
|
+
`Oobee App Version: ${allIssues.oobeeAppVersion}`,
|
1854
1842
|
'',
|
1855
1843
|
`Site Name: ${allIssues.siteName}`,
|
1856
1844
|
`URL: ${allIssues.urlScanned}`,
|
1857
1845
|
`Pages Scanned: ${allIssues.totalPagesScanned}`,
|
1846
|
+
`Start Time: ${allIssues.startTime}`,
|
1847
|
+
`End Time: ${allIssues.endTime}`,
|
1848
|
+
`Elapsed Time: ${(new Date(allIssues.endTime).getTime() - new Date(allIssues.startTime).getTime()) / 1000}s`,
|
1849
|
+
`Device: ${allIssues.deviceChosen}`,
|
1850
|
+
`Viewport: ${allIssues.viewport}`,
|
1851
|
+
`Scan Type: ${allIssues.scanType}`,
|
1852
|
+
`Label: ${allIssues.customFlowLabel || 'N/A'}`,
|
1858
1853
|
'',
|
1859
1854
|
`Must Fix: ${allIssues.items.mustFix.rules.length} ${Object.keys(allIssues.items.mustFix.rules).length === 1 ? 'issue' : 'issues'} / ${allIssues.items.mustFix.totalItems} ${allIssues.items.mustFix.totalItems === 1 ? 'occurrence' : 'occurrences'}`,
|
1860
1855
|
`Good to Fix: ${allIssues.items.goodToFix.rules.length} ${Object.keys(allIssues.items.goodToFix.rules).length === 1 ? 'issue' : 'issues'} / ${allIssues.items.goodToFix.totalItems} ${allIssues.items.goodToFix.totalItems === 1 ? 'occurrence' : 'occurrences'}`,
|
@@ -1892,7 +1887,11 @@ const generateArtifacts = async (
|
|
1892
1887
|
consoleLogger.info(`End Time: ${allIssues.endTime}`);
|
1893
1888
|
const elapsedSeconds = (new Date(allIssues.endTime).getTime() - new Date(allIssues.startTime).getTime()) / 1000;
|
1894
1889
|
consoleLogger.info(`Elapsed Time: ${elapsedSeconds}s`);
|
1895
|
-
|
1890
|
+
consoleLogger.info(`Device: ${allIssues.deviceChosen}`);
|
1891
|
+
consoleLogger.info(`Viewport: ${allIssues.viewport}`);
|
1892
|
+
consoleLogger.info(`Scan Type: ${allIssues.scanType}`);
|
1893
|
+
consoleLogger.info(`Label: ${allIssues.customFlowLabel || 'N/A'}`);
|
1894
|
+
|
1896
1895
|
const getAxeImpactCount = (allIssues: AllIssues) => {
|
1897
1896
|
const impactCount = {
|
1898
1897
|
critical: 0,
|
@@ -1986,7 +1985,14 @@ const generateArtifacts = async (
|
|
1986
1985
|
]);
|
1987
1986
|
}
|
1988
1987
|
|
1989
|
-
|
1988
|
+
// Should consider refactor constants.userDataDirectory to be a parameter in future
|
1989
|
+
await retryFunction(() => writeSummaryPdf(storagePath, pagesScanned.length, 'summary', browserChannel, constants.userDataDirectory), 1);
|
1990
|
+
|
1991
|
+
try {
|
1992
|
+
fs.rmSync(path.join(storagePath, 'crawlee'), { recursive: true, force: true });
|
1993
|
+
} catch (error) {
|
1994
|
+
consoleLogger.warn(`Unable to force remove crawlee folder: ${error.message}`);
|
1995
|
+
}
|
1990
1996
|
|
1991
1997
|
// Take option if set
|
1992
1998
|
if (typeof zip === 'string') {
|
package/src/npmIndex.ts
CHANGED
@@ -10,7 +10,6 @@ import {
|
|
10
10
|
getBrowserToRun,
|
11
11
|
getPlaywrightLaunchOptions,
|
12
12
|
submitForm,
|
13
|
-
urlWithoutAuth,
|
14
13
|
} from './constants/common.js';
|
15
14
|
import { createCrawleeSubFolders, filterAxeResults } from './crawlers/commonCrawlerFunc.js';
|
16
15
|
import { createAndUpdateResultsFolders, createDetailsAndLogs } from './utils.js';
|
@@ -201,11 +200,12 @@ export const init = async ({
|
|
201
200
|
res: { pageUrl: string; pageTitle: string; axeScanResults: AxeResults },
|
202
201
|
metadata: string,
|
203
202
|
elementsToClick: string[],
|
203
|
+
randomToken: string,
|
204
204
|
) => {
|
205
205
|
throwErrorIfTerminated();
|
206
206
|
if (includeScreenshots) {
|
207
207
|
// use chrome by default
|
208
|
-
const { browserToRun, clonedBrowserDataDir } = getBrowserToRun(BrowserTypes.CHROME);
|
208
|
+
const { browserToRun, clonedBrowserDataDir } = getBrowserToRun(BrowserTypes.CHROME, false, randomToken);
|
209
209
|
const browserContext = await constants.launcher.launchPersistentContext(
|
210
210
|
clonedBrowserDataDir,
|
211
211
|
{ viewport: viewportSettings, ...getPlaywrightLaunchOptions(browserToRun) },
|
@@ -237,7 +237,7 @@ export const init = async ({
|
|
237
237
|
);
|
238
238
|
|
239
239
|
await browserContext.close();
|
240
|
-
deleteClonedProfiles(browserToRun);
|
240
|
+
deleteClonedProfiles(browserToRun, randomToken);
|
241
241
|
}
|
242
242
|
const pageIndex = urlsCrawled.scanned.length + 1;
|
243
243
|
const filteredResults = filterAxeResults(res.axeScanResults, res.pageTitle, {
|
@@ -245,7 +245,7 @@ export const init = async ({
|
|
245
245
|
metadata,
|
246
246
|
});
|
247
247
|
urlsCrawled.scanned.push({
|
248
|
-
url:
|
248
|
+
url: res.pageUrl.toString(),
|
249
249
|
actualUrl: 'tbd',
|
250
250
|
pageTitle: `${pageIndex}: ${res.pageTitle}`,
|
251
251
|
});
|
@@ -22,7 +22,7 @@ export const takeScreenshotForHTMLElements = async (
|
|
22
22
|
for (const violation of violations) {
|
23
23
|
if (screenshotCount >= maxScreenshots) {
|
24
24
|
/*
|
25
|
-
|
25
|
+
consoleLogger.warn(
|
26
26
|
`Skipping screenshots for ${violation.id} as maxScreenshots (${maxScreenshots}) exceeded. You can increase it by specifying a higher value when calling takeScreenshotForHTMLElements.`,
|
27
27
|
);
|
28
28
|
*/
|
@@ -34,7 +34,7 @@ export const takeScreenshotForHTMLElements = async (
|
|
34
34
|
|
35
35
|
// Check if rule ID is 'oobee-grading-text-contents' and skip screenshot logic
|
36
36
|
if (rule === 'oobee-grading-text-contents') {
|
37
|
-
//
|
37
|
+
// consoleLogger.info('Skipping screenshot for rule oobee-grading-text-contents');
|
38
38
|
newViolations.push(violation); // Make sure it gets added
|
39
39
|
continue;
|
40
40
|
}
|
@@ -59,13 +59,13 @@ export const takeScreenshotForHTMLElements = async (
|
|
59
59
|
nodeWithScreenshotPath.screenshotPath = screenshotPath;
|
60
60
|
screenshotCount++;
|
61
61
|
} else {
|
62
|
-
//
|
62
|
+
// consoleLogger.info(`Element at ${currLocator} is not visible`);
|
63
63
|
}
|
64
64
|
|
65
65
|
break; // Stop looping after finding the first visible locator
|
66
66
|
}
|
67
67
|
} catch (e) {
|
68
|
-
//
|
68
|
+
// consoleLogger.info(`Unable to take element screenshot at ${selector}`);
|
69
69
|
}
|
70
70
|
}
|
71
71
|
newViolationNodes.push(nodeWithScreenshotPath);
|