@govtechsg/oobee 0.10.58 → 0.10.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DETAILS.md +1 -1
- package/package.json +1 -1
- package/src/cli.ts +17 -64
- package/src/combine.ts +18 -4
- package/src/constants/common.ts +193 -293
- package/src/constants/constants.ts +2 -1
- package/src/constants/questions.ts +12 -4
- package/src/crawlers/commonCrawlerFunc.ts +9 -3
- package/src/crawlers/crawlDomain.ts +31 -83
- package/src/crawlers/crawlIntelligentSitemap.ts +16 -11
- package/src/crawlers/crawlLocalFile.ts +6 -17
- package/src/crawlers/crawlSitemap.ts +27 -93
- package/src/crawlers/custom/utils.ts +4 -4
- package/src/index.ts +2 -5
- package/src/logs.ts +1 -2
- package/src/mergeAxeResults.ts +35 -30
- package/src/npmIndex.ts +4 -4
- package/src/utils.ts +56 -14
package/src/index.ts
CHANGED
@@ -97,10 +97,7 @@ const runScan = async (answers: Answers) => {
|
|
97
97
|
answers.customDevice,
|
98
98
|
answers.viewportWidth,
|
99
99
|
);
|
100
|
-
|
101
|
-
deleteClonedProfiles(browserToRun);
|
102
|
-
answers.browserToRun = browserToRun;
|
103
|
-
|
100
|
+
|
104
101
|
if (!answers.nameEmail) {
|
105
102
|
answers.nameEmail = `${userData.name}:${userData.email}`;
|
106
103
|
}
|
@@ -116,7 +113,7 @@ const runScan = async (answers: Answers) => {
|
|
116
113
|
await combineRun(data, screenToScan);
|
117
114
|
|
118
115
|
// Delete cloned directory
|
119
|
-
deleteClonedProfiles(data.browser);
|
116
|
+
deleteClonedProfiles(data.browser, data.randomToken);
|
120
117
|
|
121
118
|
// Delete dataset and request queues
|
122
119
|
cleanUp(data.randomToken);
|
package/src/logs.ts
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
/* eslint-disable no-shadow */
|
3
3
|
import { createLogger, format, transports } from 'winston';
|
4
4
|
import { guiInfoStatusTypes } from './constants/constants.js';
|
5
|
-
import { urlWithoutAuth } from './constants/common.js';
|
6
5
|
|
7
6
|
const { combine, timestamp, printf } = format;
|
8
7
|
|
@@ -54,7 +53,7 @@ export const guiInfoLog = (status: string, data: { numScanned?: number; urlScann
|
|
54
53
|
case guiInfoStatusTypes.DUPLICATE:
|
55
54
|
console.log(
|
56
55
|
`crawling::${data.numScanned || 0}::${status}::${
|
57
|
-
|
56
|
+
data.urlScanned || 'no url provided'
|
58
57
|
}`,
|
59
58
|
);
|
60
59
|
break;
|
package/src/mergeAxeResults.ts
CHANGED
@@ -15,7 +15,7 @@ import { pipeline } from 'stream/promises';
|
|
15
15
|
// @ts-ignore
|
16
16
|
import * as Sentry from '@sentry/node';
|
17
17
|
import constants, { ScannerTypes, sentryConfig, setSentryUser } from './constants/constants.js';
|
18
|
-
import {
|
18
|
+
import { getBrowserToRun, getPlaywrightLaunchOptions } from './constants/common.js';
|
19
19
|
|
20
20
|
import {
|
21
21
|
createScreenshotsFolder,
|
@@ -961,29 +961,19 @@ const writeScanDetailsCsv = async (
|
|
961
961
|
});
|
962
962
|
};
|
963
963
|
|
964
|
-
let browserChannel =
|
964
|
+
let browserChannel = getBrowserToRun().browserToRun;
|
965
965
|
|
966
|
-
|
967
|
-
browserChannel = 'msedge';
|
968
|
-
}
|
969
|
-
|
970
|
-
if (os.platform() === 'linux') {
|
971
|
-
browserChannel = 'chromium';
|
972
|
-
}
|
973
|
-
|
974
|
-
const writeSummaryPdf = async (storagePath: string, pagesScanned: number, filename = 'summary') => {
|
966
|
+
const writeSummaryPdf = async (storagePath: string, pagesScanned: number, filename = 'summary', browser: string, userDataDirectory: string) => {
|
975
967
|
const htmlFilePath = `${storagePath}/${filename}.html`;
|
976
968
|
const fileDestinationPath = `${storagePath}/${filename}.pdf`;
|
977
|
-
const browser = await chromium.launch({
|
978
|
-
headless: false,
|
979
|
-
channel: browserChannel,
|
980
|
-
args: ['--headless=new', '--no-sandbox'],
|
981
|
-
});
|
982
969
|
|
983
|
-
const
|
984
|
-
|
985
|
-
|
986
|
-
|
970
|
+
const effectiveUserDataDirectory = process.env.CRAWLEE_HEADLESS === '1'
|
971
|
+
? userDataDirectory
|
972
|
+
: '';
|
973
|
+
const context = await constants.launcher.launchPersistentContext(effectiveUserDataDirectory, {
|
974
|
+
headless: process.env.CRAWLEE_HEADLESS === '1',
|
975
|
+
...getPlaywrightLaunchOptions(browser),
|
976
|
+
});
|
987
977
|
|
988
978
|
const page = await context.newPage();
|
989
979
|
|
@@ -1008,8 +998,7 @@ const writeSummaryPdf = async (storagePath: string, pagesScanned: number, filena
|
|
1008
998
|
|
1009
999
|
await page.close();
|
1010
1000
|
|
1011
|
-
await context.close();
|
1012
|
-
await browser.close();
|
1001
|
+
await context.close().catch(() => {});
|
1013
1002
|
|
1014
1003
|
if (pagesScanned < 2000) {
|
1015
1004
|
fs.unlinkSync(htmlFilePath);
|
@@ -1721,15 +1710,10 @@ const generateArtifacts = async (
|
|
1721
1710
|
zip: string = undefined, // optional
|
1722
1711
|
generateJsonFiles = false,
|
1723
1712
|
) => {
|
1724
|
-
const intermediateDatasetsPath = `${randomToken}/
|
1713
|
+
const intermediateDatasetsPath = `${getStoragePath(randomToken)}/crawlee`;
|
1725
1714
|
const oobeeAppVersion = getVersion();
|
1726
1715
|
const storagePath = getStoragePath(randomToken);
|
1727
1716
|
|
1728
|
-
urlScanned =
|
1729
|
-
scanType === ScannerTypes.SITEMAP || scanType === ScannerTypes.LOCALFILE
|
1730
|
-
? urlScanned
|
1731
|
-
: urlWithoutAuth(urlScanned);
|
1732
|
-
|
1733
1717
|
const formatAboutStartTime = (dateString: string) => {
|
1734
1718
|
const utcStartTimeDate = new Date(dateString);
|
1735
1719
|
const formattedStartTime = utcStartTimeDate.toLocaleTimeString('en-GB', {
|
@@ -1851,10 +1835,18 @@ const generateArtifacts = async (
|
|
1851
1835
|
|
1852
1836
|
printMessage([
|
1853
1837
|
'Scan Summary',
|
1838
|
+
`Oobee App Version: ${allIssues.oobeeAppVersion}`,
|
1854
1839
|
'',
|
1855
1840
|
`Site Name: ${allIssues.siteName}`,
|
1856
1841
|
`URL: ${allIssues.urlScanned}`,
|
1857
1842
|
`Pages Scanned: ${allIssues.totalPagesScanned}`,
|
1843
|
+
`Start Time: ${allIssues.startTime}`,
|
1844
|
+
`End Time: ${allIssues.endTime}`,
|
1845
|
+
`Elapsed Time: ${(new Date(allIssues.endTime).getTime() - new Date(allIssues.startTime).getTime()) / 1000}s`,
|
1846
|
+
`Device: ${allIssues.deviceChosen}`,
|
1847
|
+
`Viewport: ${allIssues.viewport}`,
|
1848
|
+
`Scan Type: ${allIssues.scanType}`,
|
1849
|
+
`Label: ${allIssues.customFlowLabel || 'N/A'}`,
|
1858
1850
|
'',
|
1859
1851
|
`Must Fix: ${allIssues.items.mustFix.rules.length} ${Object.keys(allIssues.items.mustFix.rules).length === 1 ? 'issue' : 'issues'} / ${allIssues.items.mustFix.totalItems} ${allIssues.items.mustFix.totalItems === 1 ? 'occurrence' : 'occurrences'}`,
|
1860
1852
|
`Good to Fix: ${allIssues.items.goodToFix.rules.length} ${Object.keys(allIssues.items.goodToFix.rules).length === 1 ? 'issue' : 'issues'} / ${allIssues.items.goodToFix.totalItems} ${allIssues.items.goodToFix.totalItems === 1 ? 'occurrence' : 'occurrences'}`,
|
@@ -1892,7 +1884,11 @@ const generateArtifacts = async (
|
|
1892
1884
|
consoleLogger.info(`End Time: ${allIssues.endTime}`);
|
1893
1885
|
const elapsedSeconds = (new Date(allIssues.endTime).getTime() - new Date(allIssues.startTime).getTime()) / 1000;
|
1894
1886
|
consoleLogger.info(`Elapsed Time: ${elapsedSeconds}s`);
|
1895
|
-
|
1887
|
+
consoleLogger.info(`Device: ${allIssues.deviceChosen}`);
|
1888
|
+
consoleLogger.info(`Viewport: ${allIssues.viewport}`);
|
1889
|
+
consoleLogger.info(`Scan Type: ${allIssues.scanType}`);
|
1890
|
+
consoleLogger.info(`Label: ${allIssues.customFlowLabel || 'N/A'}`);
|
1891
|
+
|
1896
1892
|
const getAxeImpactCount = (allIssues: AllIssues) => {
|
1897
1893
|
const impactCount = {
|
1898
1894
|
critical: 0,
|
@@ -1986,7 +1982,16 @@ const generateArtifacts = async (
|
|
1986
1982
|
]);
|
1987
1983
|
}
|
1988
1984
|
|
1989
|
-
|
1985
|
+
// Should consider refactor constants.userDataDirectory to be a parameter in future
|
1986
|
+
await retryFunction(() => writeSummaryPdf(storagePath, pagesScanned.length, 'summary', browserChannel, constants.userDataDirectory), 1);
|
1987
|
+
|
1988
|
+
const foldersToRemove = ['crawlee', 'logs'];
|
1989
|
+
for (const folder of foldersToRemove) {
|
1990
|
+
const folderPath = path.join(storagePath, folder);
|
1991
|
+
if (await fs.pathExists(folderPath)) {
|
1992
|
+
await fs.remove(folderPath);
|
1993
|
+
}
|
1994
|
+
}
|
1990
1995
|
|
1991
1996
|
// Take option if set
|
1992
1997
|
if (typeof zip === 'string') {
|
package/src/npmIndex.ts
CHANGED
@@ -10,7 +10,6 @@ import {
|
|
10
10
|
getBrowserToRun,
|
11
11
|
getPlaywrightLaunchOptions,
|
12
12
|
submitForm,
|
13
|
-
urlWithoutAuth,
|
14
13
|
} from './constants/common.js';
|
15
14
|
import { createCrawleeSubFolders, filterAxeResults } from './crawlers/commonCrawlerFunc.js';
|
16
15
|
import { createAndUpdateResultsFolders, createDetailsAndLogs } from './utils.js';
|
@@ -201,11 +200,12 @@ export const init = async ({
|
|
201
200
|
res: { pageUrl: string; pageTitle: string; axeScanResults: AxeResults },
|
202
201
|
metadata: string,
|
203
202
|
elementsToClick: string[],
|
203
|
+
randomToken: string,
|
204
204
|
) => {
|
205
205
|
throwErrorIfTerminated();
|
206
206
|
if (includeScreenshots) {
|
207
207
|
// use chrome by default
|
208
|
-
const { browserToRun, clonedBrowserDataDir } = getBrowserToRun(BrowserTypes.CHROME);
|
208
|
+
const { browserToRun, clonedBrowserDataDir } = getBrowserToRun(BrowserTypes.CHROME, false, randomToken);
|
209
209
|
const browserContext = await constants.launcher.launchPersistentContext(
|
210
210
|
clonedBrowserDataDir,
|
211
211
|
{ viewport: viewportSettings, ...getPlaywrightLaunchOptions(browserToRun) },
|
@@ -237,7 +237,7 @@ export const init = async ({
|
|
237
237
|
);
|
238
238
|
|
239
239
|
await browserContext.close();
|
240
|
-
deleteClonedProfiles(browserToRun);
|
240
|
+
deleteClonedProfiles(browserToRun, randomToken);
|
241
241
|
}
|
242
242
|
const pageIndex = urlsCrawled.scanned.length + 1;
|
243
243
|
const filteredResults = filterAxeResults(res.axeScanResults, res.pageTitle, {
|
@@ -245,7 +245,7 @@ export const init = async ({
|
|
245
245
|
metadata,
|
246
246
|
});
|
247
247
|
urlsCrawled.scanned.push({
|
248
|
-
url:
|
248
|
+
url: res.pageUrl.toString(),
|
249
249
|
actualUrl: 'tbd',
|
250
250
|
pageTitle: `${pageIndex}: ${res.pageTitle}`,
|
251
251
|
});
|
package/src/utils.ts
CHANGED
@@ -11,6 +11,7 @@ import constants, {
|
|
11
11
|
} from './constants/constants.js';
|
12
12
|
import { consoleLogger, silentLogger } from './logs.js';
|
13
13
|
import { getAxeConfiguration } from './crawlers/custom/getAxeConfiguration.js';
|
14
|
+
import { constant } from 'lodash';
|
14
15
|
|
15
16
|
export const getVersion = () => {
|
16
17
|
const loadJSON = (filePath: string): { version: string } =>
|
@@ -33,21 +34,57 @@ export const isWhitelistedContentType = (contentType: string): boolean => {
|
|
33
34
|
};
|
34
35
|
|
35
36
|
export const getStoragePath = (randomToken: string): string => {
|
36
|
-
|
37
|
-
|
37
|
+
// If exportDirectory is set, use it
|
38
|
+
if (constants.exportDirectory) {
|
39
|
+
return constants.exportDirectory;
|
38
40
|
}
|
39
|
-
|
40
|
-
|
41
|
+
|
42
|
+
// Otherwise, use the current working directory
|
43
|
+
let storagePath = path.join(process.cwd(), 'results', randomToken);
|
44
|
+
|
45
|
+
// Ensure storagePath is writable; if directory doesn't exist, try to create it in Documents or home directory
|
46
|
+
const isWritable = (() => {
|
47
|
+
try {
|
48
|
+
if (!fs.existsSync(storagePath)) {
|
49
|
+
fs.mkdirSync(storagePath, { recursive: true });
|
50
|
+
}
|
51
|
+
fs.accessSync(storagePath, fs.constants.W_OK);
|
52
|
+
return true;
|
53
|
+
} catch {
|
54
|
+
return false;
|
55
|
+
}
|
56
|
+
})();
|
57
|
+
|
58
|
+
if (!isWritable) {
|
59
|
+
if (os.platform() === 'win32') {
|
60
|
+
// Use Documents folder on Windows
|
61
|
+
const documentsPath = path.join(process.env.USERPROFILE || process.env.HOMEPATH || '', 'Documents');
|
62
|
+
storagePath = path.join(documentsPath, 'Oobee', randomToken);
|
63
|
+
} else if (os.platform() === 'darwin') {
|
64
|
+
// Use Documents folder on Mac
|
65
|
+
const documentsPath = path.join(process.env.HOME || '', 'Documents');
|
66
|
+
storagePath = path.join(documentsPath, 'Oobee', randomToken);
|
67
|
+
} else {
|
68
|
+
// Use home directory for Linux/other
|
69
|
+
const homePath = process.env.HOME || '';
|
70
|
+
storagePath = path.join(homePath, 'Oobee', randomToken);
|
71
|
+
}
|
72
|
+
consoleLogger.warn(`Warning: Cannot write to cwd, writing to ${storagePath}`);
|
73
|
+
|
41
74
|
}
|
42
|
-
|
43
|
-
|
75
|
+
|
76
|
+
if (!fs.existsSync(storagePath)) {
|
77
|
+
fs.mkdirSync(storagePath, { recursive: true });
|
44
78
|
}
|
45
|
-
|
79
|
+
|
80
|
+
constants.exportDirectory = storagePath;
|
81
|
+
return storagePath;
|
82
|
+
|
46
83
|
};
|
47
84
|
|
48
85
|
export const createDetailsAndLogs = async (randomToken: string): Promise<void> => {
|
49
86
|
const storagePath = getStoragePath(randomToken);
|
50
|
-
const logPath =
|
87
|
+
const logPath = `${getStoragePath(randomToken)}/logs`;
|
51
88
|
try {
|
52
89
|
await fs.ensureDir(storagePath);
|
53
90
|
|
@@ -193,8 +230,11 @@ export const createScreenshotsFolder = (randomToken: string): void => {
|
|
193
230
|
}
|
194
231
|
};
|
195
232
|
|
196
|
-
export const cleanUp = (
|
197
|
-
fs.removeSync(
|
233
|
+
export const cleanUp = (randomToken: string): void => {
|
234
|
+
fs.removeSync(randomToken);
|
235
|
+
fs.removeSync(path.join(process.env.APPDATA || '/tmp', randomToken));
|
236
|
+
fs.removeSync(path.join(getStoragePath(randomToken),'crawlee'));
|
237
|
+
fs.removeSync(path.join(getStoragePath(randomToken),'logs'));
|
198
238
|
};
|
199
239
|
|
200
240
|
export const getWcagPassPercentage = (
|
@@ -711,16 +751,18 @@ export const zipResults = (zipName: string, resultsPath: string): void => {
|
|
711
751
|
fs.unlinkSync(zipName);
|
712
752
|
}
|
713
753
|
|
754
|
+
// Check if user specified absolute or relative path
|
755
|
+
const zipFilePath = path.isAbsolute(zipName) ? zipName : path.join(process.cwd(), zipName);
|
756
|
+
|
757
|
+
|
714
758
|
if (os.platform() === 'win32') {
|
715
759
|
execSync(
|
716
|
-
`Get-ChildItem -Path "
|
717
|
-
{ shell: 'powershell.exe' },
|
760
|
+
`Get-ChildItem -Path "*.*" -Recurse | Compress-Archive -DestinationPath "${zipFilePath}"`,
|
761
|
+
{ shell: 'powershell.exe', cwd: resultsPath },
|
718
762
|
);
|
719
763
|
} else {
|
720
764
|
// Get zip command in Mac and Linux
|
721
765
|
const command = '/usr/bin/zip';
|
722
|
-
// Check if user specified absolute or relative path
|
723
|
-
const zipFilePath = path.isAbsolute(zipName) ? zipName : path.join(process.cwd(), zipName);
|
724
766
|
|
725
767
|
// To zip up files recursively (-r) in the results folder path and write it to user's specified path
|
726
768
|
const args = ['-r', zipFilePath, '.'];
|