@govtechsg/oobee 0.10.63 → 0.10.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +8 -3
- package/README.md +2 -0
- package/package.json +2 -1
- package/src/cli.ts +1 -2
- package/src/combine.ts +2 -2
- package/src/constants/common.ts +58 -52
- package/src/constants/constants.ts +45 -71
- package/src/constants/questions.ts +15 -2
- package/src/crawlers/crawlDomain.ts +1 -0
- package/src/crawlers/crawlIntelligentSitemap.ts +0 -3
- package/src/crawlers/crawlLocalFile.ts +29 -32
- package/src/crawlers/crawlSitemap.ts +7 -6
- package/src/crawlers/pdfScanFunc.ts +22 -50
- package/src/mergeAxeResults.ts +14 -3
- package/src/npmIndex.ts +2 -3
- package/src/proxyService.ts +405 -0
- package/src/screenshotFunc/pdfScreenshotFunc.ts +2 -5
- package/src/utils.ts +58 -60
package/src/utils.ts
CHANGED
|
@@ -11,8 +11,8 @@ import constants, {
|
|
|
11
11
|
} from './constants/constants.js';
|
|
12
12
|
import { consoleLogger, errorsTxtPath, silentLogger } from './logs.js';
|
|
13
13
|
import { getAxeConfiguration } from './crawlers/custom/getAxeConfiguration.js';
|
|
14
|
-
import
|
|
15
|
-
import {
|
|
14
|
+
import JSZip from 'jszip';
|
|
15
|
+
import { createReadStream, createWriteStream } from 'fs';
|
|
16
16
|
|
|
17
17
|
export const getVersion = () => {
|
|
18
18
|
const loadJSON = (filePath: string): { version: string } =>
|
|
@@ -34,6 +34,15 @@ export const isWhitelistedContentType = (contentType: string): boolean => {
|
|
|
34
34
|
return whitelist.filter(type => contentType.trim().startsWith(type)).length === 1;
|
|
35
35
|
};
|
|
36
36
|
|
|
37
|
+
export const getPdfStoragePath = (randomToken: string): string => {
|
|
38
|
+
const storagePath = getStoragePath(randomToken);
|
|
39
|
+
const pdfStoragePath = path.join(storagePath, 'pdfs');
|
|
40
|
+
if (!fs.existsSync(pdfStoragePath)) {
|
|
41
|
+
fs.mkdirSync(pdfStoragePath, { recursive: true });
|
|
42
|
+
}
|
|
43
|
+
return pdfStoragePath;
|
|
44
|
+
};
|
|
45
|
+
|
|
37
46
|
export const getStoragePath = (randomToken: string): string => {
|
|
38
47
|
// If exportDirectory is set, use it
|
|
39
48
|
if (constants.exportDirectory) {
|
|
@@ -83,41 +92,6 @@ export const getStoragePath = (randomToken: string): string => {
|
|
|
83
92
|
|
|
84
93
|
};
|
|
85
94
|
|
|
86
|
-
export const createDetailsAndLogs = async (randomToken: string): Promise<void> => {
|
|
87
|
-
const storagePath = getStoragePath(randomToken);
|
|
88
|
-
const logPath = `${storagePath}}/logs`;
|
|
89
|
-
try {
|
|
90
|
-
await fs.ensureDir(storagePath);
|
|
91
|
-
|
|
92
|
-
// update logs
|
|
93
|
-
await fs.ensureDir(logPath);
|
|
94
|
-
await fs.pathExists('errors.txt').then(async exists => {
|
|
95
|
-
if (exists) {
|
|
96
|
-
try {
|
|
97
|
-
await fs.copy('errors.txt', `${logPath}/${randomToken}.txt`);
|
|
98
|
-
} catch (error) {
|
|
99
|
-
if (error.code === 'EBUSY') {
|
|
100
|
-
consoleLogger.error(
|
|
101
|
-
`Unable to copy the file from 'errors.txt' to '${logPath}/${randomToken}.txt' because it is currently in use.`,
|
|
102
|
-
);
|
|
103
|
-
consoleLogger.error(
|
|
104
|
-
'Please close any applications that might be using this file and try again.',
|
|
105
|
-
);
|
|
106
|
-
} else {
|
|
107
|
-
consoleLogger.error(
|
|
108
|
-
`An unexpected error occurred while copying the file: ${error.message}`,
|
|
109
|
-
);
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
});
|
|
114
|
-
} catch (error) {
|
|
115
|
-
consoleLogger.error(
|
|
116
|
-
`An error occurred while setting up storage or log directories: ${error.message}`,
|
|
117
|
-
);
|
|
118
|
-
}
|
|
119
|
-
};
|
|
120
|
-
|
|
121
95
|
export const getUserDataFilePath = () => {
|
|
122
96
|
const platform = os.platform();
|
|
123
97
|
if (platform === 'win32') {
|
|
@@ -401,6 +375,12 @@ export const cleanUp = async (randomToken?: string, isError: boolean = false): P
|
|
|
401
375
|
consoleLogger.warn(`Unable to force remove crawlee folder: ${error.message}`);
|
|
402
376
|
}
|
|
403
377
|
|
|
378
|
+
try {
|
|
379
|
+
fs.rmSync(path.join(storagePath, 'pdfs'), { recursive: true, force: true });
|
|
380
|
+
} catch (error) {
|
|
381
|
+
consoleLogger.warn(`Unable to force remove pdfs folder: ${error.message}`);
|
|
382
|
+
}
|
|
383
|
+
|
|
404
384
|
let deleteErrorLogFile = true;
|
|
405
385
|
|
|
406
386
|
if (isError) {
|
|
@@ -992,35 +972,53 @@ export const setThresholdLimits = (setWarnLevel: string): void => {
|
|
|
992
972
|
process.env.WARN_LEVEL = setWarnLevel;
|
|
993
973
|
};
|
|
994
974
|
|
|
995
|
-
export const zipResults = (zipName: string, resultsPath: string): void => {
|
|
996
|
-
//
|
|
997
|
-
|
|
998
|
-
fs.unlinkSync(zipName);
|
|
999
|
-
}
|
|
975
|
+
export const zipResults = async (zipName: string, resultsPath: string): Promise<void> => {
|
|
976
|
+
// Resolve and validate the output path
|
|
977
|
+
const zipFilePath = path.isAbsolute(zipName) ? zipName : path.join(resultsPath, zipName);
|
|
1000
978
|
|
|
1001
|
-
//
|
|
1002
|
-
|
|
979
|
+
// Ensure parent dir exists
|
|
980
|
+
fs.mkdirSync(path.dirname(zipFilePath), { recursive: true });
|
|
1003
981
|
|
|
982
|
+
// Remove any prior file atomically
|
|
983
|
+
try { fs.unlinkSync(zipFilePath); } catch { /* ignore if not exists */ }
|
|
1004
984
|
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
const
|
|
985
|
+
// CWD must exist and be a directory
|
|
986
|
+
const stats = fs.statSync(resultsPath);
|
|
987
|
+
if (!stats.isDirectory()) {
|
|
988
|
+
throw new Error(`resultsPath is not a directory: ${resultsPath}`);
|
|
989
|
+
}
|
|
990
|
+
async function addFolderToZip(folderPath: string, zipFolder: JSZip): Promise<void> {
|
|
991
|
+
const items = await fs.readdir(folderPath);
|
|
992
|
+
for (const item of items) {
|
|
993
|
+
const fullPath = path.join(folderPath, item);
|
|
994
|
+
const stats = await fs.stat(fullPath);
|
|
995
|
+
if (stats.isDirectory()) {
|
|
996
|
+
const folder = zipFolder.folder(item);
|
|
997
|
+
await addFolderToZip(fullPath, folder);
|
|
998
|
+
} else {
|
|
999
|
+
// Add file as a stream so that it doesn't load the entire file into memory
|
|
1000
|
+
zipFolder.file(item, createReadStream(fullPath));
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1013
1004
|
|
|
1014
|
-
|
|
1015
|
-
const args = ['-r', zipFilePath, '.'];
|
|
1005
|
+
await addFolderToZip(resultsPath, new JSZip());
|
|
1016
1006
|
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
cwd: resultsPath,
|
|
1020
|
-
};
|
|
1007
|
+
const zip = new JSZip();
|
|
1008
|
+
await addFolderToZip(resultsPath, zip);
|
|
1021
1009
|
|
|
1022
|
-
|
|
1023
|
-
|
|
1010
|
+
const zipStream = zip.generateNodeStream({
|
|
1011
|
+
type: 'nodebuffer',
|
|
1012
|
+
streamFiles: true,
|
|
1013
|
+
compression: 'DEFLATE',
|
|
1014
|
+
});
|
|
1015
|
+
|
|
1016
|
+
await new Promise((resolve, reject) => {
|
|
1017
|
+
const outStream = createWriteStream(zipFilePath);
|
|
1018
|
+
zipStream.pipe(outStream)
|
|
1019
|
+
.on('finish', resolve)
|
|
1020
|
+
.on('error', reject);
|
|
1021
|
+
});
|
|
1024
1022
|
};
|
|
1025
1023
|
|
|
1026
1024
|
// areLinksEqual compares 2 string URLs and ignores comparison of 'www.' and url protocol
|