@govtechsg/oobee 0.10.39 → 0.10.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/docker-test.yml +1 -1
- package/README.md +2 -0
- package/REPORTS.md +362 -0
- package/package.json +1 -1
- package/src/crawlers/commonCrawlerFunc.ts +29 -1
- package/src/crawlers/crawlDomain.ts +4 -21
- package/src/crawlers/crawlSitemap.ts +1 -1
- package/src/crawlers/custom/flagUnlabelledClickableElements.ts +589 -554
- package/src/crawlers/pdfScanFunc.ts +67 -26
- package/src/mergeAxeResults.ts +302 -237
- package/src/screenshotFunc/htmlScreenshotFunc.ts +1 -1
- package/src/screenshotFunc/pdfScreenshotFunc.ts +34 -1
- package/src/utils.ts +289 -13
@@ -256,30 +256,63 @@ export const handlePdfDownload = (
|
|
256
256
|
|
257
257
|
pdfDownloads.push(
|
258
258
|
new Promise<void>(async resolve => {
|
259
|
-
|
260
|
-
let
|
259
|
+
let bufs: Buffer[] = [];
|
260
|
+
let buf: Buffer;
|
261
261
|
|
262
262
|
if (isFilePath(url)) {
|
263
|
-
// Read
|
263
|
+
// Read from local file system
|
264
264
|
const filePath = new URL(url).pathname;
|
265
|
-
pdfResponse = fs.createReadStream(filePath, { encoding: 'binary' });
|
265
|
+
const pdfResponse = fs.createReadStream(filePath, { encoding: 'binary' });
|
266
|
+
|
267
|
+
const downloadFile = fs.createWriteStream(`${randomToken}/${pdfFileName}.pdf`, {
|
268
|
+
flags: 'a',
|
269
|
+
});
|
270
|
+
|
271
|
+
pdfResponse.on('data', (chunk: Buffer) => {
|
272
|
+
downloadFile.write(chunk, 'binary');
|
273
|
+
bufs.push(Buffer.from(chunk));
|
274
|
+
});
|
275
|
+
|
276
|
+
pdfResponse.on('end', () => {
|
277
|
+
downloadFile.end();
|
278
|
+
buf = Buffer.concat(bufs);
|
279
|
+
|
280
|
+
if (isPDF(buf)) {
|
281
|
+
guiInfoLog(guiInfoStatusTypes.SCANNED, {
|
282
|
+
numScanned: urlsCrawled.scanned.length,
|
283
|
+
urlScanned: request.url,
|
284
|
+
});
|
285
|
+
urlsCrawled.scanned.push({
|
286
|
+
url: request.url,
|
287
|
+
pageTitle,
|
288
|
+
actualUrl: url,
|
289
|
+
});
|
290
|
+
} else {
|
291
|
+
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
292
|
+
numScanned: urlsCrawled.scanned.length,
|
293
|
+
urlScanned: request.url,
|
294
|
+
});
|
295
|
+
urlsCrawled.invalid.push({
|
296
|
+
url: request.url,
|
297
|
+
pageTitle: url,
|
298
|
+
actualUrl: url,
|
299
|
+
});
|
300
|
+
}
|
301
|
+
|
302
|
+
resolve();
|
303
|
+
});
|
266
304
|
} else {
|
267
|
-
//
|
268
|
-
|
269
|
-
|
270
|
-
}
|
271
|
-
const downloadFile = fs.createWriteStream(`${randomToken}/${pdfFileName}.pdf`, {
|
272
|
-
flags: 'a',
|
273
|
-
});
|
305
|
+
// Download from remote URL
|
306
|
+
const response = await sendRequest({ responseType: 'buffer' });
|
307
|
+
buf = Buffer.isBuffer(response) ? response : response.body;
|
274
308
|
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
});
|
309
|
+
const downloadFile = fs.createWriteStream(`${randomToken}/${pdfFileName}.pdf`, {
|
310
|
+
flags: 'a',
|
311
|
+
});
|
279
312
|
|
280
|
-
|
313
|
+
downloadFile.write(buf, 'binary');
|
281
314
|
downloadFile.end();
|
282
|
-
|
315
|
+
|
283
316
|
if (isPDF(buf)) {
|
284
317
|
guiInfoLog(guiInfoStatusTypes.SCANNED, {
|
285
318
|
numScanned: urlsCrawled.scanned.length,
|
@@ -298,11 +331,12 @@ export const handlePdfDownload = (
|
|
298
331
|
urlsCrawled.invalid.push({
|
299
332
|
url: request.url,
|
300
333
|
pageTitle: url,
|
301
|
-
actualUrl: url,
|
334
|
+
actualUrl: url,
|
302
335
|
});
|
303
336
|
}
|
337
|
+
|
304
338
|
resolve();
|
305
|
-
}
|
339
|
+
}
|
306
340
|
}),
|
307
341
|
);
|
308
342
|
|
@@ -374,14 +408,21 @@ export const mapPdfScanResults = async (
|
|
374
408
|
const { itemDetails, validationResult } = jobs[jobIdx];
|
375
409
|
const { name: fileName } = itemDetails;
|
376
410
|
|
377
|
-
const
|
378
|
-
|
379
|
-
.pop()
|
380
|
-
.split('.')[0];
|
381
|
-
const url = uuidToUrlMapping[uuid];
|
382
|
-
const pageTitle = decodeURI(url).split('/').pop();
|
383
|
-
const filePath = `${randomToken}/${uuid}.pdf`;
|
411
|
+
const rawFileName = fileName.split(os.platform() === 'win32' ? '\\' : '/').pop();
|
412
|
+
const fileNameWithoutExt = rawFileName.replace(/\.pdf$/i, '');
|
384
413
|
|
414
|
+
const url =
|
415
|
+
uuidToUrlMapping[rawFileName] || // exact match like 'Some-filename.pdf'
|
416
|
+
uuidToUrlMapping[fileNameWithoutExt] || // uuid-based key like 'a9f7ebbd-5a90...'
|
417
|
+
`file://${fileName}`; // fallback
|
418
|
+
|
419
|
+
const filePath = `${randomToken}/${rawFileName}`;
|
420
|
+
|
421
|
+
|
422
|
+
const pageTitle = decodeURI(url).split('/').pop();
|
423
|
+
translated.url = url;
|
424
|
+
translated.pageTitle = pageTitle;
|
425
|
+
|
385
426
|
translated.url = url;
|
386
427
|
translated.pageTitle = pageTitle;
|
387
428
|
translated.filePath = filePath;
|