@govtechsg/oobee 0.10.39 → 0.10.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,12 @@
1
- export function xPathToCss(expr: string) {
2
- const isValidXPath = expr =>
1
+ export default function xPathToCss(expr: string) {
2
+ const isValidXPath = (expr: string) =>
3
3
  typeof expr !== 'undefined' &&
4
4
  expr.replace(/[\s-_=]/g, '') !== '' &&
5
5
  expr.length ===
6
- expr.replace(
7
- /[-_\w:.]+\(\)\s*=|=\s*[-_\w:.]+\(\)|\sor\s|\sand\s|\[(?:[^\/\]]+[\/\[]\/?.+)+\]|starts-with\(|\[.*last\(\)\s*[-\+<>=].+\]|number\(\)|not\(|count\(|text\(|first\(|normalize-space|[^\/]following-sibling|concat\(|descendant::|parent::|self::|child::|/gi,
8
- '',
9
- ).length;
6
+ expr.replace(
7
+ /[-_\w:.]+\(\)\s*=|=\s*[-_\w:.]+\(\)|\sor\s|\sand\s|\[(?:[^\/\]]+[\/\[]\/?.+)+\]|starts-with\(|\[.*last\(\)\s*[-\+<>=].+\]|number\(\)|not\(|count\(|text\(|first\(|normalize-space|[^\/]following-sibling|concat\(|descendant::|parent::|self::|child::|/gi,
8
+ '',
9
+ ).length;
10
10
 
11
11
  const getValidationRegex = () => {
12
12
  let regex =
@@ -30,7 +30,7 @@ export function xPathToCss(expr: string) {
30
30
  value: '\\s*[\\w/:][-/\\w\\s,:;.]*',
31
31
  };
32
32
 
33
- Object.keys(subRegexes).forEach(key => {
33
+ Object.keys(subRegexes).forEach((key: keyof typeof subRegexes) => {
34
34
  regex = regex.replace(new RegExp(`%\\(${key}\\)s`, 'gi'), subRegexes[key]);
35
35
  });
36
36
 
@@ -42,14 +42,14 @@ export function xPathToCss(expr: string) {
42
42
  return new RegExp(regex, 'gi');
43
43
  };
44
44
 
45
- const preParseXpath = expr =>
45
+ const preParseXpath = (expr: string) =>
46
46
  expr.replace(
47
47
  /contains\s*\(\s*concat\(["']\s+["']\s*,\s*@class\s*,\s*["']\s+["']\)\s*,\s*["']\s+([a-zA-Z0-9-_]+)\s+["']\)/gi,
48
48
  '@class="$1"',
49
49
  );
50
50
 
51
- function escapeCssIdSelectors(cssSelector) {
52
- return cssSelector.replace(/#([^ >]+)/g, (match, id) => {
51
+ function escapeCssIdSelectors(cssSelector: string) {
52
+ return cssSelector.replace(/#([^ >]+)/g, (_match, id) => {
53
53
  // Escape special characters in the id part
54
54
  return `#${id.replace(/[!"#$%&'()*+,./:;<=>?@[\\\]^`{|}~]/g, '\\$&')}`;
55
55
  });
@@ -256,30 +256,63 @@ export const handlePdfDownload = (
256
256
 
257
257
  pdfDownloads.push(
258
258
  new Promise<void>(async resolve => {
259
- const bufs = [];
260
- let pdfResponse: ReadStream;
259
+ let bufs: Buffer[] = [];
260
+ let buf: Buffer;
261
261
 
262
262
  if (isFilePath(url)) {
263
- // Read the file from the file system
263
+ // Read from local file system
264
264
  const filePath = new URL(url).pathname;
265
- pdfResponse = fs.createReadStream(filePath, { encoding: 'binary' });
265
+ const pdfResponse = fs.createReadStream(filePath, { encoding: 'binary' });
266
+
267
+ const downloadFile = fs.createWriteStream(`${randomToken}/${pdfFileName}.pdf`, {
268
+ flags: 'a',
269
+ });
270
+
271
+ pdfResponse.on('data', (chunk: Buffer) => {
272
+ downloadFile.write(chunk, 'binary');
273
+ bufs.push(Buffer.from(chunk));
274
+ });
275
+
276
+ pdfResponse.on('end', () => {
277
+ downloadFile.end();
278
+ buf = Buffer.concat(bufs);
279
+
280
+ if (isPDF(buf)) {
281
+ guiInfoLog(guiInfoStatusTypes.SCANNED, {
282
+ numScanned: urlsCrawled.scanned.length,
283
+ urlScanned: request.url,
284
+ });
285
+ urlsCrawled.scanned.push({
286
+ url: request.url,
287
+ pageTitle,
288
+ actualUrl: url,
289
+ });
290
+ } else {
291
+ guiInfoLog(guiInfoStatusTypes.SKIPPED, {
292
+ numScanned: urlsCrawled.scanned.length,
293
+ urlScanned: request.url,
294
+ });
295
+ urlsCrawled.invalid.push({
296
+ url: request.url,
297
+ pageTitle: url,
298
+ actualUrl: url,
299
+ });
300
+ }
301
+
302
+ resolve();
303
+ });
266
304
  } else {
267
- // Send HTTP/HTTPS request
268
- pdfResponse = await sendRequest({ responseType: 'buffer', isStream: true });
269
- pdfResponse.setEncoding('binary');
270
- }
271
- const downloadFile = fs.createWriteStream(`${randomToken}/${pdfFileName}.pdf`, {
272
- flags: 'a',
273
- });
305
+ // Download from remote URL
306
+ const response = await sendRequest({ responseType: 'buffer' });
307
+ buf = Buffer.isBuffer(response) ? response : response.body;
274
308
 
275
- pdfResponse.on('data', (chunk: Buffer) => {
276
- downloadFile.write(chunk, 'binary');
277
- bufs.push(Buffer.from(chunk));
278
- });
309
+ const downloadFile = fs.createWriteStream(`${randomToken}/${pdfFileName}.pdf`, {
310
+ flags: 'a',
311
+ });
279
312
 
280
- pdfResponse.on('end', () => {
313
+ downloadFile.write(buf, 'binary');
281
314
  downloadFile.end();
282
- const buf = Buffer.concat(bufs);
315
+
283
316
  if (isPDF(buf)) {
284
317
  guiInfoLog(guiInfoStatusTypes.SCANNED, {
285
318
  numScanned: urlsCrawled.scanned.length,
@@ -298,11 +331,12 @@ export const handlePdfDownload = (
298
331
  urlsCrawled.invalid.push({
299
332
  url: request.url,
300
333
  pageTitle: url,
301
- actualUrl: url, // i.e. actualUrl
334
+ actualUrl: url,
302
335
  });
303
336
  }
337
+
304
338
  resolve();
305
- });
339
+ }
306
340
  }),
307
341
  );
308
342
 
@@ -374,14 +408,21 @@ export const mapPdfScanResults = async (
374
408
  const { itemDetails, validationResult } = jobs[jobIdx];
375
409
  const { name: fileName } = itemDetails;
376
410
 
377
- const uuid = fileName
378
- .split(os.platform() === 'win32' ? '\\' : '/')
379
- .pop()
380
- .split('.')[0];
381
- const url = uuidToUrlMapping[uuid];
382
- const pageTitle = decodeURI(url).split('/').pop();
383
- const filePath = `${randomToken}/${uuid}.pdf`;
411
+ const rawFileName = fileName.split(os.platform() === 'win32' ? '\\' : '/').pop();
412
+ const fileNameWithoutExt = rawFileName.replace(/\.pdf$/i, '');
384
413
 
414
+ const url =
415
+ uuidToUrlMapping[rawFileName] || // exact match like 'Some-filename.pdf'
416
+ uuidToUrlMapping[fileNameWithoutExt] || // uuid-based key like 'a9f7ebbd-5a90...'
417
+ `file://${fileName}`; // fallback
418
+
419
+ const filePath = `${randomToken}/${rawFileName}`;
420
+
421
+
422
+ const pageTitle = decodeURI(url).split('/').pop();
423
+ translated.url = url;
424
+ translated.pageTitle = pageTitle;
425
+
385
426
  translated.url = url;
386
427
  translated.pageTitle = pageTitle;
387
428
  translated.filePath = filePath;
@@ -48,7 +48,7 @@ const runCustom = async (
48
48
  includeScreenshots: boolean,
49
49
  ) => {
50
50
  // checks and delete datasets path if it already exists
51
- await cleanUp(randomToken);
51
+ cleanUp(randomToken);
52
52
  process.env.CRAWLEE_STORAGE_DIR = randomToken;
53
53
 
54
54
  const urlsCrawled: UrlsCrawled = { ...constants.urlsCrawledObj };
package/src/index.ts CHANGED
@@ -1,6 +1,4 @@
1
1
  #!/usr/bin/env node
2
- /* eslint-disable func-names */
3
- /* eslint-disable no-param-reassign */
4
2
  import printMessage from 'print-message';
5
3
  import inquirer from 'inquirer';
6
4
  import { EnqueueStrategy } from 'crawlee';
@@ -22,6 +20,7 @@ import {
22
20
  import questions from './constants/questions.js';
23
21
  import combineRun from './combine.js';
24
22
  import { BrowserTypes, RuleFlags, ScannerTypes } from './constants/constants.js';
23
+ import { DeviceDescriptor } from './types/types.js';
25
24
 
26
25
  export type Answers = {
27
26
  headless: boolean;
@@ -32,7 +31,7 @@ export type Answers = {
32
31
  scanner: ScannerTypes;
33
32
  url: string;
34
33
  clonedBrowserDataDir: string;
35
- playwrightDeviceDetailsObject: object;
34
+ playwrightDeviceDetailsObject: DeviceDescriptor;
36
35
  nameEmail: string;
37
36
  fileTypes: string;
38
37
  metadata: string;
@@ -61,7 +60,7 @@ export type Data = {
61
60
  deviceChosen: string;
62
61
  customDevice: string;
63
62
  viewportWidth: number;
64
- playwrightDeviceDetailsObject: object;
63
+ playwrightDeviceDetailsObject: DeviceDescriptor;
65
64
  maxRequestsPerCrawl: number;
66
65
  strategy: EnqueueStrategy;
67
66
  isLocalFileScan: boolean;
package/src/logs.ts CHANGED
@@ -40,7 +40,7 @@ const silentLogger = createLogger({
40
40
  });
41
41
 
42
42
  // guiInfoLogger feeds the gui information via console log and is mainly used for scanning process
43
- export const guiInfoLog = (status, data) => {
43
+ export const guiInfoLog = (status: string, data: { numScanned?: number; urlScanned?: string }) => {
44
44
  if (process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE) {
45
45
  switch (status) {
46
46
  case guiInfoStatusTypes.COMPLETED: