@govtechsg/oobee 0.10.51 → 0.10.58

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,15 @@
1
- import { Request, RequestList } from 'crawlee';
2
- import printMessage from 'print-message';
1
+ import { Request, RequestList, Dataset } from 'crawlee';
3
2
  import fs from 'fs';
4
3
  import path from 'path';
5
4
  import { createCrawleeSubFolders, runAxeScript, isUrlPdf } from './commonCrawlerFunc.js';
6
- import constants, { guiInfoStatusTypes, basicAuthRegex } from '../constants/constants.js';
5
+ import constants, {
6
+ guiInfoStatusTypes,
7
+ basicAuthRegex,
8
+ UrlsCrawled,
9
+ } from '../constants/constants.js';
10
+ import { ViewportSettingsClass } from '../combine.js';
7
11
  import {
8
12
  getPlaywrightLaunchOptions,
9
- messageOptions,
10
13
  isFilePath,
11
14
  convertLocalFileToPath,
12
15
  convertPathToLocalFile,
@@ -16,27 +19,47 @@ import { runPdfScan, mapPdfScanResults, doPdfScreenshots } from './pdfScanFunc.j
16
19
  import { guiInfoLog } from '../logs.js';
17
20
  import crawlSitemap from './crawlSitemap.js';
18
21
 
19
- const crawlLocalFile = async (
20
- sitemapUrl: string,
21
- randomToken: string,
22
- host: string,
23
- viewportSettings: any,
24
- maxRequestsPerCrawl: number,
25
- browser: string,
26
- userDataDirectory: string,
27
- specifiedMaxConcurrency: number,
28
- fileTypes: string,
29
- blacklistedPatterns: string[],
30
- includeScreenshots: boolean,
31
- extraHTTPHeaders: any,
32
- fromCrawlIntelligentSitemap: boolean = false, // optional
33
- userUrlInputFromIntelligent: any = null, // optional
34
- datasetFromIntelligent: any = null, // optional
35
- urlsCrawledFromIntelligent: any = null, // optional
36
- ) => {
22
+ export const crawlLocalFile = async ({
23
+ url,
24
+ randomToken,
25
+ host,
26
+ viewportSettings,
27
+ maxRequestsPerCrawl,
28
+ browser,
29
+ userDataDirectory,
30
+ specifiedMaxConcurrency,
31
+ fileTypes,
32
+ blacklistedPatterns,
33
+ includeScreenshots,
34
+ extraHTTPHeaders,
35
+ scanDuration = 0,
36
+ fromCrawlIntelligentSitemap = false,
37
+ userUrlInputFromIntelligent = null,
38
+ datasetFromIntelligent = null,
39
+ urlsCrawledFromIntelligent = null,
40
+ }: {
41
+ url: string;
42
+ randomToken: string;
43
+ host: string;
44
+ viewportSettings: ViewportSettingsClass;
45
+ maxRequestsPerCrawl: number;
46
+ browser: string;
47
+ userDataDirectory: string;
48
+ specifiedMaxConcurrency: number;
49
+ fileTypes: string;
50
+ blacklistedPatterns: string[];
51
+ includeScreenshots: boolean;
52
+ extraHTTPHeaders: Record<string, string>;
53
+ scanDuration?: number;
54
+ fromCrawlIntelligentSitemap?: boolean;
55
+ userUrlInputFromIntelligent?: string | null;
56
+ datasetFromIntelligent?: Dataset | null;
57
+ urlsCrawledFromIntelligent?: UrlsCrawled | null;
58
+ }) => {
37
59
  let dataset: any;
38
- let urlsCrawled: any;
60
+ let urlsCrawled: UrlsCrawled;
39
61
  let linksFromSitemap = [];
62
+ let sitemapUrl = url;
40
63
 
41
64
  // Boolean to omit axe scan for basic auth URL
42
65
  let isBasicAuth: boolean;
@@ -82,7 +105,7 @@ const crawlLocalFile = async (
82
105
  // Non XML file
83
106
  } else {
84
107
  // Put it to crawlSitemap function to handle xml files
85
- const updatedUrlsCrawled = await crawlSitemap(
108
+ const updatedUrlsCrawled = await crawlSitemap({
86
109
  sitemapUrl,
87
110
  randomToken,
88
111
  host,
@@ -95,12 +118,13 @@ const crawlLocalFile = async (
95
118
  blacklistedPatterns,
96
119
  includeScreenshots,
97
120
  extraHTTPHeaders,
98
- (fromCrawlIntelligentSitemap = false), // optional
99
- (userUrlInputFromIntelligent = null), // optional
100
- (datasetFromIntelligent = null), // optional
101
- (urlsCrawledFromIntelligent = null), // optional
102
- true,
103
- );
121
+ scanDuration,
122
+ fromCrawlIntelligentSitemap,
123
+ userUrlInputFromIntelligent,
124
+ datasetFromIntelligent,
125
+ urlsCrawledFromIntelligent,
126
+ crawledFromLocalFile: true,
127
+ });
104
128
 
105
129
  urlsCrawled = { ...urlsCrawled, ...updatedUrlsCrawled };
106
130
  return urlsCrawled;
@@ -124,16 +148,12 @@ const crawlLocalFile = async (
124
148
 
125
149
  const uuidToPdfMapping: Record<string, string> = {}; // key and value of string type
126
150
 
127
- printMessage(['Fetching URLs. This might take some time...'], { border: false });
128
-
129
151
  finalLinks = [...finalLinks, ...linksFromSitemap];
130
152
 
131
153
  await RequestList.open({
132
154
  sources: finalLinks,
133
155
  });
134
156
 
135
- printMessage(['Fetch URLs completed. Beginning scan'], messageOptions);
136
-
137
157
  const request = linksFromSitemap[0];
138
158
  const pdfFileName = path.basename(request.url);
139
159
  const trimmedUrl: string = request.url;
@@ -142,6 +162,8 @@ const crawlLocalFile = async (
142
162
  fs.writeFileSync(destinationFilePath, data);
143
163
  uuidToPdfMapping[pdfFileName] = trimmedUrl;
144
164
 
165
+ let shouldAbort = false;
166
+
145
167
  if (!isUrlPdf(request.url)) {
146
168
  await initModifiedUserAgent(browser);
147
169
  const browserContext = await constants.launcher.launchPersistentContext('', {
@@ -150,9 +172,24 @@ const crawlLocalFile = async (
150
172
  ...playwrightDeviceDetailsObject,
151
173
  });
152
174
 
175
+ const timeoutId = scanDuration > 0
176
+ ? setTimeout(() => {
177
+ console.log(`Crawl duration of ${scanDuration}s exceeded. Aborting local file scan.`);
178
+ shouldAbort = true;
179
+ }, scanDuration * 1000)
180
+ : null;
181
+
153
182
  const page = await browserContext.newPage();
154
183
  request.url = convertPathToLocalFile(request.url);
155
184
  await page.goto(request.url);
185
+
186
+ if (shouldAbort) {
187
+ console.warn('Scan aborted due to timeout before page scan.');
188
+ await dataset.pushData({ scanned: [], scannedRedirects: [] });
189
+ await browserContext.close().catch(() => {});
190
+ return urlsCrawled;
191
+ }
192
+
156
193
  const results = await runAxeScript({ includeScreenshots, page, randomToken });
157
194
 
158
195
  const actualUrl = page.url() || request.loadedUrl || request.url;
@@ -178,7 +215,11 @@ const crawlLocalFile = async (
178
215
 
179
216
  await dataset.pushData(results);
180
217
  } else {
181
- urlsCrawled.scanned.push({ url: trimmedUrl, pageTitle: pdfFileName });
218
+ urlsCrawled.scanned.push({
219
+ url: trimmedUrl,
220
+ pageTitle: pdfFileName,
221
+ actualUrl: trimmedUrl,
222
+ });
182
223
 
183
224
  await runPdfScan(randomToken);
184
225
  // transform result format
@@ -192,6 +233,7 @@ const crawlLocalFile = async (
192
233
  // push results for each pdf document to key value store
193
234
  await Promise.all(pdfResults.map(result => dataset.pushData(result)));
194
235
  }
236
+
195
237
  return urlsCrawled;
196
238
  };
197
239
  export default crawlLocalFile;
@@ -1,5 +1,4 @@
1
- import crawlee, { LaunchContext, Request, RequestList } from 'crawlee';
2
- import printMessage from 'print-message';
1
+ import crawlee, { LaunchContext, Request, RequestList, Dataset } from 'crawlee';
3
2
  import fs from 'fs';
4
3
  import {
5
4
  createCrawleeSubFolders,
@@ -8,11 +7,15 @@ import {
8
7
  isUrlPdf,
9
8
  } from './commonCrawlerFunc.js';
10
9
 
11
- import constants, { STATUS_CODE_METADATA, guiInfoStatusTypes, UrlsCrawled } from '../constants/constants.js';
10
+ import constants, {
11
+ STATUS_CODE_METADATA,
12
+ guiInfoStatusTypes,
13
+ UrlsCrawled,
14
+ disallowedListOfPatterns,
15
+ } from '../constants/constants.js';
12
16
  import {
13
17
  getLinksFromSitemap,
14
18
  getPlaywrightLaunchOptions,
15
- messageOptions,
16
19
  isSkippedUrl,
17
20
  urlWithoutAuth,
18
21
  waitForPageLoaded,
@@ -24,25 +27,46 @@ import { handlePdfDownload, runPdfScan, mapPdfScanResults } from './pdfScanFunc.
24
27
  import { guiInfoLog } from '../logs.js';
25
28
  import { ViewportSettingsClass } from '../combine.js';
26
29
 
27
- const crawlSitemap = async (
28
- sitemapUrl: string,
29
- randomToken: string,
30
- _host: string,
31
- viewportSettings: ViewportSettingsClass,
32
- maxRequestsPerCrawl: number,
33
- browser: string,
34
- userDataDirectory: string,
35
- specifiedMaxConcurrency: number,
36
- fileTypes: string,
37
- blacklistedPatterns: string[],
38
- includeScreenshots: boolean,
39
- extraHTTPHeaders: Record<string, string>,
40
- fromCrawlIntelligentSitemap = false, // optional
41
- userUrlInputFromIntelligent: string = null, // optional
42
- datasetFromIntelligent: crawlee.Dataset = null, // optional
43
- urlsCrawledFromIntelligent: UrlsCrawled = null, // optional
44
- crawledFromLocalFile = false, // optional
45
- ) => {
30
+ const crawlSitemap = async ({
31
+ sitemapUrl,
32
+ randomToken,
33
+ host,
34
+ viewportSettings,
35
+ maxRequestsPerCrawl,
36
+ browser,
37
+ userDataDirectory,
38
+ specifiedMaxConcurrency,
39
+ fileTypes,
40
+ blacklistedPatterns,
41
+ includeScreenshots,
42
+ extraHTTPHeaders,
43
+ scanDuration = 0,
44
+ fromCrawlIntelligentSitemap = false,
45
+ userUrlInputFromIntelligent = null,
46
+ datasetFromIntelligent = null,
47
+ urlsCrawledFromIntelligent = null,
48
+ crawledFromLocalFile = false,
49
+ }: {
50
+ sitemapUrl: string;
51
+ randomToken: string;
52
+ host: string;
53
+ viewportSettings: ViewportSettingsClass;
54
+ maxRequestsPerCrawl: number;
55
+ browser: string;
56
+ userDataDirectory: string;
57
+ specifiedMaxConcurrency: number;
58
+ fileTypes: string;
59
+ blacklistedPatterns: string[];
60
+ includeScreenshots: boolean;
61
+ extraHTTPHeaders: Record<string, string>;
62
+ scanDuration?: number;
63
+ fromCrawlIntelligentSitemap?: boolean;
64
+ userUrlInputFromIntelligent?: string;
65
+ datasetFromIntelligent?: Dataset;
66
+ urlsCrawledFromIntelligent?: UrlsCrawled;
67
+ crawledFromLocalFile?: boolean;
68
+ }) => {
69
+ const crawlStartTime = Date.now();
46
70
  let dataset: crawlee.Dataset;
47
71
  let urlsCrawled: UrlsCrawled;
48
72
 
@@ -127,14 +151,11 @@ const crawlSitemap = async (
127
151
  const { playwrightDeviceDetailsObject } = viewportSettings;
128
152
  const { maxConcurrency } = constants;
129
153
 
130
- printMessage(['Fetching URLs. This might take some time...'], { border: false });
131
-
132
154
  finalLinks = [...finalLinks, ...linksFromSitemap];
133
155
 
134
156
  const requestList = await RequestList.open({
135
157
  sources: finalLinks,
136
158
  });
137
- printMessage(['Fetch URLs completed. Beginning scan'], messageOptions);
138
159
 
139
160
  let userDataDir = '';
140
161
  if (userDataDirectory) {
@@ -165,7 +186,6 @@ const crawlSitemap = async (
165
186
  },
166
187
  requestList,
167
188
  postNavigationHooks: [
168
-
169
189
  async ({ page }) => {
170
190
  try {
171
191
  // Wait for a quiet period in the DOM, but with safeguards
@@ -173,36 +193,35 @@ const crawlSitemap = async (
173
193
  return new Promise(resolve => {
174
194
  let timeout;
175
195
  let mutationCount = 0;
176
- const MAX_MUTATIONS = 250; // stop if things never quiet down
177
- const OBSERVER_TIMEOUT = 5000; // hard cap on total wait
178
-
196
+ const MAX_MUTATIONS = 250; // stop if things never quiet down
197
+ const OBSERVER_TIMEOUT = 5000; // hard cap on total wait
198
+
179
199
  const observer = new MutationObserver(() => {
180
200
  clearTimeout(timeout);
181
-
201
+
182
202
  mutationCount++;
183
203
  if (mutationCount > MAX_MUTATIONS) {
184
204
  observer.disconnect();
185
205
  resolve('Too many mutations, exiting.');
186
206
  return;
187
207
  }
188
-
208
+
189
209
  // restart quiet‑period timer
190
210
  timeout = setTimeout(() => {
191
211
  observer.disconnect();
192
212
  resolve('DOM stabilized.');
193
213
  }, 1000);
194
214
  });
195
-
215
+
196
216
  // overall timeout in case the page never settles
197
217
  timeout = setTimeout(() => {
198
218
  observer.disconnect();
199
219
  resolve('Observer timeout reached.');
200
220
  }, OBSERVER_TIMEOUT);
201
-
221
+
202
222
  const root = document.documentElement || document.body || document;
203
223
  if (!root || typeof observer.observe !== 'function') {
204
224
  resolve('No root node to observe.');
205
- return;
206
225
  }
207
226
  });
208
227
  });
@@ -214,27 +233,54 @@ const crawlSitemap = async (
214
233
  throw err; // Rethrow unknown errors
215
234
  }
216
235
  },
217
-
218
236
  ],
237
+ preNavigationHooks: [
238
+ async ({ request, page }, gotoOptions) => {
239
+ const url = request.url.toLowerCase();
219
240
 
220
- preNavigationHooks: isBasicAuth
221
- ? [
222
- async ({ page }) => {
241
+ const isNotSupportedDocument = disallowedListOfPatterns.some(pattern =>
242
+ url.startsWith(pattern),
243
+ );
244
+
245
+ if (isNotSupportedDocument) {
246
+ request.skipNavigation = true;
247
+ request.userData.isNotSupportedDocument = true;
248
+
249
+ // Log for verification (optional, but not required for correctness)
250
+ // console.log(`[SKIP] Not supported: ${request.url}`);
251
+
252
+ return;
253
+ }
254
+
255
+ // Set headers if basic auth
256
+ if (isBasicAuth) {
223
257
  await page.setExtraHTTPHeaders({
224
258
  Authorization: authHeader,
225
259
  ...extraHTTPHeaders,
226
260
  });
227
- },
228
- ]
229
- : [
230
- async () => {
261
+ } else {
231
262
  preNavigationHooks(extraHTTPHeaders);
232
- // insert other code here
233
- },
234
- ],
263
+ }
264
+ },
265
+ ],
235
266
  requestHandlerTimeoutSecs: 90,
236
267
  requestHandler: async ({ page, request, response, sendRequest }) => {
237
- await waitForPageLoaded(page, 10000);
268
+ // Log documents that are not supported
269
+ if (request.userData?.isNotSupportedDocument) {
270
+ guiInfoLog(guiInfoStatusTypes.SKIPPED, {
271
+ numScanned: urlsCrawled.scanned.length,
272
+ urlScanned: request.url,
273
+ });
274
+ urlsCrawled.userExcluded.push({
275
+ url: request.url,
276
+ pageTitle: request.url,
277
+ actualUrl: request.url, // because about:blank is not useful
278
+ metadata: STATUS_CODE_METADATA[1],
279
+ httpStatusCode: 0,
280
+ });
281
+
282
+ return;
283
+ }
238
284
 
239
285
  // Set basic auth header if needed
240
286
  if (isBasicAuth) {
@@ -247,39 +293,48 @@ const crawlSitemap = async (
247
293
  request.url = currentUrl.href;
248
294
  }
249
295
 
296
+ await waitForPageLoaded(page, 10000);
297
+
250
298
  const actualUrl = page.url() || request.loadedUrl || request.url;
251
299
 
252
- if (urlsCrawled.scanned.length >= maxRequestsPerCrawl) {
253
- crawler.autoscaledPool.abort();
300
+ const hasExceededDuration =
301
+ scanDuration > 0 && Date.now() - crawlStartTime > scanDuration * 1000;
302
+
303
+ if (urlsCrawled.scanned.length >= maxRequestsPerCrawl || hasExceededDuration) {
304
+ if (hasExceededDuration) {
305
+ console.log(`Crawl duration of ${scanDuration}s exceeded. Aborting sitemap crawl.`);
306
+ }
307
+ crawler.autoscaledPool.abort(); // stops new requests
254
308
  return;
255
309
  }
256
310
 
257
- if (request.skipNavigation && actualUrl === "about:blank") {
258
- if (!isScanPdfs) {
259
- guiInfoLog(guiInfoStatusTypes.SKIPPED, {
260
- numScanned: urlsCrawled.scanned.length,
261
- urlScanned: request.url,
262
- });
263
- urlsCrawled.userExcluded.push({
264
- url: request.url,
265
- pageTitle: request.url,
266
- actualUrl: request.url, // because about:blank is not useful
267
- metadata: STATUS_CODE_METADATA[1],
268
- httpStatusCode: 0,
269
- });
270
-
311
+ if (request.skipNavigation && actualUrl === 'about:blank') {
312
+ if (isScanPdfs) {
313
+ // pushes download promise into pdfDownloads
314
+ const { pdfFileName, url } = handlePdfDownload(
315
+ randomToken,
316
+ pdfDownloads,
317
+ request,
318
+ sendRequest,
319
+ urlsCrawled,
320
+ );
321
+
322
+ uuidToPdfMapping[pdfFileName] = url;
271
323
  return;
272
324
  }
273
- // pushes download promise into pdfDownloads
274
- const { pdfFileName, url } = handlePdfDownload(
275
- randomToken,
276
- pdfDownloads,
277
- request,
278
- sendRequest,
279
- urlsCrawled,
280
- );
281
325
 
282
- uuidToPdfMapping[pdfFileName] = url;
326
+ guiInfoLog(guiInfoStatusTypes.SKIPPED, {
327
+ numScanned: urlsCrawled.scanned.length,
328
+ urlScanned: request.url,
329
+ });
330
+ urlsCrawled.userExcluded.push({
331
+ url: request.url,
332
+ pageTitle: request.url,
333
+ actualUrl: request.url, // because about:blank is not useful
334
+ metadata: STATUS_CODE_METADATA[1],
335
+ httpStatusCode: 0,
336
+ });
337
+
283
338
  return;
284
339
  }
285
340
 
@@ -303,15 +358,11 @@ const crawlSitemap = async (
303
358
  }
304
359
 
305
360
  // This logic is different from crawlDomain, as it also checks if the pae is redirected before checking if it is excluded using exclusions.txt
306
- if (
307
- isRedirected &&
308
- blacklistedPatterns &&
309
- isSkippedUrl(actualUrl, blacklistedPatterns)
310
- ) {
361
+ if (isRedirected && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
311
362
  urlsCrawled.userExcluded.push({
312
363
  url: request.url,
313
364
  pageTitle: request.url,
314
- actualUrl: actualUrl,
365
+ actualUrl,
315
366
  metadata: STATUS_CODE_METADATA[0],
316
367
  httpStatusCode: 0,
317
368
  });
@@ -324,7 +375,7 @@ const crawlSitemap = async (
324
375
  }
325
376
 
326
377
  const results = await runAxeScript({ includeScreenshots, page, randomToken });
327
-
378
+
328
379
  guiInfoLog(guiInfoStatusTypes.SCANNED, {
329
380
  numScanned: urlsCrawled.scanned.length,
330
381
  urlScanned: request.url,
@@ -333,7 +384,7 @@ const crawlSitemap = async (
333
384
  urlsCrawled.scanned.push({
334
385
  url: urlWithoutAuth(request.url),
335
386
  pageTitle: results.pageTitle,
336
- actualUrl: actualUrl, // i.e. actualUrl
387
+ actualUrl, // i.e. actualUrl
337
388
  });
338
389
 
339
390
  urlsCrawled.scannedRedirects.push({
@@ -354,16 +405,17 @@ const crawlSitemap = async (
354
405
  if (isScanHtml) {
355
406
  // carry through the HTTP status metadata
356
407
  const status = response?.status();
357
- const metadata = typeof status === 'number'
358
- ? (STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599])
359
- : STATUS_CODE_METADATA[2];
408
+ const metadata =
409
+ typeof status === 'number'
410
+ ? STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599]
411
+ : STATUS_CODE_METADATA[2];
360
412
 
361
- urlsCrawled.invalid.push({
413
+ urlsCrawled.invalid.push({
362
414
  actualUrl,
363
415
  url: request.url,
364
416
  pageTitle: request.url,
365
417
  metadata,
366
- httpStatusCode: typeof status === 'number' ? status : 0
418
+ httpStatusCode: typeof status === 'number' ? status : 0,
367
419
  });
368
420
  }
369
421
  }
@@ -384,21 +436,31 @@ const crawlSitemap = async (
384
436
  });
385
437
 
386
438
  const status = response?.status();
387
- const metadata = typeof status === 'number'
388
- ? (STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599])
389
- : STATUS_CODE_METADATA[2];
439
+ const metadata =
440
+ typeof status === 'number'
441
+ ? STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599]
442
+ : STATUS_CODE_METADATA[2];
390
443
 
391
444
  urlsCrawled.error.push({
392
445
  url: request.url,
393
446
  pageTitle: request.url,
394
447
  actualUrl: request.url,
395
448
  metadata,
396
- httpStatusCode: typeof status === 'number' ? status : 0
449
+ httpStatusCode: typeof status === 'number' ? status : 0,
397
450
  });
398
451
  crawlee.log.error(`Failed Request - ${request.url}: ${request.errorMessages}`);
399
452
  },
400
453
  maxRequestsPerCrawl: Infinity,
401
454
  maxConcurrency: specifiedMaxConcurrency || maxConcurrency,
455
+ ...(process.env.OOBEE_FAST_CRAWLER && {
456
+ autoscaledPoolOptions: {
457
+ minConcurrency: specifiedMaxConcurrency ? Math.min(specifiedMaxConcurrency, 10) : 10,
458
+ maxConcurrency: specifiedMaxConcurrency || maxConcurrency,
459
+ desiredConcurrencyRatio: 0.98, // Increase threshold for scaling up
460
+ scaleUpStepRatio: 0.99, // Scale up faster
461
+ scaleDownStepRatio: 0.1, // Scale down slower
462
+ },
463
+ }),
402
464
  });
403
465
 
404
466
  await crawler.run();
@@ -430,6 +492,11 @@ const crawlSitemap = async (
430
492
  guiInfoLog(guiInfoStatusTypes.COMPLETED, {});
431
493
  }
432
494
 
495
+ if (scanDuration > 0) {
496
+ const elapsed = Math.round((Date.now() - crawlStartTime) / 1000);
497
+ console.log(`Crawl ended after ${elapsed}s (limit: ${scanDuration}s).`);
498
+ }
499
+
433
500
  return urlsCrawled;
434
501
  };
435
502
 
@@ -12,6 +12,7 @@ import { consoleLogger, guiInfoLog, silentLogger } from '../logs.js';
12
12
  import constants, {
13
13
  getExecutablePath,
14
14
  guiInfoStatusTypes,
15
+ STATUS_CODE_METADATA,
15
16
  UrlsCrawled,
16
17
  } from '../constants/constants.js';
17
18
 
@@ -296,6 +297,7 @@ export const handlePdfDownload = (
296
297
  url: request.url,
297
298
  pageTitle: url,
298
299
  actualUrl: url,
300
+ metadata: STATUS_CODE_METADATA[1],
299
301
  });
300
302
  }
301
303
 
package/src/index.ts CHANGED
@@ -50,6 +50,7 @@ export type Answers = {
50
50
  zip: string;
51
51
  ruleset: RuleFlags[];
52
52
  generateJsonFiles: boolean;
53
+ scanDuration?: number;
53
54
  };
54
55
 
55
56
  export type Data = {
@@ -80,6 +81,7 @@ export type Data = {
80
81
  zip?: string;
81
82
  ruleset: RuleFlags[];
82
83
  generateJsonFiles: boolean;
84
+ scanDuration: number;
83
85
  };
84
86
 
85
87
  const userData = getUserDataTxt();
package/src/logs.ts CHANGED
@@ -23,8 +23,10 @@ const logFormat = printf(({ timestamp, level, message }) => {
23
23
  // All logs in combined.txt, error in errors.txt
24
24
 
25
25
  const consoleLogger = createLogger({
26
+ silent: !(process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE),
26
27
  format: combine(timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), logFormat),
27
- transports: [new transports.Console()],
28
+ transports:
29
+ process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE ? [new transports.Console()] : [],
28
30
  });
29
31
 
30
32
  // No display in consoles, this will mostly be used within the interactive script to avoid disrupting the flow
@@ -33,7 +35,7 @@ const consoleLogger = createLogger({
33
35
  const silentLogger = createLogger({
34
36
  format: combine(timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), logFormat),
35
37
  transports: [
36
- process.env.OOBEE_VERBOSE
38
+ process.env.OOBEE_VERBOSE || process.env.RUNNING_FROM_PH_GUI
37
39
  ? new transports.Console({ handleExceptions: true })
38
40
  : new transports.File({ filename: 'errors.txt', level: 'warn', handleExceptions: true }),
39
41
  ].filter(Boolean),