npm - @govtechsg/oobee - Versions diffs - 0.10.50 → 0.10.57 - Mend

@govtechsg/oobee 0.10.50 → 0.10.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/.github/workflows/bump-package-version.yml +58 -0
package/.github/workflows/image.yml +38 -17
package/DETAILS.md +5 -2
package/INTEGRATION.md +57 -53
package/README.md +4 -1
package/__tests__/test-sitemap-url-patterns.xml +105 -0
package/exclusions.txt +1 -0
package/package.json +7 -6
package/src/cli.ts +35 -2
package/src/combine.ts +10 -7
package/src/constants/cliFunctions.ts +9 -0
package/src/constants/common.ts +95 -105
package/src/constants/constants.ts +47 -2
package/src/crawlers/commonCrawlerFunc.ts +50 -5
package/src/crawlers/crawlDomain.ts +112 -73
package/src/crawlers/crawlIntelligentSitemap.ts +40 -36
package/src/crawlers/crawlLocalFile.ts +77 -35
package/src/crawlers/crawlSitemap.ts +156 -89
package/src/index.ts +2 -0
package/src/logs.ts +4 -2
package/src/mergeAxeResults.ts +20 -9
package/src/npmIndex.ts +1 -1
package/src/screenshotFunc/htmlScreenshotFunc.ts +7 -5
package/src/screenshotFunc/pdfScreenshotFunc.ts +2 -2
package/src/static/ejs/partials/components/wcagCompliance.ejs +1 -1
package/src/static/ejs/partials/scripts/ruleOffcanvas.ejs +1 -0
package/src/static/ejs/partials/styles/styles.ejs +11 -0
package/src/static/ejs/report.ejs +14 -1
package/src/utils.ts +3 -3

package/src/constants/constants.ts CHANGED Viewed

@@ -7,7 +7,7 @@ import os from 'os';
 import { spawnSync, execSync } from 'child_process';
 import { chromium } from 'playwright';
 import * as Sentry from '@sentry/node';
-import { silentLogger } from '../logs.js';
+import { consoleLogger, silentLogger } from '../logs.js';
 import { PageInfo } from '../mergeAxeResults.js';
 const filename = fileURLToPath(import.meta.url);
@@ -128,7 +128,7 @@ export const getDefaultChromiumDataDir = () => {
         defaultChromiumDataDir = '/tmp';
       }
-      silentLogger.warn(`Using Chromium support directory at ${defaultChromiumDataDir}`);
+      consoleLogger.info(`Using Chromium support directory at ${defaultChromiumDataDir}`);
     }
     if (defaultChromiumDataDir && fs.existsSync(defaultChromiumDataDir)) {
@@ -179,6 +179,7 @@ export const basicAuthRegex = /^.*\/\/.*:.*@.*$/i;
 // for crawlers
 export const axeScript = path.join(dirname, '../../node_modules/axe-core/axe.min.js');
 export class UrlsCrawled {
+  siteName: string;
   toScan: string[] = [];
   scanned: PageInfo[] = [];
   invalid: PageInfo[] = [];
@@ -361,6 +362,7 @@ const wcagLinks = {
   // 'WCAG 1.4.10': 'https://www.w3.org/TR/WCAG22/#reflow', - TODO: review for veraPDF
   'WCAG 1.4.12': 'https://www.w3.org/TR/WCAG22/#text-spacing',
   'WCAG 2.1.1': 'https://www.w3.org/TR/WCAG22/#keyboard',
+  'WCAG 2.1.3': 'https://www.w3.org/WAI/WCAG22/Understanding/keyboard-no-exception.html', // AAA
   'WCAG 2.2.1': 'https://www.w3.org/TR/WCAG22/#timing-adjustable',
   'WCAG 2.2.2': 'https://www.w3.org/TR/WCAG22/#pause-stop-hide',
   'WCAG 2.2.4': 'https://www.w3.org/TR/WCAG22/#interruptions', // AAA
@@ -564,3 +566,46 @@ export const STATUS_CODE_METADATA: Record<number,string> = {
   511: '511 - Network Authentication Required',
 };
+  // Elements that should not be clicked or enqueued
+  // With reference from https://chromeenterprise.google/policies/url-patterns/
+export const disallowedListOfPatterns = [
+  "#",
+  "mailto:",
+  "tel:",
+  "sms:",
+  "skype:",
+  "zoommtg:",
+  "msteams:",
+  "whatsapp:",
+  "slack:",
+  "viber:",
+  "tg:",
+  "line:",
+  "meet:",
+  "facetime:",
+  "imessage:",
+  "discord:",
+  "sgnl:",
+  "webex:",
+  "intent:",
+  "ms-outlook:",
+  "ms-onedrive:",
+  "ms-word:",
+  "ms-excel:",
+  "ms-powerpoint:",
+  "ms-office:",
+  "onenote:",
+  "vs:",
+  "chrome-extension:",
+  "chrome-search:",
+  "chrome:",
+  "chrome-untrusted:",
+  "devtools:",
+  "isolated-app:"
+];
+export const disallowedSelectorPatterns = disallowedListOfPatterns
+  .map(pattern => `a[href^="${pattern}"]`)
+  .join(',')
+  .replace(/\s+/g, '');

package/src/crawlers/commonCrawlerFunc.ts CHANGED Viewed

@@ -1,13 +1,14 @@
 import crawlee, { CrawlingContext, PlaywrightGotoOptions, Request } from 'crawlee';
 import axe, { AxeResults, ImpactValue, NodeResult, Result, resultGroups, TagValue } from 'axe-core';
-import { BrowserContext, Page } from 'playwright';
+import { BrowserContext, ElementHandle, Page } from 'playwright';
 import {
   axeScript,
+  disallowedListOfPatterns,
   guiInfoStatusTypes,
   RuleFlags,
   saflyIconSelector,
 } from '../constants/constants.js';
-import { guiInfoLog, silentLogger } from '../logs.js';
+import { consoleLogger, guiInfoLog, silentLogger } from '../logs.js';
 import { takeScreenshotForHTMLElements } from '../screenshotFunc/htmlScreenshotFunc.js';
 import { isFilePath } from '../constants/common.js';
 import { extractAndGradeText } from './custom/extractAndGradeText.js';
@@ -305,7 +306,7 @@ export const runAxeScript = async ({
       });
     });
   } catch (e) {
-    silentLogger.warn(`Error while checking for DOM mutations: ${e}`);
+    // do nothing, just continue
   }
   // Omit logging of browser console errors to reduce unnecessary verbosity
@@ -459,9 +460,9 @@ export const runAxeScript = async ({
   try {
     pageTitle = await page.evaluate(() => document.title);
   } catch (e) {
-    silentLogger.warn(`Error while getting page title: ${e}`);
+    consoleLogger.info(`Error while getting page title: ${e}`);
     if (page.isClosed()) {
-      silentLogger.info(`Page was closed for ${requestUrl}, creating new page`);
+      consoleLogger.info(`Page was closed for ${requestUrl}, creating new page`);
       page = await browserContext.newPage();
       await page.goto(requestUrl, { waitUntil: 'domcontentloaded' });
       pageTitle = await page.evaluate(() => document.title);
@@ -508,3 +509,47 @@ export const isUrlPdf = (url: string) => {
   const parsedUrl = new URL(url);
   return /\.pdf($|\?|#)/i.test(parsedUrl.pathname) || /\.pdf($|\?|#)/i.test(parsedUrl.href);
 };
+export async function shouldSkipClickDueToDisallowedHref(
+  page: Page,
+  element: ElementHandle
+): Promise<boolean> {
+  return await page.evaluate(
+    ({ el, disallowedPrefixes }) => {
+      function isDisallowedHref(href: string | null): boolean {
+        if (!href) return false;
+        href = href.toLowerCase();
+        return disallowedPrefixes.some((prefix: string) => href.startsWith(prefix));
+      }
+      const castEl = el as HTMLElement;
+      // Check descendant <a href="">
+      const descendants = castEl.querySelectorAll('a[href]');
+      for (const a of descendants) {
+        const href = a.getAttribute('href');
+        if (isDisallowedHref(href)) {
+          return true;
+        }
+      }
+      // Check self and ancestors for disallowed <a>
+      let current: HTMLElement | null = castEl;
+      while (current) {
+        if (
+          current.tagName === 'A' &&
+          isDisallowedHref(current.getAttribute('href'))
+        ) {
+          return true;
+        }
+        current = current.parentElement;
+      }
+      return false;
+    },
+    {
+      el: element,
+      disallowedPrefixes: disallowedListOfPatterns,
+    }
+  );
+}

package/src/crawlers/crawlDomain.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import {
   createCrawleeSubFolders,
   runAxeScript,
   isUrlPdf,
+  shouldSkipClickDueToDisallowedHref,
 } from './commonCrawlerFunc.js';
 import constants, {
   UrlsCrawled,
@@ -19,6 +20,8 @@ import constants, {
   cssQuerySelectors,
   RuleFlags,
   STATUS_CODE_METADATA,
+  disallowedListOfPatterns,
+  disallowedSelectorPatterns,
 } from '../constants/constants.js';
 import {
   getPlaywrightLaunchOptions,
@@ -37,7 +40,7 @@ import {
   mapPdfScanResults,
   doPdfScreenshots,
 } from './pdfScanFunc.js';
-import { silentLogger, guiInfoLog } from '../logs.js';
+import { consoleLogger, guiInfoLog, silentLogger } from '../logs.js';
 import { ViewportSettingsClass } from '../combine.js';
 const isBlacklisted = (url: string, blacklistedPatterns: string[]) => {
@@ -71,6 +74,7 @@ const crawlDomain = async ({
   includeScreenshots,
   followRobots,
   extraHTTPHeaders,
+  scanDuration = 0,
   safeMode = false,
   fromCrawlIntelligentSitemap = false,
   datasetFromIntelligent = null,
@@ -91,12 +95,14 @@ const crawlDomain = async ({
   includeScreenshots: boolean;
   followRobots: boolean;
   extraHTTPHeaders: Record<string, string>;
+  scanDuration?: number;
   safeMode?: boolean;
   fromCrawlIntelligentSitemap?: boolean;
   datasetFromIntelligent?: crawlee.Dataset;
   urlsCrawledFromIntelligent?: UrlsCrawled;
   ruleset?: RuleFlags[];
 }) => {
+  const crawlStartTime = Date.now();
   let dataset: crawlee.Dataset;
   let urlsCrawled: UrlsCrawled;
   let requestQueue: crawlee.RequestQueue;
@@ -165,7 +171,7 @@ const crawlDomain = async ({
   const httpHeadCache = new Map<string, boolean>();
   const isProcessibleUrl = async (url: string): Promise<boolean> => {
     if (httpHeadCache.has(url)) {
-      silentLogger.info(`Skipping request as URL has been processed before ${url}}`);
+      consoleLogger.info(`Skipping request as URL has been processed before: ${url}}`);
       return false; // return false to avoid processing the same url again
     }
@@ -180,14 +186,14 @@ const crawlDomain = async ({
       // Check if the response suggests it's a downloadable file based on Content-Disposition header
       if (contentDisposition.includes('attachment')) {
-        silentLogger.info(`Skipping URL due to attachment header: ${url}`);
+        consoleLogger.info(`Skipping URL due to attachment header: ${url}`);
         httpHeadCache.set(url, false);
         return false;
       }
       // Check if the MIME type suggests it's a downloadable file
       if (contentType.startsWith('application/') || contentType.includes('octet-stream')) {
-        silentLogger.info(`Skipping potential downloadable file: ${contentType} at URL ${url}`);
+        consoleLogger.info(`Skipping potential downloadable file: ${contentType} at URL ${url}`);
         httpHeadCache.set(url, false);
         return false;
       }
@@ -195,14 +201,14 @@ const crawlDomain = async ({
       // Use the mime-types library to ensure it's processible content (e.g., HTML or plain text)
       const mimeType = mime.lookup(contentType);
       if (mimeType && !mimeType.startsWith('text/html') && !mimeType.startsWith('text/')) {
-        silentLogger.info(`Detected non-processible MIME type: ${mimeType} at URL ${url}`);
+        consoleLogger.info(`Detected non-processible MIME type: ${mimeType} at URL ${url}`);
         httpHeadCache.set(url, false);
         return false;
       }
       // Additional check for zip files by their magic number (PK\x03\x04)
       if (url.endsWith('.zip')) {
-        silentLogger.info(`Checking for zip file magic number at URL ${url}`);
+        consoleLogger.info(`Checking for zip file magic number at URL ${url}`);
         // Download the first few bytes of the file to check for the magic number
         const byteResponse = await axios.get(url, {
@@ -213,11 +219,11 @@ const crawlDomain = async ({
         const magicNumber = byteResponse.data.toString('hex');
         if (magicNumber === '504b0304') {
-          silentLogger.info(`Skipping zip file at URL ${url}`);
+          consoleLogger.info(`Skipping zip file at URL ${url}`);
           httpHeadCache.set(url, false);
           return false;
         }
-        silentLogger.info(
+        consoleLogger.info(
           `Not skipping ${url}, magic number does not match ZIP file: ${magicNumber}`,
         );
       }
@@ -235,12 +241,12 @@ const crawlDomain = async ({
         !fileType.mime.startsWith('text/html') &&
         !fileType.mime.startsWith('text/')
       ) {
-        silentLogger.info(`Detected downloadable file of type ${fileType.mime} at URL ${url}`);
+        consoleLogger.info(`Detected downloadable file of type ${fileType.mime} at URL ${url}`);
         httpHeadCache.set(url, false);
         return false;
       }
     } catch (e) {
-      // silentLogger.error(`Error checking the MIME type of ${url}: ${e.message}`);
+      // consoleLogger.error(`Error checking the MIME type of ${url}: ${e.message}`);
       // If an error occurs (e.g., a network issue), assume the URL is processible
       httpHeadCache.set(url, true);
       return true;
@@ -259,14 +265,14 @@ const crawlDomain = async ({
     try {
       await enqueueLinks({
         // set selector matches anchor elements with href but not contains # or starting with mailto:
-        selector: 'a:not(a[href*="#"],a[href^="mailto:"])',
+        selector: `a:not(${disallowedSelectorPatterns})`,
         strategy,
         requestQueue,
         transformRequestFunction: (req: RequestOptions): RequestOptions | null => {
           try {
             req.url = req.url.replace(/(?<=&|\?)utm_.*?(&|$)/gim, '');
           } catch (e) {
-            silentLogger.error(e);
+            consoleLogger.error(e);
           }
           if (urlsCrawled.scanned.some(item => item.url === req.url)) {
             req.skipNavigation = true;
@@ -288,7 +294,7 @@ const crawlDomain = async ({
         try {
           await customEnqueueLinksByClickingElements(page, browserContext);
         } catch (e) {
-          silentLogger.info(e);
+          // do nothing;
         }
       }
     } catch {
@@ -307,7 +313,10 @@ const crawlDomain = async ({
       const isAlreadyScanned: boolean = urlsCrawled.scanned.some(item => item.url === newPageUrl);
       const isBlacklistedUrl: boolean = isBlacklisted(newPageUrl, blacklistedPatterns);
       const isNotFollowStrategy: boolean = !isFollowStrategy(newPageUrl, initialPageUrl, strategy);
-      return isAlreadyScanned || isBlacklistedUrl || isNotFollowStrategy;
+      const isNotSupportedDocument: boolean = disallowedListOfPatterns.some(pattern =>
+        newPageUrl.toLowerCase().startsWith(pattern),
+      );
+      return isNotSupportedDocument || isAlreadyScanned || isBlacklistedUrl || isNotFollowStrategy;
     };
     const setPageListeners = (page: Page): void => {
       // event listener to handle new page popups upon button click
@@ -431,6 +440,16 @@ const crawlDomain = async ({
             });
           } else if (!newUrlFoundInElement) {
             try {
+              const shouldSkip = await shouldSkipClickDueToDisallowedHref(page, element);
+              if (shouldSkip) {
+                const elementHtml = await page.evaluate(el => el.outerHTML, element);
+                consoleLogger.info(
+                  'Skipping a click due to disallowed href nearby. Element HTML:',
+                  elementHtml,
+                );
+                continue;
+              }
               // Find url in html elements by manually clicking them. New page navigation/popups will be handled by event listeners above
               await element.click({ force: true });
               await page.waitForTimeout(1000); // Add a delay of 1 second between each Element click
@@ -455,7 +474,7 @@ const crawlDomain = async ({
   }
   await initModifiedUserAgent(browser, playwrightDeviceDetailsObject);
   const crawler = new crawlee.PlaywrightCrawler({
     launchContext: {
       launcher: constants.launcher,
@@ -486,36 +505,35 @@ const crawlDomain = async ({
           return new Promise(resolve => {
             let timeout;
             let mutationCount = 0;
-            const MAX_MUTATIONS     = 250;   // stop if things never quiet down
-            const OBSERVER_TIMEOUT  = 5000;  // hard cap on total wait
+            const MAX_MUTATIONS = 250; // stop if things never quiet down
+            const OBSERVER_TIMEOUT = 5000; // hard cap on total wait
             const observer = new MutationObserver(() => {
               clearTimeout(timeout);
               mutationCount++;
               if (mutationCount > MAX_MUTATIONS) {
                 observer.disconnect();
                 resolve('Too many mutations, exiting.');
                 return;
               }
               // restart quiet‑period timer
               timeout = setTimeout(() => {
                 observer.disconnect();
                 resolve('DOM stabilized.');
               }, 1000);
             });
             // overall timeout in case the page never settles
             timeout = setTimeout(() => {
               observer.disconnect();
               resolve('Observer timeout reached.');
             }, OBSERVER_TIMEOUT);
             const root = document.documentElement || document.body || document;
             if (!root || typeof observer.observe !== 'function') {
               resolve('No root node to observe.');
-              return;
             }
           });
         });
@@ -539,31 +557,31 @@ const crawlDomain = async ({
     ],
     preNavigationHooks: isBasicAuth
       ? [
-        async ({ page, request }) => {
-          await page.setExtraHTTPHeaders({
-            Authorization: authHeader,
-            ...extraHTTPHeaders,
-          });
-          const processible = await isProcessibleUrl(request.url);
-          if (!processible) {
-            request.skipNavigation = true;
-            return null;
-          }
-        },
-      ]
+          async ({ page, request }) => {
+            await page.setExtraHTTPHeaders({
+              Authorization: authHeader,
+              ...extraHTTPHeaders,
+            });
+            const processible = await isProcessibleUrl(request.url);
+            if (!processible) {
+              request.skipNavigation = true;
+              return null;
+            }
+          },
+        ]
       : [
-        async ({ page, request }) => {
-          await page.setExtraHTTPHeaders({
-            ...extraHTTPHeaders,
-          });
+          async ({ page, request }) => {
+            await page.setExtraHTTPHeaders({
+              ...extraHTTPHeaders,
+            });
-          const processible = await isProcessibleUrl(request.url);
-          if (!processible) {
-            request.skipNavigation = true;
-            return null;
-          }
-        },
-      ],
+            const processible = await isProcessibleUrl(request.url);
+            if (!processible) {
+              request.skipNavigation = true;
+              return null;
+            }
+          },
+        ],
     requestHandlerTimeoutSecs: 90, // Allow each page to be processed by up from default 60 seconds
     requestHandler: async ({ page, request, response, crawler, sendRequest, enqueueLinks }) => {
       const browserContext: BrowserContext = page.context();
@@ -586,7 +604,10 @@ const crawlDomain = async ({
           actualUrl = page.url();
         }
-        if (!isFollowStrategy(url, actualUrl, strategy) && (isBlacklisted(actualUrl, blacklistedPatterns) || (isUrlPdf(actualUrl) && !isScanPdfs))) {
+        if (
+          !isFollowStrategy(url, actualUrl, strategy) &&
+          (isBlacklisted(actualUrl, blacklistedPatterns) || (isUrlPdf(actualUrl) && !isScanPdfs))
+        ) {
           guiInfoLog(guiInfoStatusTypes.SKIPPED, {
             numScanned: urlsCrawled.scanned.length,
             urlScanned: actualUrl,
@@ -594,7 +615,13 @@ const crawlDomain = async ({
           return;
         }
-        if (urlsCrawled.scanned.length >= maxRequestsPerCrawl) {
+        const hasExceededDuration =
+          scanDuration > 0 && Date.now() - crawlStartTime > scanDuration * 1000;
+        if (urlsCrawled.scanned.length >= maxRequestsPerCrawl || hasExceededDuration) {
+          if (hasExceededDuration) {
+            console.log(`Crawl duration of ${scanDuration}s exceeded. Aborting website crawl.`);
+          }
           isAbortingScanNow = true;
           crawler.autoscaledPool.abort();
           return;
@@ -612,7 +639,7 @@ const crawlDomain = async ({
         }
         // handle pdfs
-        if (request.skipNavigation && actualUrl === "about:blank") {
+        if (request.skipNavigation && actualUrl === 'about:blank') {
           if (!isScanPdfs) {
             guiInfoLog(guiInfoStatusTypes.SKIPPED, {
               numScanned: urlsCrawled.scanned.length,
@@ -648,7 +675,7 @@ const crawlDomain = async ({
           urlsCrawled.userExcluded.push({
             url: request.url,
             pageTitle: request.url,
-            actualUrl: actualUrl, // because about:blank is not useful
+            actualUrl, // because about:blank is not useful
             metadata: STATUS_CODE_METADATA[1],
             httpStatusCode: 0,
           });
@@ -656,15 +683,19 @@ const crawlDomain = async ({
           return;
         }
-        if (!isFollowStrategy(url, actualUrl, strategy) && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
+        if (
+          !isFollowStrategy(url, actualUrl, strategy) &&
+          blacklistedPatterns &&
+          isSkippedUrl(actualUrl, blacklistedPatterns)
+        ) {
           urlsCrawled.userExcluded.push({
             url: request.url,
             pageTitle: request.url,
-            actualUrl: actualUrl,
+            actualUrl,
             metadata: STATUS_CODE_METADATA[0],
             httpStatusCode: 0,
           });
           guiInfoLog(guiInfoStatusTypes.SKIPPED, {
             numScanned: urlsCrawled.scanned.length,
             urlScanned: request.url,
@@ -679,11 +710,7 @@ const crawlDomain = async ({
           const isRedirected = !areLinksEqual(actualUrl, request.url);
           // check if redirected link is following strategy (same-domain/same-hostname)
-          const isLoadedUrlFollowStrategy = isFollowStrategy(
-            actualUrl,
-            request.url,
-            strategy,
-          );
+          const isLoadedUrlFollowStrategy = isFollowStrategy(actualUrl, request.url, strategy);
           if (isRedirected && !isLoadedUrlFollowStrategy) {
             urlsCrawled.notScannedRedirects.push({
               fromUrl: request.url,
@@ -693,7 +720,7 @@ const crawlDomain = async ({
           }
           const responseStatus = response?.status();
-            if (responseStatus && responseStatus >= 300) {
+          if (responseStatus && responseStatus >= 300) {
             guiInfoLog(guiInfoStatusTypes.SKIPPED, {
               numScanned: urlsCrawled.scanned.length,
               urlScanned: request.url,
@@ -706,7 +733,7 @@ const crawlDomain = async ({
               httpStatusCode: responseStatus,
             });
             return;
-            }
+          }
           const results = await runAxeScript({ includeScreenshots, page, randomToken, ruleset });
@@ -733,7 +760,7 @@ const crawlDomain = async ({
               urlsCrawled.scanned.push({
                 url: urlWithoutAuth(request.url),
                 pageTitle: results.pageTitle,
-                actualUrl: actualUrl, // i.e. actualUrl
+                actualUrl, // i.e. actualUrl
               });
               urlsCrawled.scannedRedirects.push({
@@ -768,11 +795,10 @@ const crawlDomain = async ({
           urlsCrawled.userExcluded.push({
             url: request.url,
             pageTitle: request.url,
-            actualUrl: actualUrl, // because about:blank is not useful
+            actualUrl, // because about:blank is not useful
             metadata: STATUS_CODE_METADATA[1],
             httpStatusCode: 0,
           });
         }
         if (followRobots) await getUrlsFromRobotsTxt(request.url, browser);
@@ -780,7 +806,7 @@ const crawlDomain = async ({
       } catch (e) {
         try {
           if (!e.message.includes('page.evaluate')) {
-            silentLogger.info(e);
+            // do nothing;
             guiInfoLog(guiInfoStatusTypes.ERROR, {
               numScanned: urlsCrawled.scanned.length,
               urlScanned: request.url,
@@ -815,11 +841,11 @@ const crawlDomain = async ({
             urlScanned: request.url,
           });
-          urlsCrawled.error.push({
-            url: request.url,
-            pageTitle: request.url,
-            actualUrl: request.url,
-            metadata: STATUS_CODE_METADATA[2]
+          urlsCrawled.error.push({
+            url: request.url,
+            pageTitle: request.url,
+            actualUrl: request.url,
+            metadata: STATUS_CODE_METADATA[2],
           });
         }
       }
@@ -831,9 +857,10 @@ const crawlDomain = async ({
       });
       const status = response?.status();
-      const metadata = typeof status === 'number'
-      ? (STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599])
-      : STATUS_CODE_METADATA[2];
+      const metadata =
+        typeof status === 'number'
+          ? STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599]
+          : STATUS_CODE_METADATA[2];
       urlsCrawled.error.push({
         url: request.url,
@@ -842,10 +869,18 @@ const crawlDomain = async ({
         metadata,
         httpStatusCode: typeof status === 'number' ? status : 0,
       });
     },
     maxRequestsPerCrawl: Infinity,
     maxConcurrency: specifiedMaxConcurrency || maxConcurrency,
+    ...(process.env.OOBEE_FAST_CRAWLER && {
+      autoscaledPoolOptions: {
+        minConcurrency: specifiedMaxConcurrency ? Math.min(specifiedMaxConcurrency, 10) : 10,
+        maxConcurrency: specifiedMaxConcurrency || maxConcurrency,
+        desiredConcurrencyRatio: 0.98, // Increase threshold for scaling up
+        scaleUpStepRatio: 0.99,        // Scale up faster
+        scaleDownStepRatio: 0.1,       // Scale down slower
+      },
+    }),
   });
   await crawler.run();
@@ -875,6 +910,10 @@ const crawlDomain = async ({
     guiInfoLog(guiInfoStatusTypes.COMPLETED, {});
   }
+  if (scanDuration > 0) {
+    const elapsed = Math.round((Date.now() - crawlStartTime) / 1000);
+    console.log(`Crawl ended after ${elapsed}s. Limit: ${scanDuration}s.`);
+  }
   return urlsCrawled;
 };