@govtechsg/oobee 0.10.36 → 0.10.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/DETAILS.md +3 -3
  2. package/INTEGRATION.md +142 -53
  3. package/README.md +15 -0
  4. package/exclusions.txt +4 -1
  5. package/package.json +2 -2
  6. package/src/constants/cliFunctions.ts +0 -7
  7. package/src/constants/common.ts +39 -1
  8. package/src/constants/constants.ts +9 -8
  9. package/src/crawlers/commonCrawlerFunc.ts +66 -219
  10. package/src/crawlers/crawlDomain.ts +6 -2
  11. package/src/crawlers/crawlLocalFile.ts +2 -0
  12. package/src/crawlers/crawlSitemap.ts +5 -3
  13. package/src/crawlers/custom/escapeCssSelector.ts +10 -0
  14. package/src/crawlers/custom/evaluateAltText.ts +13 -0
  15. package/src/crawlers/custom/extractAndGradeText.ts +0 -2
  16. package/src/crawlers/custom/extractText.ts +28 -0
  17. package/src/crawlers/custom/findElementByCssSelector.ts +46 -0
  18. package/src/crawlers/custom/flagUnlabelledClickableElements.ts +1006 -901
  19. package/src/crawlers/custom/framesCheck.ts +51 -0
  20. package/src/crawlers/custom/getAxeConfiguration.ts +126 -0
  21. package/src/crawlers/custom/gradeReadability.ts +30 -0
  22. package/src/crawlers/custom/xPathToCss.ts +178 -0
  23. package/src/mergeAxeResults.ts +467 -129
  24. package/src/npmIndex.ts +130 -62
  25. package/src/static/ejs/partials/components/ruleOffcanvas.ejs +1 -1
  26. package/src/static/ejs/partials/components/scanAbout.ejs +1 -1
  27. package/src/static/ejs/partials/footer.ejs +3 -3
  28. package/src/static/ejs/partials/scripts/reportSearch.ejs +112 -74
  29. package/src/static/ejs/partials/scripts/ruleOffcanvas.ejs +2 -2
  30. package/src/static/ejs/partials/summaryMain.ejs +3 -3
  31. package/src/static/ejs/report.ejs +3 -3
  32. package/src/xPathToCssCypress.ts +178 -0
  33. package/src/crawlers/customAxeFunctions.ts +0 -82
package/DETAILS.md CHANGED
@@ -14,12 +14,12 @@ Details of each issue and severity rating provided by the current scan engine.
14
14
 
15
15
  ## Conformance Covered
16
16
 
17
- #### Definitions of Conformance Level, Must Fix, Good To Fix, Manual Review Required Required
17
+ #### Definitions of Conformance Level, Must Fix, Good To Fix, Manual Review Required
18
18
 
19
19
  In Oobee, issues are grouped into one of three categories:
20
20
  - **Must Fix** issues includes WCAG A & AA success criteria (excluding those requiring review).
21
21
  - **Good To Fix** issues includes WCAG Level AAA success criteria issues and all best practice rules that do not necessarily conform to WCAG success criterion but are industry accepted practices that improve the user experience.
22
- - **Manual Review Required Required** occurrences could potentially be false positive, requiring human validation for accuracy.
22
+ - **Manual Review Required** occurrences could potentially be false positive, requiring human validation for accuracy.
23
23
 
24
24
  Note: Level AAA are disabled by default. Please specify `enable-wcag-aaa` in ruleset flag to enable AAA rules.
25
25
 
@@ -158,7 +158,7 @@ Note: Level AAA are disabled by default. Please specify `enable-wcag-aaa` in ru
158
158
  | color-contrast-enhanced | Ensure the contrast between foreground and background colors meets WCAG 2 AAA enhanced contrast ratio thresholds | Good to Fix | WCAG 1.4.6 |
159
159
  | identical-links-same-purpose | Ensure that links with the same accessible name serve a similar purpose | Good to Fix | WCAG 2.4.9 |
160
160
  | meta-refresh-no-exceptions | Ensure <meta http-equiv="refresh"> is not used for delayed refresh | Good to Fix | WCAG 2.2.4, WCAG 3.2.5 |
161
- | oobee-grading-text-contents | Text content should be clear and plain to ensure that it is easily understood. | Manual Review Required Required | WCAG 3.1.5 |
161
+ | oobee-grading-text-contents | Text content should be clear and plain to ensure that it is easily understood. | Manual Review Required | WCAG 3.1.5 |
162
162
 
163
163
  ## Best Practice
164
164
 
package/INTEGRATION.md CHANGED
@@ -30,7 +30,7 @@ In order to use this functionality, the testing framework must support:
30
30
 
31
31
  ### API Reference
32
32
 
33
- #### `async oobeeA11yInit(entryUrl, testLabel, name, email, includeScreenshots, viewportSettings, thresholds, scanAboutMetadata)`
33
+ #### `async oobeeA11yInit({entryUrl, testLabel, name, email, includeScreenshots, viewportSettings, thresholds, scanAboutMetadata, zip, deviceChosen, strategy, ruleset, specifiedMaxConcurrency, followRobots})`
34
34
 
35
35
  Returns an instance of Oobee
36
36
 
@@ -54,6 +54,16 @@ Returns an instance of Oobee
54
54
  - Include additional information in the Scan About section of the report by passing in a JSON object.
55
55
  - `zip` (optional)
56
56
  - Name of the generated zip of Oobee results at the end of scan. Defaults to "oobee-scan-results".
57
+ - `deviceChosen` (optional)
58
+ - Name of the device to scan on. Example: `iPhone 13 Pro Max`
59
+ - `strategy` (optional)
60
+ - The EnqueueStrategy to use. Options: `all`, `same-hostname`, `same-domain`, `same-origin`
61
+ - `ruleset` (optional)
62
+ - The array of rulesets to use. Options: `default`, `disable-oobee`, `enable-wcag-aaa`
63
+ - `specifiedMaxConcurrency` (optional)
64
+ - The maximum number of concurrent requests to be made. Defaults to 25.
65
+ - `followRobots` (optional)
66
+ - Whether to follow robots.txt. Defaults to false.
57
67
 
58
68
  #### Oobee Instance
59
69
 
@@ -78,14 +88,15 @@ Unique identifier for the scan instance
78
88
 
79
89
  `getScripts()`
80
90
 
81
- Get the axe-core script to be injected into the browser
91
+ Get the axe-core script to be injected into the browser, along with other custom Oobee scripts
82
92
 
83
- - `runA11yScan(elementsToScan)`
93
+ - `runA11yScan(elementsToScan, gradingReadabilityFlag)`
84
94
  Runs axe scan on the current page.
85
95
 
86
96
  Parameter(s):
87
97
 
88
98
  - `elementsToScan`: Specifies which element should and which should not be tested
99
+ - `gradingReadabilityFlag`: This is the readability score as a string. If it is non-empty, a readability violation will be added.
89
100
 
90
101
  Returns:
91
102
 
@@ -176,17 +187,22 @@ Create <code>cypress.config.js</code> with the following contents, and change yo
176
187
  // name of the generated zip of the results at the end of scan
177
188
  const resultsZipName = "oobee-scan-results"
178
189
 
179
- const oobeeA11y = await oobeeA11yInit(
180
- "https://govtechsg.github.io", // initial url to start scan
181
- "Demo Cypress Scan", // label for test
182
- "Your Name",
183
- "email@domain.com",
184
- true, // include screenshots of affected elements in the report
185
- viewportSettings,
186
- thresholds,
187
- scanAboutMetadata,
188
- resultsZipName
189
- );
190
+ const oobeeA11y = await oobeeA11yInit({
191
+ entryUrl: "https://govtechsg.github.io", // initial url to start scan
192
+ testLabel: "Demo Cypress Scan", // label for test
193
+ name: "Your Name",
194
+ email: "email@domain.com",
195
+ includeScreenshots: true, // include screenshots of affected elements in the report
196
+ viewportSettings,
197
+ thresholds: { mustFix: undefined, goodToFix: undefined },
198
+ scanAboutMetadata: undefined,
199
+ zip: resultsZipName,
200
+ deviceChosen: "",
201
+ strategy: undefined,
202
+ ruleset: ["enable-wcag-aaa"], // add "disable-oobee" to disable Oobee custom checks
203
+ specifiedMaxConcurrency: undefined,
204
+ followRobots: undefined,
205
+ });
190
206
 
191
207
  export default defineConfig({
192
208
  taskTimeout: 120000, // need to extend as screenshot function requires some time
@@ -198,6 +214,9 @@ Create <code>cypress.config.js</code> with the following contents, and change yo
198
214
  getPurpleA11yScripts() {
199
215
  return oobeeA11y.getScripts();
200
216
  },
217
+ gradeReadability(sentences) {
218
+ return oobeeA11y.gradeReadability(sentences);
219
+ },
201
220
  async pushPurpleA11yScanResults({res, metadata, elementsToClick}) {
202
221
  return await oobeeA11y.pushScanResults(res, metadata, elementsToClick);
203
222
  },
@@ -229,8 +248,26 @@ Create a sub-folder and file <code>cypress/support/e2e.js</code> with the follow
229
248
  Cypress.Commands.add("runPurpleA11yScan", (items={}) => {
230
249
  cy.window().then(async (win) => {
231
250
  const { elementsToScan, elementsToClick, metadata } = items;
232
- const res = await win.runA11yScan(elementsToScan);
233
- cy.task("pushPurpleA11yScanResults", {res, metadata, elementsToClick}).then((count) => { return count });
251
+
252
+ // extract text from the page for readability grading
253
+ const sentences = win.extractText();
254
+ // run readability grading separately as it cannot be done within the browser context
255
+ cy.task("gradeReadability", sentences).then(
256
+ async (gradingReadabilityFlag) => {
257
+ // passing the grading flag to runA11yScan to inject violation as needed
258
+ const res = await win.runA11yScan(
259
+ elementsToScan,
260
+ gradingReadabilityFlag,
261
+ );
262
+ cy.task("pushPurpleA11yScanResults", {
263
+ res,
264
+ metadata,
265
+ elementsToClick,
266
+ }).then((count) => {
267
+ return count;
268
+ });
269
+ },
270
+ );
234
271
  cy.task("finishPurpleA11yTestCase"); // test the accumulated number of issue occurrences against specified thresholds. If exceed, terminate oobeeA11y instance.
235
272
  });
236
273
  });
@@ -244,7 +281,7 @@ Create <code>cypress/e2e/spec.cy.js</code> with the following contents:
244
281
  describe("template spec", () => {
245
282
  it("should run oobee A11y", () => {
246
283
  cy.visit(
247
- "https://govtechsg.github.io/purple-banner-embeds/oobee-integrated-scan-example.htm"
284
+ "https://govtechsg.github.io/purple-banner-embeds/purple-integrated-scan-example.htm"
248
285
  );
249
286
  cy.injectPurpleA11yScripts();
250
287
  cy.runPurpleA11yScan();
@@ -338,17 +375,22 @@ Create <code>cypress.config.ts</code> with the following contents, and change yo
338
375
  // name of the generated zip of the results at the end of scan
339
376
  const resultsZipName: string = "oobee-scan-results"
340
377
 
341
- const oobeeA11y = await oobeeA11yInit(
378
+ const oobeeA11y = await oobeeA11yInit({
342
379
  "https://govtechsg.github.io", // initial url to start scan
343
380
  "Demo Cypress Scan", // label for test
344
381
  "Your Name",
345
382
  "email@domain.com",
346
383
  true, // include screenshots of affected elements in the report
347
384
  viewportSettings,
348
- thresholds,
349
- scanAboutMetadata,
350
- resultsZipName
351
- );
385
+ thresholds: { mustFix: undefined, goodToFix: undefined },
386
+ scanAboutMetadata: undefined,
387
+ zip: resultsZipName,
388
+ deviceChosen: "",
389
+ strategy: undefined,
390
+ ruleset: ["enable-wcag-aaa"], // add "disable-oobee" to disable Oobee custom checks
391
+ specifiedMaxConcurrency: undefined,
392
+ followRobots: undefined,
393
+ });
352
394
 
353
395
  export default defineConfig({
354
396
  taskTimeout: 120000, // need to extend as screenshot function requires some time
@@ -360,6 +402,9 @@ Create <code>cypress.config.ts</code> with the following contents, and change yo
360
402
  getPurpleA11yScripts(): string {
361
403
  return oobeeA11y.getScripts();
362
404
  },
405
+ gradeReadability(sentences: string[]): string {
406
+ return oobeeA11y.gradeReadability(sentences);
407
+ },
363
408
  async pushPurpleA11yScanResults({res, metadata, elementsToClick}: { res: any, metadata: any, elementsToClick: any[] }): Promise<{ mustFix: number, goodToFix: number }> {
364
409
  return await oobeeA11y.pushScanResults(res, metadata, elementsToClick);
365
410
  },
@@ -393,8 +438,26 @@ Create a sub-folder and file <code>src/cypress/support/e2e.ts</code> with the fo
393
438
  Cypress.Commands.add("runPurpleA11yScan", (items={}) => {
394
439
  cy.window().then(async (win) => {
395
440
  const { elementsToScan, elementsToClick, metadata } = items;
396
- const res = await win.runA11yScan(elementsToScan);
397
- cy.task("pushPurpleA11yScanResults", {res, metadata, elementsToClick}).then((count) => { return count });
441
+
442
+ // extract text from the page for readability grading
443
+ const sentences = win.extractText();
444
+ // run readability grading separately as it cannot be done within the browser context
445
+ cy.task("gradeReadability", sentences).then(
446
+ async (gradingReadabilityFlag: string) => {
447
+ // passing the grading flag to runA11yScan to inject violation as needed
448
+ const res = await win.runA11yScan(
449
+ elementsToScan,
450
+ gradingReadabilityFlag,
451
+ );
452
+ cy.task("pushPurpleA11yScanResults", {
453
+ res,
454
+ metadata,
455
+ elementsToClick,
456
+ }).then((count) => {
457
+ return count;
458
+ });
459
+ },
460
+ );
398
461
  cy.task("finishPurpleA11yTestCase"); // test the accumulated number of issue occurrences against specified thresholds. If exceed, terminate oobeeA11y instance.
399
462
  });
400
463
  });
@@ -443,6 +506,7 @@ declare namespace Cypress {
443
506
 
444
507
  interface Window {
445
508
  runA11yScan: (elementsToScan?: string[]) => Promise<any>;
509
+ extractText: () => string[];
446
510
  }
447
511
  ```
448
512
 
@@ -475,6 +539,7 @@ On your project's root folder, create a Playwright test file <code>oobeeA11y-pla
475
539
 
476
540
  import { chromium } from "playwright";
477
541
  import oobeeA11yInit from "@govtechsg/oobee";
542
+ import { extractText } from "@govtechsg/oobee/dist/crawlers/custom/extractText.js";
478
543
 
479
544
  // viewport used in tests to optimise screenshots
480
545
  const viewportSettings = { width: 1920, height: 1040 };
@@ -483,16 +548,22 @@ On your project's root folder, create a Playwright test file <code>oobeeA11y-pla
483
548
  // additional information to include in the "Scan About" section of the report
484
549
  const scanAboutMetadata = { browser: 'Chrome (Desktop)' };
485
550
 
486
- const oobeeA11y = await oobeeA11yInit(
487
- "https://govtechsg.github.io", // initial url to start scan
488
- "Demo Playwright Scan", // label for test
489
- "Your Name",
490
- "email@domain.com",
491
- true, // include screenshots of affected elements in the report
551
+ const oobeeA11y = await oobeeA11yInit({
552
+ entryUrl: "https://govtechsg.github.io", // initial url to start scan
553
+ testLabel: "Demo Cypress Scan", // label for test
554
+ name: "Your Name",
555
+ email: "email@domain.com",
556
+ includeScreenshots: true, // include screenshots of affected elements in the report
492
557
  viewportSettings,
493
- thresholds,
494
- scanAboutMetadata,
495
- );
558
+ thresholds: { mustFix: undefined, goodToFix: undefined },
559
+ scanAboutMetadata: undefined,
560
+ zip: resultsZipName,
561
+ deviceChosen: "",
562
+ strategy: undefined,
563
+ ruleset: ["enable-wcag-aaa"],
564
+ specifiedMaxConcurrency: undefined,
565
+ followRobots: undefined,
566
+ });
496
567
 
497
568
  (async () => {
498
569
  const browser = await chromium.launch({
@@ -501,18 +572,22 @@ On your project's root folder, create a Playwright test file <code>oobeeA11y-pla
501
572
  const context = await browser.newContext();
502
573
  const page = await context.newPage();
503
574
 
504
- const runPurpleA11yScan = async (elementsToScan) => {
575
+ const runPurpleA11yScan = async (elementsToScan, gradingReadabilityFlag) => {
505
576
  const scanRes = await page.evaluate(
506
- async elementsToScan => await runA11yScan(elementsToScan),
507
- elementsToScan,
577
+ async ({ elementsToScan, gradingReadabilityFlag }) => await runA11yScan(elementsToScan, gradingReadabilityFlag),
578
+ { elementsToScan, gradingReadabilityFlag },
508
579
  );
509
580
  await oobeeA11y.pushScanResults(scanRes);
510
581
  oobeeA11y.testThresholds(); // test the accumulated number of issue occurrences against specified thresholds. If exceed, terminate oobeeA11y instance.
511
582
  };
512
583
 
513
- await page.goto('https://govtechsg.github.io/purple-banner-embeds/oobee-integrated-scan-example.htm');
584
+ await page.goto('https://govtechsg.github.io/purple-banner-embeds/purple-integrated-scan-example.htm');
514
585
  await page.evaluate(oobeeA11y.getScripts());
515
- await runPurpleA11yScan();
586
+
587
+ const sentences = await page.evaluate(() => extractText());
588
+ const gradingReadabilityFlag = await oobeeA11y.gradeReadability(sentences);
589
+
590
+ await runPurpleA11yScan([], gradingReadabilityFlag);;
516
591
 
517
592
  await page.getByRole('button', { name: 'Click Me' }).click();
518
593
  // Run a scan on <input> and <button> elements
@@ -566,8 +641,12 @@ Create a sub-folder and Playwright test file <code>src/oobeeA11y-playwright-demo
566
641
 
567
642
  import { Browser, BrowserContext, Page, chromium } from "playwright";
568
643
  import oobeeA11yInit from "@govtechsg/oobee";
644
+ import { extractText } from "@govtechsg/oobee/dist/crawlers/custom/extractText.js";
569
645
 
570
- declare const runA11yScan: (elementsToScan?: string[]) => Promise<any>;
646
+ declare const runA11yScan: (
647
+ elementsToScan?: string[],
648
+ gradingReadabilityFlag?: string,
649
+ ) => Promise<any>;
571
650
 
572
651
  interface ViewportSettings {
573
652
  width: number;
@@ -590,16 +669,22 @@ Create a sub-folder and Playwright test file <code>src/oobeeA11y-playwright-demo
590
669
  // additional information to include in the "Scan About" section of the report
591
670
  const scanAboutMetadata: ScanAboutMetadata = { browser: 'Chrome (Desktop)' };
592
671
 
593
- const oobeeA11y = await oobeeA11yInit(
594
- "https://govtechsg.github.io", // initial url to start scan
595
- "Demo Playwright Scan", // label for test
596
- "Your Name",
597
- "email@domain.com",
598
- true, // include screenshots of affected elements in the report
672
+ const oobeeA11y = await oobeeA11yInit({
673
+ entryUrl: "https://govtechsg.github.io", // initial url to start scan
674
+ testLabel: "Demo Cypress Scan", // label for test
675
+ name: "Your Name",
676
+ email: "email@domain.com",
677
+ includeScreenshots: true, // include screenshots of affected elements in the report
599
678
  viewportSettings,
600
- thresholds,
601
- scanAboutMetadata,
602
- );
679
+ thresholds: { mustFix: undefined, goodToFix: undefined },
680
+ scanAboutMetadata: undefined,
681
+ zip: resultsZipName,
682
+ deviceChosen: "",
683
+ strategy: undefined,
684
+ ruleset: ["enable-wcag-aaa"],
685
+ specifiedMaxConcurrency: undefined,
686
+ followRobots: undefined,
687
+ });
603
688
 
604
689
  (async () => {
605
690
  const browser: Browser = await chromium.launch({
@@ -608,18 +693,22 @@ Create a sub-folder and Playwright test file <code>src/oobeeA11y-playwright-demo
608
693
  const context: BrowserContext = await browser.newContext();
609
694
  const page: Page = await context.newPage();
610
695
 
611
- const runPurpleA11yScan = async (elementsToScan?: string[]) => {
696
+ const runPurpleA11yScan = async (elementsToScan?: string[], gradingReadabilityFlag?: string) => {
612
697
  const scanRes = await page.evaluate(
613
- async elementsToScan => await runA11yScan(elementsToScan),
614
- elementsToScan,
698
+ async ({ elementsToScan, gradingReadabilityFlag }) => await runA11yScan(elementsToScan, gradingReadabilityFlag),
699
+ { elementsToScan, gradingReadabilityFlag },
615
700
  );
616
701
  await oobeeA11y.pushScanResults(scanRes);
617
702
  oobeeA11y.testThresholds(); // test the accumulated number of issue occurrences against specified thresholds. If exceed, terminate oobeeA11y instance.
618
703
  };
619
704
 
620
- await page.goto('https://govtechsg.github.io/purple-banner-embeds/oobee-integrated-scan-example.htm');
705
+ await page.goto('https://govtechsg.github.io/purple-banner-embeds/purple-integrated-scan-example.htm');
621
706
  await page.evaluate(oobeeA11y.getScripts());
622
- await runPurpleA11yScan();
707
+
708
+ const sentences = await page.evaluate(() => extractText());
709
+ const gradingReadabilityFlag = await oobeeA11y.gradeReadability(sentences);
710
+
711
+ await runPurpleA11yScan([], gradingReadabilityFlag);
623
712
 
624
713
  await page.getByRole('button', { name: 'Click Me' }).click();
625
714
  // Run a scan on <input> and <button> elements
package/README.md CHANGED
@@ -80,6 +80,21 @@ export PATH="<location of verapdf>:$PATH"
80
80
  verapdf --version
81
81
  ```
82
82
 
83
+ #### Environment variables (Optional)
84
+ | Variable Name | Description | Default |
85
+ | ------------- | ----------- | ------- |
86
+ | OOBEE_VERBOSE | When set to `true`, log output goes to console | `false` |
87
+ | RUNNING_FROM_PH_GUI | Legacy, replaced by OOBEE_VERBOSE | `false` |
88
+ | WARN_LEVEL | Only used in tests. | |
89
+
90
+ #### Environment variables used internally (Do not set)
91
+ Do not set these environment variables or behaviour might change unexpectedly.
92
+ | Variable Name | Description |
93
+ | ------------- | ----------- |
94
+ | CRAWLEE_LOG_LEVEL | https://crawlee.dev/docs/guides/configuration#crawlee_log_level |
95
+ | CRAWLEE_STORAGE_DIR | https://crawlee.dev/docs/guides/configuration#crawlee_storage_dir |
96
+ | CRAWLEE_HEADLESS | https://crawlee.dev/docs/guides/configuration#crawlee_headless |
97
+
83
98
  #### Facing issues?
84
99
 
85
100
  Please refer to [Troubleshooting section](#troubleshooting) for more information.
package/exclusions.txt CHANGED
@@ -1,3 +1,6 @@
1
1
  \.*login.singpass.gov.sg\.*
2
2
  \.*auth.singpass.gov.sg\.*
3
- \.*form.gov.sg\.*
3
+ \.*form.gov.sg\.*
4
+ \.*login.microsoftonline.com\.*
5
+ \.*id.atlassian.com\.*
6
+ \.*cloudflareaccess.com\.*
package/package.json CHANGED
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "name": "@govtechsg/oobee",
3
3
  "main": "dist/npmIndex.js",
4
- "version": "0.10.36",
4
+ "version": "0.10.39",
5
5
  "type": "module",
6
6
  "author": "Government Technology Agency <info@tech.gov.sg>",
7
7
  "dependencies": {
8
8
  "@json2csv/node": "^7.0.3",
9
9
  "@napi-rs/canvas": "^0.1.53",
10
10
  "axe-core": "^4.10.2",
11
- "axios": "^1.7.4",
11
+ "axios": "^1.8.2",
12
12
  "base64-stream": "^1.0.0",
13
13
  "cheerio": "^1.0.0-rc.12",
14
14
  "crawlee": "^3.11.1",
@@ -333,10 +333,3 @@ To obtain the JSON files, you need to base64-decode the file followed by gunzip.
333
333
  },
334
334
  };
335
335
 
336
- export const configureReportSetting = (isEnabled: boolean): void => {
337
- if (isEnabled) {
338
- process.env.REPORT_BREAKDOWN = '1';
339
- } else {
340
- process.env.REPORT_BREAKDOWN = '0';
341
- }
342
- };
@@ -461,7 +461,7 @@ const checkUrlConnectivityWithBrowser = async (
461
461
 
462
462
  res.content = await page.content();
463
463
 
464
- const contentType = response.headers()['content-type'];
464
+ const contentType = response?.headers?.()['content-type'] || '';
465
465
  if (contentType.includes('xml')) {
466
466
  const responseFromUrl = await requestToUrl(res.url, true, extraHTTPHeaders);
467
467
 
@@ -1776,6 +1776,44 @@ export const submitForm = async (
1776
1776
  }
1777
1777
  }
1778
1778
  };
1779
+
1780
+ export async function initModifiedUserAgent(browser?: string, playwrightDeviceDetailsObject?: object) {
1781
+ const isHeadless = process.env.CRAWLEE_HEADLESS === '1';
1782
+
1783
+ // If headless mode is enabled, ensure the headless flag is set.
1784
+ if (isHeadless && !constants.launchOptionsArgs.includes('--headless=new')) {
1785
+ constants.launchOptionsArgs.push('--headless=new');
1786
+ }
1787
+
1788
+ // Build the launch options using your production settings.
1789
+ // headless is forced to false as in your persistent context, and we merge in getPlaywrightLaunchOptions and device details.
1790
+ const launchOptions = {
1791
+ headless: false,
1792
+ ...getPlaywrightLaunchOptions(browser),
1793
+ ...playwrightDeviceDetailsObject,
1794
+ };
1795
+
1796
+ // Launch a temporary persistent context with an empty userDataDir to mimic your production browser setup.
1797
+ const browserContext = await constants.launcher.launchPersistentContext('', launchOptions);
1798
+ const page = await browserContext.newPage();
1799
+
1800
+ // Retrieve the default user agent.
1801
+ const defaultUA = await page.evaluate(() => navigator.userAgent);
1802
+ await browserContext.close();
1803
+
1804
+ // Modify the UA:
1805
+ // Replace "HeadlessChrome" with "Chrome" if present.
1806
+ let modifiedUA = defaultUA.includes('HeadlessChrome')
1807
+ ? defaultUA.replace('HeadlessChrome', 'Chrome')
1808
+ : defaultUA;
1809
+
1810
+ // Push the modified UA flag into your global launch options.
1811
+ constants.launchOptionsArgs.push(`--user-agent=${modifiedUA}`);
1812
+ // Optionally log the modified UA.
1813
+ // console.log('Modified User Agent:', modifiedUA);
1814
+ }
1815
+
1816
+
1779
1817
  /**
1780
1818
  * @param {string} browser browser name ("chrome" or "edge", null for chromium, the default Playwright browser)
1781
1819
  * @returns playwright launch options object. For more details: https://playwright.dev/docs/api/class-browsertype#browser-type-launch
@@ -7,6 +7,7 @@ import os from 'os';
7
7
  import { spawnSync, execSync } from 'child_process';
8
8
  import { chromium } from 'playwright';
9
9
  import { silentLogger } from '../logs.js';
10
+ import { PageInfo } from '../mergeAxeResults.js';
10
11
 
11
12
  const filename = fileURLToPath(import.meta.url);
12
13
  const dirname = path.dirname(filename);
@@ -177,16 +178,16 @@ export const basicAuthRegex = /^.*\/\/.*:.*@.*$/i;
177
178
  export const axeScript = path.join(dirname, '../../node_modules/axe-core/axe.min.js');
178
179
  export class UrlsCrawled {
179
180
  toScan: string[] = [];
180
- scanned: { url: string; actualUrl: string; pageTitle: string }[] = [];
181
- invalid: { url: string; actualUrl: string; pageTitle: string }[] = [];
181
+ scanned: PageInfo[] = [];
182
+ invalid: PageInfo[] = [];
182
183
  scannedRedirects: { fromUrl: string; toUrl: string }[] = [];
183
184
  notScannedRedirects: { fromUrl: string; toUrl: string }[] = [];
184
- outOfDomain: string[] = [];
185
- blacklisted: { url: string; actualUrl: string; pageTitle: string }[] = [];
186
- error: { url: string }[] = [];
187
- exceededRequests: string[] = [];
188
- forbidden: { url: string; actualUrl: string; pageTitle: string }[] = [];
189
- userExcluded: { url: string; actualUrl: string; pageTitle: string }[] = [];
185
+ outOfDomain: PageInfo[] = [];
186
+ blacklisted: PageInfo[] = [];
187
+ error: PageInfo[] = [];
188
+ exceededRequests: PageInfo[] = [];
189
+ forbidden: PageInfo[] = [];
190
+ userExcluded: PageInfo[] = [];
190
191
  everything: string[] = [];
191
192
 
192
193
  constructor(urlsCrawled?: Partial<UrlsCrawled>) {