@govtechsg/oobee 0.10.36 → 0.10.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/docker-test.yml +1 -1
- package/DETAILS.md +3 -3
- package/INTEGRATION.md +142 -53
- package/README.md +17 -0
- package/REPORTS.md +362 -0
- package/exclusions.txt +4 -1
- package/package.json +2 -2
- package/src/constants/cliFunctions.ts +0 -7
- package/src/constants/common.ts +39 -1
- package/src/constants/constants.ts +9 -8
- package/src/crawlers/commonCrawlerFunc.ts +95 -220
- package/src/crawlers/crawlDomain.ts +10 -23
- package/src/crawlers/crawlLocalFile.ts +2 -0
- package/src/crawlers/crawlSitemap.ts +6 -4
- package/src/crawlers/custom/escapeCssSelector.ts +10 -0
- package/src/crawlers/custom/evaluateAltText.ts +13 -0
- package/src/crawlers/custom/extractAndGradeText.ts +0 -2
- package/src/crawlers/custom/extractText.ts +28 -0
- package/src/crawlers/custom/findElementByCssSelector.ts +46 -0
- package/src/crawlers/custom/flagUnlabelledClickableElements.ts +982 -842
- package/src/crawlers/custom/framesCheck.ts +51 -0
- package/src/crawlers/custom/getAxeConfiguration.ts +126 -0
- package/src/crawlers/custom/gradeReadability.ts +30 -0
- package/src/crawlers/custom/xPathToCss.ts +178 -0
- package/src/crawlers/pdfScanFunc.ts +67 -26
- package/src/mergeAxeResults.ts +535 -132
- package/src/npmIndex.ts +130 -62
- package/src/screenshotFunc/htmlScreenshotFunc.ts +1 -1
- package/src/screenshotFunc/pdfScreenshotFunc.ts +34 -1
- package/src/static/ejs/partials/components/ruleOffcanvas.ejs +1 -1
- package/src/static/ejs/partials/components/scanAbout.ejs +1 -1
- package/src/static/ejs/partials/footer.ejs +3 -3
- package/src/static/ejs/partials/scripts/reportSearch.ejs +112 -74
- package/src/static/ejs/partials/scripts/ruleOffcanvas.ejs +2 -2
- package/src/static/ejs/partials/summaryMain.ejs +3 -3
- package/src/static/ejs/report.ejs +3 -3
- package/src/utils.ts +289 -13
- package/src/xPathToCssCypress.ts +178 -0
- package/src/crawlers/customAxeFunctions.ts +0 -82
package/REPORTS.md
ADDED
@@ -0,0 +1,362 @@
|
|
1
|
+
# Accessibility Scan Reports Documentation
|
2
|
+
|
3
|
+
Various types of reports are provided to help you to identify, manage, and understand the scale of accessibility for each website.
|
4
|
+
|
5
|
+
In order to generate JSON reports, you need to parse the switch `-g yes` in Oobee CLI. For each of the JSON reports listed below, a compressed version with the file extension `json.gz.b64` is provided. See below for steps on uncompressing the compressed JSON files.
|
6
|
+
|
7
|
+
## HTML, CSV and Summary Reports
|
8
|
+
|
9
|
+
### report.html
|
10
|
+
An interactive HTML report that allows the user to interact and understand the different accessibility issues. Note that if the number of scan issues is large > 510 MB of JSON data, the individual accessibility issues will not be viewable in the report. Please refer to the report.csv for the detailed accessibility issues.
|
11
|
+
|
12
|
+
### summary.pdf
|
13
|
+
A short printable summary of the types and occurrences of accessibility issues found. It contains metadata of how many WCAG (Level A and AA) were violated.
|
14
|
+
|
15
|
+
### report.csv
|
16
|
+
This is the report which contains each individual accessibility issue found, across mustFix, goodToFix, and needsReview categories. It contains the same information as a regular report.html except the scan metadata (i.e. how the scan was set up to run). For scan metadata, please refer to scanData.csv.
|
17
|
+
|
18
|
+
This file contains detailed accessibility scan results, including issue details, affected pages, and recommendations.
|
19
|
+
|
20
|
+
#### CSV Structure
|
21
|
+
|
22
|
+
| Column Name | Description |
|
23
|
+
|----------------------|-------------|
|
24
|
+
| `customFlowLabel` | Label indicating the custom flow used for the scan. |
|
25
|
+
| `deviceChosen` | Type of device used during the scan (e.g., Desktop, Mobile). |
|
26
|
+
| `scanCompletedAt` | Timestamp indicating when the scan was completed (ISO 8601 format). |
|
27
|
+
| `severity` | Severity level of the issue (`mustFix`, `goodToFix`, `needsReview`, `error`). |
|
28
|
+
| `issueId` | Unique identifier for the issue found. |
|
29
|
+
| `issueDescription` | Description of the issue detected during the scan. |
|
30
|
+
| `wcagConformance` | WCAG guidelines that the issue relates to, comma-separated. |
|
31
|
+
| `url` | The URL of the affected page. |
|
32
|
+
| `pageTitle` | The title of the affected page. |
|
33
|
+
| `context` | HTML snippet or element associated with the issue. |
|
34
|
+
| `howToFix` | Suggested fix or recommendation to resolve the issue. |
|
35
|
+
| `axeImpact` | Impact severity as determined by Axe (e.g., `critical`, `serious`, `moderate`, `minor`). |
|
36
|
+
| `xpath` | XPath selector for locating the issue within the page. |
|
37
|
+
| `learnMore` | URL to additional documentation about the issue (masked for privacy). |
|
38
|
+
|
39
|
+
#### Example CSV
|
40
|
+
|
41
|
+
```csv
|
42
|
+
"customFlowLabel","deviceChosen","scanCompletedAt","severity","issueId","issueDescription","wcagConformance","url","pageTitle","context","howToFix","axeImpact","xpath","learnMore"
|
43
|
+
"Custom Flow","Desktop","2025-03-13T10:09:18.733Z","needsReview","aria-prohibited-attr","Elements must only use permitted ARIA attributes","wcag2a,wcag412","https://example.com/page1","Example Page 1","<a class=""nav-link"" aria-label=""Example Link"">Example<i class=""icon-chevron-down"" aria-hidden=""true""></i></a>","aria-label attribute is not well supported on an <a> with no valid role attribute.","serious","a[aria-label=""Example Link""]","https://dequeuniversity.com/rules/axe/4.10/aria-prohibited-attr"
|
44
|
+
"Custom Flow","Desktop","2025-03-13T10:09:18.733Z","error","error-pages-skipped","Page was skipped during the scan",,"https://example.com/file.pdf","Error",,,,,,
|
45
|
+
```
|
46
|
+
|
47
|
+
## scanItemsSummary.json
|
48
|
+
|
49
|
+
This file contains a summary of accessibility issues found in a scan, categorized into different levels of severity.
|
50
|
+
|
51
|
+
### Sample JSON
|
52
|
+
```json
|
53
|
+
{
|
54
|
+
"oobeeAppVersion": "<string>",
|
55
|
+
"mustFix": { "totalItems": <number>, "totalRuleIssues": <number> },
|
56
|
+
"goodToFix": { "totalItems": <number>, "totalRuleIssues": <number> },
|
57
|
+
"needsReview": { "totalItems": <number>, "totalRuleIssues": <number> },
|
58
|
+
"topTenPagesWithMostIssues": [
|
59
|
+
{
|
60
|
+
"url": "<string>",
|
61
|
+
"pageTitle": "<string>",
|
62
|
+
"totalIssues": <number>,
|
63
|
+
"totalOccurrences": <number>
|
64
|
+
}
|
65
|
+
],
|
66
|
+
"wcagLinks": {},
|
67
|
+
"wcagPassPercentage": {
|
68
|
+
"passPercentageAA": "<string>",
|
69
|
+
"totalWcagChecksAA": <number>,
|
70
|
+
"totalWcagViolationsAA": <number>,
|
71
|
+
"passPercentageAAandAAA": "<string>",
|
72
|
+
"totalWcagChecksAAandAAA": <number>,
|
73
|
+
"totalWcagViolationsAAandAAA": <number>
|
74
|
+
},
|
75
|
+
"progressPercentage": {
|
76
|
+
"averageProgressPercentageAA": "<string>",
|
77
|
+
"averageProgressPercentageAAandAAA": "<string>"
|
78
|
+
},
|
79
|
+
"issuesPercentage": {
|
80
|
+
"avgTypesOfIssuesCountAtMustFix": "<number>",
|
81
|
+
"avgTypesOfIssuesCountAtGoodToFix": "<number>",
|
82
|
+
"avgTypesOfIssuesCountAtMustFixAndGoodToFix": "<number>",
|
83
|
+
"avgTypesOfIssuesPercentageOfTotalRulesAtMustFix": "<number>",
|
84
|
+
"avgTypesOfIssuesPercentageOfTotalRulesAtGoodToFix": "<number>",
|
85
|
+
"avgTypesOfIssuesPercentageOfTotalRulesAtMustFixAndGoodToFix": "<number>",
|
86
|
+
"totalRulesMustFix": <number>,
|
87
|
+
"totalRulesGoodToFix": <number>,
|
88
|
+
"totalRulesMustFixAndGoodToFix": <number>,
|
89
|
+
"pagesAffectedPerRule": {
|
90
|
+
"<string>": <number>
|
91
|
+
},
|
92
|
+
"pagesPercentageAffectedPerRule": {
|
93
|
+
"<string>": "<string>"
|
94
|
+
}
|
95
|
+
},
|
96
|
+
"totalPagesScanned": <number>,
|
97
|
+
"totalPagesNotScanned": <number>,
|
98
|
+
"topTenIssues": [
|
99
|
+
{
|
100
|
+
"category": "<string>",
|
101
|
+
"ruleId": "<string>",
|
102
|
+
"description": "<string>",
|
103
|
+
"axeImpact": "<string>",
|
104
|
+
"conformance": ["<string>", "<string>"],
|
105
|
+
"totalItems": <number>
|
106
|
+
}
|
107
|
+
]
|
108
|
+
}
|
109
|
+
```
|
110
|
+
|
111
|
+
| Variable | Description |
|
112
|
+
|----------|-------------|
|
113
|
+
| `oobeeAppVersion` | Version of the Oobee application used for the scan. |
|
114
|
+
| `mustFix` | Summary of must-fix issues including `totalItems` and `totalRuleIssues`. |
|
115
|
+
| `goodToFix` | Summary of good-to-fix issues including `totalItems` and `totalRuleIssues`. |
|
116
|
+
| `needsReview` | Summary of needs-review issues including `totalItems` and `totalRuleIssues`. |
|
117
|
+
| `topTenPagesWithMostIssues` | List of the top ten pages with the most accessibility issues. |
|
118
|
+
| `url` | URL of the affected page. |
|
119
|
+
| `pageTitle` | Title of the affected page. |
|
120
|
+
| `totalIssues` | Total number of accessibility issues on the page. |
|
121
|
+
| `totalOccurrences` | Number of times these issues occurred. |
|
122
|
+
| `wcagLinks` | Mapping of WCAG guidelines to their documentation URLs. |
|
123
|
+
| `wcagPassPercentage` | Summary of WCAG compliance percentages. |
|
124
|
+
| `passPercentageAA` | Percentage of WCAG AA guidelines passed. |
|
125
|
+
| `totalWcagChecksAA` | Total WCAG AA checks performed. |
|
126
|
+
| `totalWcagViolationsAA` | Total WCAG AA violations found. |
|
127
|
+
| `passPercentageAAandAAA` | Percentage of WCAG AA and AAA guidelines passed. |
|
128
|
+
| `totalWcagChecksAAandAAA` | Total WCAG AA and AAA checks performed. |
|
129
|
+
| `totalWcagViolationsAAandAAA` | Total WCAG AA and AAA violations found. |
|
130
|
+
| `progressPercentage` | Summary of average progress percentages. |
|
131
|
+
| `averageProgressPercentageAA` | Average progress percentage for WCAG AA guidelines. |
|
132
|
+
| `averageProgressPercentageAAandAAA` | Average progress percentage for WCAG AA and AAA guidelines. |
|
133
|
+
| `issuesPercentage` | Detailed breakdown of issue percentages and counts. |
|
134
|
+
| `avgTypesOfIssuesCountAtMustFix` | Average count of issue types at "Must Fix" level. |
|
135
|
+
| `avgTypesOfIssuesCountAtGoodToFix` | Average count of issue types at "Good to Fix" level. |
|
136
|
+
| `avgTypesOfIssuesCountAtMustFixAndGoodToFix` | Average count of issue types at both "Must Fix" and "Good to Fix" levels per page. |
|
137
|
+
| `avgTypesOfIssuesPercentageOfTotalRulesAtMustFix` | Average percentage of total rules affected at "Must Fix" level per page. |
|
138
|
+
| `avgTypesOfIssuesPercentageOfTotalRulesAtGoodToFix` | Average percentage of total rules affected at "Good to Fix" level per page. |
|
139
|
+
| `avgTypesOfIssuesPercentageOfTotalRulesAtMustFixAndGoodToFix` | Average percentage of total rules affected at both "Must Fix" and "Good to Fix" levels per page. |
|
140
|
+
| `totalRulesMustFix` | Total number of rules categorized as "Must Fix". |
|
141
|
+
| `totalRulesGoodToFix` | Total number of rules categorized as "Good to Fix". |
|
142
|
+
| `totalRulesMustFixAndGoodToFix` | Total number of rules categorized as either "Must Fix" or "Good to Fix". |
|
143
|
+
| `pagesAffectedPerRule` | Number of pages affected by each rule (keyed by rule ID). |
|
144
|
+
| `pagesPercentageAffectedPerRule` | Percentage of pages affected by each rule (keyed by rule ID). |
|
145
|
+
| `totalPagesScanned` | Total number of pages scanned. |
|
146
|
+
| `totalPagesNotScanned` | Total number of pages not scanned. |
|
147
|
+
| `topTenIssues` | List of the ten most common accessibility issues. |
|
148
|
+
| `category` | Category of the issue (`mustFix`, `goodToFix`, `needsReview`). |
|
149
|
+
| `ruleId` | Identifier of the accessibility rule violated. |
|
150
|
+
| `description` | Description of the accessibility issue. |
|
151
|
+
| `axeImpact` | Severity impact as determined by Axe. |
|
152
|
+
| `conformance` | List of WCAG guidelines the rule conforms to. |
|
153
|
+
| `totalItems` | Number of times this issue was detected. |
|
154
|
+
|
155
|
+
|
156
|
+
## scanIssuesSummary.json
|
157
|
+
|
158
|
+
This file contains a summary of accessibility issues found in a scan, categorized into different levels of severity.
|
159
|
+
|
160
|
+
### Sample JSON
|
161
|
+
```json
|
162
|
+
{
|
163
|
+
"oobeeAppVersion": "<string>",
|
164
|
+
"mustFix": [],
|
165
|
+
"goodToFix": [
|
166
|
+
{
|
167
|
+
"rule": "<string>",
|
168
|
+
"description": "<string>",
|
169
|
+
"axeImpact": "<string>",
|
170
|
+
"helpUrl": "<string>",
|
171
|
+
"conformance": ["<string>", "<string>"],
|
172
|
+
"totalItems": <number>,
|
173
|
+
"pagesAffectedCount": <number>
|
174
|
+
}
|
175
|
+
],
|
176
|
+
"needsReview": [
|
177
|
+
],
|
178
|
+
"passed": [
|
179
|
+
],
|
180
|
+
}
|
181
|
+
```
|
182
|
+
|
183
|
+
| Variable | Description |
|
184
|
+
|----------|-------------|
|
185
|
+
| `oobeeAppVersion` | Version of the Oobee application used for the scan. |
|
186
|
+
| `mustFix` | Array of must-fix issues. |
|
187
|
+
| `goodToFix` | Array of good-to-fix issues. |
|
188
|
+
| `needsReview` | Array of issues requiring human review. |
|
189
|
+
| `passed` | Array of rules that were checked and passed. |
|
190
|
+
| `rule` | Unique identifier of the accessibility rule being checked. |
|
191
|
+
| `description` | Description of the accessibility issue. |
|
192
|
+
| `axeImpact` | Severity impact as determined by Axe. |
|
193
|
+
| `helpUrl` | URL with more information on the accessibility rule. |
|
194
|
+
| `conformance` | List of WCAG guidelines the rule conforms to. |
|
195
|
+
| `totalItems` | Number of times this issue was detected. |
|
196
|
+
| `pagesAffectedCount` | Number of pages where this issue was found. |
|
197
|
+
|
198
|
+
## scanPagesSummary.json
|
199
|
+
|
200
|
+
This file contains a summary of pages affected by accessibility issues.
|
201
|
+
|
202
|
+
### Sample JSON
|
203
|
+
```json
|
204
|
+
{
|
205
|
+
"oobeeAppVersion": "<string>",
|
206
|
+
"pagesAffected": [
|
207
|
+
{
|
208
|
+
"pageTitle": "<string>",
|
209
|
+
"url": "<string>",
|
210
|
+
"totalOccurrencesFailedIncludingNeedsReview": <number>,
|
211
|
+
"totalOccurrencesFailedExcludingNeedsReview": <number>,
|
212
|
+
"totalOccurrencesMustFix": <number>,
|
213
|
+
"totalOccurrencesGoodToFix": <number>,
|
214
|
+
"totalOccurrencesNeedsReview": <number>,
|
215
|
+
"totalOccurrencesPassed": <number>,
|
216
|
+
"typesOfIssuesExclusiveToNeedsReviewCount": <boolean>,
|
217
|
+
"typesOfIssuesCount": <number>,
|
218
|
+
"typesOfIssuesExcludingNeedsReviewCount": <number>,
|
219
|
+
"categoriesPresent": ["<string>", "<string>"],
|
220
|
+
"conformance": ["<string>", "<string>", "<string>"]
|
221
|
+
}
|
222
|
+
],
|
223
|
+
"pagesNotAffected": [
|
224
|
+
{
|
225
|
+
"pageTitle": "<string>",
|
226
|
+
"url": "<string>",
|
227
|
+
"totalOccurrencesFailedIncludingNeedsReview": <number>,
|
228
|
+
"totalOccurrencesFailedExcludingNeedsReview": <number>,
|
229
|
+
"totalOccurrencesMustFix": <number>,
|
230
|
+
"totalOccurrencesGoodToFix": <number>,
|
231
|
+
"totalOccurrencesNeedsReview": <number>,
|
232
|
+
"totalOccurrencesPassed": <number>,
|
233
|
+
"occurrencesExclusiveToNeedsReview": <boolean>,
|
234
|
+
"typesOfIssuesCount": <number>,
|
235
|
+
"typesOfIssuesExcludingNeedsReviewCount": <number>,
|
236
|
+
"categoriesPresent": ["<string>", "<string>"],
|
237
|
+
"conformance": ["<string>", "<string>", "<string>"],
|
238
|
+
}
|
239
|
+
],
|
240
|
+
"scannedPagesCount": <number>,
|
241
|
+
"pagesNotScanned": [
|
242
|
+
{
|
243
|
+
"url": "<string>",
|
244
|
+
"pageTitle": "<string>",
|
245
|
+
"actualUrl": "about:blank"
|
246
|
+
},
|
247
|
+
],
|
248
|
+
"pagesNotScannedCount": <number>
|
249
|
+
}
|
250
|
+
```
|
251
|
+
|
252
|
+
| Variable | Description |
|
253
|
+
|----------|-------------|
|
254
|
+
| `oobeeAppVersion` | Version of the Oobee application used for the scan. |
|
255
|
+
| `pagesAffected` | Array of objects representing pages with accessibility issues. |
|
256
|
+
| `pageTitle` | Title of the affected page. |
|
257
|
+
| `url` | URL of the affected page. |
|
258
|
+
| `totalOccurrencesFailedIncludingNeedsReview` | Total number of failed checks, including needs-review issues. |
|
259
|
+
| `totalOccurrencesFailedExcludingNeedsReview` | Total number of failed checks, excluding needs-review issues. |
|
260
|
+
| `totalOccurrencesMustFix` | Number of must-fix occurrences of the rule. |
|
261
|
+
| `totalOccurrencesGoodToFix` | Number of good-to-fix occurrences of the rule. |
|
262
|
+
| `totalOccurrencesNeedsReview` | Number of occurrences requiring review. |
|
263
|
+
| `totalOccurrencesPassed` | Number of times the rule was checked and passed. |
|
264
|
+
| `typesOfIssuesExclusiveToNeedsReviewCount` | Number of unique needs-review issues found on the page. |
|
265
|
+
| `typesOfIssuesCount` | Number of unique issue types found on the page. |
|
266
|
+
| `typesOfIssuesExcludingNeedsReviewCount` | Number of unique issue types found on the page, excluding needs-review issues. |
|
267
|
+
| `categoriesPresent` | List of issue categories found on the page. |
|
268
|
+
| `conformance` | List of WCAG guidelines applicable to the issues found on the page. |
|
269
|
+
| `pagesNotAffected` | Array of pages that did not have any accessibility issues. |
|
270
|
+
| `scannedPagesCount` | Total number of pages scanned. |
|
271
|
+
| `pagesNotScanned` | Array of pages that were not scanned. |
|
272
|
+
| `pagesNotScannedCount` | Number of pages that were not scanned. |
|
273
|
+
|
274
|
+
|
275
|
+
## scanPagesDetail.json
|
276
|
+
|
277
|
+
This file contains a summary of accessibility issues found in a scan, categorized into different levels of severity.
|
278
|
+
|
279
|
+
### Sample JSON
|
280
|
+
|
281
|
+
```json
|
282
|
+
{
|
283
|
+
"oobeeAppVersion": "<string>",
|
284
|
+
"pagesAffected": [
|
285
|
+
{
|
286
|
+
"pageTitle": "<string>",
|
287
|
+
"url": "<string>",
|
288
|
+
"totalOccurrencesFailedIncludingNeedsReview": <number>,
|
289
|
+
"totalOccurrencesFailedExcludingNeedsReview": <number>,
|
290
|
+
"totalOccurrencesMustFix": <number>,
|
291
|
+
"totalOccurrencesGoodToFix": <number>,
|
292
|
+
"totalOccurrencesNeedsReview": <number>,
|
293
|
+
"totalOccurrencesPassed": <number>,
|
294
|
+
"occurrencesExclusiveToNeedsReview": <boolean>,
|
295
|
+
"typesOfIssuesCount": <number>,
|
296
|
+
"typesOfIssuesExcludingNeedsReviewCount": <number>,
|
297
|
+
"categoriesPresent": ["<string>", "<string>"],
|
298
|
+
"conformance": ["<string>", "<string>", "<string>"],
|
299
|
+
"typesOfIssues": [
|
300
|
+
{
|
301
|
+
"ruleId": "<string>",
|
302
|
+
"wagConformance": ["<string>", "<string>"],
|
303
|
+
"occurrencesMustFix": <number>,
|
304
|
+
"occurrencesGoodToFix": <number>,
|
305
|
+
"occurrencesNeedsReview": <number>,
|
306
|
+
"occurrencesPassed": <number>
|
307
|
+
}
|
308
|
+
]
|
309
|
+
}
|
310
|
+
],
|
311
|
+
"pagesNotAffected": [
|
312
|
+
{
|
313
|
+
"pageTitle": "<string>",
|
314
|
+
"url": "<string>",
|
315
|
+
"totalOccurrencesFailedIncludingNeedsReview": <number>,
|
316
|
+
"totalOccurrencesFailedExcludingNeedsReview": <number>,
|
317
|
+
"totalOccurrencesMustFix": <number>,
|
318
|
+
"totalOccurrencesGoodToFix": <number>,
|
319
|
+
"totalOccurrencesNeedsReview": <number>,
|
320
|
+
"totalOccurrencesPassed": <number>,
|
321
|
+
"occurrencesExclusiveToNeedsReview": <boolean>,
|
322
|
+
"typesOfIssuesCount": <number>,
|
323
|
+
"typesOfIssuesExcludingNeedsReviewCount": <number>,
|
324
|
+
"categoriesPresent": ["<string>", "<string>"],
|
325
|
+
"conformance": ["<string>", "<string>", "<string>"],
|
326
|
+
"typesOfIssues": [
|
327
|
+
{
|
328
|
+
"ruleId": "<string>",
|
329
|
+
"wagConformance": ["<string>", "<string>"],
|
330
|
+
"occurrencesMustFix": <number>,
|
331
|
+
"occurrencesGoodToFix": <number>,
|
332
|
+
"occurrencesNeedsReview": <number>,
|
333
|
+
"occurrencesPassed": <number>
|
334
|
+
}
|
335
|
+
]
|
336
|
+
}
|
337
|
+
],
|
338
|
+
"scannedPagesCount": <number>,
|
339
|
+
"pagesNotScanned": [
|
340
|
+
{
|
341
|
+
"url": "<string>",
|
342
|
+
"pageTitle": "<string>",
|
343
|
+
"actualUrl": "about:blank"
|
344
|
+
},
|
345
|
+
],
|
346
|
+
"pagesNotScannedCount": <number>
|
347
|
+
}
|
348
|
+
```
|
349
|
+
|
350
|
+
## Manage Compressed JSON in Base64 Encoding
|
351
|
+
|
352
|
+
To deflate the .json.gz.b64, use the following with `pako` library installed:
|
353
|
+
```js
|
354
|
+
// Decompress the binary data using pako.inflate
|
355
|
+
const decompressedBytes = pako.inflate(compressedBytes);
|
356
|
+
|
357
|
+
// Decode the decompressed bytes into a UTF-8 string
|
358
|
+
const jsonString = new TextDecoder().decode(decompressedBytes);
|
359
|
+
|
360
|
+
// Parse and return the JSON object
|
361
|
+
return JSON.parse(jsonString);
|
362
|
+
```
|
package/exclusions.txt
CHANGED
package/package.json
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
{
|
2
2
|
"name": "@govtechsg/oobee",
|
3
3
|
"main": "dist/npmIndex.js",
|
4
|
-
"version": "0.10.
|
4
|
+
"version": "0.10.42",
|
5
5
|
"type": "module",
|
6
6
|
"author": "Government Technology Agency <info@tech.gov.sg>",
|
7
7
|
"dependencies": {
|
8
8
|
"@json2csv/node": "^7.0.3",
|
9
9
|
"@napi-rs/canvas": "^0.1.53",
|
10
10
|
"axe-core": "^4.10.2",
|
11
|
-
"axios": "^1.
|
11
|
+
"axios": "^1.8.2",
|
12
12
|
"base64-stream": "^1.0.0",
|
13
13
|
"cheerio": "^1.0.0-rc.12",
|
14
14
|
"crawlee": "^3.11.1",
|
@@ -333,10 +333,3 @@ To obtain the JSON files, you need to base64-decode the file followed by gunzip.
|
|
333
333
|
},
|
334
334
|
};
|
335
335
|
|
336
|
-
export const configureReportSetting = (isEnabled: boolean): void => {
|
337
|
-
if (isEnabled) {
|
338
|
-
process.env.REPORT_BREAKDOWN = '1';
|
339
|
-
} else {
|
340
|
-
process.env.REPORT_BREAKDOWN = '0';
|
341
|
-
}
|
342
|
-
};
|
package/src/constants/common.ts
CHANGED
@@ -461,7 +461,7 @@ const checkUrlConnectivityWithBrowser = async (
|
|
461
461
|
|
462
462
|
res.content = await page.content();
|
463
463
|
|
464
|
-
const contentType = response
|
464
|
+
const contentType = response?.headers?.()['content-type'] || '';
|
465
465
|
if (contentType.includes('xml')) {
|
466
466
|
const responseFromUrl = await requestToUrl(res.url, true, extraHTTPHeaders);
|
467
467
|
|
@@ -1776,6 +1776,44 @@ export const submitForm = async (
|
|
1776
1776
|
}
|
1777
1777
|
}
|
1778
1778
|
};
|
1779
|
+
|
1780
|
+
export async function initModifiedUserAgent(browser?: string, playwrightDeviceDetailsObject?: object) {
|
1781
|
+
const isHeadless = process.env.CRAWLEE_HEADLESS === '1';
|
1782
|
+
|
1783
|
+
// If headless mode is enabled, ensure the headless flag is set.
|
1784
|
+
if (isHeadless && !constants.launchOptionsArgs.includes('--headless=new')) {
|
1785
|
+
constants.launchOptionsArgs.push('--headless=new');
|
1786
|
+
}
|
1787
|
+
|
1788
|
+
// Build the launch options using your production settings.
|
1789
|
+
// headless is forced to false as in your persistent context, and we merge in getPlaywrightLaunchOptions and device details.
|
1790
|
+
const launchOptions = {
|
1791
|
+
headless: false,
|
1792
|
+
...getPlaywrightLaunchOptions(browser),
|
1793
|
+
...playwrightDeviceDetailsObject,
|
1794
|
+
};
|
1795
|
+
|
1796
|
+
// Launch a temporary persistent context with an empty userDataDir to mimic your production browser setup.
|
1797
|
+
const browserContext = await constants.launcher.launchPersistentContext('', launchOptions);
|
1798
|
+
const page = await browserContext.newPage();
|
1799
|
+
|
1800
|
+
// Retrieve the default user agent.
|
1801
|
+
const defaultUA = await page.evaluate(() => navigator.userAgent);
|
1802
|
+
await browserContext.close();
|
1803
|
+
|
1804
|
+
// Modify the UA:
|
1805
|
+
// Replace "HeadlessChrome" with "Chrome" if present.
|
1806
|
+
let modifiedUA = defaultUA.includes('HeadlessChrome')
|
1807
|
+
? defaultUA.replace('HeadlessChrome', 'Chrome')
|
1808
|
+
: defaultUA;
|
1809
|
+
|
1810
|
+
// Push the modified UA flag into your global launch options.
|
1811
|
+
constants.launchOptionsArgs.push(`--user-agent=${modifiedUA}`);
|
1812
|
+
// Optionally log the modified UA.
|
1813
|
+
// console.log('Modified User Agent:', modifiedUA);
|
1814
|
+
}
|
1815
|
+
|
1816
|
+
|
1779
1817
|
/**
|
1780
1818
|
* @param {string} browser browser name ("chrome" or "edge", null for chromium, the default Playwright browser)
|
1781
1819
|
* @returns playwright launch options object. For more details: https://playwright.dev/docs/api/class-browsertype#browser-type-launch
|
@@ -7,6 +7,7 @@ import os from 'os';
|
|
7
7
|
import { spawnSync, execSync } from 'child_process';
|
8
8
|
import { chromium } from 'playwright';
|
9
9
|
import { silentLogger } from '../logs.js';
|
10
|
+
import { PageInfo } from '../mergeAxeResults.js';
|
10
11
|
|
11
12
|
const filename = fileURLToPath(import.meta.url);
|
12
13
|
const dirname = path.dirname(filename);
|
@@ -177,16 +178,16 @@ export const basicAuthRegex = /^.*\/\/.*:.*@.*$/i;
|
|
177
178
|
export const axeScript = path.join(dirname, '../../node_modules/axe-core/axe.min.js');
|
178
179
|
export class UrlsCrawled {
|
179
180
|
toScan: string[] = [];
|
180
|
-
scanned:
|
181
|
-
invalid:
|
181
|
+
scanned: PageInfo[] = [];
|
182
|
+
invalid: PageInfo[] = [];
|
182
183
|
scannedRedirects: { fromUrl: string; toUrl: string }[] = [];
|
183
184
|
notScannedRedirects: { fromUrl: string; toUrl: string }[] = [];
|
184
|
-
outOfDomain:
|
185
|
-
blacklisted:
|
186
|
-
error:
|
187
|
-
exceededRequests:
|
188
|
-
forbidden:
|
189
|
-
userExcluded:
|
185
|
+
outOfDomain: PageInfo[] = [];
|
186
|
+
blacklisted: PageInfo[] = [];
|
187
|
+
error: PageInfo[] = [];
|
188
|
+
exceededRequests: PageInfo[] = [];
|
189
|
+
forbidden: PageInfo[] = [];
|
190
|
+
userExcluded: PageInfo[] = [];
|
190
191
|
everything: string[] = [];
|
191
192
|
|
192
193
|
constructor(urlsCrawled?: Partial<UrlsCrawled>) {
|