@govtechsg/oobee 0.10.85 → 0.10.87
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/publish.yml +10 -0
- package/DETAILS.md +29 -0
- package/dist/cli.js +18 -5
- package/dist/combine.js +3 -1
- package/dist/constants/cliFunctions.js +2 -2
- package/dist/constants/common.js +70 -17
- package/dist/constants/constants.js +604 -1
- package/dist/crawlers/commonCrawlerFunc.js +3 -2
- package/dist/crawlers/crawlDomain.js +38 -13
- package/dist/crawlers/crawlIntelligentSitemap.js +62 -30
- package/dist/crawlers/crawlSitemap.js +141 -84
- package/dist/crawlers/custom/utils.js +218 -71
- package/dist/crawlers/guards/urlGuard.js +8 -15
- package/dist/crawlers/runCustom.js +18 -11
- package/dist/generateHtmlReport.js +18 -11
- package/dist/generateOobeeClientScanner.js +570 -0
- package/dist/mergeAxeResults/itemReferences.js +60 -25
- package/dist/mergeAxeResults/sentryTelemetry.js +4 -1
- package/dist/mergeAxeResults.js +23 -13
- package/dist/npmIndex.js +10 -2
- package/dist/proxyService.js +18 -3
- package/dist/services/s3Uploader.js +21 -10
- package/dist/static/ejs/partials/scripts/decodeUnzipParse.ejs +6 -3
- package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
- package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
- package/dist/static/ejs/partials/scripts/ruleModal/itemCardRenderer.ejs +38 -2
- package/dist/static/ejs/partials/scripts/ruleModal/pageAccordionBuilder.ejs +1 -1
- package/dist/static/ejs/partials/scripts/ruleModal/ruleOffcanvas.ejs +4 -4
- package/dist/static/ejs/summary.ejs +19 -8
- package/dist/utils.js +4 -3
- package/fix-summary-html-oom-pr.md +62 -0
- package/oobee-client-scanner.js +34992 -0
- package/package.json +5 -5
- package/src/cli.ts +19 -5
- package/src/combine.ts +5 -1
- package/src/constants/cliFunctions.ts +2 -2
- package/src/constants/common.ts +87 -22
- package/src/constants/constants.ts +602 -1
- package/src/crawlers/commonCrawlerFunc.ts +4 -3
- package/src/crawlers/crawlDomain.ts +39 -13
- package/src/crawlers/crawlIntelligentSitemap.ts +63 -30
- package/src/crawlers/crawlSitemap.ts +165 -100
- package/src/crawlers/custom/utils.ts +241 -80
- package/src/crawlers/guards/urlGuard.ts +24 -31
- package/src/crawlers/runCustom.ts +29 -11
- package/src/generateHtmlReport.ts +21 -11
- package/src/generateOobeeClientScanner.ts +591 -0
- package/src/mergeAxeResults/itemReferences.ts +70 -26
- package/src/mergeAxeResults/sentryTelemetry.ts +4 -1
- package/src/mergeAxeResults.ts +26 -14
- package/src/npmIndex.ts +12 -2
- package/src/proxyService.ts +25 -4
- package/src/services/s3Uploader.ts +23 -11
- package/src/static/ejs/partials/scripts/decodeUnzipParse.ejs +6 -3
- package/src/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
- package/src/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
- package/src/static/ejs/partials/scripts/ruleModal/itemCardRenderer.ejs +38 -2
- package/src/static/ejs/partials/scripts/ruleModal/pageAccordionBuilder.ejs +1 -1
- package/src/static/ejs/partials/scripts/ruleModal/ruleOffcanvas.ejs +4 -4
- package/src/static/ejs/summary.ejs +19 -8
- package/src/utils.ts +4 -3
- package/testStaticJSScanner.html +534 -0
|
@@ -1,10 +1,44 @@
|
|
|
1
1
|
<script>
|
|
2
2
|
/**
|
|
3
|
-
*
|
|
3
|
+
* Rebuilds the item list for a page from pre-computed htmlGroups when the light report omits page.items.
|
|
4
|
+
*/
|
|
5
|
+
function buildItemsFromHtmlGroupsForPage(page, ruleInCategory) {
|
|
6
|
+
const htmlGroups = ruleInCategory.htmlGroups || {};
|
|
7
|
+
const resolvedItems = [];
|
|
8
|
+
|
|
9
|
+
Object.values(htmlGroups).forEach(groupData => {
|
|
10
|
+
if (!Array.isArray(groupData.pageUrls) || !groupData.pageUrls.includes(page.url)) {
|
|
11
|
+
return;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
resolvedItems.push({
|
|
15
|
+
html: groupData.html,
|
|
16
|
+
xpath: groupData.xpath,
|
|
17
|
+
message: groupData.message,
|
|
18
|
+
screenshotPath: groupData.screenshotPath,
|
|
19
|
+
displayNeedsReview: groupData.displayNeedsReview,
|
|
20
|
+
pageUrl: page.url,
|
|
21
|
+
pageTitle: page.pageTitle || page.metadata
|
|
22
|
+
});
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
return resolvedItems;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* The embedded report payload now omits page.items and rebuilds occurrences from
|
|
30
|
+
* htmlGroups + page metadata. Keep the older page.items resolution logic below
|
|
31
|
+
* commented for an easy rollback if we need to restore mixed payload support.
|
|
4
32
|
*/
|
|
5
33
|
function resolveItemReferencesForPage(page, ruleInCategory) {
|
|
34
|
+
return buildItemsFromHtmlGroupsForPage(page, ruleInCategory);
|
|
35
|
+
|
|
36
|
+
/*
|
|
6
37
|
const items = page.items || [];
|
|
7
|
-
|
|
38
|
+
|
|
39
|
+
if (items.length === 0) {
|
|
40
|
+
return buildItemsFromHtmlGroupsForPage(page, ruleInCategory);
|
|
41
|
+
}
|
|
8
42
|
|
|
9
43
|
const isReference = typeof items[0] === 'string';
|
|
10
44
|
|
|
@@ -27,6 +61,7 @@
|
|
|
27
61
|
pageTitle: page.pageTitle || page.metadata
|
|
28
62
|
};
|
|
29
63
|
}
|
|
64
|
+
|
|
30
65
|
// Fallback: parse composite key
|
|
31
66
|
const nullByteIndex = compositeKey.indexOf('\x00');
|
|
32
67
|
const html = nullByteIndex !== -1 ? compositeKey.slice(0, nullByteIndex) : compositeKey;
|
|
@@ -40,6 +75,7 @@
|
|
|
40
75
|
pageTitle: page.pageTitle || page.metadata
|
|
41
76
|
};
|
|
42
77
|
});
|
|
78
|
+
*/
|
|
43
79
|
}
|
|
44
80
|
|
|
45
81
|
function buildItemCardsWithPagination(accordionId, category, ruleInCategory, page, index) {
|
|
@@ -86,7 +86,7 @@
|
|
|
86
86
|
// Use pre-computed htmlGroups for count if available, otherwise use pages
|
|
87
87
|
const count = isHtmlGrouping && selectedCategory.htmlGroups
|
|
88
88
|
? Object.keys(selectedCategory.htmlGroups).length
|
|
89
|
-
: selectedCategory.pagesAffected.length;
|
|
89
|
+
: (selectedCategory.pagesAffectedCount || selectedCategory.pagesAffected.length);
|
|
90
90
|
if (isHtmlGrouping) {
|
|
91
91
|
dropdownTitle.innerText = `HTML elements affected by this issue (${count})`;
|
|
92
92
|
} else {
|
|
@@ -270,8 +270,8 @@ include('./pageAccordionBuilder') %> <%- include('./constants') %>
|
|
|
270
270
|
if (!Array.isArray(rule.pagesAffected)) return;
|
|
271
271
|
|
|
272
272
|
rule.pagesAffected.sort((a, b) => {
|
|
273
|
-
const lenA = Array.isArray(a.items) ? a.items.length : 0;
|
|
274
|
-
const lenB = Array.isArray(b.items) ? b.items.length : 0;
|
|
273
|
+
const lenA = Array.isArray(a.items) ? a.items.length : a.itemsCount || 0;
|
|
274
|
+
const lenB = Array.isArray(b.items) ? b.items.length : b.itemsCount || 0;
|
|
275
275
|
return lenB - lenA; // DESC
|
|
276
276
|
});
|
|
277
277
|
});
|
|
@@ -295,10 +295,10 @@ include('./pageAccordionBuilder') %> <%- include('./constants') %>
|
|
|
295
295
|
dropdownToggle.innerText = `${ruleInCategory.totalItems} Total occ.`;
|
|
296
296
|
dropdownToggle.setAttribute('aria-label', occurrencesText);
|
|
297
297
|
document.getElementById('expandedRuleDropdownTitle').innerText =
|
|
298
|
-
`Pages affected by this issue (${ruleInCategory.pagesAffected.length})`;
|
|
298
|
+
`Pages affected by this issue (${(ruleInCategory.pagesAffectedCount || ruleInCategory.pagesAffected.length)})`;
|
|
299
299
|
buildExpandedRuleCategoryContent(category, ruleInCategory);
|
|
300
300
|
document.getElementById('expandedRulePageContent').innerText =
|
|
301
|
-
`Total ${ruleInCategory.pagesAffected.length} affected pages`;
|
|
301
|
+
`Total ${(ruleInCategory.pagesAffectedCount || ruleInCategory.pagesAffected.length)} affected pages`;
|
|
302
302
|
}
|
|
303
303
|
}
|
|
304
304
|
});
|
|
@@ -21,14 +21,25 @@
|
|
|
21
21
|
%>
|
|
22
22
|
<script>
|
|
23
23
|
const scanItems = <%- JSON.stringify(
|
|
24
|
-
{
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
24
|
+
['mustFix','goodToFix','needsReview','passed'].reduce((acc, cat) => {
|
|
25
|
+
if (items[cat]) {
|
|
26
|
+
acc[cat] = {
|
|
27
|
+
description: items[cat].description,
|
|
28
|
+
totalItems: items[cat].totalItems,
|
|
29
|
+
totalRuleIssues: items[cat].totalRuleIssues,
|
|
30
|
+
rules: (items[cat].rules || []).map(rule => ({
|
|
31
|
+
rule: rule.rule,
|
|
32
|
+
description: rule.description,
|
|
33
|
+
helpUrl: rule.helpUrl,
|
|
34
|
+
conformance: rule.conformance,
|
|
35
|
+
totalItems: rule.totalItems,
|
|
36
|
+
pagesAffected: { length: (rule.pagesAffected || []).length },
|
|
37
|
+
})),
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
return acc;
|
|
41
|
+
}, {})
|
|
42
|
+
).replace(/<\//g, '<\\/') %>
|
|
32
43
|
</script>
|
|
33
44
|
<%- include('partials/scripts/summaryTable') %>
|
|
34
45
|
<script>
|
package/dist/utils.js
CHANGED
|
@@ -4,6 +4,7 @@ import fs from 'fs-extra';
|
|
|
4
4
|
import axe from 'axe-core';
|
|
5
5
|
import { v4 as uuidv4 } from 'uuid';
|
|
6
6
|
import { getDomain } from 'tldts';
|
|
7
|
+
import { normalizeUrl } from '@apify/utilities';
|
|
7
8
|
import constants, { destinationPath, getIntermediateScreenshotsPath, } from './constants/constants.js';
|
|
8
9
|
import { consoleLogger, errorsTxtPath } from './logs.js';
|
|
9
10
|
import { getAxeConfiguration } from './crawlers/custom/getAxeConfiguration.js';
|
|
@@ -852,13 +853,13 @@ export const randomThreeDigitNumberString = () => {
|
|
|
852
853
|
const threeDigitNumber = Math.floor(scaledDecimal) + 100;
|
|
853
854
|
return String(threeDigitNumber);
|
|
854
855
|
};
|
|
856
|
+
export const normUrl = (u) => (u ? normalizeUrl(u) || u : '');
|
|
855
857
|
export const isFollowStrategy = (link1, link2, rule) => {
|
|
858
|
+
if (rule === 'all')
|
|
859
|
+
return true;
|
|
856
860
|
try {
|
|
857
861
|
const parsedLink1 = new URL(link1);
|
|
858
862
|
const parsedLink2 = new URL(link2);
|
|
859
|
-
if (rule === 'all') {
|
|
860
|
-
return true;
|
|
861
|
-
}
|
|
862
863
|
if (rule === 'same-origin') {
|
|
863
864
|
return parsedLink1.origin === parsedLink2.origin;
|
|
864
865
|
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# fix: prevent OOM and browser crash in report generation for large scans
|
|
2
|
+
|
|
3
|
+
## Summary
|
|
4
|
+
|
|
5
|
+
- Fix `summary.ejs` inlining the entire scan items payload (2 GB+ for 1000-page scans) via `JSON.stringify`, causing V8 OOM and killing the process
|
|
6
|
+
- Fix `report.html` embedded scanItems exceeding browser memory limits (746 MB uncompressed JSON for 1000-page scans)
|
|
7
|
+
- Fix write stream backpressure handling when embedding chunked base64 data
|
|
8
|
+
- `writeSummaryHTML` crash also blocked `report.html` generation since it runs first
|
|
9
|
+
|
|
10
|
+
## Problem 1: OOM in summary.html generation (server-side)
|
|
11
|
+
|
|
12
|
+
For large scans (e.g. 1000 pages, 2.5M+ passed occurrences), `summary.ejs` serialized the full `items` object — including every rule's `pagesAffected` array with all individual issue items — into an inline `<script>` tag. This produced a string exceeding V8's limits, crashing the process silently.
|
|
13
|
+
|
|
14
|
+
The result: neither `summary.html` nor `report.html` were generated, even though all JSON artifacts (`scanData.json`, `scanItems.json`, etc.) were written successfully.
|
|
15
|
+
|
|
16
|
+
## Problem 2: Browser cannot parse embedded scanItems (client-side)
|
|
17
|
+
|
|
18
|
+
Even with report generation fixed, the browser failed to load the All Issues view:
|
|
19
|
+
```
|
|
20
|
+
Failed to decode/unzip/parse: Unexpected end of JSON input
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Root cause: `convertItemsToReferences` stripped per-page `items` arrays but still embedded the full `pagesAffected` array (url, pageTitle, actualUrl, metadata, etc. for every page × every rule). For 1000-page scans this produced **746 MB of uncompressed JSON** after base64-decode and gunzip — exceeding browser string/memory limits during `JSON.parse()`.
|
|
24
|
+
|
|
25
|
+
## Problem 3: Write stream backpressure (server-side)
|
|
26
|
+
|
|
27
|
+
The `writeHTML` function writes scan items as 2 MB base64 chunks via a `for await` loop over a read stream. `outputStream.write()` was not being checked for backpressure — when the write buffer filled up, subsequent writes could be silently dropped, producing truncated base64.
|
|
28
|
+
|
|
29
|
+
## Fix
|
|
30
|
+
|
|
31
|
+
### summary.ejs (OOM fix)
|
|
32
|
+
Strip the inline JSON to only what `summaryTable.ejs` actually needs:
|
|
33
|
+
- Rule-level metadata: `description`, `helpUrl`, `conformance`, `totalItems`
|
|
34
|
+
- `pagesAffected: { length: N }` (just the count object, not the full array)
|
|
35
|
+
|
|
36
|
+
This reduces the serialized payload from potentially gigabytes to a few kilobytes regardless of scan size.
|
|
37
|
+
|
|
38
|
+
### itemReferences.ts (browser payload fix)
|
|
39
|
+
`convertItemsToReferences` now strips each `pagesAffected` entry down to only `url`, `pageTitle`, and `itemsCount` — removing all per-item details (html snippets, screenshots, xpath, metadata, etc.) that constituted the bulk of the data. The All Issues list renders rule totals, and the "Group By Page" view in the rule modal still shows page URLs with occurrence counts.
|
|
40
|
+
|
|
41
|
+
This reduces the embedded payload from 746 MB (uncompressed) to ~11 MB for a 1000-page scan — well within browser memory limits.
|
|
42
|
+
|
|
43
|
+
### mergeAxeResults.ts (backpressure fix)
|
|
44
|
+
Await the `drain` event on the output stream when `write()` returns `false` before writing the next chunk. This ensures all base64 data is fully written to the report regardless of payload size.
|
|
45
|
+
|
|
46
|
+
## Files changed
|
|
47
|
+
|
|
48
|
+
| File | Change |
|
|
49
|
+
|------|--------|
|
|
50
|
+
| `src/static/ejs/summary.ejs` | Strip inline JSON to rule counts only |
|
|
51
|
+
| `src/mergeAxeResults/itemReferences.ts` | Strip `pagesAffected` to lightweight entries (url, pageTitle, itemsCount only) |
|
|
52
|
+
| `src/mergeAxeResults.ts` | Await drain on backpressure during chunked write |
|
|
53
|
+
| `src/static/ejs/partials/scripts/ruleModal/ruleOffcanvas.ejs` | Fall back to `pagesAffectedCount` |
|
|
54
|
+
| `src/static/ejs/partials/scripts/ruleModal/pageAccordionBuilder.ejs` | Fall back to `pagesAffectedCount` |
|
|
55
|
+
|
|
56
|
+
## Test plan
|
|
57
|
+
|
|
58
|
+
- [ ] Run a large scan (500+ pages) and verify both `summary.html` and `report.html` are generated
|
|
59
|
+
- [ ] Open `summary.html` in a browser and verify the summary table renders correctly (issue counts, page counts, help links)
|
|
60
|
+
- [ ] Open `report.html` and verify the All Issues list loads and displays rule counts correctly
|
|
61
|
+
- [ ] Verify the rule modal shows correct "Pages affected" count
|
|
62
|
+
- [ ] Verify small scans still produce correct reports (no regression)
|