@govtechsg/oobee 0.10.85 → 0.10.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/image.yml +3 -2
- package/.github/workflows/publish.yml +10 -0
- package/DETAILS.md +29 -0
- package/dist/combine.js +1 -1
- package/dist/constants/common.js +15 -4
- package/dist/constants/constants.js +604 -1
- package/dist/crawlers/commonCrawlerFunc.js +3 -2
- package/dist/crawlers/crawlSitemap.js +98 -80
- package/dist/crawlers/custom/utils.js +137 -31
- package/dist/crawlers/guards/urlGuard.js +8 -15
- package/dist/crawlers/runCustom.js +18 -11
- package/dist/generateOobeeClientScanner.js +570 -0
- package/dist/mergeAxeResults.js +5 -4
- package/dist/npmIndex.js +10 -2
- package/dist/proxyService.js +18 -3
- package/dist/services/s3Uploader.js +21 -10
- package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
- package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
- package/dist/static/ejs/summary.ejs +10 -5
- package/oobee-client-scanner.js +34992 -0
- package/package.json +2 -2
- package/src/combine.ts +3 -1
- package/src/constants/common.ts +22 -10
- package/src/constants/constants.ts +602 -1
- package/src/crawlers/commonCrawlerFunc.ts +4 -3
- package/src/crawlers/crawlSitemap.ts +116 -98
- package/src/crawlers/custom/utils.ts +143 -38
- package/src/crawlers/guards/urlGuard.ts +24 -31
- package/src/crawlers/runCustom.ts +29 -11
- package/src/generateOobeeClientScanner.ts +591 -0
- package/src/mergeAxeResults.ts +5 -3
- package/src/npmIndex.ts +12 -2
- package/src/proxyService.ts +25 -4
- package/src/services/s3Uploader.ts +23 -11
- package/src/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
- package/src/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
- package/src/static/ejs/summary.ejs +10 -5
- package/testStaticJSScanner.html +534 -0
|
@@ -146,17 +146,18 @@ jobs:
|
|
|
146
146
|
chmod -R u+w "$GITHUB_WORKSPACE/oobee"
|
|
147
147
|
|
|
148
148
|
# Sign all Mach-O (exec bits OR dylib OR node native addons)
|
|
149
|
+
# Search $GITHUB_WORKSPACE (not just oobee/) to cover scripts copied to the parent dir
|
|
149
150
|
while IFS= read -r f; do
|
|
150
151
|
echo "Signing $f"
|
|
151
152
|
codesign --force --options runtime --timestamp --sign "${CERTIFICATE_NAME}" "$f"
|
|
152
153
|
done < <(
|
|
153
|
-
find "$GITHUB_WORKSPACE
|
|
154
|
+
find "$GITHUB_WORKSPACE" -type f \
|
|
154
155
|
\( -perm -111 -o -name "*.dylib" -o -name "*.node" \) \
|
|
155
156
|
! -path "*/.git/*"
|
|
156
157
|
)
|
|
157
158
|
|
|
158
159
|
echo "Verifying signatures of Mach-O files..."
|
|
159
|
-
find "$GITHUB_WORKSPACE
|
|
160
|
+
find "$GITHUB_WORKSPACE" -type f \( -perm -111 -o -name "*.dylib" -o -name "*.node" \) \
|
|
160
161
|
-exec codesign --verify --strict --verbose=2 {} \; || true
|
|
161
162
|
|
|
162
163
|
- name: Cleanup keychain
|
|
@@ -3,6 +3,8 @@ on:
|
|
|
3
3
|
workflow_dispatch:
|
|
4
4
|
release:
|
|
5
5
|
types: [published]
|
|
6
|
+
permissions:
|
|
7
|
+
contents: write
|
|
6
8
|
jobs:
|
|
7
9
|
build:
|
|
8
10
|
runs-on: ubuntu-latest
|
|
@@ -20,6 +22,14 @@ jobs:
|
|
|
20
22
|
- run: npm run build
|
|
21
23
|
continue-on-error: false
|
|
22
24
|
|
|
25
|
+
- name: Create and push git tag
|
|
26
|
+
run: |
|
|
27
|
+
VERSION=$(node -p "require('./package.json').version")
|
|
28
|
+
git config user.name "github-actions[bot]"
|
|
29
|
+
git config user.email "github-actions[bot]@users.noreply.github.com"
|
|
30
|
+
git tag -af "v${VERSION}" -m "Version ${VERSION}"
|
|
31
|
+
git push origin "v${VERSION}" --force
|
|
32
|
+
|
|
23
33
|
- run: npm publish
|
|
24
34
|
env:
|
|
25
35
|
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
package/DETAILS.md
CHANGED
|
@@ -195,3 +195,32 @@ Note: Level AAA are disabled by default. Please specify `enable-wcag-aaa` in ru
|
|
|
195
195
|
| skip-link | Ensure all skip links have a focusable target | Good to Fix |
|
|
196
196
|
| tabindex | Ensures tabindex attribute values are not greater than 0 | Good to Fix |
|
|
197
197
|
| table-duplicate-name | Ensure the `<caption>` element does not contain the same text as the summary attribute | Good to Fix |
|
|
198
|
+
|
|
199
|
+
## Additional Information
|
|
200
|
+
### How the Readability Grading Works
|
|
201
|
+
|
|
202
|
+
#### 1. Text Extraction
|
|
203
|
+
|
|
204
|
+
During a page scan, Oobee extracts text from all `<p>` elements on the page (via extractAndGradeText.ts or extractText.ts). The raw text is split into individual **sentences** using the pattern `/[^.!?]*[.!?]+/g` — only text segments ending with `.`, `!`, or `?` are kept.
|
|
205
|
+
|
|
206
|
+
#### 2. Flesch Reading Ease Scoring
|
|
207
|
+
|
|
208
|
+
The extracted sentences are joined into a single string and word-counted. If the page has **fewer than 20 words**, grading is skipped (score = 0, treated as a pass). Otherwise, the [Flesch Reading Ease](https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests) formula is applied via the `text-readability` library in gradeReadability.ts:
|
|
209
|
+
|
|
210
|
+
| Score Range | Interpretation |
|
|
211
|
+
|---|---|
|
|
212
|
+
| 90–100 | Very easy to read (5th grade) |
|
|
213
|
+
| 60–70 | Easily understood by 13–15 year olds |
|
|
214
|
+
| **≤ 50** | **Difficult — college level or above** |
|
|
215
|
+
| 0–30 | Very difficult — best understood by university graduates |
|
|
216
|
+
|
|
217
|
+
#### 3. Flagging Criteria
|
|
218
|
+
|
|
219
|
+
The `oobee-grading-text-contents` rule is **only enabled when WCAG AAA mode is on** (`enableWcagAaa = true`) and violations are flagged under **Manual Review Required** findings. It maps to **WCAG 3.1.5 (Reading Level)**.
|
|
220
|
+
|
|
221
|
+
A page is **flagged** (incomplete) when the Flesch Reading Ease score is **50 or below**, indicating the text is potentially difficult to understand. The issue message reports the exact score and explains that the target passing score is above 50.
|
|
222
|
+
|
|
223
|
+
A page **passes** when:
|
|
224
|
+
- The score is **above 50**, or
|
|
225
|
+
- There are fewer than 20 words of paragraph text, or
|
|
226
|
+
- No valid sentences (ending with punctuation) are found
|
package/dist/combine.js
CHANGED
|
@@ -77,7 +77,7 @@ const combineRun = async (details, deviceToScan) => {
|
|
|
77
77
|
let durationExceeded = false;
|
|
78
78
|
switch (type) {
|
|
79
79
|
case ScannerTypes.CUSTOM:
|
|
80
|
-
const res = await runCustom(url, randomToken, viewportSettings, blacklistedPatterns, includeScreenshots, customFlowLabel && customFlowLabel !== 'None' ? customFlowLabel : '');
|
|
80
|
+
const res = await runCustom(url, randomToken, browser, userDataDirectory, viewportSettings, blacklistedPatterns, includeScreenshots, customFlowLabel && customFlowLabel !== 'None' ? customFlowLabel : '');
|
|
81
81
|
urlsCrawledObj = res.urlsCrawled;
|
|
82
82
|
uiCustomFlowLabel = res.customFlowLabel;
|
|
83
83
|
break;
|
package/dist/constants/common.js
CHANGED
|
@@ -175,6 +175,14 @@ export const validateXML = (content) => {
|
|
|
175
175
|
});
|
|
176
176
|
return { isValid, parsedContent };
|
|
177
177
|
};
|
|
178
|
+
export const validateTXT = (content) => {
|
|
179
|
+
// Strip HTML tags first — browsers wrap .txt files in HTML when fetched via Playwright
|
|
180
|
+
const plainText = content.replace(/<[^>]+>/g, '\n');
|
|
181
|
+
const lines = plainText.split(/\r?\n/).map(l => l.trim()).filter(l => l.length > 0);
|
|
182
|
+
// Allow http, https and relative paths (starting with /) for txt sitemaps, as some sitemaps use relative paths and some txt sitemaps are fetched as HTML by Playwright
|
|
183
|
+
const urlPattern = /^(https?:\/\/|\/)[^\s]+$/i;
|
|
184
|
+
return { isValid: lines.some(line => urlPattern.test(line)) };
|
|
185
|
+
};
|
|
178
186
|
export const isSkippedUrl = (pageUrl, whitelistedDomains) => {
|
|
179
187
|
const matched = whitelistedDomains.filter(p => {
|
|
180
188
|
const pattern = p.replace(/[\n\r]+/g, '');
|
|
@@ -464,13 +472,13 @@ export const isSitemapContent = (content) => {
|
|
|
464
472
|
}
|
|
465
473
|
const regexForHtml = new RegExp('<(?:!doctype html|html|head|body)+?>', 'gmi');
|
|
466
474
|
const regexForXmlSitemap = new RegExp('<(?:urlset|feed|rss)+?.*>', 'gmi');
|
|
467
|
-
const regexForUrl = new RegExp('^.*(http|https):/{2}.*$', 'gmi');
|
|
468
475
|
if (content.match(regexForHtml) && content.match(regexForXmlSitemap)) {
|
|
469
476
|
// is an XML sitemap wrapped in a HTML document
|
|
470
477
|
return true;
|
|
471
478
|
}
|
|
472
|
-
|
|
473
|
-
|
|
479
|
+
const { isValid: isTxtSitemap } = validateTXT(content);
|
|
480
|
+
if (isTxtSitemap) {
|
|
481
|
+
// treat this as a txt sitemap (plain text or browser-wrapped with HTML)
|
|
474
482
|
return true;
|
|
475
483
|
}
|
|
476
484
|
// is HTML webpage
|
|
@@ -1603,6 +1611,7 @@ const cacheProxyInfo = getProxyInfo();
|
|
|
1603
1611
|
export const getPlaywrightLaunchOptions = (browser) => {
|
|
1604
1612
|
const channel = browser || undefined;
|
|
1605
1613
|
const resolution = proxyInfoToResolution(cacheProxyInfo);
|
|
1614
|
+
const shouldIgnoreMuteAudio = process.env.OOBEE_PLAYWRIGHT_IGNORE_DEFAULT_ARGS === '--mute-audio';
|
|
1606
1615
|
// Start with your base args and sanitise
|
|
1607
1616
|
const finalArgs = [...constants.launchOptionsArgs].filter(arg => !arg.startsWith('--headless') &&
|
|
1608
1617
|
!arg.startsWith('--user-agent=') &&
|
|
@@ -1630,7 +1639,9 @@ export const getPlaywrightLaunchOptions = (browser) => {
|
|
|
1630
1639
|
break;
|
|
1631
1640
|
}
|
|
1632
1641
|
const options = {
|
|
1633
|
-
ignoreDefaultArgs:
|
|
1642
|
+
ignoreDefaultArgs: shouldIgnoreMuteAudio
|
|
1643
|
+
? ['--use-mock-keychain', '--mute-audio']
|
|
1644
|
+
: ['--use-mock-keychain'],
|
|
1634
1645
|
args: finalArgs,
|
|
1635
1646
|
headless: process.env.CRAWLEE_HEADLESS === '1',
|
|
1636
1647
|
...(channel && { channel }),
|