@govtechsg/oobee 0.10.85 → 0.10.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/.github/workflows/image.yml +3 -2
  2. package/.github/workflows/publish.yml +10 -0
  3. package/DETAILS.md +29 -0
  4. package/dist/combine.js +1 -1
  5. package/dist/constants/common.js +15 -4
  6. package/dist/constants/constants.js +604 -1
  7. package/dist/crawlers/commonCrawlerFunc.js +3 -2
  8. package/dist/crawlers/crawlSitemap.js +98 -80
  9. package/dist/crawlers/custom/utils.js +137 -31
  10. package/dist/crawlers/guards/urlGuard.js +8 -15
  11. package/dist/crawlers/runCustom.js +18 -11
  12. package/dist/generateOobeeClientScanner.js +570 -0
  13. package/dist/mergeAxeResults.js +5 -4
  14. package/dist/npmIndex.js +10 -2
  15. package/dist/proxyService.js +18 -3
  16. package/dist/services/s3Uploader.js +21 -10
  17. package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
  18. package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
  19. package/dist/static/ejs/summary.ejs +10 -5
  20. package/oobee-client-scanner.js +34992 -0
  21. package/package.json +2 -2
  22. package/src/combine.ts +3 -1
  23. package/src/constants/common.ts +22 -10
  24. package/src/constants/constants.ts +602 -1
  25. package/src/crawlers/commonCrawlerFunc.ts +4 -3
  26. package/src/crawlers/crawlSitemap.ts +116 -98
  27. package/src/crawlers/custom/utils.ts +143 -38
  28. package/src/crawlers/guards/urlGuard.ts +24 -31
  29. package/src/crawlers/runCustom.ts +29 -11
  30. package/src/generateOobeeClientScanner.ts +591 -0
  31. package/src/mergeAxeResults.ts +5 -3
  32. package/src/npmIndex.ts +12 -2
  33. package/src/proxyService.ts +25 -4
  34. package/src/services/s3Uploader.ts +23 -11
  35. package/src/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
  36. package/src/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
  37. package/src/static/ejs/summary.ejs +10 -5
  38. package/testStaticJSScanner.html +534 -0
@@ -146,17 +146,18 @@ jobs:
146
146
  chmod -R u+w "$GITHUB_WORKSPACE/oobee"
147
147
 
148
148
  # Sign all Mach-O (exec bits OR dylib OR node native addons)
149
+ # Search $GITHUB_WORKSPACE (not just oobee/) to cover scripts copied to the parent dir
149
150
  while IFS= read -r f; do
150
151
  echo "Signing $f"
151
152
  codesign --force --options runtime --timestamp --sign "${CERTIFICATE_NAME}" "$f"
152
153
  done < <(
153
- find "$GITHUB_WORKSPACE/oobee" -type f \
154
+ find "$GITHUB_WORKSPACE" -type f \
154
155
  \( -perm -111 -o -name "*.dylib" -o -name "*.node" \) \
155
156
  ! -path "*/.git/*"
156
157
  )
157
158
 
158
159
  echo "Verifying signatures of Mach-O files..."
159
- find "$GITHUB_WORKSPACE/oobee" -type f \( -perm -111 -o -name "*.dylib" -o -name "*.node" \) \
160
+ find "$GITHUB_WORKSPACE" -type f \( -perm -111 -o -name "*.dylib" -o -name "*.node" \) \
160
161
  -exec codesign --verify --strict --verbose=2 {} \; || true
161
162
 
162
163
  - name: Cleanup keychain
@@ -3,6 +3,8 @@ on:
3
3
  workflow_dispatch:
4
4
  release:
5
5
  types: [published]
6
+ permissions:
7
+ contents: write
6
8
  jobs:
7
9
  build:
8
10
  runs-on: ubuntu-latest
@@ -20,6 +22,14 @@ jobs:
20
22
  - run: npm run build
21
23
  continue-on-error: false
22
24
 
25
+ - name: Create and push git tag
26
+ run: |
27
+ VERSION=$(node -p "require('./package.json').version")
28
+ git config user.name "github-actions[bot]"
29
+ git config user.email "github-actions[bot]@users.noreply.github.com"
30
+ git tag -af "v${VERSION}" -m "Version ${VERSION}"
31
+ git push origin "v${VERSION}" --force
32
+
23
33
  - run: npm publish
24
34
  env:
25
35
  NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
package/DETAILS.md CHANGED
@@ -195,3 +195,32 @@ Note: Level AAA are disabled by default. Please specify `enable-wcag-aaa` in ru
195
195
  | skip-link | Ensure all skip links have a focusable target | Good to Fix |
196
196
  | tabindex | Ensures tabindex attribute values are not greater than 0 | Good to Fix |
197
197
  | table-duplicate-name | Ensure the `<caption>` element does not contain the same text as the summary attribute | Good to Fix |
198
+
199
+ ## Additional Information
200
+ ### How the Readability Grading Works
201
+
202
+ #### 1. Text Extraction
203
+
204
+ During a page scan, Oobee extracts text from all `<p>` elements on the page (via extractAndGradeText.ts or extractText.ts). The raw text is split into individual **sentences** using the pattern `/[^.!?]*[.!?]+/g` — only text segments ending with `.`, `!`, or `?` are kept.
205
+
206
+ #### 2. Flesch Reading Ease Scoring
207
+
208
+ The extracted sentences are joined into a single string and word-counted. If the page has **fewer than 20 words**, grading is skipped (score = 0, treated as a pass). Otherwise, the [Flesch Reading Ease](https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests) formula is applied via the `text-readability` library in gradeReadability.ts:
209
+
210
+ | Score Range | Interpretation |
211
+ |---|---|
212
+ | 90–100 | Very easy to read (5th grade) |
213
+ | 60–70 | Easily understood by 13–15 year olds |
214
+ | **≤ 50** | **Difficult — college level or above** |
215
+ | 0–30 | Very difficult — best understood by university graduates |
216
+
217
+ #### 3. Flagging Criteria
218
+
219
+ The `oobee-grading-text-contents` rule is **only enabled when WCAG AAA mode is on** (`enableWcagAaa = true`) and violations are flagged under **Manual Review Required** findings. It maps to **WCAG 3.1.5 (Reading Level)**.
220
+
221
+ A page is **flagged** (incomplete) when the Flesch Reading Ease score is **50 or below**, indicating the text is potentially difficult to understand. The issue message reports the exact score and explains that the target passing score is above 50.
222
+
223
+ A page **passes** when:
224
+ - The score is **above 50**, or
225
+ - There are fewer than 20 words of paragraph text, or
226
+ - No valid sentences (ending with punctuation) are found
package/dist/combine.js CHANGED
@@ -77,7 +77,7 @@ const combineRun = async (details, deviceToScan) => {
77
77
  let durationExceeded = false;
78
78
  switch (type) {
79
79
  case ScannerTypes.CUSTOM:
80
- const res = await runCustom(url, randomToken, viewportSettings, blacklistedPatterns, includeScreenshots, customFlowLabel && customFlowLabel !== 'None' ? customFlowLabel : '');
80
+ const res = await runCustom(url, randomToken, browser, userDataDirectory, viewportSettings, blacklistedPatterns, includeScreenshots, customFlowLabel && customFlowLabel !== 'None' ? customFlowLabel : '');
81
81
  urlsCrawledObj = res.urlsCrawled;
82
82
  uiCustomFlowLabel = res.customFlowLabel;
83
83
  break;
@@ -175,6 +175,14 @@ export const validateXML = (content) => {
175
175
  });
176
176
  return { isValid, parsedContent };
177
177
  };
178
+ export const validateTXT = (content) => {
179
+ // Strip HTML tags first — browsers wrap .txt files in HTML when fetched via Playwright
180
+ const plainText = content.replace(/<[^>]+>/g, '\n');
181
+ const lines = plainText.split(/\r?\n/).map(l => l.trim()).filter(l => l.length > 0);
182
+ // Allow http, https and relative paths (starting with /) for txt sitemaps, as some sitemaps use relative paths and some txt sitemaps are fetched as HTML by Playwright
183
+ const urlPattern = /^(https?:\/\/|\/)[^\s]+$/i;
184
+ return { isValid: lines.some(line => urlPattern.test(line)) };
185
+ };
178
186
  export const isSkippedUrl = (pageUrl, whitelistedDomains) => {
179
187
  const matched = whitelistedDomains.filter(p => {
180
188
  const pattern = p.replace(/[\n\r]+/g, '');
@@ -464,13 +472,13 @@ export const isSitemapContent = (content) => {
464
472
  }
465
473
  const regexForHtml = new RegExp('<(?:!doctype html|html|head|body)+?>', 'gmi');
466
474
  const regexForXmlSitemap = new RegExp('<(?:urlset|feed|rss)+?.*>', 'gmi');
467
- const regexForUrl = new RegExp('^.*(http|https):/{2}.*$', 'gmi');
468
475
  if (content.match(regexForHtml) && content.match(regexForXmlSitemap)) {
469
476
  // is an XML sitemap wrapped in a HTML document
470
477
  return true;
471
478
  }
472
- if (!content.match(regexForHtml) && content.match(regexForUrl)) {
473
- // treat this as a txt sitemap where all URLs will be extracted for crawling
479
+ const { isValid: isTxtSitemap } = validateTXT(content);
480
+ if (isTxtSitemap) {
481
+ // treat this as a txt sitemap (plain text or browser-wrapped with HTML)
474
482
  return true;
475
483
  }
476
484
  // is HTML webpage
@@ -1603,6 +1611,7 @@ const cacheProxyInfo = getProxyInfo();
1603
1611
  export const getPlaywrightLaunchOptions = (browser) => {
1604
1612
  const channel = browser || undefined;
1605
1613
  const resolution = proxyInfoToResolution(cacheProxyInfo);
1614
+ const shouldIgnoreMuteAudio = process.env.OOBEE_PLAYWRIGHT_IGNORE_DEFAULT_ARGS === '--mute-audio';
1606
1615
  // Start with your base args and sanitise
1607
1616
  const finalArgs = [...constants.launchOptionsArgs].filter(arg => !arg.startsWith('--headless') &&
1608
1617
  !arg.startsWith('--user-agent=') &&
@@ -1630,7 +1639,9 @@ export const getPlaywrightLaunchOptions = (browser) => {
1630
1639
  break;
1631
1640
  }
1632
1641
  const options = {
1633
- ignoreDefaultArgs: ['--use-mock-keychain'],
1642
+ ignoreDefaultArgs: shouldIgnoreMuteAudio
1643
+ ? ['--use-mock-keychain', '--mute-audio']
1644
+ : ['--use-mock-keychain'],
1634
1645
  args: finalArgs,
1635
1646
  headless: process.env.CRAWLEE_HEADLESS === '1',
1636
1647
  ...(channel && { channel }),