@govtechsg/oobee 0.10.91 → 0.10.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +303 -0
- package/README.md +22 -0
- package/dist/cli.js +3 -0
- package/dist/combine.js +15 -3
- package/dist/constants/cliFunctions.js +7 -0
- package/dist/constants/common.js +149 -80
- package/dist/constants/constants.js +1 -0
- package/dist/crawlers/commonCrawlerFunc.js +136 -15
- package/dist/crawlers/crawlDomain.js +55 -58
- package/dist/crawlers/crawlIntelligentSitemap.js +21 -11
- package/dist/crawlers/crawlRateController.js +47 -0
- package/dist/crawlers/crawlSitemap.js +51 -62
- package/dist/crawlers/runCustom.js +8 -2
- package/dist/generateOobeeClientScanner.js +32 -1
- package/dist/mergeAxeResults/itemsStore.js +32 -3
- package/dist/mergeAxeResults/sentryTelemetry.js +3 -0
- package/dist/mergeAxeResults.js +120 -92
- package/dist/npmIndex.js +1 -0
- package/dist/utils.js +23 -28
- package/oobee-client-scanner.js +35 -4
- package/package.json +3 -3
- package/src/cli.ts +4 -0
- package/src/combine.ts +16 -1
- package/src/constants/cliFunctions.ts +7 -0
- package/src/constants/common.ts +162 -90
- package/src/constants/constants.ts +1 -0
- package/src/crawlers/commonCrawlerFunc.ts +148 -14
- package/src/crawlers/crawlDomain.ts +64 -66
- package/src/crawlers/crawlIntelligentSitemap.ts +23 -11
- package/src/crawlers/crawlRateController.ts +63 -0
- package/src/crawlers/crawlSitemap.ts +57 -70
- package/src/crawlers/runCustom.ts +10 -1
- package/src/generateOobeeClientScanner.ts +32 -1
- package/src/index.ts +1 -0
- package/src/mergeAxeResults/itemsStore.ts +37 -3
- package/src/mergeAxeResults/sentryTelemetry.ts +3 -0
- package/src/mergeAxeResults.ts +139 -99
- package/src/npmIndex.ts +1 -0
- package/src/utils.ts +25 -33
- /package/{bf04540e-0894-4d00-98ec-c1be74c6f199.txt → 7339fae5-e8ed-4b50-af13-317847620dbf.txt} +0 -0
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import fs from 'fs-extra';
|
|
2
2
|
import path from 'path';
|
|
3
3
|
import readline from 'readline';
|
|
4
|
+
import { consoleLogger } from '../logs.js';
|
|
4
5
|
export class ItemsStore {
|
|
5
6
|
constructor(storagePath) {
|
|
6
7
|
this.ensuredDirs = new Set();
|
|
8
|
+
this.fileWriteQueues = new Map();
|
|
7
9
|
this.basePath = path.join(storagePath, 'tmp-items');
|
|
8
10
|
}
|
|
9
11
|
sanitizeRuleId(ruleId) {
|
|
@@ -22,8 +24,25 @@ export class ItemsStore {
|
|
|
22
24
|
async appendPageItems(category, ruleId, entry) {
|
|
23
25
|
await this.ensureDir(category);
|
|
24
26
|
const filePath = this.getRuleFilePath(category, ruleId);
|
|
25
|
-
|
|
26
|
-
|
|
27
|
+
let line = JSON.stringify(entry);
|
|
28
|
+
// JSON.stringify should never produce literal newlines inside strings, but HTML content
|
|
29
|
+
// from page evaluation may contain edge-case characters (e.g. unescaped control chars in
|
|
30
|
+
// non-spec-compliant innerHTML). Strip any embedded \r or \n that would break JSONL format readline parsing.
|
|
31
|
+
line = line.replace(/[\n\r]/g, (match) => {
|
|
32
|
+
if (match === '\n')
|
|
33
|
+
return '\\n';
|
|
34
|
+
if (match === '\r')
|
|
35
|
+
return '\\r';
|
|
36
|
+
return match;
|
|
37
|
+
});
|
|
38
|
+
line += '\n';
|
|
39
|
+
// Serialize writes per rule file to avoid concurrent append interleaving/truncation.
|
|
40
|
+
const previous = this.fileWriteQueues.get(filePath) ?? Promise.resolve();
|
|
41
|
+
const next = previous.then(() => fs.appendFile(filePath, line, 'utf8'));
|
|
42
|
+
this.fileWriteQueues.set(filePath, next.catch(() => {
|
|
43
|
+
// Keep queue alive for subsequent writes.
|
|
44
|
+
}));
|
|
45
|
+
await next;
|
|
27
46
|
}
|
|
28
47
|
async *readRuleItems(category, ruleId) {
|
|
29
48
|
const filePath = this.getRuleFilePath(category, ruleId);
|
|
@@ -31,10 +50,19 @@ export class ItemsStore {
|
|
|
31
50
|
return;
|
|
32
51
|
const fileStream = fs.createReadStream(filePath, { encoding: 'utf8' });
|
|
33
52
|
const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity });
|
|
53
|
+
let lineNumber = 0;
|
|
34
54
|
for await (const line of rl) {
|
|
35
|
-
|
|
55
|
+
lineNumber += 1;
|
|
56
|
+
if (!line.trim())
|
|
57
|
+
continue;
|
|
58
|
+
try {
|
|
36
59
|
yield JSON.parse(line);
|
|
37
60
|
}
|
|
61
|
+
catch (error) {
|
|
62
|
+
// Tolerate malformed/truncated JSONL lines (e.g. interrupted append) so report generation can continue.
|
|
63
|
+
const preview = line.slice(0, 200);
|
|
64
|
+
consoleLogger.warn(`Skipping malformed itemsStore JSONL line ${lineNumber} in ${filePath}: ${error.message}. Content preview: ${preview}`);
|
|
65
|
+
}
|
|
38
66
|
}
|
|
39
67
|
}
|
|
40
68
|
async readRuleItemsMap(category, ruleId) {
|
|
@@ -46,6 +74,7 @@ export class ItemsStore {
|
|
|
46
74
|
return map;
|
|
47
75
|
}
|
|
48
76
|
async cleanup() {
|
|
77
|
+
await Promise.all(this.fileWriteQueues.values());
|
|
49
78
|
await fs.rm(this.basePath, { recursive: true, force: true });
|
|
50
79
|
}
|
|
51
80
|
}
|
|
@@ -114,6 +114,9 @@ const sendWcagBreakdownToSentry = async (appVersion, wcagBreakdown, ruleIdJson,
|
|
|
114
114
|
...(process.env.OOBEE_SCAN_PRODUCT && {
|
|
115
115
|
scanProduct: process.env.OOBEE_SCAN_PRODUCT,
|
|
116
116
|
}),
|
|
117
|
+
...(process.env.OOBEE_TAGGED_WEBSITE && {
|
|
118
|
+
websiteTag: process.env.OOBEE_TAGGED_WEBSITE,
|
|
119
|
+
}),
|
|
117
120
|
},
|
|
118
121
|
user: {
|
|
119
122
|
...(scanInfo.email && scanInfo.name
|
package/dist/mergeAxeResults.js
CHANGED
|
@@ -5,6 +5,7 @@ import printMessage from 'print-message';
|
|
|
5
5
|
import path from 'path';
|
|
6
6
|
import ejs from 'ejs';
|
|
7
7
|
import { fileURLToPath } from 'url';
|
|
8
|
+
import { Dataset, RequestQueue, Configuration } from 'crawlee';
|
|
8
9
|
import constants, { BrowserTypes, ScannerTypes, WCAGclauses, a11yRuleShortDescriptionMap, disabilityBadgesMap, a11yRuleLongDescriptionMap, a11yRuleStepByStepGuide, } from './constants/constants.js';
|
|
9
10
|
import { getBrowserToRun, getPlaywrightLaunchOptions } from './constants/common.js';
|
|
10
11
|
import { createScreenshotsFolder, getStoragePath, getVersion, getWcagPassPercentage, getProgressPercentage, retryFunction, zipResults, getIssuesPercentage, register, } from './utils.js';
|
|
@@ -255,54 +256,84 @@ const cleanUpJsonFiles = async (filesToDelete) => {
|
|
|
255
256
|
});
|
|
256
257
|
};
|
|
257
258
|
const writeSummaryPdf = async (storagePath, pagesScanned, filename = 'summary', browser, _userDataDirectory) => {
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
259
|
+
const renderPdfWithBrowser = async (browserToUse) => {
|
|
260
|
+
let browserInstance;
|
|
261
|
+
let context;
|
|
262
|
+
let page;
|
|
263
|
+
try {
|
|
264
|
+
const htmlFilePath = path.join(storagePath, `${filename}.html`);
|
|
265
|
+
const fileDestinationPath = path.join(storagePath, `${filename}.pdf`);
|
|
266
|
+
const htmlFileUrl = `file://${htmlFilePath}`;
|
|
267
|
+
const launchOptions = getPlaywrightLaunchOptions(browserToUse);
|
|
268
|
+
browserInstance = await constants.launcher.launch({
|
|
269
|
+
...launchOptions,
|
|
270
|
+
headless: true,
|
|
271
|
+
});
|
|
272
|
+
register(browserInstance);
|
|
273
|
+
context = await browserInstance.newContext();
|
|
274
|
+
page = await context.newPage();
|
|
275
|
+
await page.goto(htmlFileUrl, {
|
|
276
|
+
waitUntil: 'domcontentloaded',
|
|
277
|
+
timeout: 120000,
|
|
278
|
+
});
|
|
279
|
+
await page.emulateMedia({ media: 'print' });
|
|
280
|
+
await page.pdf({
|
|
281
|
+
margin: { bottom: '32px' },
|
|
282
|
+
path: fileDestinationPath,
|
|
283
|
+
format: 'A4',
|
|
284
|
+
displayHeaderFooter: true,
|
|
285
|
+
footerTemplate: `
|
|
284
286
|
<div style="margin-top:50px;color:#26241b;font-family:Open Sans;text-align: center;width: 100%;font-weight:400">
|
|
285
287
|
<span style="color:#26241b;font-size: 14px;font-weight:400">Page <span class="pageNumber"></span> of <span class="totalPages"></span></span>
|
|
286
288
|
</div>
|
|
287
289
|
`,
|
|
288
|
-
|
|
289
|
-
if (pagesScanned < 2000) {
|
|
290
|
+
});
|
|
290
291
|
fs.unlinkSync(htmlFilePath);
|
|
291
292
|
}
|
|
293
|
+
finally {
|
|
294
|
+
try {
|
|
295
|
+
await page?.close();
|
|
296
|
+
}
|
|
297
|
+
catch (err) {
|
|
298
|
+
consoleLogger.info(`Error at page close writeSummaryPDF ${err}`);
|
|
299
|
+
}
|
|
300
|
+
try {
|
|
301
|
+
await context?.close();
|
|
302
|
+
}
|
|
303
|
+
catch (err) {
|
|
304
|
+
consoleLogger.info(`Error at context close writeSummaryPDF ${err}`);
|
|
305
|
+
}
|
|
306
|
+
try {
|
|
307
|
+
await browserInstance?.close();
|
|
308
|
+
}
|
|
309
|
+
catch (err) {
|
|
310
|
+
consoleLogger.info(`Error at browserInstance close writeSummaryPDF ${err}`);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
};
|
|
314
|
+
const browserAttempts = [browser];
|
|
315
|
+
// Runtime fallback: if Chrome launch fails on Windows, try Edge once for PDF generation.
|
|
316
|
+
if (process.platform === 'win32' && browser === BrowserTypes.CHROME) {
|
|
317
|
+
browserAttempts.push(BrowserTypes.EDGE);
|
|
292
318
|
}
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
consoleLogger.info(`Error at
|
|
305
|
-
|
|
319
|
+
for (let i = 0; i < browserAttempts.length; i++) {
|
|
320
|
+
const currentBrowser = browserAttempts[i];
|
|
321
|
+
try {
|
|
322
|
+
await renderPdfWithBrowser(currentBrowser);
|
|
323
|
+
if (i > 0) {
|
|
324
|
+
consoleLogger.warn(`writeSummaryPDF succeeded with fallback browser '${currentBrowser}' after '${browser}' failed.`);
|
|
325
|
+
}
|
|
326
|
+
return;
|
|
327
|
+
}
|
|
328
|
+
catch (err) {
|
|
329
|
+
const isLastAttempt = i === browserAttempts.length - 1;
|
|
330
|
+
consoleLogger.info(`Error at writeSummaryPDF using browser '${currentBrowser}': ${err instanceof Error ? err.stack : err}`);
|
|
331
|
+
if (isLastAttempt) {
|
|
332
|
+
return;
|
|
333
|
+
}
|
|
334
|
+
const nextBrowser = browserAttempts[i + 1];
|
|
335
|
+
consoleLogger.warn(`writeSummaryPDF failed using browser '${currentBrowser}', retrying with '${nextBrowser}'.`);
|
|
336
|
+
}
|
|
306
337
|
}
|
|
307
338
|
};
|
|
308
339
|
// Tracking WCAG occurrences
|
|
@@ -489,6 +520,7 @@ const extractRuleAiData = (ruleId, totalItems, items, callback) => {
|
|
|
489
520
|
export const createRuleIdJson = async (allIssues, itemsStore) => {
|
|
490
521
|
const compiledRuleJson = {};
|
|
491
522
|
for (const category of ['mustFix', 'goodToFix', 'needsReview']) {
|
|
523
|
+
compiledRuleJson[category] = {};
|
|
492
524
|
for (const rule of allIssues.items[category].rules) {
|
|
493
525
|
let allItems = [];
|
|
494
526
|
if (itemsStore) {
|
|
@@ -499,7 +531,7 @@ export const createRuleIdJson = async (allIssues, itemsStore) => {
|
|
|
499
531
|
else {
|
|
500
532
|
allItems = rule.pagesAffected.flatMap(page => page.items || []);
|
|
501
533
|
}
|
|
502
|
-
compiledRuleJson[rule.rule] = extractRuleAiData(rule.rule, rule.totalItems, allItems);
|
|
534
|
+
compiledRuleJson[category][rule.rule] = extractRuleAiData(rule.rule, rule.totalItems, allItems);
|
|
503
535
|
}
|
|
504
536
|
}
|
|
505
537
|
return compiledRuleJson;
|
|
@@ -508,9 +540,10 @@ export const createRuleIdJson = async (allIssues, itemsStore) => {
|
|
|
508
540
|
export const createBasicFormHTMLSnippet = filteredResults => {
|
|
509
541
|
const compiledRuleJson = {};
|
|
510
542
|
['mustFix', 'goodToFix', 'needsReview'].forEach(category => {
|
|
543
|
+
compiledRuleJson[category] = {};
|
|
511
544
|
if (filteredResults[category] && filteredResults[category].rules) {
|
|
512
545
|
Object.entries(filteredResults[category].rules).forEach(([ruleId, ruleVal]) => {
|
|
513
|
-
compiledRuleJson[ruleId] = extractRuleAiData(ruleId, ruleVal.totalItems, ruleVal.items);
|
|
546
|
+
compiledRuleJson[category][ruleId] = extractRuleAiData(ruleId, ruleVal.totalItems, ruleVal.items);
|
|
514
547
|
});
|
|
515
548
|
}
|
|
516
549
|
});
|
|
@@ -547,7 +580,7 @@ const formatAboutStartTime = (dateString) => {
|
|
|
547
580
|
return htmlFormattedStartTime;
|
|
548
581
|
};
|
|
549
582
|
const generateArtifacts = async (randomToken, urlScanned, scanType, viewport, pagesScanned, pagesNotScanned, customFlowLabel, cypressScanAboutMetadata, scanDetails, zip = undefined, // optional
|
|
550
|
-
generateJsonFiles = false) => {
|
|
583
|
+
generateJsonFiles = false, preferredBrowser) => {
|
|
551
584
|
consoleLogger.info('Generating report artifacts');
|
|
552
585
|
const storagePath = getStoragePath(randomToken);
|
|
553
586
|
const intermediateDatasetsPath = `${storagePath}/crawlee`;
|
|
@@ -564,6 +597,8 @@ generateJsonFiles = false) => {
|
|
|
564
597
|
endTime: scanDetails.endTime ? scanDetails.endTime : new Date(),
|
|
565
598
|
urlScanned,
|
|
566
599
|
scanType,
|
|
600
|
+
totalLinksFetchedFromSitemaps: constants.sitemapFetchedLinks?.totalLinksFetchedFromSitemaps ?? 0,
|
|
601
|
+
fetchedSitemaps: constants.sitemapFetchedLinks?.fetchedSitemaps ?? [],
|
|
567
602
|
deviceChosen: scanDetails.deviceChosen || 'Desktop',
|
|
568
603
|
formatAboutStartTime,
|
|
569
604
|
isCustomFlow,
|
|
@@ -752,39 +787,37 @@ generateJsonFiles = false) => {
|
|
|
752
787
|
scanPagesSummaryBase64FilePath,
|
|
753
788
|
]);
|
|
754
789
|
}
|
|
755
|
-
const browserChannel = getBrowserToRun(randomToken, BrowserTypes.CHROME, false).browserToRun;
|
|
790
|
+
const browserChannel = getBrowserToRun(randomToken, preferredBrowser || BrowserTypes.CHROME, false).browserToRun;
|
|
756
791
|
// Should consider refactor constants.userDataDirectory to be a parameter in future
|
|
757
792
|
await retryFunction(() => writeSummaryPdf(storagePath, pagesScanned.length, 'summary', browserChannel, constants.userDataDirectory), 1);
|
|
758
|
-
//
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
}
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
793
|
+
// Flush pending background storage operations (metadata writes, lock-file ops)
|
|
794
|
+
const storageClient = Configuration.getStorageClient();
|
|
795
|
+
if (storageClient.teardown) {
|
|
796
|
+
await storageClient.teardown();
|
|
797
|
+
}
|
|
798
|
+
// Gracefully drop Dataset and RequestQueue — releases locks and removes files
|
|
799
|
+
const crawleeDir = path.join(storagePath, 'crawlee');
|
|
800
|
+
try {
|
|
801
|
+
const dataset = await Dataset.open(crawleeDir);
|
|
802
|
+
await dataset.drop();
|
|
803
|
+
}
|
|
804
|
+
catch (error) {
|
|
805
|
+
consoleLogger.info(`Dataset drop: ${error.message}`);
|
|
806
|
+
}
|
|
807
|
+
try {
|
|
808
|
+
const requestQueue = await RequestQueue.open(crawleeDir);
|
|
809
|
+
await requestQueue.drop();
|
|
810
|
+
}
|
|
811
|
+
catch (error) {
|
|
812
|
+
consoleLogger.info(`RequestQueue drop: ${error.message}`);
|
|
813
|
+
}
|
|
814
|
+
// Fallback rm for any leftover files not managed by Crawlee's storage API
|
|
773
815
|
const crawleePath = path.join(storagePath, 'crawlee');
|
|
774
816
|
try {
|
|
775
817
|
await fs.promises.rm(crawleePath, { recursive: true, force: true });
|
|
776
818
|
}
|
|
777
|
-
catch
|
|
778
|
-
//
|
|
779
|
-
if (process.platform === 'win32') {
|
|
780
|
-
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
781
|
-
try {
|
|
782
|
-
await fs.promises.rm(crawleePath, { recursive: true, force: true });
|
|
783
|
-
}
|
|
784
|
-
catch {
|
|
785
|
-
// Best-effort cleanup — leave the folder; report generation continues
|
|
786
|
-
}
|
|
787
|
-
}
|
|
819
|
+
catch {
|
|
820
|
+
// Best-effort; storage was already dropped via API
|
|
788
821
|
}
|
|
789
822
|
try {
|
|
790
823
|
await fs.promises.rm(path.join(storagePath, 'pdfs'), { recursive: true, force: true });
|
|
@@ -792,6 +825,22 @@ generateJsonFiles = false) => {
|
|
|
792
825
|
catch (error) {
|
|
793
826
|
consoleLogger.warn(`Unable to force remove pdfs folder: ${error.message}`);
|
|
794
827
|
}
|
|
828
|
+
// Generate scrubbed HTML Code Snippets
|
|
829
|
+
const ruleIdJson = await createRuleIdJson(allIssues, itemsStore);
|
|
830
|
+
// Clean up intermediate items files before zipping
|
|
831
|
+
await itemsStore.cleanup();
|
|
832
|
+
try {
|
|
833
|
+
await sendWcagBreakdownToSentry(oobeeAppVersion, wcagOccurrencesMap, ruleIdJson, {
|
|
834
|
+
entryUrl: urlScanned,
|
|
835
|
+
scanType,
|
|
836
|
+
browser: scanDetails.deviceChosen,
|
|
837
|
+
email: scanDetails.nameEmail?.email,
|
|
838
|
+
name: scanDetails.nameEmail?.name,
|
|
839
|
+
}, allIssues, pagesScanned.length);
|
|
840
|
+
}
|
|
841
|
+
catch (error) {
|
|
842
|
+
console.error('Error sending WCAG data to Sentry:', error);
|
|
843
|
+
}
|
|
795
844
|
// Take option if set
|
|
796
845
|
if (typeof zip === 'string') {
|
|
797
846
|
constants.cliZipFileName = zip;
|
|
@@ -827,29 +876,8 @@ generateJsonFiles = false) => {
|
|
|
827
876
|
catch (error) {
|
|
828
877
|
printMessage([`Error in zipping results: ${error}`]);
|
|
829
878
|
}
|
|
830
|
-
// Generate scrubbed HTML Code Snippets
|
|
831
|
-
const ruleIdJson = await createRuleIdJson(allIssues, itemsStore);
|
|
832
|
-
// Clean up intermediate items files
|
|
833
|
-
await itemsStore.cleanup();
|
|
834
|
-
// At the end of the function where results are generated, add:
|
|
835
|
-
try {
|
|
836
|
-
// Always send WCAG breakdown to Sentry, even if no violations were found
|
|
837
|
-
// This ensures that all criteria are reported, including those with 0 occurrences
|
|
838
|
-
await sendWcagBreakdownToSentry(oobeeAppVersion, wcagOccurrencesMap, ruleIdJson, {
|
|
839
|
-
entryUrl: urlScanned,
|
|
840
|
-
scanType,
|
|
841
|
-
browser: scanDetails.deviceChosen,
|
|
842
|
-
email: scanDetails.nameEmail?.email,
|
|
843
|
-
name: scanDetails.nameEmail?.name,
|
|
844
|
-
}, allIssues, pagesScanned.length);
|
|
845
|
-
}
|
|
846
|
-
catch (error) {
|
|
847
|
-
console.error('Error sending WCAG data to Sentry:', error);
|
|
848
|
-
}
|
|
849
879
|
if (process.env.RUNNING_FROM_PH_GUI || process.env.OOBEE_VERBOSE)
|
|
850
880
|
console.log('Report generated successfully');
|
|
851
|
-
process.removeListener('uncaughtException', crawleeEpermHandler);
|
|
852
|
-
process.removeListener('unhandledRejection', crawleeEpermHandler);
|
|
853
881
|
return ruleIdJson;
|
|
854
882
|
};
|
|
855
883
|
export { writeHTML, compressJsonFileStreaming, convertItemsToReferences, flattenAndSortResults, populateScanPagesDetail, sendWcagBreakdownToSentry, getWcagPassPercentage, getProgressPercentage, getIssuesPercentage, itemTypeDescription, oobeeAiHtmlETL, oobeeAiRules, formatAboutStartTime, };
|
package/dist/npmIndex.js
CHANGED
|
@@ -290,6 +290,7 @@ thresholds = { mustFix: undefined, goodToFix: undefined }, scanAboutMetadata = u
|
|
|
290
290
|
// max numbers of mustFix/goodToFix occurrences before test returns a fail
|
|
291
291
|
const { mustFix: mustFixThreshold, goodToFix: goodToFixThreshold } = thresholds;
|
|
292
292
|
process.env.CRAWLEE_STORAGE_DIR = randomToken;
|
|
293
|
+
constants.sitemapFetchedLinks = null;
|
|
293
294
|
const scanDetails = {
|
|
294
295
|
startTime: new Date(),
|
|
295
296
|
endTime: new Date(),
|
package/dist/utils.js
CHANGED
|
@@ -5,6 +5,7 @@ import axe from 'axe-core';
|
|
|
5
5
|
import { v4 as uuidv4 } from 'uuid';
|
|
6
6
|
import { getDomain } from 'tldts';
|
|
7
7
|
import { normalizeUrl } from '@apify/utilities';
|
|
8
|
+
import { Dataset, RequestQueue, Configuration } from 'crawlee';
|
|
8
9
|
import constants, { destinationPath, getIntermediateScreenshotsPath, } from './constants/constants.js';
|
|
9
10
|
import { consoleLogger, errorsTxtPath } from './logs.js';
|
|
10
11
|
import { getAxeConfiguration } from './crawlers/custom/getAxeConfiguration.js';
|
|
@@ -346,6 +347,20 @@ export const cleanUp = async (randomToken, isError = false) => {
|
|
|
346
347
|
}
|
|
347
348
|
if (randomToken !== undefined) {
|
|
348
349
|
const storagePath = getStoragePath(randomToken);
|
|
350
|
+
try {
|
|
351
|
+
const storageClient = Configuration.getStorageClient();
|
|
352
|
+
if (storageClient.teardown) {
|
|
353
|
+
await storageClient.teardown();
|
|
354
|
+
}
|
|
355
|
+
const crawleeDir = path.join(storagePath, 'crawlee');
|
|
356
|
+
const dataset = await Dataset.open(crawleeDir);
|
|
357
|
+
await dataset.drop();
|
|
358
|
+
const requestQueue = await RequestQueue.open(crawleeDir);
|
|
359
|
+
await requestQueue.drop();
|
|
360
|
+
}
|
|
361
|
+
catch (error) {
|
|
362
|
+
consoleLogger.info(`Crawlee storage drop in cleanUp: ${error.message}`);
|
|
363
|
+
}
|
|
349
364
|
try {
|
|
350
365
|
fs.rmSync(path.join(storagePath, 'crawlee'), { recursive: true, force: true });
|
|
351
366
|
}
|
|
@@ -358,32 +373,8 @@ export const cleanUp = async (randomToken, isError = false) => {
|
|
|
358
373
|
catch (error) {
|
|
359
374
|
consoleLogger.warn(`Unable to force remove pdfs folder: ${error.message}`);
|
|
360
375
|
}
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
let logsPath = storagePath;
|
|
364
|
-
if (process.env.OOBEE_LOGS_PATH) {
|
|
365
|
-
logsPath = process.env.OOBEE_LOGS_PATH;
|
|
366
|
-
}
|
|
367
|
-
if (fs.existsSync(errorsTxtPath)) {
|
|
368
|
-
try {
|
|
369
|
-
const logFilePath = path.join(logsPath, `logs-${randomToken}.txt`);
|
|
370
|
-
fs.copyFileSync(errorsTxtPath, logFilePath);
|
|
371
|
-
console.log(`An error occured. Log file is located at: ${logFilePath}`);
|
|
372
|
-
}
|
|
373
|
-
catch (copyError) {
|
|
374
|
-
consoleLogger.error(`Error copying errors file during cleanup: ${copyError.message}`);
|
|
375
|
-
console.log(`An error occured. Log file is located at: ${errorsTxtPath}`);
|
|
376
|
-
deleteErrorLogFile = false; // Do not delete the log file if copy failed
|
|
377
|
-
}
|
|
378
|
-
if (deleteErrorLogFile && fs.existsSync(errorsTxtPath)) {
|
|
379
|
-
try {
|
|
380
|
-
fs.unlinkSync(errorsTxtPath);
|
|
381
|
-
}
|
|
382
|
-
catch (error) {
|
|
383
|
-
consoleLogger.warn(`Unable to delete log file ${errorsTxtPath}: ${error.message}`);
|
|
384
|
-
}
|
|
385
|
-
}
|
|
386
|
-
}
|
|
376
|
+
if (isError && fs.existsSync(errorsTxtPath)) {
|
|
377
|
+
console.log(`An error occured. Log file is located at: ${errorsTxtPath}`);
|
|
387
378
|
}
|
|
388
379
|
if (fs.existsSync(storagePath) && fs.readdirSync(storagePath).length === 0) {
|
|
389
380
|
try {
|
|
@@ -854,6 +845,8 @@ export const randomThreeDigitNumberString = () => {
|
|
|
854
845
|
return String(threeDigitNumber);
|
|
855
846
|
};
|
|
856
847
|
export const normUrl = (u) => (u ? normalizeUrl(u) || u : '');
|
|
848
|
+
export const stripWwwPrefix = (hostname) => hostname.replace(/^www\./, '');
|
|
849
|
+
export const isSameHostname = (hostname1, hostname2) => stripWwwPrefix(hostname1) === stripWwwPrefix(hostname2);
|
|
857
850
|
export const isFollowStrategy = (link1, link2, rule) => {
|
|
858
851
|
if (rule === 'all')
|
|
859
852
|
return true;
|
|
@@ -861,7 +854,9 @@ export const isFollowStrategy = (link1, link2, rule) => {
|
|
|
861
854
|
const parsedLink1 = new URL(link1);
|
|
862
855
|
const parsedLink2 = new URL(link2);
|
|
863
856
|
if (rule === 'same-origin') {
|
|
864
|
-
return parsedLink1.
|
|
857
|
+
return parsedLink1.protocol === parsedLink2.protocol &&
|
|
858
|
+
isSameHostname(parsedLink1.hostname, parsedLink2.hostname) &&
|
|
859
|
+
parsedLink1.port === parsedLink2.port;
|
|
865
860
|
}
|
|
866
861
|
if (rule === 'same-domain') {
|
|
867
862
|
const link1Domain = getDomain(parsedLink1.hostname, { allowPrivateDomains: true }) || parsedLink1.hostname;
|
|
@@ -869,7 +864,7 @@ export const isFollowStrategy = (link1, link2, rule) => {
|
|
|
869
864
|
return link1Domain.toLowerCase() === link2Domain.toLowerCase();
|
|
870
865
|
}
|
|
871
866
|
// default: same-hostname
|
|
872
|
-
return parsedLink1.hostname
|
|
867
|
+
return isSameHostname(parsedLink1.hostname, parsedLink2.hostname);
|
|
873
868
|
}
|
|
874
869
|
catch {
|
|
875
870
|
return false;
|
package/oobee-client-scanner.js
CHANGED
|
@@ -3,9 +3,9 @@
|
|
|
3
3
|
* DO NOT EDIT MANUALLY. Re-generate with: node dist/generateOobeeClientScanner.js
|
|
4
4
|
*
|
|
5
5
|
* Embedded at generation time:
|
|
6
|
-
* App version : 0.10.
|
|
6
|
+
* App version : 0.10.93
|
|
7
7
|
* Sentry DSN : (from OOBEE_SENTRY_DSN env var or constants.ts default)
|
|
8
|
-
* Sentry SDK : @sentry/browser
|
|
8
|
+
* Sentry SDK : @sentry/browser 10.58.0 (loaded from CDN at runtime)
|
|
9
9
|
*
|
|
10
10
|
* Usage:
|
|
11
11
|
* <script src="oobee-client-scanner.js"></script>
|
|
@@ -34883,8 +34883,8 @@
|
|
|
34883
34883
|
// ── Sentry browser telemetry (Sentry JS SDK, loaded from CDN) ────────────
|
|
34884
34884
|
|
|
34885
34885
|
var _oobeeSentryDsn = "https://3b8c7ee46b06f33815a1301b6713ebc3@o4509047624761344.ingest.us.sentry.io/4509327783559168";
|
|
34886
|
-
var _oobeeAppVersion = "0.10.
|
|
34887
|
-
var _oobeeSentryVersion = "
|
|
34886
|
+
var _oobeeAppVersion = "0.10.93";
|
|
34887
|
+
var _oobeeSentryVersion = "10.58.0";
|
|
34888
34888
|
var _oobeeSentryInitialized = false;
|
|
34889
34889
|
var _oobeeSentryLoadPromise = null;
|
|
34890
34890
|
|
|
@@ -35091,6 +35091,37 @@
|
|
|
35091
35091
|
// Run axe-core + oobee custom checks
|
|
35092
35092
|
var scanResult = await window.runA11yScan(elementsToScan, '');
|
|
35093
35093
|
|
|
35094
|
+
// Re-verify aria-hidden-focus violations against the live DOM to handle
|
|
35095
|
+
// race conditions with JS that sets tabindex="-1" after aria-hidden
|
|
35096
|
+
var axeViolations = scanResult.axeScanResults.violations || [];
|
|
35097
|
+
var ariaHiddenViolation = axeViolations.find(function(v) { return v.id === 'aria-hidden-focus'; });
|
|
35098
|
+
if (ariaHiddenViolation) {
|
|
35099
|
+
await new Promise(function(resolve) { setTimeout(resolve, 0); });
|
|
35100
|
+
ariaHiddenViolation.nodes = ariaHiddenViolation.nodes.filter(function(node) {
|
|
35101
|
+
var selector = node.target && node.target[0];
|
|
35102
|
+
if (typeof selector !== 'string') return true;
|
|
35103
|
+
try {
|
|
35104
|
+
var el = document.querySelector(selector);
|
|
35105
|
+
if (!el) return true;
|
|
35106
|
+
var focusables = el.querySelectorAll(
|
|
35107
|
+
'a[href], area[href], button:not([disabled]), input:not([disabled]):not([type="hidden"]), select:not([disabled]), textarea:not([disabled]), [tabindex]'
|
|
35108
|
+
);
|
|
35109
|
+
if (focusables.length === 0) return false;
|
|
35110
|
+
return Array.from(focusables).some(function(child) {
|
|
35111
|
+
var tabindex = child.getAttribute('tabindex');
|
|
35112
|
+
if (tabindex === null) return true;
|
|
35113
|
+
var parsed = parseInt(tabindex, 10);
|
|
35114
|
+
return isNaN(parsed) || parsed >= 0;
|
|
35115
|
+
});
|
|
35116
|
+
} catch (e) { return true; }
|
|
35117
|
+
});
|
|
35118
|
+
if (ariaHiddenViolation.nodes.length === 0) {
|
|
35119
|
+
scanResult.axeScanResults.violations = axeViolations.filter(function(v) {
|
|
35120
|
+
return v.id !== 'aria-hidden-focus';
|
|
35121
|
+
});
|
|
35122
|
+
}
|
|
35123
|
+
}
|
|
35124
|
+
|
|
35094
35125
|
// Convert raw axe results into oobee category structure
|
|
35095
35126
|
var filtered = _oobeeFilterAxeResults(scanResult.axeScanResults, scanResult.pageTitle);
|
|
35096
35127
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@govtechsg/oobee",
|
|
3
3
|
"main": "dist/npmIndex.js",
|
|
4
|
-
"version": "0.10.
|
|
4
|
+
"version": "0.10.93",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"author": "Government Technology Agency <info@tech.gov.sg>",
|
|
7
7
|
"bin": {
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
"@aws-sdk/client-s3": "^3.1049.0",
|
|
12
12
|
"@json2csv/node": "^7.0.3",
|
|
13
13
|
"@napi-rs/canvas": "^0.1.53",
|
|
14
|
-
"@sentry/node": "^
|
|
14
|
+
"@sentry/node": "^10.58.0",
|
|
15
15
|
"@types/aws-sdk": "^0.0.42",
|
|
16
16
|
"axe-core": "^4.11.4",
|
|
17
17
|
"axios": "^1.8.2",
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
"inquirer": "^9.2.12",
|
|
27
27
|
"jsdom": "^29.0.0",
|
|
28
28
|
"jszip": "^3.10.1",
|
|
29
|
-
"lodash": "^4.
|
|
29
|
+
"lodash": "^4.18.1",
|
|
30
30
|
"mime": "^4.0.7",
|
|
31
31
|
"mime-types": "^2.1.35",
|
|
32
32
|
"minimatch": "^10.2.4",
|
package/src/cli.ts
CHANGED
|
@@ -228,6 +228,10 @@ if (!options.strategy) {
|
|
|
228
228
|
options.strategy = options.scanner === ScannerTypes.SITEMAP ? 'ignore' : 'same-domain';
|
|
229
229
|
}
|
|
230
230
|
|
|
231
|
+
if (options.websiteTag) {
|
|
232
|
+
process.env.OOBEE_TAGGED_WEBSITE = options.websiteTag;
|
|
233
|
+
}
|
|
234
|
+
|
|
231
235
|
const scanInit = async (argvs: Answers): Promise<string> => {
|
|
232
236
|
const updatedArgvs = { ...argvs };
|
|
233
237
|
|
package/src/combine.ts
CHANGED
|
@@ -6,7 +6,7 @@ import crawlLocalFile from './crawlers/crawlLocalFile.js';
|
|
|
6
6
|
import crawlIntelligentSitemap from './crawlers/crawlIntelligentSitemap.js';
|
|
7
7
|
import generateArtifacts from './mergeAxeResults.js';
|
|
8
8
|
import { getHost, createAndUpdateResultsFolders, cleanUpAndExit, getStoragePath } from './utils.js';
|
|
9
|
-
import { ScannerTypes, UrlsCrawled } from './constants/constants.js';
|
|
9
|
+
import constants, { ScannerTypes, UrlsCrawled } from './constants/constants.js';
|
|
10
10
|
import { getBlackListedPatterns, submitForm } from './constants/common.js';
|
|
11
11
|
import { consoleLogger, silentLogger } from './logs.js';
|
|
12
12
|
import runCustom from './crawlers/runCustom.js';
|
|
@@ -72,6 +72,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
|
72
72
|
|
|
73
73
|
process.env.CRAWLEE_LOG_LEVEL = 'ERROR';
|
|
74
74
|
process.env.CRAWLEE_STORAGE_DIR = randomToken;
|
|
75
|
+
constants.sitemapFetchedLinks = null;
|
|
75
76
|
|
|
76
77
|
if (process.env.CRAWLEE_SYSTEM_INFO_V2 === undefined) {
|
|
77
78
|
// Set the environment variable to enable system info v2
|
|
@@ -79,6 +80,18 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
|
79
80
|
process.env.CRAWLEE_SYSTEM_INFO_V2 = '1';
|
|
80
81
|
}
|
|
81
82
|
|
|
83
|
+
// Suppress non-fatal Crawlee ps-tree errors on Windows with non-English locales.
|
|
84
|
+
// The system info module tries to parse process listing headers and crashes when
|
|
85
|
+
// headers are in a different language (e.g. "Wo" instead of "PID").
|
|
86
|
+
const psTreeHandler = (err: Error) => {
|
|
87
|
+
if (err.message?.includes('Unknown process listing header')) {
|
|
88
|
+
consoleLogger.info(`Suppressed Crawlee ps-tree locale error: ${err.message}`);
|
|
89
|
+
return;
|
|
90
|
+
}
|
|
91
|
+
throw err;
|
|
92
|
+
};
|
|
93
|
+
process.on('uncaughtException', psTreeHandler);
|
|
94
|
+
|
|
82
95
|
const host = type === ScannerTypes.SITEMAP || type === ScannerTypes.LOCALFILE ? '' : getHost(url);
|
|
83
96
|
|
|
84
97
|
let blacklistedPatterns: string[] | null = null;
|
|
@@ -141,6 +154,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
|
141
154
|
blacklistedPatterns,
|
|
142
155
|
includeScreenshots,
|
|
143
156
|
customFlowLabel && customFlowLabel !== 'None' ? customFlowLabel : '',
|
|
157
|
+
extraHTTPHeaders,
|
|
144
158
|
);
|
|
145
159
|
|
|
146
160
|
urlsCrawledObj = res.urlsCrawled;
|
|
@@ -274,6 +288,7 @@ const combineRun = async (details: Data, deviceToScan: string) => {
|
|
|
274
288
|
scanDetails,
|
|
275
289
|
zip,
|
|
276
290
|
generateJsonFiles,
|
|
291
|
+
browser,
|
|
277
292
|
);
|
|
278
293
|
const [name, email] = nameEmail.split(':');
|
|
279
294
|
|
|
@@ -341,5 +341,12 @@ To obtain the JSON files, you need to base64-decode the file followed by gunzip.
|
|
|
341
341
|
demandOption: false,
|
|
342
342
|
coerce: val => Number(val),
|
|
343
343
|
},
|
|
344
|
+
z: {
|
|
345
|
+
alias: 'websiteTag',
|
|
346
|
+
describe: 'Tag to identify the website in telemetry. Overrides OOBEE_TAGGED_WEBSITE env var.',
|
|
347
|
+
type: 'string',
|
|
348
|
+
requiresArg: true,
|
|
349
|
+
demandOption: false,
|
|
350
|
+
},
|
|
344
351
|
};
|
|
345
352
|
|