@govtechsg/oobee 0.10.39 → 0.10.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/docker-test.yml +1 -1
- package/README.md +2 -0
- package/REPORTS.md +431 -0
- package/package.json +3 -2
- package/src/cli.ts +2 -11
- package/src/constants/common.ts +68 -52
- package/src/constants/constants.ts +81 -1
- package/src/constants/oobeeAi.ts +6 -6
- package/src/constants/questions.ts +3 -2
- package/src/crawlers/commonCrawlerFunc.ts +45 -16
- package/src/crawlers/crawlDomain.ts +83 -102
- package/src/crawlers/crawlIntelligentSitemap.ts +21 -19
- package/src/crawlers/crawlSitemap.ts +121 -110
- package/src/crawlers/custom/findElementByCssSelector.ts +1 -1
- package/src/crawlers/custom/flagUnlabelledClickableElements.ts +593 -558
- package/src/crawlers/custom/xPathToCss.ts +10 -10
- package/src/crawlers/pdfScanFunc.ts +67 -26
- package/src/crawlers/runCustom.ts +1 -1
- package/src/index.ts +3 -4
- package/src/logs.ts +1 -1
- package/src/mergeAxeResults.ts +305 -242
- package/src/npmIndex.ts +12 -8
- package/src/screenshotFunc/htmlScreenshotFunc.ts +8 -20
- package/src/screenshotFunc/pdfScreenshotFunc.ts +34 -1
- package/src/types/text-readability.d.ts +3 -0
- package/src/types/types.ts +1 -1
- package/src/utils.ts +340 -50
- package/src/xPathToCss.ts +0 -186
- package/src/xPathToCssCypress.ts +0 -178
package/src/constants/common.ts
CHANGED
@@ -15,8 +15,8 @@ import safe from 'safe-regex';
|
|
15
15
|
import * as https from 'https';
|
16
16
|
import os from 'os';
|
17
17
|
import { minimatch } from 'minimatch';
|
18
|
-
import { globSync } from 'glob';
|
19
|
-
import { LaunchOptions, devices, webkit } from 'playwright';
|
18
|
+
import { globSync, GlobOptionsWithFileTypesFalse } from 'glob';
|
19
|
+
import { LaunchOptions, Locator, Page, devices, webkit } from 'playwright';
|
20
20
|
import printMessage from 'print-message';
|
21
21
|
import constants, {
|
22
22
|
getDefaultChromeDataDir,
|
@@ -31,6 +31,7 @@ import { silentLogger } from '../logs.js';
|
|
31
31
|
import { isUrlPdf } from '../crawlers/commonCrawlerFunc.js';
|
32
32
|
import { randomThreeDigitNumberString } from '../utils.js';
|
33
33
|
import { Answers, Data } from '../index.js';
|
34
|
+
import { DeviceDescriptor } from '../types/types.js';
|
34
35
|
|
35
36
|
// validateDirPath validates a provided directory path
|
36
37
|
// returns null if no error
|
@@ -252,7 +253,7 @@ export const getUrlMessage = (scanner: ScannerTypes): string => {
|
|
252
253
|
}
|
253
254
|
};
|
254
255
|
|
255
|
-
export const isInputValid = inputString => {
|
256
|
+
export const isInputValid = (inputString: string): boolean => {
|
256
257
|
if (!validator.isEmpty(inputString)) {
|
257
258
|
const removeBlackListCharacters = validator.escape(inputString);
|
258
259
|
|
@@ -373,12 +374,12 @@ const requestToUrl = async (
|
|
373
374
|
};
|
374
375
|
|
375
376
|
const checkUrlConnectivityWithBrowser = async (
|
376
|
-
url,
|
377
|
-
browserToRun,
|
378
|
-
clonedDataDir,
|
379
|
-
playwrightDeviceDetailsObject,
|
380
|
-
isCustomFlow,
|
381
|
-
extraHTTPHeaders,
|
377
|
+
url: string,
|
378
|
+
browserToRun: string,
|
379
|
+
clonedDataDir: string,
|
380
|
+
playwrightDeviceDetailsObject: DeviceDescriptor,
|
381
|
+
isCustomFlow: boolean,
|
382
|
+
extraHTTPHeaders: Record<string, string>,
|
382
383
|
) => {
|
383
384
|
const res = new RES();
|
384
385
|
|
@@ -468,7 +469,6 @@ const checkUrlConnectivityWithBrowser = async (
|
|
468
469
|
res.content = responseFromUrl.content;
|
469
470
|
}
|
470
471
|
} catch (error) {
|
471
|
-
|
472
472
|
// But this does work with the headless=new flag
|
473
473
|
if (error.message.includes('net::ERR_INVALID_AUTH_CREDENTIALS')) {
|
474
474
|
res.status = constants.urlCheckStatuses.unauthorised.code;
|
@@ -510,13 +510,13 @@ export const isSitemapContent = (content: string) => {
|
|
510
510
|
};
|
511
511
|
|
512
512
|
export const checkUrl = async (
|
513
|
-
scanner,
|
514
|
-
url,
|
515
|
-
browser,
|
516
|
-
clonedDataDir,
|
517
|
-
playwrightDeviceDetailsObject,
|
518
|
-
isCustomFlow,
|
519
|
-
extraHTTPHeaders,
|
513
|
+
scanner: ScannerTypes,
|
514
|
+
url: string,
|
515
|
+
browser: string,
|
516
|
+
clonedDataDir: string,
|
517
|
+
playwrightDeviceDetailsObject: DeviceDescriptor,
|
518
|
+
isCustomFlow: boolean,
|
519
|
+
extraHTTPHeaders: Record<string, string>,
|
520
520
|
) => {
|
521
521
|
const res = await checkUrlConnectivityWithBrowser(
|
522
522
|
url,
|
@@ -548,7 +548,7 @@ export const parseHeaders = (header?: string): Record<string, string> => {
|
|
548
548
|
// parse HTTP headers from string
|
549
549
|
if (!header) return {};
|
550
550
|
const headerValues = header.split(', ');
|
551
|
-
const allHeaders = {};
|
551
|
+
const allHeaders: Record<string, string> = {};
|
552
552
|
headerValues.map((headerValue: string) => {
|
553
553
|
const headerValuePair = headerValue.split(/ (.*)/s);
|
554
554
|
if (headerValuePair.length < 2) {
|
@@ -776,11 +776,11 @@ export const getLinksFromSitemap = async (
|
|
776
776
|
password: string,
|
777
777
|
) => {
|
778
778
|
const scannedSitemaps = new Set<string>();
|
779
|
-
const urls = {}; // dictionary of requests to urls to be scanned
|
779
|
+
const urls: Record<string, Request> = {}; // dictionary of requests to urls to be scanned
|
780
780
|
|
781
781
|
const isLimitReached = () => Object.keys(urls).length >= maxLinksCount;
|
782
782
|
|
783
|
-
const addToUrlList = url => {
|
783
|
+
const addToUrlList = (url: string) => {
|
784
784
|
if (!url) return;
|
785
785
|
if (isDisallowedInRobotsTxt(url)) return;
|
786
786
|
|
@@ -803,14 +803,14 @@ export const getLinksFromSitemap = async (
|
|
803
803
|
urls[url] = request;
|
804
804
|
};
|
805
805
|
|
806
|
-
const addBasicAuthCredentials = (url, username, password) => {
|
806
|
+
const addBasicAuthCredentials = (url: string, username: string, password: string) => {
|
807
807
|
const urlObject = new URL(url);
|
808
808
|
urlObject.username = username;
|
809
809
|
urlObject.password = password;
|
810
810
|
return urlObject.toString();
|
811
811
|
};
|
812
812
|
|
813
|
-
const calculateCloseness = sitemapUrl => {
|
813
|
+
const calculateCloseness = (sitemapUrl: string) => {
|
814
814
|
// Remove 'http://', 'https://', and 'www.' prefixes from the URLs
|
815
815
|
const normalizedSitemapUrl = sitemapUrl.replace(/^(https?:\/\/)?(www\.)?/, '');
|
816
816
|
const normalizedUserUrlInput = userUrlInput
|
@@ -825,10 +825,16 @@ export const getLinksFromSitemap = async (
|
|
825
825
|
}
|
826
826
|
return 0;
|
827
827
|
};
|
828
|
-
const processXmlSitemap = async (
|
829
|
-
|
828
|
+
const processXmlSitemap = async (
|
829
|
+
$: cheerio.CheerioAPI,
|
830
|
+
sitemapType: number,
|
831
|
+
linkSelector: string,
|
832
|
+
dateSelector: string,
|
833
|
+
sectionSelector: string,
|
834
|
+
) => {
|
835
|
+
const urlList: { url: string; lastModifiedDate: Date }[] = [];
|
830
836
|
// Iterate through each URL element in the sitemap, collect url and modified date
|
831
|
-
$(sectionSelector).each((
|
837
|
+
$(sectionSelector).each((_index, urlElement) => {
|
832
838
|
let url;
|
833
839
|
if (sitemapType === constants.xmlSitemapTypes.atom) {
|
834
840
|
url = $(urlElement).find(linkSelector).prop('href');
|
@@ -850,8 +856,7 @@ export const getLinksFromSitemap = async (
|
|
850
856
|
}
|
851
857
|
|
852
858
|
// If closeness is the same, sort by last modified date in descending order
|
853
|
-
|
854
|
-
return dateDifference !== 0 ? dateDifference : 0; // Maintain original order for equal dates
|
859
|
+
return (b.lastModifiedDate?.getTime() || 0) - (a.lastModifiedDate?.getTime() || 0);
|
855
860
|
});
|
856
861
|
}
|
857
862
|
|
@@ -861,7 +866,7 @@ export const getLinksFromSitemap = async (
|
|
861
866
|
}
|
862
867
|
};
|
863
868
|
|
864
|
-
const processNonStandardSitemap = data => {
|
869
|
+
const processNonStandardSitemap = (data: string) => {
|
865
870
|
const urlsFromData = crawlee
|
866
871
|
.extractUrls({ string: data, urlRegExp: new RegExp('^(http|https):/{2}.+$', 'gmi') })
|
867
872
|
.slice(0, maxLinksCount);
|
@@ -934,7 +939,7 @@ export const getLinksFromSitemap = async (
|
|
934
939
|
const sitemapIndex = page.locator('sitemapindex');
|
935
940
|
const rss = page.locator('rss');
|
936
941
|
const feed = page.locator('feed');
|
937
|
-
const isRoot = async locator => (await locator.count()) > 0;
|
942
|
+
const isRoot = async (locator: Locator) => (await locator.count()) > 0;
|
938
943
|
|
939
944
|
if (await isRoot(urlSet)) {
|
940
945
|
data = await urlSet.evaluate(elem => elem.outerHTML);
|
@@ -1054,14 +1059,14 @@ export const getLinksFromSitemap = async (
|
|
1054
1059
|
return requestList;
|
1055
1060
|
};
|
1056
1061
|
|
1057
|
-
export const validEmail = email => {
|
1062
|
+
export const validEmail = (email: string) => {
|
1058
1063
|
const emailRegex = /^.+@.+\..+$/u;
|
1059
1064
|
|
1060
1065
|
return emailRegex.test(email);
|
1061
1066
|
};
|
1062
1067
|
|
1063
1068
|
// For new user flow.
|
1064
|
-
export const validName = name => {
|
1069
|
+
export const validName = (name: string) => {
|
1065
1070
|
// Allow only printable characters from any language
|
1066
1071
|
const regex = /^[\p{L}\p{N}\s'".,()\[\]{}!?:؛،؟…]+$/u;
|
1067
1072
|
|
@@ -1213,11 +1218,11 @@ export const getEdgeData = () => {
|
|
1213
1218
|
* @param {*} destDir destination directory
|
1214
1219
|
* @returns boolean indicating whether the operation was successful
|
1215
1220
|
*/
|
1216
|
-
const cloneChromeProfileCookieFiles = (options, destDir) => {
|
1221
|
+
const cloneChromeProfileCookieFiles = (options: GlobOptionsWithFileTypesFalse, destDir: string) => {
|
1217
1222
|
let profileCookiesDir;
|
1218
1223
|
// Cookies file per profile is located in .../User Data/<profile name>/Network/Cookies for windows
|
1219
1224
|
// and ../Chrome/<profile name>/Cookies for mac
|
1220
|
-
let profileNamesRegex;
|
1225
|
+
let profileNamesRegex: RegExp;
|
1221
1226
|
if (os.platform() === 'win32') {
|
1222
1227
|
profileCookiesDir = globSync('**/Network/Cookies', {
|
1223
1228
|
...options,
|
@@ -1288,11 +1293,11 @@ const cloneChromeProfileCookieFiles = (options, destDir) => {
|
|
1288
1293
|
* @param {*} destDir destination directory
|
1289
1294
|
* @returns boolean indicating whether the operation was successful
|
1290
1295
|
*/
|
1291
|
-
const cloneEdgeProfileCookieFiles = (options, destDir) => {
|
1296
|
+
const cloneEdgeProfileCookieFiles = (options: GlobOptionsWithFileTypesFalse, destDir: string) => {
|
1292
1297
|
let profileCookiesDir;
|
1293
1298
|
// Cookies file per profile is located in .../User Data/<profile name>/Network/Cookies for windows
|
1294
1299
|
// and ../Chrome/<profile name>/Cookies for mac
|
1295
|
-
let profileNamesRegex;
|
1300
|
+
let profileNamesRegex: RegExp;
|
1296
1301
|
// Ignores the cloned oobee directory if exists
|
1297
1302
|
if (os.platform() === 'win32') {
|
1298
1303
|
profileCookiesDir = globSync('**/Network/Cookies', {
|
@@ -1361,7 +1366,7 @@ const cloneEdgeProfileCookieFiles = (options, destDir) => {
|
|
1361
1366
|
* @param {string} destDir - destination directory
|
1362
1367
|
* @returns boolean indicating whether the operation was successful
|
1363
1368
|
*/
|
1364
|
-
const cloneLocalStateFile = (options, destDir) => {
|
1369
|
+
const cloneLocalStateFile = (options: GlobOptionsWithFileTypesFalse, destDir: string) => {
|
1365
1370
|
const localState = globSync('**/*Local State', {
|
1366
1371
|
...options,
|
1367
1372
|
maxDepth: 1,
|
@@ -1647,8 +1652,9 @@ export const getPlaywrightDeviceDetailsObject = (
|
|
1647
1652
|
deviceChosen: string,
|
1648
1653
|
customDevice: string,
|
1649
1654
|
viewportWidth: number,
|
1650
|
-
) => {
|
1651
|
-
let playwrightDeviceDetailsObject =
|
1655
|
+
): DeviceDescriptor => {
|
1656
|
+
let playwrightDeviceDetailsObject = devices['Desktop Chrome']; // default to Desktop Chrome
|
1657
|
+
|
1652
1658
|
if (deviceChosen === 'Mobile' || customDevice === 'iPhone 11') {
|
1653
1659
|
playwrightDeviceDetailsObject = devices['iPhone 11'];
|
1654
1660
|
} else if (customDevice === 'Samsung Galaxy S9+') {
|
@@ -1656,6 +1662,11 @@ export const getPlaywrightDeviceDetailsObject = (
|
|
1656
1662
|
} else if (viewportWidth) {
|
1657
1663
|
playwrightDeviceDetailsObject = {
|
1658
1664
|
viewport: { width: viewportWidth, height: 720 },
|
1665
|
+
isMobile: false,
|
1666
|
+
hasTouch: false,
|
1667
|
+
userAgent: devices['Desktop Chrome'].userAgent,
|
1668
|
+
deviceScaleFactor: 1,
|
1669
|
+
defaultBrowserType: 'chromium',
|
1659
1670
|
};
|
1660
1671
|
} else if (customDevice) {
|
1661
1672
|
playwrightDeviceDetailsObject = devices[customDevice.replace(/_/g, ' ')];
|
@@ -1777,14 +1788,17 @@ export const submitForm = async (
|
|
1777
1788
|
}
|
1778
1789
|
};
|
1779
1790
|
|
1780
|
-
export async function initModifiedUserAgent(
|
1791
|
+
export async function initModifiedUserAgent(
|
1792
|
+
browser?: string,
|
1793
|
+
playwrightDeviceDetailsObject?: object,
|
1794
|
+
) {
|
1781
1795
|
const isHeadless = process.env.CRAWLEE_HEADLESS === '1';
|
1782
|
-
|
1796
|
+
|
1783
1797
|
// If headless mode is enabled, ensure the headless flag is set.
|
1784
1798
|
if (isHeadless && !constants.launchOptionsArgs.includes('--headless=new')) {
|
1785
1799
|
constants.launchOptionsArgs.push('--headless=new');
|
1786
1800
|
}
|
1787
|
-
|
1801
|
+
|
1788
1802
|
// Build the launch options using your production settings.
|
1789
1803
|
// headless is forced to false as in your persistent context, and we merge in getPlaywrightLaunchOptions and device details.
|
1790
1804
|
const launchOptions = {
|
@@ -1803,17 +1817,16 @@ export async function initModifiedUserAgent(browser?: string, playwrightDeviceDe
|
|
1803
1817
|
|
1804
1818
|
// Modify the UA:
|
1805
1819
|
// Replace "HeadlessChrome" with "Chrome" if present.
|
1806
|
-
|
1820
|
+
const modifiedUA = defaultUA.includes('HeadlessChrome')
|
1807
1821
|
? defaultUA.replace('HeadlessChrome', 'Chrome')
|
1808
1822
|
: defaultUA;
|
1809
|
-
|
1823
|
+
|
1810
1824
|
// Push the modified UA flag into your global launch options.
|
1811
1825
|
constants.launchOptionsArgs.push(`--user-agent=${modifiedUA}`);
|
1812
1826
|
// Optionally log the modified UA.
|
1813
1827
|
// console.log('Modified User Agent:', modifiedUA);
|
1814
1828
|
}
|
1815
1829
|
|
1816
|
-
|
1817
1830
|
/**
|
1818
1831
|
* @param {string} browser browser name ("chrome" or "edge", null for chromium, the default Playwright browser)
|
1819
1832
|
* @returns playwright launch options object. For more details: https://playwright.dev/docs/api/class-browsertype#browser-type-launch
|
@@ -1856,25 +1869,25 @@ export const urlWithoutAuth = (url: string): string => {
|
|
1856
1869
|
return parsedUrl.toString();
|
1857
1870
|
};
|
1858
1871
|
|
1859
|
-
export const waitForPageLoaded = async (page, timeout = 10000) => {
|
1872
|
+
export const waitForPageLoaded = async (page: Page, timeout = 10000) => {
|
1860
1873
|
const OBSERVER_TIMEOUT = timeout; // Ensure observer timeout does not exceed the main timeout
|
1861
1874
|
|
1862
1875
|
return Promise.race([
|
1863
1876
|
page.waitForLoadState('load'), // Ensure page load completes
|
1864
1877
|
page.waitForLoadState('networkidle'), // Wait for network requests to settle
|
1865
1878
|
new Promise(resolve => setTimeout(resolve, timeout)), // Hard timeout as a fallback
|
1866
|
-
page.evaluate(
|
1867
|
-
return new Promise(
|
1879
|
+
page.evaluate(OBSERVER_TIMEOUT => {
|
1880
|
+
return new Promise(resolve => {
|
1868
1881
|
// Skip mutation check for PDFs
|
1869
1882
|
if (document.contentType === 'application/pdf') {
|
1870
1883
|
resolve('Skipping DOM mutation check for PDF.');
|
1871
1884
|
return;
|
1872
1885
|
}
|
1873
1886
|
|
1874
|
-
let timeout;
|
1887
|
+
let timeout: NodeJS.Timeout;
|
1875
1888
|
let mutationCount = 0;
|
1876
1889
|
const MAX_MUTATIONS = 250; // Limit max mutations
|
1877
|
-
const mutationHash = {};
|
1890
|
+
const mutationHash: Record<string, number> = {};
|
1878
1891
|
|
1879
1892
|
const observer = new MutationObserver(mutationsList => {
|
1880
1893
|
clearTimeout(timeout);
|
@@ -1916,14 +1929,17 @@ export const waitForPageLoaded = async (page, timeout = 10000) => {
|
|
1916
1929
|
resolve('Observer timeout reached, exiting.');
|
1917
1930
|
}, OBSERVER_TIMEOUT);
|
1918
1931
|
|
1919
|
-
observer.observe(document.documentElement, {
|
1932
|
+
observer.observe(document.documentElement, {
|
1933
|
+
childList: true,
|
1934
|
+
subtree: true,
|
1935
|
+
attributes: true,
|
1936
|
+
});
|
1920
1937
|
});
|
1921
1938
|
}, OBSERVER_TIMEOUT), // Pass OBSERVER_TIMEOUT dynamically to the browser context
|
1922
1939
|
]);
|
1923
1940
|
};
|
1924
1941
|
|
1925
|
-
|
1926
|
-
function isValidHttpUrl(urlString) {
|
1942
|
+
function isValidHttpUrl(urlString: string) {
|
1927
1943
|
const pattern = /^(http|https):\/\/[^ "]+$/;
|
1928
1944
|
return pattern.test(urlString);
|
1929
1945
|
}
|
@@ -29,6 +29,7 @@ export const blackListedFileExtensions = [
|
|
29
29
|
'zip',
|
30
30
|
'webp',
|
31
31
|
'json',
|
32
|
+
'xml'
|
32
33
|
];
|
33
34
|
|
34
35
|
export const getIntermediateScreenshotsPath = (datasetsPath: string): string =>
|
@@ -217,7 +218,7 @@ export const guiInfoStatusTypes = {
|
|
217
218
|
DUPLICATE: 'duplicate',
|
218
219
|
};
|
219
220
|
|
220
|
-
let launchOptionsArgs = [];
|
221
|
+
let launchOptionsArgs: string[] = [];
|
221
222
|
|
222
223
|
// Check if running in docker container
|
223
224
|
if (fs.existsSync('/.dockerenv')) {
|
@@ -444,3 +445,82 @@ export enum RuleFlags {
|
|
444
445
|
DISABLE_OOBEE = 'disable-oobee',
|
445
446
|
ENABLE_WCAG_AAA = 'enable-wcag-aaa',
|
446
447
|
}
|
448
|
+
|
449
|
+
// Note: Not all status codes will appear as Crawler will handle it as best effort first. E.g. try to handle redirect
|
450
|
+
export const STATUS_CODE_METADATA: Record<number,string> = {
|
451
|
+
// Custom Codes for Oobee's use
|
452
|
+
0: 'Page Excluded',
|
453
|
+
1: 'Not A Supported Document',
|
454
|
+
2: 'Web Crawler Errored',
|
455
|
+
|
456
|
+
// 599 is set because Crawlee returns response status 100, 102, 103 as 599
|
457
|
+
599: 'Uncommon Response Status Code Received',
|
458
|
+
|
459
|
+
// This is Status OK but thrown when the crawler cannot scan the page
|
460
|
+
200: '200 - However Page Could Not Be Scanned',
|
461
|
+
|
462
|
+
// 1xx - Informational
|
463
|
+
100: '100 - Continue',
|
464
|
+
101: '101 - Switching Protocols',
|
465
|
+
102: '102 - Processing',
|
466
|
+
103: '103 - Early Hints',
|
467
|
+
|
468
|
+
// 2xx - Browser Doesn't Support
|
469
|
+
204: '204 - No Content',
|
470
|
+
205: '205 - Reset Content',
|
471
|
+
|
472
|
+
// 3xx - Redirection
|
473
|
+
300: '300 - Multiple Choices',
|
474
|
+
301: '301 - Moved Permanently',
|
475
|
+
302: '302 - Found',
|
476
|
+
303: '303 - See Other',
|
477
|
+
304: '304 - Not Modified',
|
478
|
+
305: '305 - Use Proxy',
|
479
|
+
307: '307 - Temporary Redirect',
|
480
|
+
308: '308 - Permanent Redirect',
|
481
|
+
|
482
|
+
// 4xx - Client Error
|
483
|
+
400: '400 - Bad Request',
|
484
|
+
401: '401 - Unauthorized',
|
485
|
+
402: '402 - Payment Required',
|
486
|
+
403: '403 - Forbidden',
|
487
|
+
404: '404 - Not Found',
|
488
|
+
405: '405 - Method Not Allowed',
|
489
|
+
406: '406 - Not Acceptable',
|
490
|
+
407: '407 - Proxy Authentication Required',
|
491
|
+
408: '408 - Request Timeout',
|
492
|
+
409: '409 - Conflict',
|
493
|
+
410: '410 - Gone',
|
494
|
+
411: '411 - Length Required',
|
495
|
+
412: '412 - Precondition Failed',
|
496
|
+
413: '413 - Payload Too Large',
|
497
|
+
414: '414 - URI Too Long',
|
498
|
+
415: '415 - Unsupported Media Type',
|
499
|
+
416: '416 - Range Not Satisfiable',
|
500
|
+
417: '417 - Expectation Failed',
|
501
|
+
418: "418 - I'm a teapot",
|
502
|
+
421: '421 - Misdirected Request',
|
503
|
+
422: '422 - Unprocessable Content',
|
504
|
+
423: '423 - Locked',
|
505
|
+
424: '424 - Failed Dependency',
|
506
|
+
425: '425 - Too Early',
|
507
|
+
426: '426 - Upgrade Required',
|
508
|
+
428: '428 - Precondition Required',
|
509
|
+
429: '429 - Too Many Requests',
|
510
|
+
431: '431 - Request Header Fields Too Large',
|
511
|
+
451: '451 - Unavailable For Legal Reasons',
|
512
|
+
|
513
|
+
// 5xx - Server Error
|
514
|
+
500: '500 - Internal Server Error',
|
515
|
+
501: '501 - Not Implemented',
|
516
|
+
502: '502 - Bad Gateway',
|
517
|
+
503: '503 - Service Unavailable',
|
518
|
+
504: '504 - Gateway Timeout',
|
519
|
+
505: '505 - HTTP Version Not Supported',
|
520
|
+
506: '506 - Variant Also Negotiates',
|
521
|
+
507: '507 - Insufficient Storage',
|
522
|
+
508: '508 - Loop Detected',
|
523
|
+
510: '510 - Not Extended',
|
524
|
+
511: '511 - Network Authentication Required',
|
525
|
+
|
526
|
+
};
|
package/src/constants/oobeeAi.ts
CHANGED
@@ -24,7 +24,7 @@ export const oobeeAiRules = [
|
|
24
24
|
'autocomplete-valid',
|
25
25
|
];
|
26
26
|
|
27
|
-
export const oobeeAiHtmlETL = htmlSnippet => {
|
27
|
+
export const oobeeAiHtmlETL = (htmlSnippet: string) => {
|
28
28
|
// Whitelisted attributes (to not drop)
|
29
29
|
// i.e. any other attribute will be dropped
|
30
30
|
const whitelistedAttributes = [
|
@@ -60,12 +60,12 @@ export const oobeeAiHtmlETL = htmlSnippet => {
|
|
60
60
|
`aria-labelledby`,
|
61
61
|
];
|
62
62
|
|
63
|
-
const sortAlphaAttributes = html => {
|
63
|
+
const sortAlphaAttributes = (html: string) => {
|
64
64
|
let entireHtml = '';
|
65
65
|
const htmlOpeningTagRegex = /<[^>]+/g;
|
66
66
|
const htmlTagmatches = html.match(htmlOpeningTagRegex);
|
67
67
|
|
68
|
-
let sortedHtmlTag;
|
68
|
+
let sortedHtmlTag: string = '';
|
69
69
|
|
70
70
|
htmlTagmatches.forEach(htmlTag => {
|
71
71
|
const closingTag = htmlTag.trim().slice(-1) === '/' ? '/>' : '>';
|
@@ -112,7 +112,7 @@ export const oobeeAiHtmlETL = htmlSnippet => {
|
|
112
112
|
|
113
113
|
// For all attributes within mutedAttributeValues array
|
114
114
|
// replace their values with "something" while maintaining the attribute
|
115
|
-
const muteAttributeValues = html => {
|
115
|
+
const muteAttributeValues = (html: string) => {
|
116
116
|
const regex = /(\s+)([\w-]+)(\s*=\s*")([^"]*)(")/g;
|
117
117
|
|
118
118
|
// p1 is the whitespace before the attribute
|
@@ -120,7 +120,7 @@ export const oobeeAiHtmlETL = htmlSnippet => {
|
|
120
120
|
// p3 is the attribute value before the replacement
|
121
121
|
// p4 is the attribute value (replaced with "...")
|
122
122
|
// p5 is the closing quote of the attribute value
|
123
|
-
return html.replace(regex, (match, p1, p2, p3,
|
123
|
+
return html.replace(regex, (match, p1, p2, p3, _p4, p5) => {
|
124
124
|
if (mutedAttributeValues.includes(p2)) {
|
125
125
|
return `${p1}${p2}${p3}...${p5}`;
|
126
126
|
}
|
@@ -129,7 +129,7 @@ export const oobeeAiHtmlETL = htmlSnippet => {
|
|
129
129
|
};
|
130
130
|
|
131
131
|
// Drop all attributes from the HTML snippet except whitelisted
|
132
|
-
const dropAllExceptWhitelisted = html => {
|
132
|
+
const dropAllExceptWhitelisted = (html: string) => {
|
133
133
|
const regex = new RegExp(
|
134
134
|
`(\\s+)(?!${whitelistedAttributes.join(`|`)})([\\w-]+)(\\s*=\\s*"[^"]*")`,
|
135
135
|
`g`,
|
@@ -12,12 +12,13 @@ import {
|
|
12
12
|
validEmail,
|
13
13
|
validName,
|
14
14
|
validateCustomFlowLabel,
|
15
|
+
parseHeaders,
|
15
16
|
} from './common.js';
|
16
17
|
import constants, { BrowserTypes, ScannerTypes } from './constants.js';
|
17
18
|
|
18
19
|
const userData = getUserDataTxt();
|
19
20
|
|
20
|
-
const questions = [];
|
21
|
+
const questions: Question[] = [];
|
21
22
|
|
22
23
|
const startScanQuestions = [
|
23
24
|
{
|
@@ -95,7 +96,7 @@ const startScanQuestions = [
|
|
95
96
|
clonedBrowserDataDir,
|
96
97
|
playwrightDeviceDetailsObject,
|
97
98
|
answers.scanner === ScannerTypes.CUSTOM,
|
98
|
-
answers.header,
|
99
|
+
parseHeaders(answers.header),
|
99
100
|
);
|
100
101
|
|
101
102
|
deleteClonedProfiles(browserToRun);
|
@@ -1,4 +1,4 @@
|
|
1
|
-
import crawlee, { CrawlingContext, PlaywrightGotoOptions } from 'crawlee';
|
1
|
+
import crawlee, { CrawlingContext, PlaywrightGotoOptions, Request } from 'crawlee';
|
2
2
|
import axe, { AxeResults, ImpactValue, NodeResult, Result, resultGroups, TagValue } from 'axe-core';
|
3
3
|
import { BrowserContext, Page } from 'playwright';
|
4
4
|
import {
|
@@ -18,7 +18,7 @@ import { framesCheck } from './custom/framesCheck.js';
|
|
18
18
|
import { findElementByCssSelector } from './custom/findElementByCssSelector.js';
|
19
19
|
import { getAxeConfiguration } from './custom/getAxeConfiguration.js';
|
20
20
|
import { flagUnlabelledClickableElements } from './custom/flagUnlabelledClickableElements.js';
|
21
|
-
import
|
21
|
+
import xPathToCss from './custom/xPathToCss.js';
|
22
22
|
|
23
23
|
// types
|
24
24
|
interface AxeResultsWithScreenshot extends AxeResults {
|
@@ -69,6 +69,30 @@ type FilteredResults = {
|
|
69
69
|
actualUrl?: string;
|
70
70
|
};
|
71
71
|
|
72
|
+
const truncateHtml = (html: string, maxBytes = 1024, suffix = '…'): string => {
|
73
|
+
const encoder = new TextEncoder();
|
74
|
+
if (encoder.encode(html).length <= maxBytes) return html;
|
75
|
+
|
76
|
+
let left = 0;
|
77
|
+
let right = html.length;
|
78
|
+
let result = '';
|
79
|
+
|
80
|
+
while (left <= right) {
|
81
|
+
const mid = Math.floor((left + right) / 2);
|
82
|
+
const truncated = html.slice(0, mid) + suffix;
|
83
|
+
const bytes = encoder.encode(truncated).length;
|
84
|
+
|
85
|
+
if (bytes <= maxBytes) {
|
86
|
+
result = truncated;
|
87
|
+
left = mid + 1;
|
88
|
+
} else {
|
89
|
+
right = mid - 1;
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
return result;
|
94
|
+
};
|
95
|
+
|
72
96
|
export const filterAxeResults = (
|
73
97
|
results: AxeResultsWithScreenshot,
|
74
98
|
pageTitle: string,
|
@@ -94,13 +118,13 @@ export const filterAxeResults = (
|
|
94
118
|
|
95
119
|
if (conformance[0] !== 'best-practice' && !wcagRegex.test(conformance[0])) {
|
96
120
|
conformance.sort((a, b) => {
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
121
|
+
if (wcagRegex.test(a) && !wcagRegex.test(b)) {
|
122
|
+
return -1;
|
123
|
+
}
|
124
|
+
if (!wcagRegex.test(a) && wcagRegex.test(b)) {
|
125
|
+
return 1;
|
126
|
+
}
|
127
|
+
return 0;
|
104
128
|
});
|
105
129
|
}
|
106
130
|
|
@@ -124,6 +148,7 @@ export const filterAxeResults = (
|
|
124
148
|
if (html.includes('</script>')) {
|
125
149
|
finalHtml = html.replaceAll('</script>', '</script>');
|
126
150
|
}
|
151
|
+
finalHtml = truncateHtml(finalHtml);
|
127
152
|
|
128
153
|
const xpath = target.length === 1 && typeof target[0] === 'string' ? target[0] : null;
|
129
154
|
|
@@ -141,7 +166,6 @@ export const filterAxeResults = (
|
|
141
166
|
};
|
142
167
|
|
143
168
|
nodes.forEach(node => {
|
144
|
-
const { impact } = node;
|
145
169
|
const hasWcagA = conformance.some(tag => /^wcag\d*a$/.test(tag));
|
146
170
|
const hasWcagAA = conformance.some(tag => /^wcag\d*aa$/.test(tag));
|
147
171
|
// const hasWcagAAA = conformance.some(tag => /^wcag\d*aaa$/.test(tag));
|
@@ -178,7 +202,10 @@ export const filterAxeResults = (
|
|
178
202
|
items: [],
|
179
203
|
};
|
180
204
|
}
|
181
|
-
|
205
|
+
|
206
|
+
const finalHtml = truncateHtml(html);
|
207
|
+
passed.rules[rule].items.push({ html: finalHtml, screenshotPath: '', message: '', xpath: '' });
|
208
|
+
|
182
209
|
passed.totalItems += 1;
|
183
210
|
passed.rules[rule].totalItems += 1;
|
184
211
|
totalItems += 1;
|
@@ -227,7 +254,7 @@ export const runAxeScript = async ({
|
|
227
254
|
let mutationCount = 0;
|
228
255
|
const MAX_MUTATIONS = 250;
|
229
256
|
const MAX_SAME_MUTATION_LIMIT = 10;
|
230
|
-
const mutationHash = {};
|
257
|
+
const mutationHash: Record<string, number> = {};
|
231
258
|
|
232
259
|
const observer = new MutationObserver(mutationsList => {
|
233
260
|
clearTimeout(timeout);
|
@@ -281,6 +308,8 @@ export const runAxeScript = async ({
|
|
281
308
|
silentLogger.warn(`Error while checking for DOM mutations: ${e}`);
|
282
309
|
}
|
283
310
|
|
311
|
+
// Omit logging of browser console errors to reduce unnecessary verbosity
|
312
|
+
/*
|
284
313
|
page.on('console', msg => {
|
285
314
|
const type = msg.type();
|
286
315
|
if (type === 'error') {
|
@@ -289,6 +318,7 @@ export const runAxeScript = async ({
|
|
289
318
|
silentLogger.log({ level: 'info', message: msg.text() });
|
290
319
|
}
|
291
320
|
});
|
321
|
+
*/
|
292
322
|
|
293
323
|
const disableOobee = ruleset.includes(RuleFlags.DISABLE_OOBEE);
|
294
324
|
const enableWcagAaa = ruleset.includes(RuleFlags.ENABLE_WCAG_AAA);
|
@@ -371,7 +401,7 @@ export const runAxeScript = async ({
|
|
371
401
|
help: 'Clickable elements (i.e. elements with mouse-click interaction) must have accessible labels.',
|
372
402
|
helpUrl: 'https://www.deque.com/blog/accessible-aria-buttons',
|
373
403
|
nodes: escapedCssSelectors
|
374
|
-
.map(cssSelector => ({
|
404
|
+
.map((cssSelector: string): NodeResult => ({
|
375
405
|
html: findElementByCssSelector(cssSelector),
|
376
406
|
target: [cssSelector],
|
377
407
|
impact: 'serious' as ImpactValue,
|
@@ -415,8 +445,7 @@ export const runAxeScript = async ({
|
|
415
445
|
framesCheckFunctionString: framesCheck.toString(),
|
416
446
|
findElementByCssSelectorFunctionString: findElementByCssSelector.toString(),
|
417
447
|
getAxeConfigurationFunctionString: getAxeConfiguration.toString(),
|
418
|
-
flagUnlabelledClickableElementsFunctionString:
|
419
|
-
flagUnlabelledClickableElements.toString(),
|
448
|
+
flagUnlabelledClickableElementsFunctionString: flagUnlabelledClickableElements.toString(),
|
420
449
|
xPathToCssFunctionString: xPathToCss.toString(),
|
421
450
|
},
|
422
451
|
);
|
@@ -467,7 +496,7 @@ export const postNavigationHooks = [
|
|
467
496
|
},
|
468
497
|
];
|
469
498
|
|
470
|
-
export const failedRequestHandler = async ({ request }) => {
|
499
|
+
export const failedRequestHandler = async ({ request }: { request: Request }) => {
|
471
500
|
guiInfoLog(guiInfoStatusTypes.ERROR, { numScanned: 0, urlScanned: request.url });
|
472
501
|
crawlee.log.error(`Failed Request - ${request.url}: ${request.errorMessages}`);
|
473
502
|
};
|