@govtechsg/oobee 0.10.42 → 0.10.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/REPORTS.md +71 -2
- package/package.json +3 -2
- package/src/cli.ts +2 -11
- package/src/constants/common.ts +68 -52
- package/src/constants/constants.ts +81 -1
- package/src/constants/oobeeAi.ts +6 -6
- package/src/constants/questions.ts +3 -2
- package/src/crawlers/commonCrawlerFunc.ts +16 -15
- package/src/crawlers/crawlDomain.ts +82 -84
- package/src/crawlers/crawlIntelligentSitemap.ts +21 -19
- package/src/crawlers/crawlSitemap.ts +120 -109
- package/src/crawlers/custom/findElementByCssSelector.ts +1 -1
- package/src/crawlers/custom/flagUnlabelledClickableElements.ts +8 -8
- package/src/crawlers/custom/xPathToCss.ts +10 -10
- package/src/crawlers/runCustom.ts +1 -1
- package/src/index.ts +3 -4
- package/src/logs.ts +1 -1
- package/src/mergeAxeResults.ts +3 -5
- package/src/npmIndex.ts +12 -8
- package/src/screenshotFunc/htmlScreenshotFunc.ts +7 -19
- package/src/types/text-readability.d.ts +3 -0
- package/src/types/types.ts +1 -1
- package/src/utils.ts +128 -114
- package/src/xPathToCss.ts +0 -186
- package/src/xPathToCssCypress.ts +0 -178
package/src/xPathToCss.ts
DELETED
@@ -1,186 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* XPath to CSS
|
3
|
-
*
|
4
|
-
* Utility function for converting XPath expressions to CSS selectors
|
5
|
-
*
|
6
|
-
* Originally written in Python by [santiycr](https://github.com/santiycr) for
|
7
|
-
* [cssify](https://github.com/santiycr/cssify) and ported to JavaScript by
|
8
|
-
* [Dither](https://github.com/Dither). Converted to ES2015 and packaged as an npm module by
|
9
|
-
* [svenheden](https://github.com/svenheden)
|
10
|
-
*/
|
11
|
-
|
12
|
-
'use strict';
|
13
|
-
|
14
|
-
import { consoleLogger } from "./logs.js";
|
15
|
-
|
16
|
-
const isValidXPath = expr => (
|
17
|
-
typeof expr != 'undefined' &&
|
18
|
-
expr.replace(/[\s-_=]/g,'') !== '' &&
|
19
|
-
expr.length === expr.replace(/[-_\w:.]+\(\)\s*=|=\s*[-_\w:.]+\(\)|\sor\s|\sand\s|\[(?:[^\/\]]+[\/\[]\/?.+)+\]|starts-with\(|\[.*last\(\)\s*[-\+<>=].+\]|number\(\)|not\(|count\(|text\(|first\(|normalize-space|[^\/]following-sibling|concat\(|descendant::|parent::|self::|child::|/gi,'').length
|
20
|
-
);
|
21
|
-
|
22
|
-
const getValidationRegex = () => {
|
23
|
-
let regex =
|
24
|
-
"(?P<node>"+
|
25
|
-
"("+
|
26
|
-
"^id\\([\"\\']?(?P<idvalue>%(value)s)[\"\\']?\\)"+// special case! `id(idValue)`
|
27
|
-
"|"+
|
28
|
-
"(?P<nav>//?(?:following-sibling::)?)(?P<tag>%(tag)s)" + // `//div`
|
29
|
-
"(\\[("+
|
30
|
-
"(?P<matched>(?P<mattr>@?%(attribute)s=[\"\\'](?P<mvalue>%(value)s))[\"\\']"+ // `[@id="well"]` supported and `[text()="yes"]` is not
|
31
|
-
"|"+
|
32
|
-
"(?P<contained>contains\\((?P<cattr>@?%(attribute)s,\\s*[\"\\'](?P<cvalue>%(value)s)[\"\\']\\))"+// `[contains(@id, "bleh")]` supported and `[contains(text(), "some")]` is not
|
33
|
-
")\\])?"+
|
34
|
-
"(\\[\\s*(?P<nth>\\d+|last\\(\\s*\\))\\s*\\])?"+
|
35
|
-
")"+
|
36
|
-
")";
|
37
|
-
|
38
|
-
const subRegexes = {
|
39
|
-
"tag": "([a-zA-Z][a-zA-Z0-9:-]*|\\*)",
|
40
|
-
"attribute": "[.a-zA-Z_:][-\\w:.]*(\\(\\))?)",
|
41
|
-
"value": "\\s*[\\w/:][-/\\w\\s,:;.]*"
|
42
|
-
};
|
43
|
-
|
44
|
-
Object.keys(subRegexes).forEach(key => {
|
45
|
-
regex = regex.replace(new RegExp('%\\(' + key + '\\)s', 'gi'), subRegexes[key]);
|
46
|
-
});
|
47
|
-
|
48
|
-
regex = regex.replace(/\?P<node>|\?P<idvalue>|\?P<nav>|\?P<tag>|\?P<matched>|\?P<mattr>|\?P<mvalue>|\?P<contained>|\?P<cattr>|\?P<cvalue>|\?P<nth>/gi, '');
|
49
|
-
|
50
|
-
return new RegExp(regex, 'gi');
|
51
|
-
};
|
52
|
-
|
53
|
-
const preParseXpath = expr => (
|
54
|
-
expr.replace(/contains\s*\(\s*concat\(["']\s+["']\s*,\s*@class\s*,\s*["']\s+["']\)\s*,\s*["']\s+([a-zA-Z0-9-_]+)\s+["']\)/gi, '@class="$1"')
|
55
|
-
);
|
56
|
-
|
57
|
-
function escapeCssIdSelectors(cssSelector) {
|
58
|
-
return cssSelector.replace(/#([^ >]+)/g, (match, id) => {
|
59
|
-
// Escape special characters in the id part
|
60
|
-
return '#' + id.replace(/[!"#$%&'()*+,./:;<=>?@[\\\]^`{|}~]/g, '\\$&');
|
61
|
-
});
|
62
|
-
}
|
63
|
-
|
64
|
-
export const xPathToCss = expr => {
|
65
|
-
if (!expr) {
|
66
|
-
throw new Error('Missing XPath expression');
|
67
|
-
}
|
68
|
-
|
69
|
-
expr = preParseXpath(expr);
|
70
|
-
|
71
|
-
if (!isValidXPath(expr)) {
|
72
|
-
consoleLogger.error(`Invalid or unsupported XPath: ${expr}`);
|
73
|
-
// do not throw error so that this function proceeds to convert xpath that it does not support
|
74
|
-
// for example, //*[@id="google_ads_iframe_/4654/dweb/imu1/homepage/landingpage/na_0"]/html/body/div[1]/a
|
75
|
-
// becomes #google_ads_iframe_/4654/dweb/imu1/homepage/landingpage/na_0 > html > body > div:first-of-type > div > a
|
76
|
-
// which is invalid because the slashes in the id selector are not escaped
|
77
|
-
// throw new Error('Invalid or unsupported XPath: ' + expr);
|
78
|
-
}
|
79
|
-
|
80
|
-
const xPathArr = expr.split('|');
|
81
|
-
const prog = getValidationRegex();
|
82
|
-
const cssSelectors = [];
|
83
|
-
let xindex = 0;
|
84
|
-
|
85
|
-
while (xPathArr[xindex]) {
|
86
|
-
const css = [];
|
87
|
-
let position = 0;
|
88
|
-
let nodes;
|
89
|
-
|
90
|
-
while (nodes = prog.exec(xPathArr[xindex])) {
|
91
|
-
let attr;
|
92
|
-
|
93
|
-
if (!nodes && position === 0) {
|
94
|
-
throw new Error('Invalid or unsupported XPath: ' + expr);
|
95
|
-
}
|
96
|
-
|
97
|
-
const match = {
|
98
|
-
node: nodes[5],
|
99
|
-
idvalue: nodes[12] || nodes[3],
|
100
|
-
nav: nodes[4],
|
101
|
-
tag: nodes[5],
|
102
|
-
matched: nodes[7],
|
103
|
-
mattr: nodes[10] || nodes[14],
|
104
|
-
mvalue: nodes[12] || nodes[16],
|
105
|
-
contained: nodes[13],
|
106
|
-
cattr: nodes[14],
|
107
|
-
cvalue: nodes[16],
|
108
|
-
nth: nodes[18]
|
109
|
-
};
|
110
|
-
|
111
|
-
let nav = '';
|
112
|
-
|
113
|
-
if (position != 0 && match['nav']) {
|
114
|
-
if (~match['nav'].indexOf('following-sibling::')) {
|
115
|
-
nav = ' + ';
|
116
|
-
} else {
|
117
|
-
nav = (match['nav'] == '//') ? ' ' : ' > ';
|
118
|
-
}
|
119
|
-
}
|
120
|
-
|
121
|
-
const tag = (match['tag'] === '*') ? '' : (match['tag'] || '');
|
122
|
-
|
123
|
-
if (match['contained']) {
|
124
|
-
if (match['cattr'].indexOf('@') === 0) {
|
125
|
-
attr = '[' + match['cattr'].replace(/^@/, '') + '*="' + match['cvalue'] + '"]';
|
126
|
-
} else {
|
127
|
-
throw new Error('Invalid or unsupported XPath attribute: ' + match['cattr']);
|
128
|
-
}
|
129
|
-
} else if (match['matched']) {
|
130
|
-
switch (match['mattr']) {
|
131
|
-
case '@id':
|
132
|
-
attr = '#' + match['mvalue'].replace(/^\s+|\s+$/,'').replace(/\s/g, '#');
|
133
|
-
break;
|
134
|
-
case '@class':
|
135
|
-
attr = '.' + match['mvalue'].replace(/^\s+|\s+$/,'').replace(/\s/g, '.');
|
136
|
-
break;
|
137
|
-
case 'text()':
|
138
|
-
case '.':
|
139
|
-
throw new Error('Invalid or unsupported XPath attribute: ' + match['mattr']);
|
140
|
-
default:
|
141
|
-
if (match['mattr'].indexOf('@') !== 0) {
|
142
|
-
throw new Error('Invalid or unsupported XPath attribute: ' + match['mattr']);
|
143
|
-
}
|
144
|
-
if (match['mvalue'].indexOf(' ') !== -1) {
|
145
|
-
match['mvalue'] = '\"' + match['mvalue'].replace(/^\s+|\s+$/,'') + '\"';
|
146
|
-
}
|
147
|
-
attr = '[' + match['mattr'].replace('@', '') + '="' + match['mvalue'] + '"]';
|
148
|
-
break;
|
149
|
-
}
|
150
|
-
} else if (match['idvalue']) {
|
151
|
-
attr = '#' + match['idvalue'].replace(/\s/, '#');
|
152
|
-
} else {
|
153
|
-
attr = '';
|
154
|
-
}
|
155
|
-
|
156
|
-
let nth = '';
|
157
|
-
|
158
|
-
if (match['nth']) {
|
159
|
-
if (match['nth'].indexOf('last') === -1) {
|
160
|
-
if (isNaN(parseInt(match['nth'], 10))) {
|
161
|
-
throw new Error('Invalid or unsupported XPath attribute: ' + match['nth']);
|
162
|
-
}
|
163
|
-
nth = parseInt(match['nth'], 10) !== 1 ? ':nth-of-type(' + match['nth'] + ')' : ':first-of-type';
|
164
|
-
} else {
|
165
|
-
nth = ':last-of-type';
|
166
|
-
}
|
167
|
-
}
|
168
|
-
|
169
|
-
css.push(nav + tag + attr + nth);
|
170
|
-
position++;
|
171
|
-
}
|
172
|
-
|
173
|
-
const result = css.join('');
|
174
|
-
|
175
|
-
if (result === '') {
|
176
|
-
throw new Error('Invalid or unsupported XPath');
|
177
|
-
}
|
178
|
-
|
179
|
-
cssSelectors.push(result);
|
180
|
-
xindex++;
|
181
|
-
}
|
182
|
-
|
183
|
-
// return cssSelectors.join(', ');
|
184
|
-
const originalResult = cssSelectors.join(', ');
|
185
|
-
return escapeCssIdSelectors(originalResult);
|
186
|
-
};
|
package/src/xPathToCssCypress.ts
DELETED
@@ -1,178 +0,0 @@
|
|
1
|
-
export function xPathToCss(expr: string) {
|
2
|
-
const isValidXPath = expr =>
|
3
|
-
typeof expr !== 'undefined' &&
|
4
|
-
expr.replace(/[\s-_=]/g, '') !== '' &&
|
5
|
-
expr.length ===
|
6
|
-
expr.replace(
|
7
|
-
/[-_\w:.]+\(\)\s*=|=\s*[-_\w:.]+\(\)|\sor\s|\sand\s|\[(?:[^\/\]]+[\/\[]\/?.+)+\]|starts-with\(|\[.*last\(\)\s*[-\+<>=].+\]|number\(\)|not\(|count\(|text\(|first\(|normalize-space|[^\/]following-sibling|concat\(|descendant::|parent::|self::|child::|/gi,
|
8
|
-
'',
|
9
|
-
).length;
|
10
|
-
|
11
|
-
const getValidationRegex = () => {
|
12
|
-
let regex =
|
13
|
-
'(?P<node>' +
|
14
|
-
'(' +
|
15
|
-
'^id\\(["\\\']?(?P<idvalue>%(value)s)["\\\']?\\)' + // special case! `id(idValue)`
|
16
|
-
'|' +
|
17
|
-
'(?P<nav>//?(?:following-sibling::)?)(?P<tag>%(tag)s)' + // `//div`
|
18
|
-
'(\\[(' +
|
19
|
-
'(?P<matched>(?P<mattr>@?%(attribute)s=["\\\'](?P<mvalue>%(value)s))["\\\']' + // `[@id="well"]` supported and `[text()="yes"]` is not
|
20
|
-
'|' +
|
21
|
-
'(?P<contained>contains\\((?P<cattr>@?%(attribute)s,\\s*["\\\'](?P<cvalue>%(value)s)["\\\']\\))' + // `[contains(@id, "bleh")]` supported and `[contains(text(), "some")]` is not
|
22
|
-
')\\])?' +
|
23
|
-
'(\\[\\s*(?P<nth>\\d+|last\\(\\s*\\))\\s*\\])?' +
|
24
|
-
')' +
|
25
|
-
')';
|
26
|
-
|
27
|
-
const subRegexes = {
|
28
|
-
tag: '([a-zA-Z][a-zA-Z0-9:-]*|\\*)',
|
29
|
-
attribute: '[.a-zA-Z_:][-\\w:.]*(\\(\\))?)',
|
30
|
-
value: '\\s*[\\w/:][-/\\w\\s,:;.]*',
|
31
|
-
};
|
32
|
-
|
33
|
-
Object.keys(subRegexes).forEach(key => {
|
34
|
-
regex = regex.replace(new RegExp(`%\\(${key}\\)s`, 'gi'), subRegexes[key]);
|
35
|
-
});
|
36
|
-
|
37
|
-
regex = regex.replace(
|
38
|
-
/\?P<node>|\?P<idvalue>|\?P<nav>|\?P<tag>|\?P<matched>|\?P<mattr>|\?P<mvalue>|\?P<contained>|\?P<cattr>|\?P<cvalue>|\?P<nth>/gi,
|
39
|
-
'',
|
40
|
-
);
|
41
|
-
|
42
|
-
return new RegExp(regex, 'gi');
|
43
|
-
};
|
44
|
-
|
45
|
-
const preParseXpath = expr =>
|
46
|
-
expr.replace(
|
47
|
-
/contains\s*\(\s*concat\(["']\s+["']\s*,\s*@class\s*,\s*["']\s+["']\)\s*,\s*["']\s+([a-zA-Z0-9-_]+)\s+["']\)/gi,
|
48
|
-
'@class="$1"',
|
49
|
-
);
|
50
|
-
|
51
|
-
function escapeCssIdSelectors(cssSelector) {
|
52
|
-
return cssSelector.replace(/#([^ >]+)/g, (match, id) => {
|
53
|
-
// Escape special characters in the id part
|
54
|
-
return `#${id.replace(/[!"#$%&'()*+,./:;<=>?@[\\\]^`{|}~]/g, '\\$&')}`;
|
55
|
-
});
|
56
|
-
}
|
57
|
-
if (!expr) {
|
58
|
-
throw new Error('Missing XPath expression');
|
59
|
-
}
|
60
|
-
|
61
|
-
expr = preParseXpath(expr);
|
62
|
-
|
63
|
-
if (!isValidXPath(expr)) {
|
64
|
-
console.error(`Invalid or unsupported XPath: ${expr}`);
|
65
|
-
// do not throw error so that this function proceeds to convert xpath that it does not support
|
66
|
-
// for example, //*[@id="google_ads_iframe_/4654/dweb/imu1/homepage/landingpage/na_0"]/html/body/div[1]/a
|
67
|
-
// becomes #google_ads_iframe_/4654/dweb/imu1/homepage/landingpage/na_0 > html > body > div:first-of-type > div > a
|
68
|
-
// which is invalid because the slashes in the id selector are not escaped
|
69
|
-
// throw new Error('Invalid or unsupported XPath: ' + expr);
|
70
|
-
}
|
71
|
-
|
72
|
-
const xPathArr = expr.split('|');
|
73
|
-
const prog = getValidationRegex();
|
74
|
-
const cssSelectors = [];
|
75
|
-
let xindex = 0;
|
76
|
-
|
77
|
-
while (xPathArr[xindex]) {
|
78
|
-
const css = [];
|
79
|
-
let position = 0;
|
80
|
-
let nodes;
|
81
|
-
|
82
|
-
while ((nodes = prog.exec(xPathArr[xindex]))) {
|
83
|
-
let attr;
|
84
|
-
|
85
|
-
if (!nodes && position === 0) {
|
86
|
-
throw new Error(`Invalid or unsupported XPath: ${expr}`);
|
87
|
-
}
|
88
|
-
|
89
|
-
const match = {
|
90
|
-
node: nodes[5],
|
91
|
-
idvalue: nodes[12] || nodes[3],
|
92
|
-
nav: nodes[4],
|
93
|
-
tag: nodes[5],
|
94
|
-
matched: nodes[7],
|
95
|
-
mattr: nodes[10] || nodes[14],
|
96
|
-
mvalue: nodes[12] || nodes[16],
|
97
|
-
contained: nodes[13],
|
98
|
-
cattr: nodes[14],
|
99
|
-
cvalue: nodes[16],
|
100
|
-
nth: nodes[18],
|
101
|
-
};
|
102
|
-
|
103
|
-
let nav = '';
|
104
|
-
|
105
|
-
if (position != 0 && match.nav) {
|
106
|
-
if (~match.nav.indexOf('following-sibling::')) {
|
107
|
-
nav = ' + ';
|
108
|
-
} else {
|
109
|
-
nav = match.nav == '//' ? ' ' : ' > ';
|
110
|
-
}
|
111
|
-
}
|
112
|
-
|
113
|
-
const tag = match.tag === '*' ? '' : match.tag || '';
|
114
|
-
|
115
|
-
if (match.contained) {
|
116
|
-
if (match.cattr.indexOf('@') === 0) {
|
117
|
-
attr = `[${match.cattr.replace(/^@/, '')}*="${match.cvalue}"]`;
|
118
|
-
} else {
|
119
|
-
throw new Error(`Invalid or unsupported XPath attribute: ${match.cattr}`);
|
120
|
-
}
|
121
|
-
} else if (match.matched) {
|
122
|
-
switch (match.mattr) {
|
123
|
-
case '@id':
|
124
|
-
attr = `#${match.mvalue.replace(/^\s+|\s+$/, '').replace(/\s/g, '#')}`;
|
125
|
-
break;
|
126
|
-
case '@class':
|
127
|
-
attr = `.${match.mvalue.replace(/^\s+|\s+$/, '').replace(/\s/g, '.')}`;
|
128
|
-
break;
|
129
|
-
case 'text()':
|
130
|
-
case '.':
|
131
|
-
throw new Error(`Invalid or unsupported XPath attribute: ${match.mattr}`);
|
132
|
-
default:
|
133
|
-
if (match.mattr.indexOf('@') !== 0) {
|
134
|
-
throw new Error(`Invalid or unsupported XPath attribute: ${match.mattr}`);
|
135
|
-
}
|
136
|
-
if (match.mvalue.indexOf(' ') !== -1) {
|
137
|
-
match.mvalue = `\"${match.mvalue.replace(/^\s+|\s+$/, '')}\"`;
|
138
|
-
}
|
139
|
-
attr = `[${match.mattr.replace('@', '')}="${match.mvalue}"]`;
|
140
|
-
break;
|
141
|
-
}
|
142
|
-
} else if (match.idvalue) {
|
143
|
-
attr = `#${match.idvalue.replace(/\s/, '#')}`;
|
144
|
-
} else {
|
145
|
-
attr = '';
|
146
|
-
}
|
147
|
-
|
148
|
-
let nth = '';
|
149
|
-
|
150
|
-
if (match.nth) {
|
151
|
-
if (match.nth.indexOf('last') === -1) {
|
152
|
-
if (isNaN(parseInt(match.nth, 10))) {
|
153
|
-
throw new Error(`Invalid or unsupported XPath attribute: ${match.nth}`);
|
154
|
-
}
|
155
|
-
nth = parseInt(match.nth, 10) !== 1 ? `:nth-of-type(${match.nth})` : ':first-of-type';
|
156
|
-
} else {
|
157
|
-
nth = ':last-of-type';
|
158
|
-
}
|
159
|
-
}
|
160
|
-
|
161
|
-
css.push(nav + tag + attr + nth);
|
162
|
-
position++;
|
163
|
-
}
|
164
|
-
|
165
|
-
const result = css.join('');
|
166
|
-
|
167
|
-
if (result === '') {
|
168
|
-
throw new Error('Invalid or unsupported XPath');
|
169
|
-
}
|
170
|
-
|
171
|
-
cssSelectors.push(result);
|
172
|
-
xindex++;
|
173
|
-
}
|
174
|
-
|
175
|
-
// return cssSelectors.join(', ');
|
176
|
-
const originalResult = cssSelectors.join(', ');
|
177
|
-
return escapeCssIdSelectors(originalResult);
|
178
|
-
}
|