w3c-validate-html 1.0.1 ā 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -11
- package/index.js +82 -6
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -124,21 +124,14 @@ jobs:
|
|
|
124
124
|
html-validate:
|
|
125
125
|
runs-on: ubuntu-latest
|
|
126
126
|
steps:
|
|
127
|
-
- uses: actions/checkout@v4
|
|
128
127
|
- uses: actions/setup-node@v4
|
|
129
128
|
with:
|
|
130
129
|
node-version: 18
|
|
131
130
|
|
|
132
|
-
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
for i in {1..30}; do
|
|
137
|
-
curl -fsS http://localhost:8080 >/dev/null && break
|
|
138
|
-
sleep 1
|
|
139
|
-
done
|
|
140
|
-
|
|
141
|
-
- run: npx w3c-validate-html --url http://localhost:8080 --depth 3 --concurrency 4 --errors-only --json > html-report.json
|
|
131
|
+
- name: validate url
|
|
132
|
+
env:
|
|
133
|
+
TARGET_URL: https://example.com
|
|
134
|
+
run: npx w3c-validate-html --target "$TARGET_URL" --depth 2 --errors-only --json > html-report.json
|
|
142
135
|
|
|
143
136
|
- uses: actions/upload-artifact@v4
|
|
144
137
|
with:
|
package/index.js
CHANGED
|
@@ -154,14 +154,49 @@ function toList(v) {
|
|
|
154
154
|
*/
|
|
155
155
|
function toSafeName(href) {
|
|
156
156
|
var s = String(href || '');
|
|
157
|
+
var out = '';
|
|
158
|
+
var i;
|
|
159
|
+
var ch;
|
|
160
|
+
|
|
157
161
|
s = s.replace(/^https?:\/\//i, '');
|
|
158
|
-
s = s.replace(/[?#].*$/, '');
|
|
159
162
|
s = s.replace(/\/+/g, '/');
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
163
|
+
|
|
164
|
+
/* convert url chars to filename safe chars without stripping query */
|
|
165
|
+
for (i = 0; i < s.length; i++) {
|
|
166
|
+
ch = s.charAt(i);
|
|
167
|
+
|
|
168
|
+
/* keep common safe chars */
|
|
169
|
+
if (/[a-z0-9]/i.test(ch) || ch === '/' || ch === '.' || ch === '_' || ch === '-') {
|
|
170
|
+
out += ch;
|
|
171
|
+
}
|
|
172
|
+
/* map separators to readable tokens */
|
|
173
|
+
else if (ch === '?') {
|
|
174
|
+
out += '__q__';
|
|
175
|
+
}
|
|
176
|
+
else if (ch === '&') {
|
|
177
|
+
out += '__and__';
|
|
178
|
+
}
|
|
179
|
+
else if (ch === '=') {
|
|
180
|
+
out += '__eq__';
|
|
181
|
+
}
|
|
182
|
+
else if (ch === '#') {
|
|
183
|
+
out += '__hash__';
|
|
184
|
+
}
|
|
185
|
+
/* everything else becomes underscore */
|
|
186
|
+
else {
|
|
187
|
+
out += '_';
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
out = out.replace(/\/+/g, '/');
|
|
192
|
+
out = out.replace(/_+/g, '_');
|
|
193
|
+
out = out.replace(/\//g, '_');
|
|
194
|
+
out = out.replace(/^_+|_+$/g, '');
|
|
195
|
+
|
|
196
|
+
if (!out) { out = 'index.html'; }
|
|
197
|
+
if (!/\.html?$/i.test(out)) { out += '.html'; }
|
|
198
|
+
|
|
199
|
+
return out;
|
|
165
200
|
}
|
|
166
201
|
|
|
167
202
|
/**
|
|
@@ -389,6 +424,15 @@ function isCrawlable(href, cfg, origin) {
|
|
|
389
424
|
return false;
|
|
390
425
|
}
|
|
391
426
|
|
|
427
|
+
// skip common non-HTML file types (e.g., pdf, zip, docx, etc)
|
|
428
|
+
if (/\.(pdf|zip|docx?|xlsx?|pptx?|jpg|jpeg|png|gif|svg|mp3|mp4|avi|mov|wmv|exe|dmg|tar|gz|rar|7z)(\?|#|$)/i.test(href)) {
|
|
429
|
+
return false;
|
|
430
|
+
}
|
|
431
|
+
// skip links that look like downloads
|
|
432
|
+
if (/download(=|\b|\/|\.)|attachment(=|\b|\/|\.)|file(=|\b|\/|\.)/i.test(href)) {
|
|
433
|
+
return false;
|
|
434
|
+
}
|
|
435
|
+
|
|
392
436
|
if (cfg && cfg.sameOrigin) {
|
|
393
437
|
try {
|
|
394
438
|
if (new URL(href).origin !== origin) {
|
|
@@ -432,6 +476,12 @@ async function fetchHtml(pageUrl, cfg) {
|
|
|
432
476
|
throw new Error('request failed ' + res.status + ' ' + pageUrl);
|
|
433
477
|
}
|
|
434
478
|
|
|
479
|
+
// Only process HTML or XHTML
|
|
480
|
+
var contentType = res.headers.get('content-type') || '';
|
|
481
|
+
if (!/text\/html|application\/xhtml\+xml/i.test(contentType)) {
|
|
482
|
+
return null;
|
|
483
|
+
}
|
|
484
|
+
|
|
435
485
|
var finalUrl = (res.url && String(res.url)) ? String(res.url) : pageUrl;
|
|
436
486
|
var html = await res.text();
|
|
437
487
|
|
|
@@ -560,7 +610,19 @@ async function asyncPool(items, concurrency, worker) {
|
|
|
560
610
|
* @returns {Promise<{url:string,ok:boolean,errors:Array,warnings:Array,finalUrl:string,links:Array}>} - Result
|
|
561
611
|
*/
|
|
562
612
|
async function validateOneUrl(pageUrl, cfg, tmpDir) {
|
|
613
|
+
|
|
563
614
|
var fetched = await fetchHtml(pageUrl, cfg);
|
|
615
|
+
if (!fetched) {
|
|
616
|
+
// Not HTML, skip crawling and validation
|
|
617
|
+
return {
|
|
618
|
+
url: pageUrl,
|
|
619
|
+
finalUrl: pageUrl,
|
|
620
|
+
ok: true,
|
|
621
|
+
errors: [],
|
|
622
|
+
warnings: [],
|
|
623
|
+
links: []
|
|
624
|
+
};
|
|
625
|
+
}
|
|
564
626
|
var finalUrl = fetched.finalUrl;
|
|
565
627
|
var html = fetched.html;
|
|
566
628
|
|
|
@@ -973,11 +1035,25 @@ if (require.main === module) {
|
|
|
973
1035
|
userAgent: argv['user-agent']
|
|
974
1036
|
};
|
|
975
1037
|
|
|
1038
|
+
const startTime = Date.now();
|
|
976
1039
|
validate(target, cfg).then(function (summary) {
|
|
977
1040
|
if (argv.json) {
|
|
978
1041
|
try { console.log(JSON.stringify(summary)); }
|
|
979
1042
|
catch (e) { console.error('{"error":"failed to stringify results"}'); }
|
|
980
1043
|
}
|
|
1044
|
+
|
|
1045
|
+
// Jasmine-style summary (simplified)
|
|
1046
|
+
const total = summary.passed + summary.failed;
|
|
1047
|
+
const duration = ((Date.now() - startTime) / 1000).toFixed(3);
|
|
1048
|
+
console.log('\nSummary:');
|
|
1049
|
+
if (summary.failed === 0) {
|
|
1050
|
+
console.log('\nš Passed');
|
|
1051
|
+
} else {
|
|
1052
|
+
console.log('\nā Failed');
|
|
1053
|
+
}
|
|
1054
|
+
console.log('Pages: ' + summary.passed + ' of ' + total);
|
|
1055
|
+
console.log('Errors: ' + summary.failed);
|
|
1056
|
+
console.log('Finished in ' + duration + ' seconds');
|
|
981
1057
|
process.exit(summary.failed > 0 ? 1 : 0);
|
|
982
1058
|
})
|
|
983
1059
|
.catch(function (err) {
|