w3c-validate-html 1.0.2 ā 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -11
- package/index.js +41 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -124,21 +124,14 @@ jobs:
|
|
|
124
124
|
html-validate:
|
|
125
125
|
runs-on: ubuntu-latest
|
|
126
126
|
steps:
|
|
127
|
-
- uses: actions/checkout@v4
|
|
128
127
|
- uses: actions/setup-node@v4
|
|
129
128
|
with:
|
|
130
129
|
node-version: 18
|
|
131
130
|
|
|
132
|
-
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
for i in {1..30}; do
|
|
137
|
-
curl -fsS http://localhost:8080 >/dev/null && break
|
|
138
|
-
sleep 1
|
|
139
|
-
done
|
|
140
|
-
|
|
141
|
-
- run: npx w3c-validate-html --url http://localhost:8080 --depth 3 --concurrency 4 --errors-only --json > html-report.json
|
|
131
|
+
- name: validate url
|
|
132
|
+
env:
|
|
133
|
+
TARGET_URL: https://example.com
|
|
134
|
+
run: npx w3c-validate-html --target "$TARGET_URL" --depth 2 --errors-only --json > html-report.json
|
|
142
135
|
|
|
143
136
|
- uses: actions/upload-artifact@v4
|
|
144
137
|
with:
|
package/index.js
CHANGED
|
@@ -424,6 +424,15 @@ function isCrawlable(href, cfg, origin) {
|
|
|
424
424
|
return false;
|
|
425
425
|
}
|
|
426
426
|
|
|
427
|
+
// skip common non-HTML file types (e.g., pdf, zip, docx, etc)
|
|
428
|
+
if (/\.(pdf|zip|docx?|xlsx?|pptx?|jpg|jpeg|png|gif|svg|mp3|mp4|avi|mov|wmv|exe|dmg|tar|gz|rar|7z)(\?|#|$)/i.test(href)) {
|
|
429
|
+
return false;
|
|
430
|
+
}
|
|
431
|
+
// skip links that look like downloads
|
|
432
|
+
if (/download(=|\b|\/|\.)|attachment(=|\b|\/|\.)|file(=|\b|\/|\.)/i.test(href)) {
|
|
433
|
+
return false;
|
|
434
|
+
}
|
|
435
|
+
|
|
427
436
|
if (cfg && cfg.sameOrigin) {
|
|
428
437
|
try {
|
|
429
438
|
if (new URL(href).origin !== origin) {
|
|
@@ -467,6 +476,12 @@ async function fetchHtml(pageUrl, cfg) {
|
|
|
467
476
|
throw new Error('request failed ' + res.status + ' ' + pageUrl);
|
|
468
477
|
}
|
|
469
478
|
|
|
479
|
+
// Only process HTML or XHTML
|
|
480
|
+
var contentType = res.headers.get('content-type') || '';
|
|
481
|
+
if (!/text\/html|application\/xhtml\+xml/i.test(contentType)) {
|
|
482
|
+
return null;
|
|
483
|
+
}
|
|
484
|
+
|
|
470
485
|
var finalUrl = (res.url && String(res.url)) ? String(res.url) : pageUrl;
|
|
471
486
|
var html = await res.text();
|
|
472
487
|
|
|
@@ -595,7 +610,19 @@ async function asyncPool(items, concurrency, worker) {
|
|
|
595
610
|
* @returns {Promise<{url:string,ok:boolean,errors:Array,warnings:Array,finalUrl:string,links:Array}>} - Result
|
|
596
611
|
*/
|
|
597
612
|
async function validateOneUrl(pageUrl, cfg, tmpDir) {
|
|
613
|
+
|
|
598
614
|
var fetched = await fetchHtml(pageUrl, cfg);
|
|
615
|
+
if (!fetched) {
|
|
616
|
+
// Not HTML, skip crawling and validation
|
|
617
|
+
return {
|
|
618
|
+
url: pageUrl,
|
|
619
|
+
finalUrl: pageUrl,
|
|
620
|
+
ok: true,
|
|
621
|
+
errors: [],
|
|
622
|
+
warnings: [],
|
|
623
|
+
links: []
|
|
624
|
+
};
|
|
625
|
+
}
|
|
599
626
|
var finalUrl = fetched.finalUrl;
|
|
600
627
|
var html = fetched.html;
|
|
601
628
|
|
|
@@ -1008,11 +1035,25 @@ if (require.main === module) {
|
|
|
1008
1035
|
userAgent: argv['user-agent']
|
|
1009
1036
|
};
|
|
1010
1037
|
|
|
1038
|
+
const startTime = Date.now();
|
|
1011
1039
|
validate(target, cfg).then(function (summary) {
|
|
1012
1040
|
if (argv.json) {
|
|
1013
1041
|
try { console.log(JSON.stringify(summary)); }
|
|
1014
1042
|
catch (e) { console.error('{"error":"failed to stringify results"}'); }
|
|
1015
1043
|
}
|
|
1044
|
+
|
|
1045
|
+
// Jasmine-style summary (simplified)
|
|
1046
|
+
const total = summary.passed + summary.failed;
|
|
1047
|
+
const duration = ((Date.now() - startTime) / 1000).toFixed(3);
|
|
1048
|
+
console.log('\nSummary:');
|
|
1049
|
+
if (summary.failed === 0) {
|
|
1050
|
+
console.log('\nš Passed');
|
|
1051
|
+
} else {
|
|
1052
|
+
console.log('\nā Failed');
|
|
1053
|
+
}
|
|
1054
|
+
console.log('Pages: ' + summary.passed + ' of ' + total);
|
|
1055
|
+
console.log('Errors: ' + summary.failed);
|
|
1056
|
+
console.log('Finished in ' + duration + ' seconds');
|
|
1016
1057
|
process.exit(summary.failed > 0 ? 1 : 0);
|
|
1017
1058
|
})
|
|
1018
1059
|
.catch(function (err) {
|