w3c-validate-html 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +4 -11
  2. package/index.js +41 -0
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -124,21 +124,14 @@ jobs:
124
124
  html-validate:
125
125
  runs-on: ubuntu-latest
126
126
  steps:
127
- - uses: actions/checkout@v4
128
127
  - uses: actions/setup-node@v4
129
128
  with:
130
129
  node-version: 18
131
130
 
132
- - run: npm ci
133
- - run: npm start &
134
-
135
- - run: |
136
- for i in {1..30}; do
137
- curl -fsS http://localhost:8080 >/dev/null && break
138
- sleep 1
139
- done
140
-
141
- - run: npx w3c-validate-html --url http://localhost:8080 --depth 3 --concurrency 4 --errors-only --json > html-report.json
131
+ - name: validate url
132
+ env:
133
+ TARGET_URL: https://example.com
134
+ run: npx w3c-validate-html --target "$TARGET_URL" --depth 2 --errors-only --json > html-report.json
142
135
 
143
136
  - uses: actions/upload-artifact@v4
144
137
  with:
package/index.js CHANGED
@@ -424,6 +424,15 @@ function isCrawlable(href, cfg, origin) {
424
424
  return false;
425
425
  }
426
426
 
427
+ // skip common non-HTML file types (e.g., pdf, zip, docx, etc)
428
+ if (/\.(pdf|zip|docx?|xlsx?|pptx?|jpg|jpeg|png|gif|svg|mp3|mp4|avi|mov|wmv|exe|dmg|tar|gz|rar|7z)(\?|#|$)/i.test(href)) {
429
+ return false;
430
+ }
431
+ // skip links that look like downloads
432
+ if (/download(=|\b|\/|\.)|attachment(=|\b|\/|\.)|file(=|\b|\/|\.)/i.test(href)) {
433
+ return false;
434
+ }
435
+
427
436
  if (cfg && cfg.sameOrigin) {
428
437
  try {
429
438
  if (new URL(href).origin !== origin) {
@@ -467,6 +476,12 @@ async function fetchHtml(pageUrl, cfg) {
467
476
  throw new Error('request failed ' + res.status + ' ' + pageUrl);
468
477
  }
469
478
 
479
+ // Only process HTML or XHTML
480
+ var contentType = res.headers.get('content-type') || '';
481
+ if (!/text\/html|application\/xhtml\+xml/i.test(contentType)) {
482
+ return null;
483
+ }
484
+
470
485
  var finalUrl = (res.url && String(res.url)) ? String(res.url) : pageUrl;
471
486
  var html = await res.text();
472
487
 
@@ -595,7 +610,19 @@ async function asyncPool(items, concurrency, worker) {
595
610
  * @returns {Promise<{url:string,ok:boolean,errors:Array,warnings:Array,finalUrl:string,links:Array}>} - Result
596
611
  */
597
612
  async function validateOneUrl(pageUrl, cfg, tmpDir) {
613
+
598
614
  var fetched = await fetchHtml(pageUrl, cfg);
615
+ if (!fetched) {
616
+ // Not HTML, skip crawling and validation
617
+ return {
618
+ url: pageUrl,
619
+ finalUrl: pageUrl,
620
+ ok: true,
621
+ errors: [],
622
+ warnings: [],
623
+ links: []
624
+ };
625
+ }
599
626
  var finalUrl = fetched.finalUrl;
600
627
  var html = fetched.html;
601
628
 
@@ -1008,11 +1035,25 @@ if (require.main === module) {
1008
1035
  userAgent: argv['user-agent']
1009
1036
  };
1010
1037
 
1038
+ const startTime = Date.now();
1011
1039
  validate(target, cfg).then(function (summary) {
1012
1040
  if (argv.json) {
1013
1041
  try { console.log(JSON.stringify(summary)); }
1014
1042
  catch (e) { console.error('{"error":"failed to stringify results"}'); }
1015
1043
  }
1044
+
1045
+ // Jasmine-style summary (simplified)
1046
+ const total = summary.passed + summary.failed;
1047
+ const duration = ((Date.now() - startTime) / 1000).toFixed(3);
1048
+ console.log('\nSummary:');
1049
+ if (summary.failed === 0) {
1050
+ console.log('\nšŸ‘Š Passed');
1051
+ } else {
1052
+ console.log('\nāŒ Failed');
1053
+ }
1054
+ console.log('Pages: ' + summary.passed + ' of ' + total);
1055
+ console.log('Errors: ' + summary.failed);
1056
+ console.log('Finished in ' + duration + ' seconds');
1016
1057
  process.exit(summary.failed > 0 ? 1 : 0);
1017
1058
  })
1018
1059
  .catch(function (err) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "w3c-validate-html",
3
- "version": "1.0.2",
3
+ "version": "1.1.0",
4
4
  "description": "Validate HTML offline using the official W3C vnu.jar",
5
5
  "type": "commonjs",
6
6
  "main": "index.js",