npm - w3c-validate-html - Versions diffs - 1.0.1 → 1.1.0 - Mend

w3c-validate-html 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -124,21 +124,14 @@ jobs:
   html-validate:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
       - uses: actions/setup-node@v4
         with:
           node-version: 18
-      - run: npm ci
-      - run: npm start &
-      - run: |
-          for i in {1..30}; do
-            curl -fsS http://localhost:8080 >/dev/null && break
-            sleep 1
-          done
-      - run: npx w3c-validate-html --url http://localhost:8080 --depth 3 --concurrency 4 --errors-only --json > html-report.json
+      - name: validate url
+        env:
+          TARGET_URL: https://example.com
+        run: npx w3c-validate-html --target "$TARGET_URL" --depth 2 --errors-only --json > html-report.json
       - uses: actions/upload-artifact@v4
         with:

package/index.js CHANGED Viewed

@@ -154,14 +154,49 @@ function toList(v) {
  */
 function toSafeName(href) {
     var s = String(href || '');
+    var out = '';
+    var i;
+    var ch;
     s = s.replace(/^https?:\/\//i, '');
-    s = s.replace(/[?#].*$/, '');
     s = s.replace(/\/+/g, '/');
-    s = s.replace(/[^a-z0-9/._-]+/gi, '_');
-    s = s.replace(/\//g, '_');
-    if (!s) { s = 'index.html'; }
-    if (!/\.html?$/i.test(s)) { s += '.html'; }
-    return s;
+    /* convert url chars to filename safe chars without stripping query */
+    for (i = 0; i < s.length; i++) {
+        ch = s.charAt(i);
+        /* keep common safe chars */
+        if (/[a-z0-9]/i.test(ch) || ch === '/' || ch === '.' || ch === '_' || ch === '-') {
+            out += ch;
+        }
+        /* map separators to readable tokens */
+        else if (ch === '?') {
+            out += '__q__';
+        }
+        else if (ch === '&') {
+            out += '__and__';
+        }
+        else if (ch === '=') {
+            out += '__eq__';
+        }
+        else if (ch === '#') {
+            out += '__hash__';
+        }
+        /* everything else becomes underscore */
+        else {
+            out += '_';
+        }
+    }
+    out = out.replace(/\/+/g, '/');
+    out = out.replace(/_+/g, '_');
+    out = out.replace(/\//g, '_');
+    out = out.replace(/^_+|_+$/g, '');
+    if (!out) { out = 'index.html'; }
+    if (!/\.html?$/i.test(out)) { out += '.html'; }
+    return out;
 }
 /**
@@ -389,6 +424,15 @@ function isCrawlable(href, cfg, origin) {
         return false;
     }
+    // skip common non-HTML file types (e.g., pdf, zip, docx, etc)
+    if (/\.(pdf|zip|docx?|xlsx?|pptx?|jpg|jpeg|png|gif|svg|mp3|mp4|avi|mov|wmv|exe|dmg|tar|gz|rar|7z)(\?|#|$)/i.test(href)) {
+        return false;
+    }
+    // skip links that look like downloads
+    if (/download(=|\b|\/|\.)|attachment(=|\b|\/|\.)|file(=|\b|\/|\.)/i.test(href)) {
+        return false;
+    }
     if (cfg && cfg.sameOrigin) {
         try {
             if (new URL(href).origin !== origin) {
@@ -432,6 +476,12 @@ async function fetchHtml(pageUrl, cfg) {
         throw new Error('request failed ' + res.status + ' ' + pageUrl);
     }
+    // Only process HTML or XHTML
+    var contentType = res.headers.get('content-type') || '';
+    if (!/text\/html|application\/xhtml\+xml/i.test(contentType)) {
+        return null;
+    }
     var finalUrl = (res.url && String(res.url)) ? String(res.url) : pageUrl;
     var html = await res.text();
@@ -560,7 +610,19 @@ async function asyncPool(items, concurrency, worker) {
  * @returns {Promise<{url:string,ok:boolean,errors:Array,warnings:Array,finalUrl:string,links:Array}>} - Result
  */
 async function validateOneUrl(pageUrl, cfg, tmpDir) {
     var fetched = await fetchHtml(pageUrl, cfg);
+    if (!fetched) {
+        // Not HTML, skip crawling and validation
+        return {
+            url: pageUrl,
+            finalUrl: pageUrl,
+            ok: true,
+            errors: [],
+            warnings: [],
+            links: []
+        };
+    }
     var finalUrl = fetched.finalUrl;
     var html = fetched.html;
@@ -973,11 +1035,25 @@ if (require.main === module) {
         userAgent: argv['user-agent']
     };
+    const startTime = Date.now();
     validate(target, cfg).then(function (summary) {
         if (argv.json) {
             try { console.log(JSON.stringify(summary)); }
             catch (e) { console.error('{"error":"failed to stringify results"}'); }
         }
+        // Jasmine-style summary (simplified)
+        const total = summary.passed + summary.failed;
+        const duration = ((Date.now() - startTime) / 1000).toFixed(3);
+        console.log('\nSummary:');
+        if (summary.failed === 0) {
+            console.log('\n👊  Passed');
+        } else {
+            console.log('\n❌  Failed');
+        }
+        console.log('Pages:   ' + summary.passed + ' of ' + total);
+        console.log('Errors:  ' + summary.failed);
+        console.log('Finished in ' + duration + ' seconds');
         process.exit(summary.failed > 0 ? 1 : 0);
     })
     .catch(function (err) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "w3c-validate-html",
-  "version": "1.0.1",
+  "version": "1.1.0",
   "description": "Validate HTML offline using the official W3C vnu.jar",
   "type": "commonjs",
   "main": "index.js",