w3c-validate-html 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +155 -0
- package/index.js +990 -0
- package/package.json +52 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Orca Scan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# w3c-validate-html
|
|
2
|
+
|
|
3
|
+
[](https://github.com/orca-scan/w3c-validate-html/actions/workflows/ci.yml)
|
|
4
|
+
[](https://github.com/orca-scan/w3c-validate-html/blob/master/LICENSE)
|
|
5
|
+
[](https://www.npmjs.com/package/w3c-validate-html)
|
|
6
|
+
|
|
7
|
+
Validate HTML offline using the official W3C vnu.jar
|
|
8
|
+
|
|
9
|
+
**Why?** Modern build tools can introduce HTML bugs. w3c-validate-html runs locally and prints concise, clickable errors with line numbers using the same rules as the online W3C validator.
|
|
10
|
+
|
|
11
|
+
## CLI
|
|
12
|
+
|
|
13
|
+
The easiest way to use this is from the CLI using `npx`, for example:
|
|
14
|
+
|
|
15
|
+
```sh
|
|
16
|
+
# validate a website recursively (default depth 2)
|
|
17
|
+
npx w3c-validate-html --target https://example.com --depth 1 --errors-only
|
|
18
|
+
|
|
19
|
+
# Validate a folder, fail only on errors
|
|
20
|
+
npx w3c-validate-html --target ./public --errors-only
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### Options
|
|
24
|
+
|
|
25
|
+
Option | Alias | Type | Default | Description
|
|
26
|
+
:-------------|:------|:--------|:-------------------|:---------------------------------------
|
|
27
|
+
--target | -t | string | | File, folder, URL or string to validate
|
|
28
|
+
--depth | | number | 2 | Crawl depth for website validation
|
|
29
|
+
--concurrency | | number | 4 | Number of concurrent validations
|
|
30
|
+
--warnings | | number | 1 | Show warnings (0 = off, 1 = on)
|
|
31
|
+
--exclude | | string | | Comma/space separated URLs to exclude
|
|
32
|
+
--errors-only | -e | boolean | false | Only show errors
|
|
33
|
+
--json | | boolean | false | Output results as JSON
|
|
34
|
+
--same-origin | | boolean | true | Restrict crawl to same origin
|
|
35
|
+
--strip-query | | boolean | false | Exclude URLs with query strings
|
|
36
|
+
--user-agent | | string | Mozilla/5.0 (node) | Custom user agent for requests
|
|
37
|
+
|
|
38
|
+
## Output
|
|
39
|
+
|
|
40
|
+
Errors and warnings include clickable file:line:col links for quick editor navigation.
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
✖ public/invalid.html
|
|
44
|
+
End tag for "h1" seen, but there were unclosed elements. public/invalid.html:7:5
|
|
45
|
+
Unclosed element "h1". public/invalid.html:7:5
|
|
46
|
+
End of file seen when expecting text or an end tag. public/invalid.html:9:1
|
|
47
|
+
✔ public/valid.html
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Node module
|
|
51
|
+
|
|
52
|
+
You can use this package as a node module to validate a URL, file/folder, or raw HTML string:
|
|
53
|
+
|
|
54
|
+
### Validate a URL
|
|
55
|
+
|
|
56
|
+
```js
|
|
57
|
+
const validate = require('w3c-validate-html');
|
|
58
|
+
|
|
59
|
+
validate('https://example.com', { warnings: 1, depth: 0 }).then(function(summary) {
|
|
60
|
+
console.log(summary);
|
|
61
|
+
})
|
|
62
|
+
.catch((err) => {
|
|
63
|
+
console.error(err);
|
|
64
|
+
});
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Validate a local file or folder
|
|
68
|
+
|
|
69
|
+
```js
|
|
70
|
+
const validate = require('w3c-validate-html');
|
|
71
|
+
|
|
72
|
+
validate('./tests/fixtures/valid.html', { warnings: 1 }).then(function(summary) {
|
|
73
|
+
console.log(summary);
|
|
74
|
+
})
|
|
75
|
+
.catch((err) => {
|
|
76
|
+
console.error(err);
|
|
77
|
+
});
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Validate a HTML string
|
|
81
|
+
|
|
82
|
+
```js
|
|
83
|
+
const validate = require('w3c-validate-html');
|
|
84
|
+
|
|
85
|
+
var html = '<!DOCTYPE html><html><head><title>Test</title></head><body><h1>Hi</h1></body></html>';
|
|
86
|
+
|
|
87
|
+
validate(html).then(function(result) {
|
|
88
|
+
console.log(result);
|
|
89
|
+
})
|
|
90
|
+
.catch((err) => {
|
|
91
|
+
console.error(err);
|
|
92
|
+
});
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Example response
|
|
96
|
+
|
|
97
|
+
```json
|
|
98
|
+
{
|
|
99
|
+
"passed": 0,
|
|
100
|
+
"failed": 1,
|
|
101
|
+
"results": [
|
|
102
|
+
{
|
|
103
|
+
"ok": false,
|
|
104
|
+
"errors": [
|
|
105
|
+
{ "line": 7, "col": 5, "msg": "End tag for \"h1\" seen, but there were unclosed elements." },
|
|
106
|
+
{ "line": 7, "col": 5, "msg": "Unclosed element \"h1\"." },
|
|
107
|
+
{ "line": 9, "col": 1, "msg": "End of file seen when expecting text or an end tag." }
|
|
108
|
+
],
|
|
109
|
+
"warnings": []
|
|
110
|
+
}
|
|
111
|
+
]
|
|
112
|
+
}
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## GitHub Action
|
|
116
|
+
|
|
117
|
+
You can use this in your CI as a Github Action to validate your site's HTML on every push and pull request. The job fails if any pages have HTML errors and a `html-report.json` artifact is uploaded for review.
|
|
118
|
+
|
|
119
|
+
```yaml
|
|
120
|
+
name: html-validate
|
|
121
|
+
on: [push, pull_request]
|
|
122
|
+
|
|
123
|
+
jobs:
|
|
124
|
+
html-validate:
|
|
125
|
+
runs-on: ubuntu-latest
|
|
126
|
+
steps:
|
|
127
|
+
- uses: actions/checkout@v4
|
|
128
|
+
- uses: actions/setup-node@v4
|
|
129
|
+
with:
|
|
130
|
+
node-version: 18
|
|
131
|
+
|
|
132
|
+
- run: npm ci
|
|
133
|
+
- run: npm start &
|
|
134
|
+
|
|
135
|
+
- run: |
|
|
136
|
+
for i in {1..30}; do
|
|
137
|
+
curl -fsS http://localhost:8080 >/dev/null && break
|
|
138
|
+
sleep 1
|
|
139
|
+
done
|
|
140
|
+
|
|
141
|
+
- run: npx w3c-validate-html --url http://localhost:8080 --depth 3 --concurrency 4 --errors-only --json > html-report.json
|
|
142
|
+
|
|
143
|
+
- uses: actions/upload-artifact@v4
|
|
144
|
+
with:
|
|
145
|
+
name: html-report
|
|
146
|
+
path: html-report.json
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## See also
|
|
150
|
+
|
|
151
|
+
* [w3c-validate-css](https://github.com/orca-scan/w3c-validate-css)
|
|
152
|
+
|
|
153
|
+
## License
|
|
154
|
+
|
|
155
|
+
[MIT License](LICENSE) © Orca Scan - a [barcode app](https://orcascan.com) with simple [barcode tracking APIs](https://orcascan.com/guides?tag=for-developers).
|
package/index.js
ADDED
|
@@ -0,0 +1,990 @@
|
|
|
1
|
+
'#!/usr/bin/env node'
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
var fs = require('fs');
|
|
5
|
+
var fsp = fs.promises;
|
|
6
|
+
var path = require('path');
|
|
7
|
+
var os = require('os');
|
|
8
|
+
var url = require('url');
|
|
9
|
+
var child = require('child_process');
|
|
10
|
+
var chalk = require('chalk');
|
|
11
|
+
var glob = require('glob');
|
|
12
|
+
var minimist = require('minimist');
|
|
13
|
+
var fetch = require('node-fetch');
|
|
14
|
+
var cheerio = require('cheerio');
|
|
15
|
+
var beautify = require('js-beautify').html;
|
|
16
|
+
|
|
17
|
+
/* single, deterministic cache path in os temp */
|
|
18
|
+
var CACHE_DIR = path.join(os.tmpdir(), 'w3c-validate-html');
|
|
19
|
+
var CACHED_JAR = path.join(CACHE_DIR, 'vnu.jar');
|
|
20
|
+
var CURRENT_JAR_PATH = null;
|
|
21
|
+
|
|
22
|
+
var JAR_URLS = [
|
|
23
|
+
'https://github.com/validator/validator/releases/latest/download/vnu.jar'
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
var urlToFileMap = {};
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Main validate entry point
|
|
30
|
+
* Validate a URL, file/folder, or raw HTML string using vnu.jar
|
|
31
|
+
* @param {string} input - URL, file/folder path, or HTML string
|
|
32
|
+
* @param {object} [cfg] - Optional config
|
|
33
|
+
* @returns {Promise<object>} - Validation result(s)
|
|
34
|
+
*/
|
|
35
|
+
async function validate(input, cfg) {
|
|
36
|
+
|
|
37
|
+
cfg = cfg || {};
|
|
38
|
+
|
|
39
|
+
if (typeof input !== 'string' || !input.trim()) {
|
|
40
|
+
throw new Error('Input must be a non-empty string (URL, file, or HTML)');
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (isUrl(input)) {
|
|
44
|
+
return validateUrl(input, cfg);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (isFilePath(input)) {
|
|
48
|
+
return validateFiles(input, cfg);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (isHtml(input)) {
|
|
52
|
+
return validateHtmlString(input, cfg);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Checks if Java is installed and available in PATH.
|
|
58
|
+
* @returns {Promise<boolean>} Resolves true if Java is available.
|
|
59
|
+
*/
|
|
60
|
+
async function hasJava() {
|
|
61
|
+
return new Promise(resolve => {
|
|
62
|
+
const p = child.spawn('java', ['-version']);
|
|
63
|
+
let sawOutput = false;
|
|
64
|
+
p.on('error', () => resolve(false));
|
|
65
|
+
p.stderr.on('data', () => { sawOutput = true; });
|
|
66
|
+
p.on('close', code => resolve(code === 0 || sawOutput));
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Ensures a directory exists (creates if missing).
|
|
72
|
+
* @param {string} dir Directory path
|
|
73
|
+
*/
|
|
74
|
+
function ensureDir(dir) {
|
|
75
|
+
try { fs.mkdirSync(dir, { recursive: true }); } catch (e) { /* ignore */ }
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Checks if a file is a valid JAR (zip header).
|
|
80
|
+
* @param {string} file Path to jar file
|
|
81
|
+
* @returns {Promise<boolean>} True if file is a JAR
|
|
82
|
+
*/
|
|
83
|
+
async function isJar(file) {
|
|
84
|
+
try {
|
|
85
|
+
const fd = await fsp.open(file, 'r');
|
|
86
|
+
const buf = Buffer.alloc(2);
|
|
87
|
+
await fd.read(buf, 0, 2, 0);
|
|
88
|
+
await fd.close();
|
|
89
|
+
return buf[0] === 0x50 && buf[1] === 0x4B; // 'PK' zip header
|
|
90
|
+
} catch (e) {
|
|
91
|
+
return false;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Downloads a file from a URL to disk.
|
|
97
|
+
* @param {string} href URL to download
|
|
98
|
+
* @param {string} dest Destination file path
|
|
99
|
+
*/
|
|
100
|
+
async function download(href, dest) {
|
|
101
|
+
const res = await fetch(href, { headers: { 'User-Agent': 'curl/8 (+node)' }, redirect: 'follow' });
|
|
102
|
+
if (!res.ok) throw new Error('download failed ' + res.status);
|
|
103
|
+
const tmp = dest + '.part';
|
|
104
|
+
await new Promise((resolve, reject) => {
|
|
105
|
+
const out = fs.createWriteStream(tmp);
|
|
106
|
+
res.body.pipe(out);
|
|
107
|
+
res.body.on('error', reject);
|
|
108
|
+
out.on('finish', resolve);
|
|
109
|
+
});
|
|
110
|
+
try {
|
|
111
|
+
fs.renameSync(tmp, dest);
|
|
112
|
+
} catch (err) {
|
|
113
|
+
if (err && err.code === 'ENOENT') {
|
|
114
|
+
// Download failed, .part file does not exist
|
|
115
|
+
console.error(chalk.red(' Failed to download: ') + href + chalk.dim(' (no file written)'));
|
|
116
|
+
} else {
|
|
117
|
+
throw err;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Ensures vnu.jar is available, downloads if missing.
|
|
124
|
+
* @returns {Promise<string>} Path to usable jar
|
|
125
|
+
*/
|
|
126
|
+
async function resolveJarPath() {
|
|
127
|
+
if (fs.existsSync(CACHED_JAR) && await isJar(CACHED_JAR)) return CACHED_JAR;
|
|
128
|
+
ensureDir(CACHE_DIR);
|
|
129
|
+
try { fs.unlinkSync(CACHED_JAR); } catch (e) { }
|
|
130
|
+
for (const url of JAR_URLS) {
|
|
131
|
+
try {
|
|
132
|
+
await download(url, CACHED_JAR);
|
|
133
|
+
if (await isJar(CACHED_JAR)) return CACHED_JAR;
|
|
134
|
+
} catch (e2) { try { fs.unlinkSync(CACHED_JAR); } catch (e3) { } }
|
|
135
|
+
}
|
|
136
|
+
throw new Error('failed to obtain vnu.jar');
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Parse comma or space separated list into array
|
|
141
|
+
* @param {string|Array|undefined} v - Raw input
|
|
142
|
+
* @returns {Array<string>} - Normalized list
|
|
143
|
+
*/
|
|
144
|
+
function toList(v) {
|
|
145
|
+
if (!v) { return []; }
|
|
146
|
+
if (Array.isArray(v)) { return v; }
|
|
147
|
+
return String(v).split(/[,\s]+/).filter(Boolean);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Create a safe filename from a url
|
|
152
|
+
* @param {string} href - Url to encode
|
|
153
|
+
* @returns {string} - Safe file name
|
|
154
|
+
*/
|
|
155
|
+
function toSafeName(href) {
|
|
156
|
+
var s = String(href || '');
|
|
157
|
+
s = s.replace(/^https?:\/\//i, '');
|
|
158
|
+
s = s.replace(/[?#].*$/, '');
|
|
159
|
+
s = s.replace(/\/+/g, '/');
|
|
160
|
+
s = s.replace(/[^a-z0-9/._-]+/gi, '_');
|
|
161
|
+
s = s.replace(/\//g, '_');
|
|
162
|
+
if (!s) { s = 'index.html'; }
|
|
163
|
+
if (!/\.html?$/i.test(s)) { s += '.html'; }
|
|
164
|
+
return s;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Extract first json array or object from text
|
|
169
|
+
* @param {string} text - Raw process output
|
|
170
|
+
* @returns {any|null} - Parsed json or null
|
|
171
|
+
*/
|
|
172
|
+
function safeParseFirstJson(text) {
|
|
173
|
+
var s = String(text || '');
|
|
174
|
+
|
|
175
|
+
var a0 = s.indexOf('[');
|
|
176
|
+
var a1 = s.lastIndexOf(']');
|
|
177
|
+
if (a0 !== -1 && a1 !== -1 && a1 > a0) {
|
|
178
|
+
try { return JSON.parse(s.slice(a0, a1 + 1)); }
|
|
179
|
+
catch (e) { /* ignore */ }
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
var o0 = s.indexOf('{');
|
|
183
|
+
var o1 = s.lastIndexOf('}');
|
|
184
|
+
if (o0 !== -1 && o1 !== -1 && o1 > o0) {
|
|
185
|
+
try { return JSON.parse(s.slice(o0, o1 + 1)); }
|
|
186
|
+
catch (e2) { /* ignore */ }
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
return null;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Run vnu against a local html file
|
|
194
|
+
* @param {string} file - Html file path
|
|
195
|
+
* @param {object} cfg - Config
|
|
196
|
+
* @returns {Promise<{stdout:string,stderr:string,code:number}>} - Resolves process output
|
|
197
|
+
*/
|
|
198
|
+
async function runOne(file, cfg) {
|
|
199
|
+
return new Promise(function (resolve) {
|
|
200
|
+
var env = {};
|
|
201
|
+
var k;
|
|
202
|
+
|
|
203
|
+
for (k in process.env) {
|
|
204
|
+
if (Object.prototype.hasOwnProperty.call(process.env, k)) {
|
|
205
|
+
env[k] = process.env[k];
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
env.http_proxy = '';
|
|
210
|
+
env.https_proxy = '';
|
|
211
|
+
env.no_proxy = '';
|
|
212
|
+
|
|
213
|
+
var args = [
|
|
214
|
+
'-Djava.net.useSystemProxies=false',
|
|
215
|
+
'-Dhttp.proxyHost=', '-Dhttp.proxyPort=',
|
|
216
|
+
'-Dhttps.proxyHost=', '-Dhttps.proxyPort=',
|
|
217
|
+
'-jar', CURRENT_JAR_PATH,
|
|
218
|
+
'--format', 'json',
|
|
219
|
+
'--asciiquotes',
|
|
220
|
+
'--no-langdetect',
|
|
221
|
+
file
|
|
222
|
+
];
|
|
223
|
+
|
|
224
|
+
if (cfg && cfg.html) {
|
|
225
|
+
args.push('--html');
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
var p = child.spawn('java', args, { env: env });
|
|
229
|
+
|
|
230
|
+
var out = '';
|
|
231
|
+
var err = '';
|
|
232
|
+
|
|
233
|
+
p.stdout.on('data', function (d) { out += String(d || ''); });
|
|
234
|
+
p.stderr.on('data', function (d) { err += String(d || ''); });
|
|
235
|
+
|
|
236
|
+
p.on('close', function (code) { resolve({ stdout: out, stderr: err, code: code || 0 }); });
|
|
237
|
+
p.on('error', function () { resolve({ stdout: out, stderr: err, code: 1 }); });
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Clean validator message
|
|
243
|
+
* @param {string} s - Raw message
|
|
244
|
+
* @returns {string} - Clean message
|
|
245
|
+
*/
|
|
246
|
+
function cleanMessage(s) {
|
|
247
|
+
return String(s || '').replace(/\s+/g, ' ').trim();
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* Parse vnu json messages into errors and warnings
|
|
252
|
+
* @param {any} json - Parsed json
|
|
253
|
+
* @param {object} cfg - Config
|
|
254
|
+
* @returns {{errors:Array,warnings:Array}} - Parsed issues
|
|
255
|
+
*/
|
|
256
|
+
function parseIssuesFromJson(json, cfg) {
|
|
257
|
+
var errors = [];
|
|
258
|
+
var warnings = [];
|
|
259
|
+
|
|
260
|
+
var list = json;
|
|
261
|
+
|
|
262
|
+
if (!Array.isArray(list) && json && Array.isArray(json.messages)) {
|
|
263
|
+
list = json.messages;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
if (!Array.isArray(list)) {
|
|
267
|
+
return { errors: errors, warnings: warnings };
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
for (var i = 0; i < list.length; i++) {
|
|
271
|
+
var it = list[i] || {};
|
|
272
|
+
var type = String(it.type || '').toLowerCase();
|
|
273
|
+
var subType = String(it.subType || '').toLowerCase();
|
|
274
|
+
|
|
275
|
+
var line = parseInt(it.lastLine || it.firstLine || it.line, 10) || 0;
|
|
276
|
+
var col = parseInt(it.lastColumn || it.firstColumn || it.column, 10) || 0;
|
|
277
|
+
|
|
278
|
+
var msg = cleanMessage(it.message || it.msg || '');
|
|
279
|
+
|
|
280
|
+
if (!msg) {
|
|
281
|
+
continue;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
if (type === 'error') {
|
|
285
|
+
errors.push({ line: line, col: col, msg: msg });
|
|
286
|
+
continue;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
if (cfg && cfg.warnings > 0) {
|
|
290
|
+
if (type === 'info' || type === 'warning') {
|
|
291
|
+
if (subType === 'warning' || type === 'warning') {
|
|
292
|
+
warnings.push({ line: line, col: col, msg: msg });
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
return { errors: errors, warnings: warnings };
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/**
|
|
302
|
+
* Parse validator output
|
|
303
|
+
* @param {{stdout:string,stderr:string,code:number}} proc - Process output
|
|
304
|
+
* @param {object} cfg - Config
|
|
305
|
+
* @returns {{errors:Array,warnings:Array}} - Parsed issues
|
|
306
|
+
*/
|
|
307
|
+
function parseIssues(proc, cfg) {
|
|
308
|
+
var json =
|
|
309
|
+
safeParseFirstJson(proc.stdout) ||
|
|
310
|
+
safeParseFirstJson(proc.stderr) ||
|
|
311
|
+
safeParseFirstJson(String(proc.stdout || '') + String(proc.stderr || ''));
|
|
312
|
+
|
|
313
|
+
if (!json) {
|
|
314
|
+
throw new Error('validator did not produce JSON output');
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
return parseIssuesFromJson(json, cfg);
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
/**
|
|
321
|
+
* Print one page result
|
|
322
|
+
* @param {{url:string,ok:boolean,errors:Array,warnings:Array}} res - Page result
|
|
323
|
+
* @param {object} cfg - Config
|
|
324
|
+
* @returns {void} - Prints to stdout or stderr
|
|
325
|
+
*/
|
|
326
|
+
function printPageResult(res, cfg) {
|
|
327
|
+
var green = chalk.green;
|
|
328
|
+
var red = chalk.red;
|
|
329
|
+
var orange = chalk.hex('#FFA500');
|
|
330
|
+
var dim = chalk.dim;
|
|
331
|
+
|
|
332
|
+
// Always use absolute file path for clickability if available
|
|
333
|
+
var localFile = urlToFileMap[res.url] || res.url;
|
|
334
|
+
if (!path.isAbsolute(localFile) && fs.existsSync(localFile)) {
|
|
335
|
+
localFile = path.resolve(localFile);
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
if (res.ok) {
|
|
339
|
+
console.log(green(' ✔ ' + res.url));
|
|
340
|
+
return;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
console.log(red(' ✖ ' + res.url));
|
|
344
|
+
|
|
345
|
+
// Only print errors, message first, then clickable file:line:col in gray
|
|
346
|
+
for (var i = 0; i < res.errors.length; i++) {
|
|
347
|
+
var e = res.errors[i];
|
|
348
|
+
var where = localFile + ':' + (e.line || 0) + (e.col ? ':' + e.col : '');
|
|
349
|
+
// Print error message, then clickable file:line:col in gray
|
|
350
|
+
console.error(red(' ' + e.msg) + ' ' + dim(where));
|
|
351
|
+
}
|
|
352
|
+
// Warnings and extra context omitted for brevity
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
/**
|
|
356
|
+
* Normalize href to absolute and strip hash
|
|
357
|
+
* @param {string} href - Link href
|
|
358
|
+
* @param {string} base - Base url
|
|
359
|
+
* @returns {string|null} - Absolute url or null
|
|
360
|
+
*/
|
|
361
|
+
function toAbsUrl(href, base) {
|
|
362
|
+
if (!href) { return null; }
|
|
363
|
+
|
|
364
|
+
var s = String(href || '').trim();
|
|
365
|
+
|
|
366
|
+
if (!s) { return null; }
|
|
367
|
+
if (/^(mailto|tel|javascript|data):/i.test(s)) { return null; }
|
|
368
|
+
|
|
369
|
+
try {
|
|
370
|
+
var abs = String(new URL(s, base).href);
|
|
371
|
+
abs = abs.replace(/#.*$/, '');
|
|
372
|
+
return abs;
|
|
373
|
+
} catch (e) {
|
|
374
|
+
return null;
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* Decide if a url is crawlable
|
|
380
|
+
* @param {string} href - Absolute url
|
|
381
|
+
* @param {object} cfg - Config
|
|
382
|
+
* @param {string} origin - Allowed origin
|
|
383
|
+
* @returns {boolean} - True if it should be crawled
|
|
384
|
+
*/
|
|
385
|
+
function isCrawlable(href, cfg, origin) {
|
|
386
|
+
if (!href) { return false; }
|
|
387
|
+
|
|
388
|
+
if (!/^https?:\/\//i.test(href)) {
|
|
389
|
+
return false;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
if (cfg && cfg.sameOrigin) {
|
|
393
|
+
try {
|
|
394
|
+
if (new URL(href).origin !== origin) {
|
|
395
|
+
return false;
|
|
396
|
+
}
|
|
397
|
+
} catch (e) {
|
|
398
|
+
return false;
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
if (cfg && cfg.stripQuery) {
|
|
403
|
+
if (href.indexOf('?') !== -1) {
|
|
404
|
+
return false;
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
if (cfg && cfg.exclude && cfg.exclude.length) {
|
|
409
|
+
for (var i = 0; i < cfg.exclude.length; i++) {
|
|
410
|
+
if (href.indexOf(cfg.exclude[i]) !== -1) {
|
|
411
|
+
return false;
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
return true;
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* Fetch html with redirect following
|
|
421
|
+
* @param {string} pageUrl - Url to fetch
|
|
422
|
+
* @param {object} cfg - Config
|
|
423
|
+
* @returns {Promise<{finalUrl:string,html:string}>} - Html and final url
|
|
424
|
+
*/
|
|
425
|
+
async function fetchHtml(pageUrl, cfg) {
|
|
426
|
+
var res = await fetch(pageUrl, {
|
|
427
|
+
redirect: 'follow',
|
|
428
|
+
headers: { 'User-Agent': (cfg && cfg.userAgent) ? cfg.userAgent : 'Mozilla/5.0 (node)' }
|
|
429
|
+
});
|
|
430
|
+
|
|
431
|
+
if (!res.ok) {
|
|
432
|
+
throw new Error('request failed ' + res.status + ' ' + pageUrl);
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
var finalUrl = (res.url && String(res.url)) ? String(res.url) : pageUrl;
|
|
436
|
+
var html = await res.text();
|
|
437
|
+
|
|
438
|
+
return { finalUrl: finalUrl, html: html };
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
/**
|
|
442
|
+
* Save html to temp and return file path
|
|
443
|
+
* @param {string} dir - Temp dir
|
|
444
|
+
* @param {string} pageUrl - Page url
|
|
445
|
+
* @param {string} html - Html content
|
|
446
|
+
* @returns {Promise<string>} - Saved file path
|
|
447
|
+
*/
|
|
448
|
+
async function saveHtml(dir, pageUrl, html) {
|
|
449
|
+
var name = toSafeName(pageUrl);
|
|
450
|
+
var dest = path.join(dir, name);
|
|
451
|
+
var tmp = dest + '.part';
|
|
452
|
+
|
|
453
|
+
// Prettify HTML for readability
|
|
454
|
+
var prettyHtml = beautify(String(html || ''), { indent_size: 2, wrap_line_length: 120 });
|
|
455
|
+
await fsp.writeFile(tmp, prettyHtml, 'utf8');
|
|
456
|
+
try {
|
|
457
|
+
fs.renameSync(tmp, dest);
|
|
458
|
+
} catch (err) {
|
|
459
|
+
if (err && err.code === 'ENOENT') {
|
|
460
|
+
// File was not written, skip mapping and print clear message
|
|
461
|
+
console.error(chalk.red(' Failed to save HTML for: ') + pageUrl + chalk.dim(' (no file written)'));
|
|
462
|
+
return null;
|
|
463
|
+
} else {
|
|
464
|
+
throw err;
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
// Track mapping for clickable output
|
|
469
|
+
urlToFileMap[pageUrl] = dest;
|
|
470
|
+
|
|
471
|
+
return dest;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
/**
|
|
475
|
+
* Extract links from html
|
|
476
|
+
* @param {string} html - Html content
|
|
477
|
+
* @param {string} baseUrl - Base url
|
|
478
|
+
* @returns {Array<string>} - Absolute links
|
|
479
|
+
*/
|
|
480
|
+
function extractLinks(html, baseUrl) {
|
|
481
|
+
var $ = cheerio.load(String(html || ''));
|
|
482
|
+
var out = [];
|
|
483
|
+
var seen = Object.create(null);
|
|
484
|
+
|
|
485
|
+
$('a[href], area[href]').each(function () {
|
|
486
|
+
var href = $(this).attr('href');
|
|
487
|
+
var abs = toAbsUrl(href, baseUrl);
|
|
488
|
+
|
|
489
|
+
if (!abs) {
|
|
490
|
+
return;
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
if (!Object.prototype.hasOwnProperty.call(seen, abs)) {
|
|
494
|
+
seen[abs] = true;
|
|
495
|
+
out.push(abs);
|
|
496
|
+
}
|
|
497
|
+
});
|
|
498
|
+
|
|
499
|
+
return out;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
/**
|
|
503
|
+
* A tiny async pool
|
|
504
|
+
* @param {Array<any>} items - Items
|
|
505
|
+
* @param {number} concurrency - Max concurrent
|
|
506
|
+
* @param {function(any):Promise<any>} worker - Worker
|
|
507
|
+
* @returns {Promise<Array<any>>} - Results
|
|
508
|
+
*/
|
|
509
|
+
async function asyncPool(items, concurrency, worker) {
|
|
510
|
+
var results = [];
|
|
511
|
+
var i = 0;
|
|
512
|
+
|
|
513
|
+
if (!items || !items.length) {
|
|
514
|
+
return results;
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
concurrency = Math.max(1, parseInt(concurrency, 10) || 1);
|
|
518
|
+
|
|
519
|
+
var running = 0;
|
|
520
|
+
var done = 0;
|
|
521
|
+
|
|
522
|
+
return new Promise(function (resolve, reject) {
|
|
523
|
+
|
|
524
|
+
function next() {
|
|
525
|
+
while (running < concurrency && i < items.length) {
|
|
526
|
+
(function (idx) {
|
|
527
|
+
running++;
|
|
528
|
+
|
|
529
|
+
Promise.resolve()
|
|
530
|
+
.then(function () { return worker(items[idx]); })
|
|
531
|
+
.then(function (res) {
|
|
532
|
+
results[idx] = res;
|
|
533
|
+
running--;
|
|
534
|
+
done++;
|
|
535
|
+
if (done === items.length) {
|
|
536
|
+
resolve(results);
|
|
537
|
+
return;
|
|
538
|
+
}
|
|
539
|
+
next();
|
|
540
|
+
})
|
|
541
|
+
.catch(function (err) {
|
|
542
|
+
reject(err);
|
|
543
|
+
});
|
|
544
|
+
|
|
545
|
+
})(i);
|
|
546
|
+
|
|
547
|
+
i++;
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
next();
|
|
552
|
+
});
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
/**
|
|
556
|
+
* Validate a single page url
|
|
557
|
+
* @param {string} pageUrl - Url
|
|
558
|
+
* @param {object} cfg - Config
|
|
559
|
+
* @param {string} tmpDir - Temp dir
|
|
560
|
+
* @returns {Promise<{url:string,ok:boolean,errors:Array,warnings:Array,finalUrl:string,links:Array}>} - Result
|
|
561
|
+
*/
|
|
562
|
+
async function validateOneUrl(pageUrl, cfg, tmpDir) {
|
|
563
|
+
var fetched = await fetchHtml(pageUrl, cfg);
|
|
564
|
+
var finalUrl = fetched.finalUrl;
|
|
565
|
+
var html = fetched.html;
|
|
566
|
+
|
|
567
|
+
var file = await saveHtml(tmpDir, finalUrl, html);
|
|
568
|
+
var proc = await runOne(file, cfg);
|
|
569
|
+
|
|
570
|
+
var issues = parseIssues(proc, cfg);
|
|
571
|
+
|
|
572
|
+
var includeWarnings = !cfg.errorsOnly && cfg.warnings > 0;
|
|
573
|
+
var ok = (issues.errors.length === 0 && (!includeWarnings || issues.warnings.length === 0));
|
|
574
|
+
|
|
575
|
+
var links = extractLinks(html, finalUrl);
|
|
576
|
+
|
|
577
|
+
// Map both the original and final URL to the prettified file
|
|
578
|
+
urlToFileMap[pageUrl] = file;
|
|
579
|
+
urlToFileMap[finalUrl] = file;
|
|
580
|
+
|
|
581
|
+
return {
|
|
582
|
+
url: pageUrl,
|
|
583
|
+
finalUrl: finalUrl,
|
|
584
|
+
ok: ok,
|
|
585
|
+
errors: issues.errors,
|
|
586
|
+
warnings: issues.warnings,
|
|
587
|
+
links: links
|
|
588
|
+
};
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
/**
|
|
592
|
+
* Crawl and validate starting from a url
|
|
593
|
+
* @param {string} startUrl - Start url
|
|
594
|
+
* @param {object} cfg - Config
|
|
595
|
+
* @returns {Promise<{passed:number,failed:number,results:Array}>} - Summary
|
|
596
|
+
*/
|
|
597
|
+
async function validateUrl(startUrl, cfg) {
|
|
598
|
+
cfg = cfg || {};
|
|
599
|
+
|
|
600
|
+
if (!(await hasJava())) {
|
|
601
|
+
throw new Error('java not found');
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
if (!CURRENT_JAR_PATH) {
|
|
605
|
+
CURRENT_JAR_PATH = await resolveJarPath();
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
var origin = '';
|
|
609
|
+
try { origin = new URL(startUrl).origin; } catch (e) { }
|
|
610
|
+
|
|
611
|
+
var tmpDir = path.join(os.tmpdir(), 'w3c-validate-html', 'site-' + Date.now());
|
|
612
|
+
ensureDir(tmpDir);
|
|
613
|
+
|
|
614
|
+
var seen = Object.create(null);
|
|
615
|
+
var queue = [{ url: startUrl, depth: 0 }];
|
|
616
|
+
|
|
617
|
+
var results = [];
|
|
618
|
+
var passed = 0;
|
|
619
|
+
var failed = 0;
|
|
620
|
+
|
|
621
|
+
var maxDepth = parseInt(cfg.depth, 10);
|
|
622
|
+
if (isNaN(maxDepth)) { maxDepth = 2; }
|
|
623
|
+
maxDepth = Math.max(0, maxDepth);
|
|
624
|
+
|
|
625
|
+
var concurrency = parseInt(cfg.concurrency, 10);
|
|
626
|
+
if (isNaN(concurrency)) { concurrency = 4; }
|
|
627
|
+
|
|
628
|
+
if (!cfg.json) {
|
|
629
|
+
var cyan = chalk.cyan;
|
|
630
|
+
var bold = chalk.bold;
|
|
631
|
+
console.log('');
|
|
632
|
+
console.log(bold(cyan('w3c validating html starting at ' + startUrl)));
|
|
633
|
+
console.log('');
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
while (queue.length) {
|
|
637
|
+
|
|
638
|
+
var batch = [];
|
|
639
|
+
var remaining = [];
|
|
640
|
+
|
|
641
|
+
for (var i = 0; i < queue.length; i++) {
|
|
642
|
+
if (batch.length < concurrency) {
|
|
643
|
+
batch.push(queue[i]);
|
|
644
|
+
} else {
|
|
645
|
+
remaining.push(queue[i]);
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
queue = remaining;
|
|
650
|
+
|
|
651
|
+
/* run a batch */
|
|
652
|
+
var batchResults = await asyncPool(batch, concurrency, async function (job) {
|
|
653
|
+
|
|
654
|
+
var u = job.url;
|
|
655
|
+
var d = job.depth;
|
|
656
|
+
|
|
657
|
+
if (Object.prototype.hasOwnProperty.call(seen, u)) {
|
|
658
|
+
return null;
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
seen[u] = true;
|
|
662
|
+
|
|
663
|
+
try {
|
|
664
|
+
var one = await validateOneUrl(u, cfg, tmpDir);
|
|
665
|
+
one.depth = d;
|
|
666
|
+
return one;
|
|
667
|
+
} catch (e) {
|
|
668
|
+
return {
|
|
669
|
+
url: u,
|
|
670
|
+
finalUrl: u,
|
|
671
|
+
depth: d,
|
|
672
|
+
ok: false,
|
|
673
|
+
errors: [{ line: 0, col: 0, msg: (e && e.message) ? e.message : String(e) }],
|
|
674
|
+
warnings: [],
|
|
675
|
+
links: []
|
|
676
|
+
};
|
|
677
|
+
}
|
|
678
|
+
});
|
|
679
|
+
|
|
680
|
+
for (var j = 0; j < batchResults.length; j++) {
|
|
681
|
+
var r = batchResults[j];
|
|
682
|
+
|
|
683
|
+
if (!r) {
|
|
684
|
+
continue;
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
if (!cfg.json) {
|
|
688
|
+
printPageResult({ url: r.finalUrl, ok: r.ok, errors: r.errors, warnings: r.warnings }, cfg);
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
results.push({
|
|
692
|
+
url: r.finalUrl,
|
|
693
|
+
ok: r.ok,
|
|
694
|
+
errors: r.errors,
|
|
695
|
+
warnings: r.warnings
|
|
696
|
+
});
|
|
697
|
+
|
|
698
|
+
if (r.ok) {
|
|
699
|
+
passed++;
|
|
700
|
+
} else {
|
|
701
|
+
failed++;
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
if (r.depth >= maxDepth) {
|
|
705
|
+
continue;
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
for (var k = 0; k < r.links.length; k++) {
|
|
709
|
+
var nextUrl = r.links[k];
|
|
710
|
+
|
|
711
|
+
if (!isCrawlable(nextUrl, cfg, origin)) {
|
|
712
|
+
continue;
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
if (Object.prototype.hasOwnProperty.call(seen, nextUrl)) {
|
|
716
|
+
continue;
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
queue.push({ url: nextUrl, depth: r.depth + 1 });
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
console.log('');
|
|
725
|
+
|
|
726
|
+
return { passed: passed, failed: failed, results: results };
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
/**
|
|
730
|
+
* Expand a path to html files
|
|
731
|
+
* @param {string} target - File or folder
|
|
732
|
+
* @returns {Promise<string[]>} - Absolute html file paths
|
|
733
|
+
*/
|
|
734
|
+
async function expandFiles(target) {
|
|
735
|
+
var abs = path.resolve(target);
|
|
736
|
+
|
|
737
|
+
var st;
|
|
738
|
+
try {
|
|
739
|
+
st = await fsp.stat(abs);
|
|
740
|
+
} catch (e) {
|
|
741
|
+
var msg = (e && e.code === 'ENOENT') ? ('path not found ' + target) : (e && e.message ? e.message : String(e));
|
|
742
|
+
throw new Error(msg);
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
if (st.isFile()) {
|
|
746
|
+
if (!/\.html?$/i.test(abs)) {
|
|
747
|
+
throw new Error('not an html file ' + target);
|
|
748
|
+
}
|
|
749
|
+
return [abs];
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
return new Promise(function (resolve, reject) {
|
|
753
|
+
glob('**/*.html', { cwd: abs, nodir: true }, function (err, matches) {
|
|
754
|
+
if (err) {
|
|
755
|
+
reject(err);
|
|
756
|
+
return;
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
var out = [];
|
|
760
|
+
for (var i = 0; i < matches.length; i++) {
|
|
761
|
+
out.push(path.join(abs, matches[i]));
|
|
762
|
+
}
|
|
763
|
+
resolve(out);
|
|
764
|
+
});
|
|
765
|
+
});
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
/**
|
|
769
|
+
* Validate local html files
|
|
770
|
+
* @param {string} target - File or folder
|
|
771
|
+
* @param {object} cfg - Config
|
|
772
|
+
* @returns {Promise<{passed:number,failed:number,results:Array}>} - Summary
|
|
773
|
+
*/
|
|
774
|
+
async function validateFiles(target, cfg) {
|
|
775
|
+
cfg = cfg || {};
|
|
776
|
+
|
|
777
|
+
if (!(await hasJava())) {
|
|
778
|
+
throw new Error('java not found');
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
if (!CURRENT_JAR_PATH) {
|
|
782
|
+
CURRENT_JAR_PATH = await resolveJarPath();
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
var files = await expandFiles(target);
|
|
786
|
+
|
|
787
|
+
if (!cfg.json) {
|
|
788
|
+
var cyan = chalk.cyan;
|
|
789
|
+
var bold = chalk.bold;
|
|
790
|
+
console.log('');
|
|
791
|
+
console.log(bold(cyan('w3c validating ' + files.length + ' HTML files in ' + target)));
|
|
792
|
+
console.log('');
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
var results = [];
|
|
796
|
+
var passed = 0;
|
|
797
|
+
var failed = 0;
|
|
798
|
+
|
|
799
|
+
for (var i = 0; i < files.length; i++) {
|
|
800
|
+
|
|
801
|
+
var proc = await runOne(files[i], cfg);
|
|
802
|
+
var issues = parseIssues(proc, cfg);
|
|
803
|
+
|
|
804
|
+
var includeWarnings = !cfg.errorsOnly && cfg.warnings > 0;
|
|
805
|
+
var ok = (issues.errors.length === 0 && (!includeWarnings || issues.warnings.length === 0));
|
|
806
|
+
|
|
807
|
+
var res = {
|
|
808
|
+
url: path.relative(process.cwd(), files[i]) || files[i],
|
|
809
|
+
ok: ok,
|
|
810
|
+
errors: issues.errors,
|
|
811
|
+
warnings: issues.warnings
|
|
812
|
+
};
|
|
813
|
+
|
|
814
|
+
if (!cfg.json) {
|
|
815
|
+
printPageResult(res, cfg);
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
results.push(res);
|
|
819
|
+
|
|
820
|
+
if (ok) {
|
|
821
|
+
passed++;
|
|
822
|
+
} else {
|
|
823
|
+
failed++;
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
console.log('');
|
|
828
|
+
|
|
829
|
+
return { passed: passed, failed: failed, results: results };
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
/**
|
|
833
|
+
* Check if a string is raw html
|
|
834
|
+
* @param {string} str - input string
|
|
835
|
+
* @returns {boolean} - true if input looks like html
|
|
836
|
+
*/
|
|
837
|
+
function isHtml(str) {
|
|
838
|
+
|
|
839
|
+
if (typeof str !== 'string') return false;
|
|
840
|
+
|
|
841
|
+
var s = str.trim();
|
|
842
|
+
|
|
843
|
+
if (!s) return false;
|
|
844
|
+
|
|
845
|
+
// Must start with '<' (allow whitespace before)
|
|
846
|
+
if (s[0] !== '<') return false;
|
|
847
|
+
|
|
848
|
+
// Accept: doctype, comment, or any tag (e.g. <html>, <div>, <svg>, <x-foo>, etc)
|
|
849
|
+
return (
|
|
850
|
+
/^<(!doctype\b|!--|[a-z][\w:-]*\b)/i.test(s)
|
|
851
|
+
);
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
/**
|
|
855
|
+
* Check if a string is a http or https url
|
|
856
|
+
* @param {string} str - input string
|
|
857
|
+
* @returns {boolean} - true if url
|
|
858
|
+
*/
|
|
859
|
+
function isUrl(str) {
|
|
860
|
+
var s = String(str || '').replace(/^\s+|\s+$/g, '');
|
|
861
|
+
var u;
|
|
862
|
+
|
|
863
|
+
if (!/^https?:\/\//i.test(s)) {
|
|
864
|
+
return false;
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
u = url.parse(s);
|
|
868
|
+
return !!(u && (u.protocol === 'http:' || u.protocol === 'https:') && u.hostname);
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
/**
|
|
872
|
+
* Check if a string looks like a file or folder path on any os
|
|
873
|
+
* @param {string} str - input string
|
|
874
|
+
* @returns {boolean} - true if file path
|
|
875
|
+
*/
|
|
876
|
+
function isFilePath(str) {
|
|
877
|
+
var s = String(str || '').replace(/^\s+|\s+$/g, '');
|
|
878
|
+
|
|
879
|
+
if (!s || isUrl(s) || isHtml(s)) {
|
|
880
|
+
return false;
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
// abs or explicit relative (posix, windows, unc, tilde)
|
|
884
|
+
if (/^(?:[a-zA-Z]:[\\/]|\\\\|\/|~[\\/]|\.{1,2}[\\/])/.test(s)) {
|
|
885
|
+
return true;
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
// contains a separator and not just separators
|
|
889
|
+
return /[\\/]/.test(s) && /[^\s\\/]/.test(s);
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
/**
|
|
894
|
+
* Validate a raw HTML string using vnu.jar
|
|
895
|
+
* @param {string} src - The HTML string to validate
|
|
896
|
+
* @param {object} [cfg] - Optional config
|
|
897
|
+
* @returns {Promise<{passed: number, failed: number, results: Array}>} - Validation summary
|
|
898
|
+
*/
|
|
899
|
+
async function validateHtmlString(src, cfg) {
|
|
900
|
+
|
|
901
|
+
if (!(await hasJava())) {
|
|
902
|
+
throw new Error('java not found');
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
if (!CURRENT_JAR_PATH) {
|
|
906
|
+
CURRENT_JAR_PATH = await resolveJarPath();
|
|
907
|
+
}
|
|
908
|
+
|
|
909
|
+
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'w3c-validate-html-str-'));
|
|
910
|
+
const tmpFile = path.join(tmpDir, 'input.html');
|
|
911
|
+
await fsp.writeFile(tmpFile, src, 'utf8');
|
|
912
|
+
const proc = await runOne(tmpFile, cfg);
|
|
913
|
+
const issues = parseIssues(proc, cfg);
|
|
914
|
+
const includeWarnings = !cfg.errorsOnly && cfg.warnings > 0;
|
|
915
|
+
const ok = (issues.errors.length === 0 && (!includeWarnings || issues.warnings.length === 0));
|
|
916
|
+
|
|
917
|
+
try {
|
|
918
|
+
fs.unlinkSync(tmpFile);
|
|
919
|
+
fs.rmdirSync(tmpDir);
|
|
920
|
+
}
|
|
921
|
+
catch (e) { }
|
|
922
|
+
|
|
923
|
+
const result = {
|
|
924
|
+
ok,
|
|
925
|
+
errors: issues.errors,
|
|
926
|
+
warnings: issues.warnings
|
|
927
|
+
};
|
|
928
|
+
|
|
929
|
+
return {
|
|
930
|
+
passed: ok ? 1 : 0,
|
|
931
|
+
failed: ok ? 0 : 1,
|
|
932
|
+
results: [result]
|
|
933
|
+
};
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
/* cli vs module */
|
|
937
|
+
if (require.main === module) {
|
|
938
|
+
|
|
939
|
+
var argv = minimist(process.argv.slice(2), {
|
|
940
|
+
string: ['target', 'url', 'exclude', 'user-agent'],
|
|
941
|
+
boolean: ['errors-only', 'json', 'same-origin', 'strip-query'],
|
|
942
|
+
alias: { t: 'target', e: 'errors-only' },
|
|
943
|
+
default: {
|
|
944
|
+
target: '',
|
|
945
|
+
depth: 2,
|
|
946
|
+
concurrency: 4,
|
|
947
|
+
warnings: 1,
|
|
948
|
+
exclude: '',
|
|
949
|
+
'errors-only': false,
|
|
950
|
+
json: false,
|
|
951
|
+
'same-origin': true,
|
|
952
|
+
'strip-query': false,
|
|
953
|
+
'user-agent': 'Mozilla/5.0 (node)'
|
|
954
|
+
}
|
|
955
|
+
});
|
|
956
|
+
|
|
957
|
+
var target = argv.target;
|
|
958
|
+
|
|
959
|
+
if (!target) {
|
|
960
|
+
console.error('usage: w3c-validate-html --target <file|folder|url> [--depth 2] [--concurrency 4] [--warnings 0|1] [--exclude "foo,bar"] [--same-origin] [--strip-query] [--errors-only] [--json]');
|
|
961
|
+
process.exit(1);
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
var cfg = {
|
|
965
|
+
depth: parseInt(argv.depth, 10) || 0,
|
|
966
|
+
concurrency: parseInt(argv.concurrency, 10) || 1,
|
|
967
|
+
warnings: parseInt(argv.warnings, 10) || 0,
|
|
968
|
+
exclude: toList(argv.exclude),
|
|
969
|
+
errorsOnly: !!argv['errors-only'],
|
|
970
|
+
json: !!argv.json,
|
|
971
|
+
sameOrigin: argv['same-origin'] !== false,
|
|
972
|
+
stripQuery: !!argv['strip-query'],
|
|
973
|
+
userAgent: argv['user-agent']
|
|
974
|
+
};
|
|
975
|
+
|
|
976
|
+
validate(target, cfg).then(function (summary) {
|
|
977
|
+
if (argv.json) {
|
|
978
|
+
try { console.log(JSON.stringify(summary)); }
|
|
979
|
+
catch (e) { console.error('{"error":"failed to stringify results"}'); }
|
|
980
|
+
}
|
|
981
|
+
process.exit(summary.failed > 0 ? 1 : 0);
|
|
982
|
+
})
|
|
983
|
+
.catch(function (err) {
|
|
984
|
+
console.error(chalk.red('error') + ' ' + (err && err.message ? err.message : String(err)));
|
|
985
|
+
process.exit(1);
|
|
986
|
+
});
|
|
987
|
+
|
|
988
|
+
} else {
|
|
989
|
+
module.exports = validate;
|
|
990
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "w3c-validate-html",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Validate HTML offline using the official W3C vnu.jar",
|
|
5
|
+
"type": "commonjs",
|
|
6
|
+
"main": "index.js",
|
|
7
|
+
"bin": {
|
|
8
|
+
"w3c-validate-html": "index.js"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"index.js",
|
|
12
|
+
"README.md",
|
|
13
|
+
"LICENSE"
|
|
14
|
+
],
|
|
15
|
+
"scripts": {
|
|
16
|
+
"test": "NODE_ENV=test node_modules/.bin/jasmine --config=tests/jasmine/config.json"
|
|
17
|
+
},
|
|
18
|
+
"engines": {
|
|
19
|
+
"node": ">=14"
|
|
20
|
+
},
|
|
21
|
+
"dependencies": {
|
|
22
|
+
"chalk": "^4.1.2",
|
|
23
|
+
"cheerio": "0.22.0",
|
|
24
|
+
"glob": "^7.2.3",
|
|
25
|
+
"jasmine": "^3.7.0",
|
|
26
|
+
"jasmine-console-reporter": "^3.1.0",
|
|
27
|
+
"jasmine-xml-reporter": "^1.2.1",
|
|
28
|
+
"js-beautify": "^1.15.4",
|
|
29
|
+
"minimist": "^1.2.8",
|
|
30
|
+
"node-fetch": "^2.7.0"
|
|
31
|
+
},
|
|
32
|
+
"repository": {
|
|
33
|
+
"type": "git",
|
|
34
|
+
"url": "git+https://github.com/orca-scan/w3c-validate-html.git"
|
|
35
|
+
},
|
|
36
|
+
"keywords": [
|
|
37
|
+
"html",
|
|
38
|
+
"validator",
|
|
39
|
+
"w3c",
|
|
40
|
+
"lint",
|
|
41
|
+
"cli",
|
|
42
|
+
"offline",
|
|
43
|
+
"build",
|
|
44
|
+
"ci"
|
|
45
|
+
],
|
|
46
|
+
"author": "John Doherty <john@orcascan.com>",
|
|
47
|
+
"license": "MIT",
|
|
48
|
+
"bugs": {
|
|
49
|
+
"url": "https://github.com/orca-scan/w3c-validate-html/issues"
|
|
50
|
+
},
|
|
51
|
+
"homepage": "https://github.com/orca-scan/w3c-validate-html#readme"
|
|
52
|
+
}
|