@dukebot/astro-html-validator 1.1.2 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -1
- package/src/utils/links.mjs +50 -42
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@dukebot/astro-html-validator",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.3",
|
|
4
4
|
"description": "Validate Astro-generated HTML output for SEO metadata, JSON-LD, and internal links.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.mjs",
|
|
@@ -19,6 +19,8 @@
|
|
|
19
19
|
],
|
|
20
20
|
"scripts": {
|
|
21
21
|
"check": "node ./bin/cli.mjs --help",
|
|
22
|
+
"test": "node --test",
|
|
23
|
+
"test:links": "node --test tests/links-utils.test.mjs",
|
|
22
24
|
"validate:dist": "node ./bin/cli.mjs"
|
|
23
25
|
},
|
|
24
26
|
"keywords": [
|
|
@@ -35,5 +37,8 @@
|
|
|
35
37
|
},
|
|
36
38
|
"publishConfig": {
|
|
37
39
|
"access": "public"
|
|
40
|
+
},
|
|
41
|
+
"dependencies": {
|
|
42
|
+
"parse5": "^8.0.0"
|
|
38
43
|
}
|
|
39
44
|
}
|
package/src/utils/links.mjs
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
|
+
import { parse } from 'parse5';
|
|
2
3
|
import { pathExists } from './common.mjs';
|
|
3
4
|
|
|
4
5
|
/**
|
|
@@ -30,15 +31,34 @@ function toLocalPathFromAbsolute(rawUrl, absolutePrefixes) {
|
|
|
30
31
|
}
|
|
31
32
|
|
|
32
33
|
/**
|
|
33
|
-
*
|
|
34
|
+
* Collects href/src attribute values from parsed HTML element nodes.
|
|
34
35
|
*/
|
|
35
|
-
function
|
|
36
|
-
if (!html) return
|
|
36
|
+
function collectHtmlLinkAttributes(html = '') {
|
|
37
|
+
if (!html) return [];
|
|
38
|
+
|
|
39
|
+
const urls = [];
|
|
40
|
+
const document = parse(html, { sourceCodeLocationInfo: false });
|
|
41
|
+
const queue = [document];
|
|
42
|
+
|
|
43
|
+
while (queue.length > 0) {
|
|
44
|
+
const node = queue.shift();
|
|
45
|
+
if (!node) continue;
|
|
46
|
+
|
|
47
|
+
if (Array.isArray(node.attrs)) {
|
|
48
|
+
for (const attr of node.attrs) {
|
|
49
|
+
if (!attr?.name || !attr?.value) continue;
|
|
50
|
+
if (attr.name !== 'href' && attr.name !== 'src') continue;
|
|
51
|
+
urls.push(attr.value);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (node.content) queue.push(node.content);
|
|
56
|
+
if (Array.isArray(node.childNodes) && node.childNodes.length > 0) {
|
|
57
|
+
queue.push(...node.childNodes);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
37
60
|
|
|
38
|
-
return
|
|
39
|
-
.replace(/<!--[\s\S]*?-->/g, '')
|
|
40
|
-
.replace(/<script\b[\s\S]*?<\/script>/gi, '')
|
|
41
|
-
.replace(/<style\b[\s\S]*?<\/style>/gi, '');
|
|
61
|
+
return urls;
|
|
42
62
|
}
|
|
43
63
|
|
|
44
64
|
/**
|
|
@@ -46,47 +66,35 @@ function sanitizeHtmlForLinkExtraction(html = '') {
|
|
|
46
66
|
*/
|
|
47
67
|
export function extractInternalUrls(html, { absoluteUrlPrefixes = [] } = {}) {
|
|
48
68
|
const urls = new Set();
|
|
49
|
-
const tagRegex = /<[^>]+>/g;
|
|
50
|
-
const attrRegex = /\b(?:href|src)\s*=\s*["']([^"']+)["']/gi;
|
|
51
69
|
const absolutePrefixes = normalizeAbsolutePrefixes(absoluteUrlPrefixes);
|
|
52
|
-
const safeHtml = sanitizeHtmlForLinkExtraction(html);
|
|
53
|
-
|
|
54
|
-
let tagMatch;
|
|
55
|
-
while ((tagMatch = tagRegex.exec(safeHtml)) !== null) {
|
|
56
|
-
const tag = tagMatch[0];
|
|
57
|
-
if (!tag || tag.startsWith('</')) continue;
|
|
58
|
-
|
|
59
|
-
let attrMatch;
|
|
60
|
-
while ((attrMatch = attrRegex.exec(tag)) !== null) {
|
|
61
|
-
const raw = attrMatch[1]?.trim();
|
|
62
|
-
if (!raw) continue;
|
|
63
|
-
|
|
64
|
-
if (
|
|
65
|
-
raw.startsWith('//') ||
|
|
66
|
-
raw.startsWith('#') ||
|
|
67
|
-
raw.startsWith('mailto:') ||
|
|
68
|
-
raw.startsWith('tel:') ||
|
|
69
|
-
raw.startsWith('javascript:') ||
|
|
70
|
-
raw.startsWith('data:')
|
|
71
|
-
) {
|
|
72
|
-
continue;
|
|
73
|
-
}
|
|
74
70
|
|
|
75
|
-
|
|
76
|
-
|
|
71
|
+
for (const value of collectHtmlLinkAttributes(html)) {
|
|
72
|
+
const raw = value?.trim();
|
|
73
|
+
if (!raw) continue;
|
|
74
|
+
|
|
75
|
+
if (
|
|
76
|
+
raw.startsWith('//') ||
|
|
77
|
+
raw.startsWith('#') ||
|
|
78
|
+
raw.startsWith('mailto:') ||
|
|
79
|
+
raw.startsWith('tel:') ||
|
|
80
|
+
raw.startsWith('javascript:') ||
|
|
81
|
+
raw.startsWith('data:')
|
|
82
|
+
) {
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
77
85
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
continue;
|
|
81
|
-
}
|
|
86
|
+
const clean = raw.split(/[?#]/)[0];
|
|
87
|
+
if (!clean) continue;
|
|
82
88
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
}
|
|
89
|
+
if (clean.startsWith('/')) {
|
|
90
|
+
urls.add(clean);
|
|
91
|
+
continue;
|
|
87
92
|
}
|
|
88
93
|
|
|
89
|
-
|
|
94
|
+
if (clean.startsWith('http://') || clean.startsWith('https://')) {
|
|
95
|
+
const localPath = toLocalPathFromAbsolute(clean, absolutePrefixes);
|
|
96
|
+
if (localPath) urls.add(localPath);
|
|
97
|
+
}
|
|
90
98
|
}
|
|
91
99
|
|
|
92
100
|
return [...urls];
|