@dukebot/astro-html-validator 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/utils/links.mjs +50 -28
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dukebot/astro-html-validator",
3
- "version": "1.1.1",
3
+ "version": "1.1.2",
4
4
  "description": "Validate Astro-generated HTML output for SEO metadata, JSON-LD, and internal links.",
5
5
  "type": "module",
6
6
  "main": "./src/index.mjs",
@@ -29,42 +29,64 @@ function toLocalPathFromAbsolute(rawUrl, absolutePrefixes) {
29
29
  return null;
30
30
  }
31
31
 
32
+ /**
33
+ * Removes non-rendered sections to avoid false positives when extracting links.
34
+ */
35
+ function sanitizeHtmlForLinkExtraction(html = '') {
36
+ if (!html) return '';
37
+
38
+ return html
39
+ .replace(/<!--[\s\S]*?-->/g, '')
40
+ .replace(/<script\b[\s\S]*?<\/script>/gi, '')
41
+ .replace(/<style\b[\s\S]*?<\/style>/gi, '');
42
+ }
43
+
32
44
  /**
33
45
  * Extracts local (root-relative) URLs from href/src attributes.
34
46
  */
35
47
  export function extractInternalUrls(html, { absoluteUrlPrefixes = [] } = {}) {
36
48
  const urls = new Set();
37
- const regex = /(?:href|src)=["']([^"']+)["']/gi;
49
+ const tagRegex = /<[^>]+>/g;
50
+ const attrRegex = /\b(?:href|src)\s*=\s*["']([^"']+)["']/gi;
38
51
  const absolutePrefixes = normalizeAbsolutePrefixes(absoluteUrlPrefixes);
39
-
40
- let match;
41
- while ((match = regex.exec(html)) !== null) {
42
- const raw = match[1]?.trim();
43
- if (!raw) continue;
44
-
45
- if (
46
- raw.startsWith('//') ||
47
- raw.startsWith('#') ||
48
- raw.startsWith('mailto:') ||
49
- raw.startsWith('tel:') ||
50
- raw.startsWith('javascript:') ||
51
- raw.startsWith('data:')
52
- ) {
53
- continue;
52
+ const safeHtml = sanitizeHtmlForLinkExtraction(html);
53
+
54
+ let tagMatch;
55
+ while ((tagMatch = tagRegex.exec(safeHtml)) !== null) {
56
+ const tag = tagMatch[0];
57
+ if (!tag || tag.startsWith('</')) continue;
58
+
59
+ let attrMatch;
60
+ while ((attrMatch = attrRegex.exec(tag)) !== null) {
61
+ const raw = attrMatch[1]?.trim();
62
+ if (!raw) continue;
63
+
64
+ if (
65
+ raw.startsWith('//') ||
66
+ raw.startsWith('#') ||
67
+ raw.startsWith('mailto:') ||
68
+ raw.startsWith('tel:') ||
69
+ raw.startsWith('javascript:') ||
70
+ raw.startsWith('data:')
71
+ ) {
72
+ continue;
73
+ }
74
+
75
+ const clean = raw.split(/[?#]/)[0];
76
+ if (!clean) continue;
77
+
78
+ if (clean.startsWith('/')) {
79
+ urls.add(clean);
80
+ continue;
81
+ }
82
+
83
+ if (clean.startsWith('http://') || clean.startsWith('https://')) {
84
+ const localPath = toLocalPathFromAbsolute(clean, absolutePrefixes);
85
+ if (localPath) urls.add(localPath);
86
+ }
54
87
  }
55
88
 
56
- const clean = raw.split(/[?#]/)[0];
57
- if (!clean) continue;
58
-
59
- if (clean.startsWith('/')) {
60
- if (clean) urls.add(clean);
61
- continue;
62
- }
63
-
64
- if (clean.startsWith('http://') || clean.startsWith('https://')) {
65
- const localPath = toLocalPathFromAbsolute(clean, absolutePrefixes);
66
- if (localPath) urls.add(localPath);
67
- }
89
+ attrRegex.lastIndex = 0;
68
90
  }
69
91
 
70
92
  return [...urls];