@dukebot/astro-html-validator 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -46,6 +46,8 @@ astro-html-validator [selector] [options]
46
46
 
47
47
  Options:
48
48
  --dir <path> Path to the dist directory (default: <cwd>/dist)
49
+ --links-absolute-prefixes <list>
50
+ Comma-separated absolute URL prefixes treated as local routes
49
51
  --quiet Disable summary output
50
52
  --help Show help
51
53
  ```
@@ -67,7 +69,9 @@ const validator = new Validator({
67
69
  disallowEmptyInLanguage: true,
68
70
  requireLangMatch: true,
69
71
  },
70
- links: {},
72
+ links: {
73
+ absoluteUrlPrefixes: ['https://example.com', 'https://www.example.com'],
74
+ },
71
75
  meta: {
72
76
  metaTitleMinLength: 30,
73
77
  metaTitleMaxLength: 60,
@@ -82,6 +86,17 @@ const results = await validator.run({ selector: 'all' });
82
86
  console.log(results);
83
87
  ```
84
88
 
89
+ ### Internal links: absolute-to-local prefix mapping
90
+
91
+ The `links` validator can treat some absolute URLs as local internal routes.
92
+
93
+ If `absoluteUrlPrefixes` contains your site domains, links like:
94
+
95
+ - `https://example.com/about`
96
+ - `https://www.example.com/contact?utm=x#team`
97
+
98
+ are normalized to local paths (`/about`, `/contact`) and validated against `dist`.
99
+
85
100
  ### Architecture (current)
86
101
 
87
102
  - `src/index.mjs` exports the coordinator class (`Validator`) that orchestrates all checks.
@@ -143,3 +158,4 @@ Quick steps:
143
158
 
144
159
 
145
160
 
161
+
package/bin/cli.mjs CHANGED
@@ -18,6 +18,8 @@ Selector:
18
18
 
19
19
  Options:
20
20
  --dir <path> Path to the dist directory (default: ./dist)
21
+ --links-absolute-prefixes <list>
22
+ Comma-separated absolute URL prefixes treated as local
21
23
  --quiet Disable printed summary output
22
24
  --help Show help
23
25
 
@@ -25,6 +27,7 @@ Examples:
25
27
  astro-html-validator
26
28
  astro-html-validator meta
27
29
  astro-html-validator links --dir ./dist
30
+ astro-html-validator links --links-absolute-prefixes https://example.com,https://www.example.com
28
31
  astro-html-validator jsonld,meta
29
32
  `);
30
33
  }
@@ -36,6 +39,7 @@ function parseArgs(argv = process.argv.slice(2)) {
36
39
  const options = {
37
40
  selector: 'all',
38
41
  dirPath: path.resolve(process.cwd(), 'dist'),
42
+ linksAbsolutePrefixes: [],
39
43
  print: true,
40
44
  help: false,
41
45
  };
@@ -67,6 +71,17 @@ function parseArgs(argv = process.argv.slice(2)) {
67
71
  continue;
68
72
  }
69
73
 
74
+ if (arg === '--links-absolute-prefixes') {
75
+ const next = argv[index + 1];
76
+ if (!next) throw new Error('Missing value for --links-absolute-prefixes');
77
+ options.linksAbsolutePrefixes = next
78
+ .split(',')
79
+ .map((item) => item.trim())
80
+ .filter(Boolean);
81
+ index += 1;
82
+ continue;
83
+ }
84
+
70
85
  throw new Error(`Unknown argument: ${arg}`);
71
86
  }
72
87
 
@@ -88,7 +103,11 @@ async function main() {
88
103
 
89
104
  const validator = new Validator({
90
105
  dirPath: parsed.dirPath,
91
- config: {},
106
+ config: {
107
+ links: {
108
+ absoluteUrlPrefixes: parsed.linksAbsolutePrefixes,
109
+ },
110
+ },
92
111
  print: parsed.print,
93
112
  });
94
113
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dukebot/astro-html-validator",
3
- "version": "1.1.0",
3
+ "version": "1.1.2",
4
4
  "description": "Validate Astro-generated HTML output for SEO metadata, JSON-LD, and internal links.",
5
5
  "type": "module",
6
6
  "main": "./src/index.mjs",
@@ -1,35 +1,92 @@
1
1
  import path from 'node:path';
2
2
  import { pathExists } from './common.mjs';
3
3
 
4
+ /**
5
+ * Normalizes configured absolute URL prefixes.
6
+ */
7
+ function normalizeAbsolutePrefixes(absoluteUrlPrefixes = []) {
8
+ if (!absoluteUrlPrefixes) return [];
9
+
10
+ const values = Array.isArray(absoluteUrlPrefixes)
11
+ ? absoluteUrlPrefixes
12
+ : String(absoluteUrlPrefixes)
13
+ .split(',')
14
+ .map((item) => item.trim())
15
+ .filter(Boolean);
16
+
17
+ return [...new Set(values.map((value) => value.replace(/\/+$/, '')))];
18
+ }
19
+
20
+ /**
21
+ * Converts a matching absolute URL into a local root-relative URL.
22
+ */
23
+ function toLocalPathFromAbsolute(rawUrl, absolutePrefixes) {
24
+ for (const prefix of absolutePrefixes) {
25
+ if (rawUrl === prefix) return '/';
26
+ if (rawUrl.startsWith(`${prefix}/`)) return rawUrl.slice(prefix.length);
27
+ }
28
+
29
+ return null;
30
+ }
31
+
32
+ /**
33
+ * Removes non-rendered sections to avoid false positives when extracting links.
34
+ */
35
+ function sanitizeHtmlForLinkExtraction(html = '') {
36
+ if (!html) return '';
37
+
38
+ return html
39
+ .replace(/<!--[\s\S]*?-->/g, '')
40
+ .replace(/<script\b[\s\S]*?<\/script>/gi, '')
41
+ .replace(/<style\b[\s\S]*?<\/style>/gi, '');
42
+ }
43
+
4
44
  /**
5
45
  * Extracts local (root-relative) URLs from href/src attributes.
6
46
  */
7
- export function extractInternalUrls(html) {
47
+ export function extractInternalUrls(html, { absoluteUrlPrefixes = [] } = {}) {
8
48
  const urls = new Set();
9
- const regex = /(?:href|src)=["']([^"']+)["']/gi;
10
-
11
- let match;
12
- while ((match = regex.exec(html)) !== null) {
13
- const raw = match[1]?.trim();
14
- if (!raw) continue;
15
-
16
- if (
17
- raw.startsWith('http://') ||
18
- raw.startsWith('https://') ||
19
- raw.startsWith('//') ||
20
- raw.startsWith('#') ||
21
- raw.startsWith('mailto:') ||
22
- raw.startsWith('tel:') ||
23
- raw.startsWith('javascript:') ||
24
- raw.startsWith('data:')
25
- ) {
26
- continue;
27
- }
49
+ const tagRegex = /<[^>]+>/g;
50
+ const attrRegex = /\b(?:href|src)\s*=\s*["']([^"']+)["']/gi;
51
+ const absolutePrefixes = normalizeAbsolutePrefixes(absoluteUrlPrefixes);
52
+ const safeHtml = sanitizeHtmlForLinkExtraction(html);
28
53
 
29
- if (raw.startsWith('/')) {
30
- const clean = raw.split('#')[0].split('?')[0];
31
- if (clean) urls.add(clean);
54
+ let tagMatch;
55
+ while ((tagMatch = tagRegex.exec(safeHtml)) !== null) {
56
+ const tag = tagMatch[0];
57
+ if (!tag || tag.startsWith('</')) continue;
58
+
59
+ let attrMatch;
60
+ while ((attrMatch = attrRegex.exec(tag)) !== null) {
61
+ const raw = attrMatch[1]?.trim();
62
+ if (!raw) continue;
63
+
64
+ if (
65
+ raw.startsWith('//') ||
66
+ raw.startsWith('#') ||
67
+ raw.startsWith('mailto:') ||
68
+ raw.startsWith('tel:') ||
69
+ raw.startsWith('javascript:') ||
70
+ raw.startsWith('data:')
71
+ ) {
72
+ continue;
73
+ }
74
+
75
+ const clean = raw.split(/[?#]/)[0];
76
+ if (!clean) continue;
77
+
78
+ if (clean.startsWith('/')) {
79
+ urls.add(clean);
80
+ continue;
81
+ }
82
+
83
+ if (clean.startsWith('http://') || clean.startsWith('https://')) {
84
+ const localPath = toLocalPathFromAbsolute(clean, absolutePrefixes);
85
+ if (localPath) urls.add(localPath);
86
+ }
32
87
  }
88
+
89
+ attrRegex.lastIndex = 0;
33
90
  }
34
91
 
35
92
  return [...urls];
@@ -9,13 +9,13 @@ export class LinksValidator extends Validator {
9
9
  * Initializes link validator configuration (reserved for future rules).
10
10
  */
11
11
  constructor({
12
- // Reserved for future options.
12
+ absoluteUrlPrefixes = [],
13
13
  } = {}) {
14
14
  super({
15
15
  name: 'links',
16
16
  label: 'Internal links',
17
17
  config: {
18
- // Reserved for future options.
18
+ absoluteUrlPrefixes,
19
19
  },
20
20
  });
21
21
  }
@@ -25,7 +25,9 @@ export class LinksValidator extends Validator {
25
25
  */
26
26
  async validatePage({ html, dirPath }) {
27
27
  const pageWarnings = [];
28
- const urls = extractInternalUrls(html);
28
+ const urls = extractInternalUrls(html, {
29
+ absoluteUrlPrefixes: this.config.absoluteUrlPrefixes,
30
+ });
29
31
 
30
32
  for (const url of urls) {
31
33
  const exists = await internalUrlExists(dirPath, url);