@dukebot/astro-html-validator 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -61,7 +61,12 @@ import { Validator } from '@dukebot/astro-html-validator';
61
61
  const validator = new Validator({
62
62
  dirPath: path.resolve(process.cwd(), 'dist'),
63
63
  config: {
64
- jsonld: {},
64
+ jsonld: {
65
+ requireHtmlLang: true,
66
+ requireInLanguage: true,
67
+ disallowEmptyInLanguage: true,
68
+ requireLangMatch: true,
69
+ },
65
70
  links: {},
66
71
  meta: {
67
72
  metaTitleMinLength: 30,
@@ -77,6 +82,23 @@ const results = await validator.run({ selector: 'all' });
77
82
  console.log(results);
78
83
  ```
79
84
 
85
+ ### Architecture (current)
86
+
87
+ - `src/index.mjs` exports the coordinator class (`Validator`) that orchestrates all checks.
88
+ - `src/validator.mjs` now contains the **base validator class** used via inheritance by concrete validators.
89
+ - Each concrete validator (`jsonld`, `links`, `meta`) encapsulates its own config and page-level validation.
90
+
91
+ ### JSON-LD language consistency options
92
+
93
+ `config.jsonld` now supports optional checks to validate language consistency between `<html lang="...">` and JSON-LD `inLanguage` values:
94
+
95
+ - `requireHtmlLang` (default: `false`)
96
+ - `requireInLanguage` (default: `false`)
97
+ - `disallowEmptyInLanguage` (default: `false`)
98
+ - `requireLangMatch` (default: `false`)
99
+
100
+ When enabled, warnings are reported through the normal validator output (`[WARN] /route -> ...`) so existing integrations remain backward-compatible.
101
+
80
102
  ---
81
103
 
82
104
  ## Suggested scripts for your Astro project
@@ -98,6 +120,7 @@ console.log(results);
98
120
  Quick steps:
99
121
 
100
122
  1. Update `name`, `author`, and `version` in `package.json`.
123
+ - For this release, use a **major bump** (breaking changes accepted).
101
124
  2. Sign in:
102
125
 
103
126
  ```bash
@@ -110,6 +133,12 @@ Quick steps:
110
133
  npm publish --access public
111
134
  ```
112
135
 
136
+ ### Breaking change note
137
+
138
+ `@dukebot/astro-html-validator/validator` now points to the **base validator class** (`src/validator.mjs`) instead of the previous coordinator implementation.
139
+
140
+
141
+
113
142
 
114
143
 
115
144
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dukebot/astro-html-validator",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "description": "Validate Astro-generated HTML output for SEO metadata, JSON-LD, and internal links.",
5
5
  "type": "module",
6
6
  "main": "./src/index.mjs",
package/src/index.mjs CHANGED
@@ -1,4 +1,91 @@
1
- import { Validator } from './validator.mjs';
1
+ import { JsonldValidator } from './validators/jsonld.mjs';
2
+ import { LinksValidator } from './validators/links.mjs';
3
+ import { MetaValidator } from './validators/meta.mjs';
2
4
 
3
- export { Validator };
4
- export default Validator;
5
+ /**
6
+ * Coordinates all available validators and prints optional summaries.
7
+ */
8
+ export class HtmlValidator {
9
+ /**
10
+ * Builds a validator coordinator with directory, per-validator config, and output mode.
11
+ */
12
+ constructor({ dirPath, config = {}, print = true } = {}) {
13
+ this.dirPath = dirPath;
14
+
15
+ this.validators = {
16
+ jsonld: new JsonldValidator(config.jsonld),
17
+ links: new LinksValidator(config.links),
18
+ meta: new MetaValidator(config.meta),
19
+ };
20
+
21
+ this.print = print;
22
+ }
23
+
24
+ /**
25
+ * Resolves a selector string into a unique list of validator names.
26
+ */
27
+ _selectValidators(selector = 'all') {
28
+ const clean = selector.trim().toLowerCase();
29
+
30
+ if (clean === 'all') return Object.keys(this.validators);
31
+
32
+ const selected = clean
33
+ .split(',')
34
+ .map((item) => item.trim())
35
+ .filter(Boolean);
36
+
37
+ const invalid = selected.filter((name) => !this.validators[name]);
38
+ if (invalid.length > 0) {
39
+ throw new Error(
40
+ `Unknown validators: ${invalid.join(', ')}. ` +
41
+ `Valid options: all, ${Object.keys(this.validators).join(', ')}`
42
+ );
43
+ }
44
+
45
+ return [...new Set(selected)];
46
+ }
47
+
48
+ /**
49
+ * Prints a consistent summary for one validator result.
50
+ */
51
+ _printResultSummary(result) {
52
+ console.log(`\n=== ${result.label} ===`);
53
+ console.log(`Checked ${result.checkedPages} HTML pages.`);
54
+
55
+ if (result.warnings.length === 0) {
56
+ console.log('✅ No warnings.');
57
+ return;
58
+ }
59
+
60
+ console.log(`⚠️ Warnings found: ${result.warnings.length}`);
61
+ for (const warning of result.warnings) console.log(warning);
62
+ }
63
+
64
+ /**
65
+ * Runs one validator by name.
66
+ */
67
+ async runValidator(name) {
68
+ const validator = this.validators[name];
69
+ const result = await validator.validate(this.dirPath);
70
+ if (this.print) this._printResultSummary(result);
71
+ return result;
72
+ }
73
+
74
+ /**
75
+ * Runs selected validators sequentially.
76
+ */
77
+ async run({ selector = 'all' } = {}) {
78
+ const results = [];
79
+ const selectedNames = this._selectValidators(selector);
80
+
81
+ for (const name of selectedNames) {
82
+ const result = await this.runValidator(name);
83
+ results.push(result);
84
+ }
85
+
86
+ return results;
87
+ }
88
+ }
89
+
90
+ export { HtmlValidator as Validator };
91
+ export default HtmlValidator;
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Extracts the language code declared on the <html> element.
3
+ */
4
+ export function extractHtmlLang(html) {
5
+ const match = html.match(/<html[^>]*\blang=["']([^"']+)["']/i);
6
+ return match?.[1]?.trim() ?? '';
7
+ }
8
+
9
+ /**
10
+ * Recursively collects all inLanguage values from a JSON-LD node tree.
11
+ */
12
+ export function collectInLanguageValues(node, out = []) {
13
+ if (Array.isArray(node)) {
14
+ for (const item of node) collectInLanguageValues(item, out);
15
+ return out;
16
+ }
17
+
18
+ if (!node || typeof node !== 'object') return out;
19
+
20
+ if (Object.hasOwn(node, 'inLanguage')) {
21
+ out.push(node.inLanguage);
22
+ }
23
+
24
+ for (const value of Object.values(node)) {
25
+ collectInLanguageValues(value, out);
26
+ }
27
+
28
+ return out;
29
+ }
30
+
31
+ /**
32
+ * Checks whether any inLanguage value is empty, null, or undefined.
33
+ */
34
+ export function hasEmptyInLanguage(values) {
35
+ return values.some((value) => {
36
+ if (value == null) return true;
37
+ if (typeof value === 'string') return value.trim().length === 0;
38
+ if (Array.isArray(value)) {
39
+ return value.some((item) => {
40
+ if (item == null) return true;
41
+ if (typeof item !== 'string') return false;
42
+ return item.trim().length === 0;
43
+ });
44
+ }
45
+ return false;
46
+ });
47
+ }
48
+
49
+ /**
50
+ * Returns true when at least one inLanguage value matches the HTML lang.
51
+ */
52
+ export function hasHtmlLang(values, htmlLang) {
53
+ const normalizedHtmlLang = htmlLang.trim().toLowerCase();
54
+
55
+ return values.some((value) => {
56
+ if (typeof value === 'string') {
57
+ return value.trim().toLowerCase() === normalizedHtmlLang;
58
+ }
59
+
60
+ if (Array.isArray(value)) {
61
+ return value.some(
62
+ (item) => typeof item === 'string' && item.trim().toLowerCase() === normalizedHtmlLang
63
+ );
64
+ }
65
+
66
+ return false;
67
+ });
68
+ }
69
+
70
+ /**
71
+ * Extracts and parses JSON-LD script blocks from a page.
72
+ */
73
+ export function getJsonLdBlocks(html) {
74
+ const regex = /<script[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
75
+ const blocks = [];
76
+ let match;
77
+
78
+ while ((match = regex.exec(html)) !== null) {
79
+ const raw = match[1]?.trim();
80
+ if (!raw) continue;
81
+ try {
82
+ blocks.push(JSON.parse(raw));
83
+ } catch {
84
+ blocks.push({ __parseError: true, __raw: raw });
85
+ }
86
+ }
87
+
88
+ return blocks;
89
+ }
90
+
91
+ /**
92
+ * Flattens top-level JSON-LD nodes and @graph nodes into one list.
93
+ */
94
+ export function getGraphNodes(blocks) {
95
+ const nodes = [];
96
+ for (const block of blocks) {
97
+ if (block.__parseError) continue;
98
+ if (Array.isArray(block['@graph'])) nodes.push(...block['@graph']);
99
+ else nodes.push(block);
100
+ }
101
+ return nodes;
102
+ }
@@ -0,0 +1,54 @@
1
+ import path from 'node:path';
2
+ import { pathExists } from './common.mjs';
3
+
4
+ /**
5
+ * Extracts local (root-relative) URLs from href/src attributes.
6
+ */
7
+ export function extractInternalUrls(html) {
8
+ const urls = new Set();
9
+ const regex = /(?:href|src)=["']([^"']+)["']/gi;
10
+
11
+ let match;
12
+ while ((match = regex.exec(html)) !== null) {
13
+ const raw = match[1]?.trim();
14
+ if (!raw) continue;
15
+
16
+ if (
17
+ raw.startsWith('http://') ||
18
+ raw.startsWith('https://') ||
19
+ raw.startsWith('//') ||
20
+ raw.startsWith('#') ||
21
+ raw.startsWith('mailto:') ||
22
+ raw.startsWith('tel:') ||
23
+ raw.startsWith('javascript:') ||
24
+ raw.startsWith('data:')
25
+ ) {
26
+ continue;
27
+ }
28
+
29
+ if (raw.startsWith('/')) {
30
+ const clean = raw.split('#')[0].split('?')[0];
31
+ if (clean) urls.add(clean);
32
+ }
33
+ }
34
+
35
+ return [...urls];
36
+ }
37
+
38
+ /**
39
+ * Checks whether an internal URL resolves to an HTML file in dist.
40
+ */
41
+ export async function internalUrlExists(dirPath, urlPath) {
42
+ if (urlPath === '/') return pathExists(path.join(dirPath, 'index.html'));
43
+
44
+ const asFile = path.join(dirPath, urlPath.replace(/^\//, ''));
45
+ if (await pathExists(asFile)) return true;
46
+
47
+ const asIndex = path.join(dirPath, urlPath.replace(/^\//, ''), 'index.html');
48
+ if (await pathExists(asIndex)) return true;
49
+
50
+ const asHtml = path.join(dirPath, `${urlPath.replace(/^\//, '')}.html`);
51
+ if (await pathExists(asHtml)) return true;
52
+
53
+ return false;
54
+ }
@@ -0,0 +1,122 @@
1
+ import { getAttr } from './common.mjs';
2
+
3
+ // Core metadata tags expected on every page.
4
+ export const REQUIRED_META_CHECKS = [
5
+ { label: 'meta title', check: getTitleContent },
6
+ { label: 'meta description', check: (html) => hasMeta(html, 'description') },
7
+ { label: 'canonical', check: hasCanonical },
8
+ { label: 'meta robots', check: (html) => hasMeta(html, 'robots') },
9
+ { label: 'og:title', check: (html) => hasMeta(html, 'og:title', true) },
10
+ { label: 'og:description', check: (html) => hasMeta(html, 'og:description', true) },
11
+ { label: 'og:url', check: (html) => hasMeta(html, 'og:url', true) },
12
+ { label: 'og:type', check: (html) => hasMeta(html, 'og:type', true) },
13
+ ];
14
+
15
+ /**
16
+ * Returns whether a meta tag exists with the expected key and non-empty content.
17
+ */
18
+ export function hasMeta(html, name, isProperty = false) {
19
+ const tags = html.match(/<meta\b[^>]*>/gi) || [];
20
+ return tags.some((tag) => {
21
+ const key = isProperty ? getAttr(tag, 'property') : getAttr(tag, 'name');
22
+ if (!key || key !== name) return false;
23
+ const content = getAttr(tag, 'content');
24
+ return !!content;
25
+ });
26
+ }
27
+
28
+ /**
29
+ * Returns the `content` value for a meta tag by name/property.
30
+ */
31
+ export function getMetaContent(html, name, isProperty = false) {
32
+ const tags = html.match(/<meta\b[^>]*>/gi) || [];
33
+ for (const tag of tags) {
34
+ const key = isProperty ? getAttr(tag, 'property') : getAttr(tag, 'name');
35
+ if (!key || key !== name) continue;
36
+ const content = getAttr(tag, 'content');
37
+ if (content) return content;
38
+ }
39
+ return '';
40
+ }
41
+
42
+ /**
43
+ * Checks that a canonical link tag exists with a non-empty href.
44
+ */
45
+ export function hasCanonical(html) {
46
+ const links = html.match(/<link\b[^>]*>/gi) || [];
47
+ return links.some((tag) => {
48
+ const rel = getAttr(tag, 'rel');
49
+ if (!rel || rel.toLowerCase() !== 'canonical') return false;
50
+ const href = getAttr(tag, 'href');
51
+ return !!href;
52
+ });
53
+ }
54
+
55
+ /**
56
+ * Extracts the document title text.
57
+ */
58
+ export function getTitleContent(html) {
59
+ const match = html.match(/<title>([^<]+)<\/title>/i);
60
+ return match?.[1]?.trim() ?? '';
61
+ }
62
+
63
+ /**
64
+ * Validates optional length ranges for title/description fields.
65
+ */
66
+ export function validateLengthRange({ value, min = 1, max = Infinity, fieldLabel }) {
67
+ if (!value) return;
68
+ if (value.length >= min && value.length <= max) return;
69
+ return `Recommended ${fieldLabel} length is ${min}-${max}. Current: ${value.length}.`;
70
+ }
71
+
72
+ /**
73
+ * Evaluates all mandatory metadata checks for a page.
74
+ */
75
+ export function validateRequiredMeta(html) {
76
+ const warnings = [];
77
+
78
+ for (const item of REQUIRED_META_CHECKS) {
79
+ if (!item.check(html)) {
80
+ warnings.push(`Missing ${item.label}.`);
81
+ }
82
+ }
83
+
84
+ return warnings;
85
+ }
86
+
87
+ /**
88
+ * Runs required and optional SEO metadata checks for one HTML string.
89
+ */
90
+ export function validateHtmlMeta(
91
+ html,
92
+ {
93
+ metaTitleMinLength,
94
+ metaTitleMaxLength,
95
+ metaDescriptionMinLength,
96
+ metaDescriptionMaxLength,
97
+ } = {}
98
+ ) {
99
+ const warnings = [];
100
+
101
+ warnings.push(...validateRequiredMeta(html));
102
+
103
+ warnings.push(
104
+ validateLengthRange({
105
+ value: getTitleContent(html),
106
+ min: metaTitleMinLength,
107
+ max: metaTitleMaxLength,
108
+ fieldLabel: 'meta title',
109
+ })
110
+ );
111
+
112
+ warnings.push(
113
+ validateLengthRange({
114
+ value: getMetaContent(html, 'description'),
115
+ min: metaDescriptionMinLength,
116
+ max: metaDescriptionMaxLength,
117
+ fieldLabel: 'meta description',
118
+ })
119
+ );
120
+
121
+ return warnings.filter(Boolean);
122
+ }
package/src/validator.mjs CHANGED
@@ -1,98 +1,44 @@
1
- import { validateJsonld } from './validators/jsonld.mjs';
2
- import { validateLinks } from './validators/links.mjs';
3
- import { validateMeta } from './validators/meta.mjs';
1
+ import { runHtmlValidation } from './utils/common.mjs';
4
2
 
5
3
  /**
6
- * Coordinates all available validators and prints optional summaries.
4
+ * Base validator with shared execution flow for HTML page-by-page checks.
7
5
  */
8
6
  export class Validator {
9
- constructor({ dirPath, config = {}, print = true } = {}) {
10
- this.dirPath = dirPath;
11
-
12
- this.validators = {
13
- jsonld: {
14
- label: 'JSON-LD',
15
- run: validateJsonld,
16
- config: config.jsonld,
17
- },
18
- links: {
19
- label: 'Internal links',
20
- run: validateLinks,
21
- config: config.links,
22
- },
23
- meta: {
24
- label: 'SEO metadata',
25
- run: validateMeta,
26
- config: config.meta,
27
- },
28
- };
29
-
30
- this.print = print;
31
- }
32
-
33
- /**
34
- * Resolves a selector string into a unique list of validator names.
35
- */
36
- selectValidators(selector = 'all') {
37
- const clean = selector.trim().toLowerCase();
38
-
39
- if (clean === 'all') return Object.keys(this.validators);
40
-
41
- const selected = clean
42
- .split(',')
43
- .map((item) => item.trim())
44
- .filter(Boolean);
45
-
46
- const invalid = selected.filter((name) => !this.validators[name]);
47
- if (invalid.length > 0) {
48
- throw new Error(
49
- `Unknown validators: ${invalid.join(', ')}. ` +
50
- `Valid options: all, ${Object.keys(this.validators).join(', ')}`
51
- );
52
- }
53
-
54
- return [...new Set(selected)];
55
- }
56
-
57
7
  /**
58
- * Prints a consistent summary for one validator result.
8
+ * Initializes shared validator metadata and config.
59
9
  */
60
- printResultSummary(result) {
61
- console.log(`\n=== ${result.label} ===`);
62
- console.log(`Checked ${result.checkedPages} HTML pages.`);
63
-
64
- if (result.warnings.length === 0) {
65
- console.log('✅ No warnings.');
66
- return;
67
- }
10
+ constructor({ name, label, config = {} } = {}) {
11
+ if (!name) throw new Error('Validator name is required.');
12
+ if (!label) throw new Error('Validator label is required.');
68
13
 
69
- console.log(`⚠️ Warnings found: ${result.warnings.length}`);
70
- for (const warning of result.warnings) console.log(warning);
14
+ this.name = name;
15
+ this.label = label;
16
+ this.config = config;
71
17
  }
72
18
 
73
19
  /**
74
- * Runs one validator by name.
20
+ * Runs full validation over all HTML pages.
75
21
  */
76
- async runValidator(name) {
77
- const validator = this.validators[name];
78
- const result = await validator.run(this.dirPath, validator.config);
79
- result.label = validator.label;
80
- if (this.print) this.printResultSummary(result);
81
- return result;
22
+ async validate(dirPath) {
23
+ const { checkedPages, warnings } = await runHtmlValidation({
24
+ dirPath,
25
+ validatePage: (pageContext) => this.validatePage({ ...pageContext, dirPath }),
26
+ });
27
+
28
+ return {
29
+ name: this.name,
30
+ label: this.label,
31
+ checkedPages,
32
+ warnings,
33
+ };
82
34
  }
83
35
 
84
36
  /**
85
- * Runs selected validators sequentially.
37
+ * Validates one page. Child classes must override this.
86
38
  */
87
- async run({ selector = 'all' } = {}) {
88
- const results = [];
89
- const selectedNames = this.selectValidators(selector);
90
-
91
- for (const name of selectedNames) {
92
- const result = await this.runValidator(name);
93
- results.push(result);
94
- }
95
-
96
- return results;
39
+ async validatePage() {
40
+ throw new Error('validatePage() must be implemented by child validators.');
97
41
  }
98
42
  }
43
+
44
+ export default Validator;
@@ -1,74 +1,91 @@
1
- import { runHtmlValidation } from '../utils.mjs';
1
+ import { Validator } from '../validator.mjs';
2
+ import {
3
+ collectInLanguageValues,
4
+ extractHtmlLang,
5
+ getGraphNodes,
6
+ getJsonLdBlocks,
7
+ hasEmptyInLanguage,
8
+ hasHtmlLang,
9
+ } from '../utils/jsonld.mjs';
2
10
 
3
- /**
4
- * Extracts and parses JSON-LD script blocks from a page.
5
- */
6
- function getJsonLdBlocks(html) {
7
- const regex = /<script[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
8
- const blocks = [];
9
- let match;
11
+ export class JsonldValidator extends Validator {
12
+ /**
13
+ * Stores JSON-LD language consistency options for this validator instance.
14
+ */
15
+ constructor({
16
+ requireHtmlLang = false,
17
+ requireInLanguage = false,
18
+ disallowEmptyInLanguage = false,
19
+ requireLangMatch = false,
20
+ } = {}) {
21
+ super({
22
+ name: 'jsonld',
23
+ label: 'JSON-LD',
24
+ config: {
25
+ requireHtmlLang,
26
+ requireInLanguage,
27
+ disallowEmptyInLanguage,
28
+ requireLangMatch,
29
+ },
30
+ });
31
+ }
32
+
33
+ /**
34
+ * Applies language-related JSON-LD checks for one parsed HTML page.
35
+ */
36
+ validateJsonLdLanguage({ html, nodes }) {
37
+ const warnings = [];
38
+ const htmlLang = extractHtmlLang(html);
39
+ const inLanguageValues = collectInLanguageValues(nodes);
10
40
 
11
- while ((match = regex.exec(html)) !== null) {
12
- const raw = match[1]?.trim();
13
- if (!raw) continue;
14
- try {
15
- blocks.push(JSON.parse(raw));
16
- } catch {
17
- blocks.push({ __parseError: true, __raw: raw });
41
+ if (this.config.requireHtmlLang && !htmlLang) {
42
+ warnings.push('Missing <html lang="..."> value to validate JSON-LD language consistency.');
43
+ return warnings;
18
44
  }
19
- }
20
45
 
21
- return blocks;
22
- }
46
+ if (this.config.requireInLanguage && inLanguageValues.length === 0) {
47
+ warnings.push('No inLanguage property was found in JSON-LD.');
48
+ return warnings;
49
+ }
23
50
 
24
- /**
25
- * Flattens top-level JSON-LD nodes and @graph nodes into one list.
26
- */
27
- function getGraphNodes(blocks) {
28
- const nodes = [];
29
- for (const block of blocks) {
30
- if (block.__parseError) continue;
31
- if (Array.isArray(block['@graph'])) nodes.push(...block['@graph']);
32
- else nodes.push(block);
33
- }
34
- return nodes;
35
- }
51
+ if (this.config.disallowEmptyInLanguage && hasEmptyInLanguage(inLanguageValues)) {
52
+ warnings.push('Found empty or null inLanguage value(s) in JSON-LD.');
53
+ }
54
+
55
+ if (this.config.requireLangMatch && htmlLang && !hasHtmlLang(inLanguageValues, htmlLang)) {
56
+ warnings.push(`No JSON-LD inLanguage matches <html lang="${htmlLang}">.`);
57
+ }
36
58
 
37
- /**
38
- * Validates JSON-LD presence/basic parseability for each HTML page.
39
- */
40
- export async function validateJsonld(dirPath) {
41
- const { checkedPages, warnings } = await runHtmlValidation({
42
- dirPath,
43
- validatePage: ({ html }) => {
44
- const pageWarnings = [];
45
- const blocks = getJsonLdBlocks(html);
59
+ return warnings;
60
+ }
46
61
 
47
- if (blocks.length === 0) {
48
- pageWarnings.push('No JSON-LD block was found.');
49
- return pageWarnings;
50
- }
62
+ /**
63
+ * Validates JSON-LD structure and optional language consistency for one page.
64
+ */
65
+ validatePage({ html }) {
66
+ const pageWarnings = [];
67
+ const blocks = getJsonLdBlocks(html);
51
68
 
52
- if (blocks.some((b) => b.__parseError)) {
53
- pageWarnings.push('At least one JSON-LD block has invalid JSON.');
54
- return pageWarnings;
55
- }
69
+ if (blocks.length === 0) {
70
+ pageWarnings.push('No JSON-LD block was found.');
71
+ return pageWarnings;
72
+ }
56
73
 
57
- const nodes = getGraphNodes(blocks);
74
+ if (blocks.some((b) => b.__parseError)) {
75
+ pageWarnings.push('At least one JSON-LD block has invalid JSON.');
76
+ return pageWarnings;
77
+ }
58
78
 
59
- if (nodes.length === 0) {
60
- pageWarnings.push('JSON-LD exists but has no nodes in @graph.');
61
- return pageWarnings;
62
- }
79
+ const nodes = getGraphNodes(blocks);
63
80
 
81
+ if (nodes.length === 0) {
82
+ pageWarnings.push('JSON-LD exists but has no nodes in @graph.');
64
83
  return pageWarnings;
65
- },
66
- });
84
+ }
67
85
 
68
- return {
69
- name: 'jsonld',
70
- label: 'JSON-LD',
71
- checkedPages,
72
- warnings,
73
- };
86
+ pageWarnings.push(...this.validateJsonLdLanguage({ html, nodes }));
87
+ return pageWarnings;
88
+ }
74
89
  }
90
+
91
+ export default JsonldValidator
@@ -1,81 +1,39 @@
1
- import path from 'node:path';
2
- import { pathExists, runHtmlValidation } from '../utils.mjs';
1
+ import { Validator } from '../validator.mjs';
2
+ import { extractInternalUrls, internalUrlExists } from '../utils/links.mjs';
3
3
 
4
4
  /**
5
- * Extracts local (root-relative) URLs from href/src attributes.
5
+ * Reports broken internal links for each generated HTML page.
6
6
  */
7
- function extractInternalUrls(html) {
8
- const urls = new Set();
9
- const regex = /(?:href|src)=["']([^"']+)["']/gi;
7
+ export class LinksValidator extends Validator {
8
+ /**
9
+ * Initializes link validator configuration (reserved for future rules).
10
+ */
11
+ constructor({
12
+ // Reserved for future options.
13
+ } = {}) {
14
+ super({
15
+ name: 'links',
16
+ label: 'Internal links',
17
+ config: {
18
+ // Reserved for future options.
19
+ },
20
+ });
21
+ }
10
22
 
11
- let match;
12
- while ((match = regex.exec(html)) !== null) {
13
- const raw = match[1]?.trim();
14
- if (!raw) continue;
23
+ /**
24
+ * Validates internal link targets for one HTML page.
25
+ */
26
+ async validatePage({ html, dirPath }) {
27
+ const pageWarnings = [];
28
+ const urls = extractInternalUrls(html);
15
29
 
16
- if (
17
- raw.startsWith('http://') ||
18
- raw.startsWith('https://') ||
19
- raw.startsWith('//') ||
20
- raw.startsWith('#') ||
21
- raw.startsWith('mailto:') ||
22
- raw.startsWith('tel:') ||
23
- raw.startsWith('javascript:') ||
24
- raw.startsWith('data:')
25
- ) {
26
- continue;
30
+ for (const url of urls) {
31
+ const exists = await internalUrlExists(dirPath, url);
32
+ if (!exists) pageWarnings.push(`Internal link not found: ${url}`);
27
33
  }
28
34
 
29
- if (raw.startsWith('/')) {
30
- const clean = raw.split('#')[0].split('?')[0];
31
- if (clean) urls.add(clean);
32
- }
35
+ return pageWarnings;
33
36
  }
34
-
35
- return [...urls];
36
37
  }
37
38
 
38
- /**
39
- * Checks whether an internal URL resolves to an HTML file in dist.
40
- */
41
- async function internalUrlExists(dirPath, urlPath) {
42
- if (urlPath === '/') return pathExists(path.join(dirPath, 'index.html'));
43
-
44
- const asFile = path.join(dirPath, urlPath.replace(/^\//, ''));
45
- if (await pathExists(asFile)) return true;
46
-
47
- const asIndex = path.join(dirPath, urlPath.replace(/^\//, ''), 'index.html');
48
- if (await pathExists(asIndex)) return true;
49
-
50
- const asHtml = path.join(dirPath, `${urlPath.replace(/^\//, '')}.html`);
51
- if (await pathExists(asHtml)) return true;
52
-
53
- return false;
54
- }
55
-
56
- /**
57
- * Reports broken internal links for each generated HTML page.
58
- */
59
- export async function validateLinks(dirPath) {
60
- const { checkedPages, warnings } = await runHtmlValidation({
61
- dirPath,
62
- validatePage: async ({ html }) => {
63
- const pageWarnings = [];
64
- const urls = extractInternalUrls(html);
65
-
66
- for (const url of urls) {
67
- const exists = await internalUrlExists(dirPath, url);
68
- if (!exists) pageWarnings.push(`Internal link not found: ${url}`);
69
- }
70
-
71
- return pageWarnings;
72
- },
73
- });
74
-
75
- return {
76
- name: 'links',
77
- label: 'Internal links',
78
- checkedPages,
79
- warnings,
80
- };
81
- }
39
+ export default LinksValidator
@@ -1,130 +1,37 @@
1
- import { getAttr, runHtmlValidation } from '../utils.mjs';
2
-
3
- // Core metadata tags expected on every page.
4
- const REQUIRED_META_CHECKS = [
5
- { label: 'meta title', check: getTitleContent },
6
- { label: 'meta description', check: (html) => hasMeta(html, 'description') },
7
- { label: 'canonical', check: hasCanonical },
8
- { label: 'meta robots', check: (html) => hasMeta(html, 'robots') },
9
- { label: 'og:title', check: (html) => hasMeta(html, 'og:title', true) },
10
- { label: 'og:description', check: (html) => hasMeta(html, 'og:description', true) },
11
- { label: 'og:url', check: (html) => hasMeta(html, 'og:url', true) },
12
- { label: 'og:type', check: (html) => hasMeta(html, 'og:type', true) },
13
- ];
14
-
15
- function hasMeta(html, name, isProperty = false) {
16
- const tags = html.match(/<meta\b[^>]*>/gi) || [];
17
- return tags.some((tag) => {
18
- const key = isProperty ? getAttr(tag, 'property') : getAttr(tag, 'name');
19
- if (!key || key !== name) return false;
20
- const content = getAttr(tag, 'content');
21
- return !!content;
22
- });
23
- }
24
-
25
- /**
26
- * Returns the `content` value for a meta tag by name/property.
27
- */
28
- function getMetaContent(html, name, isProperty = false) {
29
- const tags = html.match(/<meta\b[^>]*>/gi) || [];
30
- for (const tag of tags) {
31
- const key = isProperty ? getAttr(tag, 'property') : getAttr(tag, 'name');
32
- if (!key || key !== name) continue;
33
- const content = getAttr(tag, 'content');
34
- if (content) return content;
35
- }
36
- return '';
37
- }
38
-
39
- /**
40
- * Checks that a canonical link tag exists with a non-empty href.
41
- */
42
- function hasCanonical(html) {
43
- const links = html.match(/<link\b[^>]*>/gi) || [];
44
- return links.some((tag) => {
45
- const rel = getAttr(tag, 'rel');
46
- if (!rel || rel.toLowerCase() !== 'canonical') return false;
47
- const href = getAttr(tag, 'href');
48
- return !!href;
49
- });
50
- }
51
-
52
- function getTitleContent(html) {
53
- const match = html.match(/<title>([^<]+)<\/title>/i);
54
- return match?.[1]?.trim() ?? '';
55
- }
56
-
57
- /**
58
- * Validates optional length ranges for title/description fields.
59
- */
60
- function validateLengthRange({ value, min = 1, max = Infinity, fieldLabel }) {
61
- if (!value) return;
62
- if (value.length >= min && value.length <= max) return;
63
- return `Recommended ${fieldLabel} length is ${min}-${max}. Current: ${value.length}.`;
64
- }
65
-
66
- function validateRequiredMeta(html) {
67
- const warnings = [];
68
-
69
- for (const item of REQUIRED_META_CHECKS) {
70
- if (!item.check(html)) {
71
- warnings.push(`Missing ${item.label}.`);
72
- }
73
- }
74
-
75
- return warnings;
76
- }
1
+ import { Validator } from '../validator.mjs';
2
+ import { validateHtmlMeta } from '../utils/meta.mjs';
77
3
 
78
4
  /**
79
- * Runs required and optional SEO metadata checks for one HTML string.
5
+ * Validates required SEO metadata and optional length recommendations.
80
6
  */
81
- function validateHtmlMeta(
82
- html,
83
- {
7
+ export class MetaValidator extends Validator {
8
+ /**
9
+ * Stores metadata validation thresholds for this validator instance.
10
+ */
11
+ constructor({
84
12
  metaTitleMinLength,
85
13
  metaTitleMaxLength,
86
14
  metaDescriptionMinLength,
87
15
  metaDescriptionMaxLength,
88
- } = {}
89
- ) {
90
- const warnings = [];
91
-
92
- warnings.push(...validateRequiredMeta(html));
93
-
94
- warnings.push(
95
- validateLengthRange({
96
- value: getTitleContent(html),
97
- min: metaTitleMinLength,
98
- max: metaTitleMaxLength,
99
- fieldLabel: 'meta title',
100
- })
101
- );
102
-
103
- warnings.push(
104
- validateLengthRange({
105
- value: getMetaContent(html, 'description'),
106
- min: metaDescriptionMinLength,
107
- max: metaDescriptionMaxLength,
108
- fieldLabel: 'meta description',
109
- })
110
- );
16
+ } = {}) {
17
+ super({
18
+ name: 'meta',
19
+ label: 'SEO metadata',
20
+ config: {
21
+ metaTitleMinLength,
22
+ metaTitleMaxLength,
23
+ metaDescriptionMinLength,
24
+ metaDescriptionMaxLength,
25
+ },
26
+ });
27
+ }
111
28
 
112
- return warnings.filter(Boolean);
29
+ /**
30
+ * Validates metadata rules for one HTML page.
31
+ */
32
+ validatePage({ html }) {
33
+ return validateHtmlMeta(html, this.config);
34
+ }
113
35
  }
114
36
 
115
- /**
116
- * Validates SEO metadata for every HTML page in dist.
117
- */
118
- export async function validateMeta(dirPath, options) {
119
- const { checkedPages, warnings } = await runHtmlValidation({
120
- dirPath,
121
- validatePage: ({ html }) => validateHtmlMeta(html, options),
122
- });
123
-
124
- return {
125
- name: 'meta',
126
- label: 'SEO metadata',
127
- checkedPages,
128
- warnings,
129
- };
130
- }
37
+ export default MetaValidator
File without changes