dogsbay 0.0.0-init → 0.2.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/bin/cli.js +2 -0
  2. package/dist/audit/dist/crawler.js +71 -0
  3. package/dist/audit/output/json.js +3 -0
  4. package/dist/audit/output/text.js +84 -0
  5. package/dist/audit/registry.js +62 -0
  6. package/dist/audit/rules/seo/description-length.js +26 -0
  7. package/dist/audit/rules/seo/description-missing.js +24 -0
  8. package/dist/audit/rules/seo/dist-meta.js +65 -0
  9. package/dist/audit/rules/seo/h1-brand-keyword.js +99 -0
  10. package/dist/audit/rules/seo/index.js +63 -0
  11. package/dist/audit/rules/seo/json-ld-parse.js +32 -0
  12. package/dist/audit/rules/seo/json-ld-required-fields.js +116 -0
  13. package/dist/audit/rules/seo/keyword-placement.js +122 -0
  14. package/dist/audit/rules/seo/sitemap.js +240 -0
  15. package/dist/audit/rules/seo/title-missing.js +33 -0
  16. package/dist/audit/rules/structure/index.js +37 -0
  17. package/dist/audit/rules/structure/locale-coherence.js +32 -0
  18. package/dist/audit/rules/structure/namespace-coherence.js +39 -0
  19. package/dist/audit/rules/structure/version-coherence.js +42 -0
  20. package/dist/audit/run.js +89 -0
  21. package/dist/audit/types.js +1 -0
  22. package/dist/commands/add.js +174 -0
  23. package/dist/commands/convert.js +89 -0
  24. package/dist/commands/export-techdocs.js +435 -0
  25. package/dist/commands/import-mkdocs.js +1388 -0
  26. package/dist/commands/init.js +234 -0
  27. package/dist/commands/lighthouse.js +236 -0
  28. package/dist/commands/preprocess-variants.js +204 -0
  29. package/dist/commands/pull.js +717 -0
  30. package/dist/commands/site-build.js +457 -0
  31. package/dist/commands/site-check.js +236 -0
  32. package/dist/commands/site-dev.js +99 -0
  33. package/dist/commands/site-init.js +376 -0
  34. package/dist/config/defaults.js +85 -0
  35. package/dist/config/find.js +38 -0
  36. package/dist/config/index.js +7 -0
  37. package/dist/config/load.js +748 -0
  38. package/dist/config/output.js +18 -0
  39. package/dist/config/schema.js +32 -0
  40. package/dist/config/serialize.js +40 -0
  41. package/dist/config/to-astro-options.js +97 -0
  42. package/dist/import-content.js +411 -0
  43. package/dist/index.js +218 -0
  44. package/dist/plugins/context.js +49 -0
  45. package/dist/plugins/index.js +9 -0
  46. package/dist/plugins/lifecycle.js +230 -0
  47. package/dist/plugins/loader.js +214 -0
  48. package/dist/registry.js +683 -0
  49. package/dist/source-resolver.js +130 -0
  50. package/dist/utils/project.js +23 -0
  51. package/package.json +50 -8
  52. package/README.md +0 -10
package/bin/cli.js ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ import "../dist/index.js";
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Dist crawler — walk a built site, parse every HTML file once
3
+ * with cheerio, and return them as `ParsedHtmlFile[]` ready for
4
+ * dist-stage rules.
5
+ *
6
+ * Parsing once and sharing the cheerio doc across rules avoids
7
+ * the N-rule × N-file re-parse that would otherwise dominate
8
+ * audit runtime on large sites.
9
+ */
10
+ import { readdirSync, readFileSync, statSync } from "node:fs";
11
+ import { join, relative } from "node:path";
12
+ import { load as loadHtml } from "cheerio";
13
+ const DEFAULT_EXTENSIONS = ["html", "htm"];
14
+ const DEFAULT_EXCLUDE_DIRS = ["pagefind", "_astro", "assets"];
15
+ /**
16
+ * Walk `distRoot` recursively, parse every HTML file with
17
+ * cheerio, return the list. The list is sorted by path for
18
+ * deterministic test output.
19
+ */
20
+ export function crawlDist(distRoot, options = {}) {
21
+ const exts = new Set((options.extensions ?? DEFAULT_EXTENSIONS).map((e) => e.startsWith(".") ? e.slice(1).toLowerCase() : e.toLowerCase()));
22
+ const excludeDirs = new Set(options.excludeDirs ?? DEFAULT_EXCLUDE_DIRS);
23
+ const files = [];
24
+ walk(distRoot, distRoot, exts, excludeDirs, files);
25
+ files.sort((a, b) => a.path.localeCompare(b.path));
26
+ return files;
27
+ }
28
+ function walk(root, current, exts, excludeDirs, out) {
29
+ let entries;
30
+ try {
31
+ entries = readdirSync(current);
32
+ }
33
+ catch {
34
+ return; // Directory missing — caller's problem to validate, not ours
35
+ }
36
+ for (const entry of entries) {
37
+ const full = join(current, entry);
38
+ let st;
39
+ try {
40
+ st = statSync(full);
41
+ }
42
+ catch {
43
+ continue;
44
+ }
45
+ if (st.isDirectory()) {
46
+ const rel = relative(root, full);
47
+ // Match either the immediate name or the full relative
48
+ // path so writers can exclude `docs/internal` vs the
49
+ // basename `internal`.
50
+ if (excludeDirs.has(entry) || excludeDirs.has(rel))
51
+ continue;
52
+ walk(root, full, exts, excludeDirs, out);
53
+ continue;
54
+ }
55
+ if (!st.isFile())
56
+ continue;
57
+ const dot = entry.lastIndexOf(".");
58
+ if (dot < 0)
59
+ continue;
60
+ const ext = entry.slice(dot + 1).toLowerCase();
61
+ if (!exts.has(ext))
62
+ continue;
63
+ const html = readFileSync(full, "utf-8");
64
+ const $ = loadHtml(html);
65
+ out.push({
66
+ path: relative(root, full),
67
+ html,
68
+ $,
69
+ });
70
+ }
71
+ }
@@ -0,0 +1,3 @@
1
+ export function formatJson(report) {
2
+ return JSON.stringify(report, null, 2) + "\n";
3
+ }
@@ -0,0 +1,84 @@
1
+ /**
2
+ * ESLint-style text formatter for audit reports.
3
+ *
4
+ * Issues group by file, sorted by file path. Within a file,
5
+ * issues stay in registration order (the runner already
6
+ * produces a stable order). Severity gets coloured via
7
+ * picocolors when stdout is a TTY.
8
+ *
9
+ * Sample output:
10
+ *
11
+ * src/content/docs/intro.md
12
+ * warning Description too long (185 chars; max 160) seo/description-length
13
+ * warning H1 missing — neither frontmatter title nor body seo/title-missing
14
+ *
15
+ * dist/concepts/seo/index.html
16
+ * error Broken link: /old-name/ links/dist-internal-links
17
+ *
18
+ * ✖ 3 issues (1 error, 2 warnings) across 2 files
19
+ */
20
+ import pc from "picocolors";
21
+ export function formatText(report, options = {}) {
22
+ const useColor = options.color ?? Boolean(process.stdout.isTTY);
23
+ const tint = useColor ? pc : noColorPc();
24
+ if (report.issues.length === 0) {
25
+ const summary = `Clean — ran ${report.summary.rulesRun} rule${report.summary.rulesRun === 1 ? "" : "s"}, no issues found.`;
26
+ return tint.green(summary) + "\n";
27
+ }
28
+ const grouped = new Map();
29
+ for (const issue of report.issues) {
30
+ const list = grouped.get(issue.file) ?? [];
31
+ list.push(issue);
32
+ grouped.set(issue.file, list);
33
+ }
34
+ const files = Array.from(grouped.keys()).sort();
35
+ const lines = [];
36
+ for (const file of files) {
37
+ lines.push(tint.bold(file));
38
+ const issues = grouped.get(file);
39
+ const widthSeverity = Math.max(...issues.map((i) => i.severity.length));
40
+ const widthMessage = Math.max(...issues.map((i) => i.message.length));
41
+ for (const issue of issues) {
42
+ const sev = padRight(issue.severity, widthSeverity);
43
+ const msg = padRight(issue.message, widthMessage + 2);
44
+ lines.push(` ${colorSeverity(sev, issue.severity, tint)} ${msg}${tint.gray(issue.ruleId)}`);
45
+ }
46
+ lines.push("");
47
+ }
48
+ const { errors, warnings, info, files: nFiles } = report.summary;
49
+ const total = errors + warnings + info;
50
+ const parts = [];
51
+ if (errors > 0)
52
+ parts.push(`${errors} error${errors === 1 ? "" : "s"}`);
53
+ if (warnings > 0)
54
+ parts.push(`${warnings} warning${warnings === 1 ? "" : "s"}`);
55
+ if (info > 0)
56
+ parts.push(`${info} info`);
57
+ const summaryLine = `✖ ${total} issue${total === 1 ? "" : "s"} (${parts.join(", ")}) across ${nFiles} file${nFiles === 1 ? "" : "s"}`;
58
+ lines.push(errors > 0 ? tint.red(summaryLine) : tint.yellow(summaryLine));
59
+ return lines.join("\n") + "\n";
60
+ }
61
+ function colorSeverity(text, severity, tint) {
62
+ switch (severity) {
63
+ case "error":
64
+ return tint.red(text);
65
+ case "warning":
66
+ return tint.yellow(text);
67
+ case "info":
68
+ return tint.blue(text);
69
+ }
70
+ }
71
+ function padRight(s, width) {
72
+ return s.length >= width ? s : s + " ".repeat(width - s.length);
73
+ }
74
+ /**
75
+ * picocolors stub that returns the input unchanged. Used when
76
+ * colour is disabled so callers don't need to branch.
77
+ */
78
+ function noColorPc() {
79
+ const id = (s) => s;
80
+ // Build a Proxy that returns identity for any property access.
81
+ return new Proxy({}, {
82
+ get: () => id,
83
+ });
84
+ }
@@ -0,0 +1,62 @@
1
+ const REGISTRY = new Map();
2
+ /**
3
+ * Register a rule. Throws on duplicate id — rule files import
4
+ * each other only via the index, and double-registration
5
+ * indicates a copy-paste bug.
6
+ */
7
+ export function registerRule(rule) {
8
+ if (REGISTRY.has(rule.id)) {
9
+ throw new Error(`Duplicate rule registration: ${rule.id}. Rule ids must be unique.`);
10
+ }
11
+ REGISTRY.set(rule.id, rule);
12
+ }
13
+ /** All registered rules, in registration order. */
14
+ export function listRules() {
15
+ return Array.from(REGISTRY.values());
16
+ }
17
+ /** Lookup by exact id, or `undefined` if unknown. */
18
+ export function getRule(id) {
19
+ return REGISTRY.get(id);
20
+ }
21
+ /** Test-only: clear the registry between tests. Internal use. */
22
+ export function _clearRegistry() {
23
+ REGISTRY.clear();
24
+ }
25
+ /**
26
+ * Apply category / stage / explicit / skip filters. Glob patterns
27
+ * use `*` to match any run of characters within a single segment
28
+ * (no recursive descent — pattern grammar is intentionally
29
+ * simple).
30
+ */
31
+ export function filterRules(options = {}, rules = listRules()) {
32
+ let out = rules;
33
+ if (options.rules && options.rules.length > 0) {
34
+ const patterns = options.rules.map(compileGlob);
35
+ out = out.filter((r) => patterns.some((re) => re.test(r.id)));
36
+ }
37
+ else {
38
+ if (options.categories && options.categories.length > 0) {
39
+ const cats = new Set(options.categories);
40
+ out = out.filter((r) => cats.has(r.category));
41
+ }
42
+ if (options.stages && options.stages.length > 0) {
43
+ const stages = new Set(options.stages);
44
+ out = out.filter((r) => stages.has(r.stage));
45
+ }
46
+ }
47
+ if (options.skip && options.skip.length > 0) {
48
+ const skipPatterns = options.skip.map(compileGlob);
49
+ out = out.filter((r) => !skipPatterns.some((re) => re.test(r.id)));
50
+ }
51
+ return out;
52
+ }
53
+ /**
54
+ * Compile a glob pattern into a regular expression anchored to
55
+ * full-string match. `*` becomes `.*`. Other regex metacharacters
56
+ * are escaped so users don't accidentally activate them.
57
+ */
58
+ function compileGlob(pattern) {
59
+ const escaped = pattern.replace(/[-/\\^$+?.()|[\]{}]/g, "\\$&");
60
+ const withWildcard = escaped.replace(/\*/g, ".*");
61
+ return new RegExp(`^${withWildcard}$`);
62
+ }
@@ -0,0 +1,26 @@
1
+ export const DEFAULT_MAX_DESCRIPTION_LENGTH = 160;
2
+ export const descriptionLength = {
3
+ id: "seo/description-length",
4
+ category: "seo",
5
+ stage: "source",
6
+ severity: "warning",
7
+ description: "Frontmatter description exceeds the SERP safe-zone length (160 chars).",
8
+ run(ctx) {
9
+ const { page } = ctx;
10
+ const description = page.frontmatter?.["description"];
11
+ if (typeof description !== "string")
12
+ return [];
13
+ const trimmed = description.trim();
14
+ if (trimmed.length <= DEFAULT_MAX_DESCRIPTION_LENGTH)
15
+ return [];
16
+ return [
17
+ {
18
+ ruleId: "seo/description-length",
19
+ severity: "warning",
20
+ file: page.slug,
21
+ message: `Description is ${trimmed.length} characters (max ${DEFAULT_MAX_DESCRIPTION_LENGTH}); SERP snippets truncate at ~160.`,
22
+ context: trimmed.slice(0, 80) + (trimmed.length > 80 ? "…" : ""),
23
+ },
24
+ ];
25
+ },
26
+ };
@@ -0,0 +1,24 @@
1
+ export const descriptionMissing = {
2
+ id: "seo/description-missing",
3
+ category: "seo",
4
+ stage: "source",
5
+ severity: "warning",
6
+ description: "Page has no `description` in frontmatter; SERP snippets fall back to extracted body text.",
7
+ run(ctx) {
8
+ const { page } = ctx;
9
+ if (page.redirect)
10
+ return [];
11
+ const description = page.frontmatter?.["description"];
12
+ const hasDescription = typeof description === "string" && description.trim().length > 0;
13
+ if (hasDescription)
14
+ return [];
15
+ return [
16
+ {
17
+ ruleId: "seo/description-missing",
18
+ severity: "warning",
19
+ file: page.slug,
20
+ message: "Page has no `description` in frontmatter.",
21
+ },
22
+ ];
23
+ },
24
+ };
@@ -0,0 +1,65 @@
1
+ export const distMeta = {
2
+ id: "seo/dist-meta",
3
+ category: "seo",
4
+ stage: "dist",
5
+ severity: "warning",
6
+ description: "Rendered HTML has the basic SEO meta surface (title, description, OG tags).",
7
+ run(ctx) {
8
+ const { file } = ctx;
9
+ const $ = file.$;
10
+ const issues = [];
11
+ // <title>
12
+ const title = $("head > title").first().text().trim();
13
+ if (title.length === 0) {
14
+ issues.push({
15
+ ruleId: "seo/dist-meta",
16
+ severity: "error",
17
+ file: file.path,
18
+ message: "Page has no <title> element or its content is empty.",
19
+ });
20
+ }
21
+ // <meta name="description">
22
+ const metaDesc = $('head > meta[name="description"]')
23
+ .attr("content")
24
+ ?.trim();
25
+ if (!metaDesc) {
26
+ issues.push({
27
+ ruleId: "seo/dist-meta",
28
+ severity: "warning",
29
+ file: file.path,
30
+ message: 'Page has no <meta name="description"> or its content is empty.',
31
+ });
32
+ }
33
+ // OG type / title / description
34
+ const ogType = $('head > meta[property="og:type"]').attr("content")?.trim();
35
+ if (!ogType) {
36
+ issues.push({
37
+ ruleId: "seo/dist-meta",
38
+ severity: "warning",
39
+ file: file.path,
40
+ message: 'Page has no <meta property="og:type">.',
41
+ });
42
+ }
43
+ const ogTitle = $('head > meta[property="og:title"]').attr("content")?.trim();
44
+ if (!ogTitle) {
45
+ issues.push({
46
+ ruleId: "seo/dist-meta",
47
+ severity: "warning",
48
+ file: file.path,
49
+ message: 'Page has no <meta property="og:title">.',
50
+ });
51
+ }
52
+ const ogDesc = $('head > meta[property="og:description"]')
53
+ .attr("content")
54
+ ?.trim();
55
+ if (!ogDesc) {
56
+ issues.push({
57
+ ruleId: "seo/dist-meta",
58
+ severity: "warning",
59
+ file: file.path,
60
+ message: 'Page has no <meta property="og:description">.',
61
+ });
62
+ }
63
+ return issues;
64
+ },
65
+ };
@@ -0,0 +1,99 @@
1
+ export const h1BrandKeyword = {
2
+ id: "seo/h1-brand-keyword",
3
+ category: "seo",
4
+ stage: "source",
5
+ severity: "warning",
6
+ description: "Landing pages include at least one configured brand keyword in their H1.",
7
+ run(ctx) {
8
+ const { page, config } = ctx;
9
+ if (page.redirect)
10
+ return [];
11
+ const brandKeywords = config.brandKeywords;
12
+ if (!Array.isArray(brandKeywords) || brandKeywords.length === 0) {
13
+ return [];
14
+ }
15
+ if (!isLandingPage(page))
16
+ return [];
17
+ const fm = page.frontmatter ?? {};
18
+ const fmTitle = typeof fm["title"] === "string" ? fm["title"] : "";
19
+ const bodyH1 = extractH1Text(page.tree);
20
+ const titleText = (bodyH1 || fmTitle).toLowerCase();
21
+ if (titleText.length === 0) {
22
+ // No H1 at all — `seo/title-missing` covers this case;
23
+ // don't double-report.
24
+ return [];
25
+ }
26
+ const matched = brandKeywords.some((kw) => titleText.includes(kw.toLowerCase()));
27
+ if (matched)
28
+ return [];
29
+ const list = brandKeywords.map((k) => `"${k}"`).join(", ");
30
+ return [
31
+ {
32
+ ruleId: "seo/h1-brand-keyword",
33
+ severity: "warning",
34
+ file: page.slug,
35
+ message: `Landing page H1 includes none of the configured brand keywords (${list}).`,
36
+ context: bodyH1 || fmTitle,
37
+ },
38
+ ];
39
+ },
40
+ };
41
+ function isLandingPage(page) {
42
+ if (page.slug === "" || page.slug === "index")
43
+ return true;
44
+ if (page.meta?.type === "landing")
45
+ return true;
46
+ return false;
47
+ }
48
+ function extractH1Text(tree) {
49
+ if (!tree || tree.length === 0)
50
+ return "";
51
+ for (const node of tree) {
52
+ if (node.type !== "heading")
53
+ continue;
54
+ const depth = node.depth;
55
+ const level = node.props?.level;
56
+ const isH1 = depth === 1 || level === 1;
57
+ if (!isH1)
58
+ continue;
59
+ if (node.inline)
60
+ return inlineToText(node.inline).trim();
61
+ if (node.html)
62
+ return node.html.trim();
63
+ return "";
64
+ }
65
+ return "";
66
+ }
67
+ function inlineToText(nodes) {
68
+ let out = "";
69
+ for (const n of nodes) {
70
+ switch (n.type) {
71
+ case "text":
72
+ out += n.text;
73
+ break;
74
+ case "code":
75
+ out += n.text;
76
+ break;
77
+ case "link":
78
+ case "highlight":
79
+ out += inlineToText(n.children);
80
+ break;
81
+ case "kbd":
82
+ out += n.keys.join("+");
83
+ break;
84
+ case "math":
85
+ out += n.latex;
86
+ break;
87
+ case "image":
88
+ if (n.alt)
89
+ out += n.alt;
90
+ break;
91
+ case "break":
92
+ out += " ";
93
+ break;
94
+ default:
95
+ break;
96
+ }
97
+ }
98
+ return out;
99
+ }
@@ -0,0 +1,63 @@
1
+ /**
2
+ * SEO rule category — register all `seo/*` rules into the
3
+ * global registry. Imported once by the audit runner (or the
4
+ * site-check command) at startup.
5
+ *
6
+ * Rules landed in this category:
7
+ * - seo/title-missing (source)
8
+ * - seo/description-missing (source)
9
+ * - seo/description-length (source)
10
+ * - seo/keyword-placement (source, opt-in via frontmatter)
11
+ * - seo/h1-brand-keyword (source, opt-in via site config)
12
+ * - seo/dist-meta (dist)
13
+ * - seo/json-ld-parse (dist)
14
+ * - seo/json-ld-required-fields (dist)
15
+ * - seo/sitemap-present (dist, fires once per run)
16
+ * - seo/sitemap-complete (dist, fires once per run)
17
+ * - seo/sitemap-robots-coherence (dist, fires once per run)
18
+ *
19
+ * Future additions land here:
20
+ * - seo/json-ld-type-coherence (dist; checks @type matches
21
+ * meta.type)
22
+ * - seo/reciprocal-links (corpus stage; lands with the
23
+ * Corpus category PR)
24
+ */
25
+ import { registerRule } from "../../registry.js";
26
+ import { titleMissing } from "./title-missing.js";
27
+ import { descriptionMissing } from "./description-missing.js";
28
+ import { descriptionLength } from "./description-length.js";
29
+ import { keywordPlacement } from "./keyword-placement.js";
30
+ import { h1BrandKeyword } from "./h1-brand-keyword.js";
31
+ import { distMeta } from "./dist-meta.js";
32
+ import { jsonLdParse } from "./json-ld-parse.js";
33
+ import { jsonLdRequiredFields } from "./json-ld-required-fields.js";
34
+ import { sitemapPresent, sitemapComplete, sitemapRobotsCoherence, } from "./sitemap.js";
35
+ let registered = false;
36
+ /**
37
+ * Idempotent — calling `registerSeoRules` more than once is a
38
+ * no-op. Lets callers (CLI, tests) register without coordinating.
39
+ */
40
+ export function registerSeoRules() {
41
+ if (registered)
42
+ return;
43
+ registered = true;
44
+ registerRule(titleMissing);
45
+ registerRule(descriptionMissing);
46
+ registerRule(descriptionLength);
47
+ registerRule(keywordPlacement);
48
+ registerRule(h1BrandKeyword);
49
+ registerRule(distMeta);
50
+ registerRule(jsonLdParse);
51
+ registerRule(jsonLdRequiredFields);
52
+ registerRule(sitemapPresent);
53
+ registerRule(sitemapComplete);
54
+ registerRule(sitemapRobotsCoherence);
55
+ }
56
+ /**
57
+ * Test-only: reset the "registered" flag so unit tests can
58
+ * register fresh after `_clearRegistry()`. Production code never
59
+ * calls this.
60
+ */
61
+ export function _resetSeoRegistration() {
62
+ registered = false;
63
+ }
@@ -0,0 +1,32 @@
1
+ export const jsonLdParse = {
2
+ id: "seo/json-ld-parse",
3
+ category: "seo",
4
+ stage: "dist",
5
+ severity: "error",
6
+ description: "Every `<script type=\"application/ld+json\">` block parses as valid JSON.",
7
+ run(ctx) {
8
+ const { file } = ctx;
9
+ const $ = file.$;
10
+ const issues = [];
11
+ $('script[type="application/ld+json"]').each((_idx, el) => {
12
+ const raw = $(el).text().trim();
13
+ if (raw.length === 0) {
14
+ // Empty block — odd but harmless; skip.
15
+ return;
16
+ }
17
+ try {
18
+ JSON.parse(raw);
19
+ }
20
+ catch (err) {
21
+ issues.push({
22
+ ruleId: "seo/json-ld-parse",
23
+ severity: "error",
24
+ file: file.path,
25
+ message: "JSON-LD block does not parse as valid JSON.",
26
+ context: err.message,
27
+ });
28
+ }
29
+ });
30
+ return issues;
31
+ },
32
+ };
@@ -0,0 +1,116 @@
1
+ /**
2
+ * Required-fields map per `@type`. Values are the field names
3
+ * Google's Rich Results validator needs at a minimum. Recommended
4
+ * fields (e.g. `Article.author`, `Article.image`) live in a
5
+ * future `seo/json-ld-recommended-fields` rule, not here.
6
+ */
7
+ const REQUIRED_FIELDS = {
8
+ Article: ["headline"],
9
+ TechArticle: ["headline"],
10
+ HowTo: ["name", "step"],
11
+ Course: ["name", "description", "provider"],
12
+ Person: ["name"],
13
+ Organization: ["name"],
14
+ BreadcrumbList: ["itemListElement"],
15
+ };
16
+ /** Field that, when present, should be an array (not a scalar). */
17
+ const ARRAY_FIELDS = {
18
+ HowTo: ["step"],
19
+ BreadcrumbList: ["itemListElement"],
20
+ };
21
+ export const jsonLdRequiredFields = {
22
+ id: "seo/json-ld-required-fields",
23
+ category: "seo",
24
+ stage: "dist",
25
+ severity: "warning",
26
+ description: "JSON-LD blocks declare a recognised @type and carry that type's required fields.",
27
+ run(ctx) {
28
+ const { file } = ctx;
29
+ const $ = file.$;
30
+ const issues = [];
31
+ $('script[type="application/ld+json"]').each((idx, el) => {
32
+ const raw = $(el).text().trim();
33
+ if (raw.length === 0)
34
+ return;
35
+ let parsed;
36
+ try {
37
+ parsed = JSON.parse(raw);
38
+ }
39
+ catch {
40
+ // json-ld-parse already flags this; don't double-report.
41
+ return;
42
+ }
43
+ // Allow arrays (`@graph` style) by recursing on each item.
44
+ const blocks = Array.isArray(parsed) ? parsed : [parsed];
45
+ for (const block of blocks) {
46
+ if (typeof block !== "object" || block === null)
47
+ continue;
48
+ validateBlock(block, idx, file.path, issues);
49
+ }
50
+ });
51
+ return issues;
52
+ },
53
+ };
54
+ function validateBlock(block, blockIndex, file, out) {
55
+ const atType = block["@type"];
56
+ if (typeof atType !== "string") {
57
+ // Either missing or an array (multi-type). Multi-type is
58
+ // legal Schema.org but uncommon and validation gets tricky;
59
+ // skip it for v1.
60
+ if (atType === undefined) {
61
+ out.push({
62
+ ruleId: "seo/json-ld-required-fields",
63
+ severity: "warning",
64
+ file,
65
+ message: `JSON-LD block #${blockIndex + 1} has no \`@type\`.`,
66
+ });
67
+ }
68
+ return;
69
+ }
70
+ const required = REQUIRED_FIELDS[atType];
71
+ if (!required) {
72
+ // Unrecognised @type — pass through. Sites can layer in any
73
+ // Schema.org type via customJsonLd; we won't fight them.
74
+ return;
75
+ }
76
+ for (const field of required) {
77
+ if (!(field in block)) {
78
+ out.push({
79
+ ruleId: "seo/json-ld-required-fields",
80
+ severity: "warning",
81
+ file,
82
+ message: `JSON-LD ${atType} block missing required field \`${field}\`.`,
83
+ });
84
+ }
85
+ else {
86
+ const value = block[field];
87
+ // Empty string / empty array / null all count as missing.
88
+ if (value === null ||
89
+ value === "" ||
90
+ (Array.isArray(value) && value.length === 0)) {
91
+ out.push({
92
+ ruleId: "seo/json-ld-required-fields",
93
+ severity: "warning",
94
+ file,
95
+ message: `JSON-LD ${atType} block has empty required field \`${field}\`.`,
96
+ });
97
+ }
98
+ }
99
+ }
100
+ const arrayFields = ARRAY_FIELDS[atType];
101
+ if (arrayFields) {
102
+ for (const field of arrayFields) {
103
+ if (field in block) {
104
+ const value = block[field];
105
+ if (!Array.isArray(value)) {
106
+ out.push({
107
+ ruleId: "seo/json-ld-required-fields",
108
+ severity: "warning",
109
+ file,
110
+ message: `JSON-LD ${atType} field \`${field}\` should be an array, got ${typeof value}.`,
111
+ });
112
+ }
113
+ }
114
+ }
115
+ }
116
+ }