dogsbay 0.2.0-beta.7 → 0.2.0-beta.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/dist/audit/lib/collect-assets.js +64 -0
  2. package/dist/audit/lib/validate-refs.js +132 -0
  3. package/dist/audit/rules/seo/sitemap.js +125 -8
  4. package/dist/audit/rules/structure/asset-refs.js +41 -0
  5. package/dist/audit/rules/structure/index.js +8 -0
  6. package/dist/audit/rules/structure/internal-links.js +28 -0
  7. package/dist/audit/rules/structure/nav-target-exists.js +107 -0
  8. package/dist/audit/rules/structure/unresolved-directives.js +108 -0
  9. package/dist/audit/run.js +4 -0
  10. package/dist/commands/import-mkdocs.js +97 -66
  11. package/dist/commands/migrate-asciidoc.js +724 -0
  12. package/dist/commands/migrate-mkdocs.js +1005 -0
  13. package/dist/commands/site-build.js +314 -16
  14. package/dist/commands/site-check.js +21 -0
  15. package/dist/commands/site-dev.js +182 -23
  16. package/dist/commands/site-init.js +45 -10
  17. package/dist/config/defaults.js +30 -23
  18. package/dist/config/load.js +111 -32
  19. package/dist/config/to-astro-options.js +5 -0
  20. package/dist/import-content.js +24 -33
  21. package/dist/index.js +64 -4
  22. package/dist/passthrough-astro.js +152 -0
  23. package/dist/registry.js +8 -0
  24. package/dist/resolve-autodoc.js +224 -0
  25. package/dist/site-build/preprocess.js +292 -0
  26. package/dist/utils/gitignore.js +54 -0
  27. package/package.json +16 -9
  28. package/skills/platform/cli-commands/SKILL.md +3 -2
  29. package/skills/platform/frontmatter-fields/SKILL.md +240 -48
  30. package/skills/platform/markdown-directives/SKILL.md +123 -10
  31. package/skills/platform/migration-shape/SKILL.md +314 -0
  32. package/skills/platform/multi-source/SKILL.md +21 -10
  33. package/skills/platform/taxonomy-config/SKILL.md +245 -5
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Walk one or more content directories collecting absolute-path
3
+ * asset references that the build's `copyAssets` would emit to
4
+ * `public/`. The audit rule `structure/asset-refs` cross-checks
5
+ * `<img src>` / `<a href>` references against this set so a
6
+ * misnamed file shows up as a structured Issue.
7
+ *
8
+ * Mirrors the extension set used by `copyAssets` in
9
+ * `@dogsbay/format-astro/src/project.ts`. Keep in sync — drift
10
+ * would either flag valid refs as broken or miss real misses.
11
+ */
12
+ import { existsSync, readdirSync, statSync } from "node:fs";
13
+ import { join, relative } from "node:path";
14
+ // Same sets as format-astro's copyAssets. Keep in sync.
15
+ const OPTIMIZABLE_EXTS = new Set([".png", ".jpg", ".jpeg", ".gif", ".webp"]);
16
+ const PASSTHROUGH_EXTS = new Set([".svg", ".ico", ".pdf"]);
17
+ /**
18
+ * Return the set of absolute asset paths (leading slash) that
19
+ * `copyAssets` would emit for the given content directories.
20
+ *
21
+ * @param contentDirs Absolute paths, one per source.
22
+ */
23
+ export function collectAssets(contentDirs) {
24
+ const out = new Set();
25
+ for (const dir of contentDirs) {
26
+ if (!existsSync(dir))
27
+ continue;
28
+ walk(dir, dir, out);
29
+ }
30
+ return out;
31
+ }
32
+ function walk(rootDir, dir, out) {
33
+ let entries;
34
+ try {
35
+ entries = readdirSync(dir);
36
+ }
37
+ catch {
38
+ return;
39
+ }
40
+ for (const entry of entries) {
41
+ const full = join(dir, entry);
42
+ let isDir = false;
43
+ try {
44
+ isDir = statSync(full).isDirectory();
45
+ }
46
+ catch {
47
+ continue;
48
+ }
49
+ if (isDir) {
50
+ walk(rootDir, full, out);
51
+ continue;
52
+ }
53
+ const dot = entry.lastIndexOf(".");
54
+ if (dot < 0)
55
+ continue;
56
+ const ext = entry.slice(dot).toLowerCase();
57
+ if (!OPTIMIZABLE_EXTS.has(ext) && !PASSTHROUGH_EXTS.has(ext))
58
+ continue;
59
+ const rel = relative(rootDir, full);
60
+ // Public URL is "/<rel>" with forward slashes — copyAssets
61
+ // copies rel-to-source verbatim into public/.
62
+ out.add("/" + rel.replace(/\\/g, "/"));
63
+ }
64
+ }
@@ -0,0 +1,132 @@
1
+ const EXTERNAL_RE = /^(?:[a-z][a-z0-9+.-]*:|\/\/)/i;
2
+ const ANCHOR_ONLY_RE = /^#/;
3
+ const DATA_URI_RE = /^data:/i;
4
+ const ASSET_EXT_RE = /\.(png|jpe?g|gif|webp|svg|avif|pdf|zip|csv|tsv|json|yml|yaml|toml|mp4|webm|mp3|ogg|woff2?|ttf|otf)$/i;
5
+ /**
6
+ * Heuristic for "looks like an asset reference," used to decide
7
+ * whether an `<a href>` should be validated against the asset
8
+ * set or the page set. Image src is always asset; href can be
9
+ * either. Anything ending in a known asset extension is treated
10
+ * as asset.
11
+ */
12
+ function looksLikeAsset(href) {
13
+ // Strip query + fragment before checking extension
14
+ const noFragment = href.split(/[?#]/)[0];
15
+ return ASSET_EXT_RE.test(noFragment);
16
+ }
17
+ export function validateRefs(pages, options) {
18
+ const basePath = options.basePath.replace(/\/$/, "");
19
+ const knownAssets = options.knownAssets;
20
+ // Page slug set — multiple match forms so the validator
21
+ // accepts the common ways authors write the same slug.
22
+ // For slug "tutorials/quickstart" any of these match:
23
+ // /tutorials/quickstart
24
+ // /tutorials/quickstart/
25
+ // /tutorials/quickstart.md
26
+ // /<basePath>/tutorials/quickstart (and the above three)
27
+ const pageSlugs = new Set();
28
+ for (const p of pages) {
29
+ pageSlugs.add(p.slug);
30
+ // Index pages can be referenced as their parent dir
31
+ if (p.slug === "index")
32
+ pageSlugs.add("");
33
+ if (p.slug.endsWith("/index")) {
34
+ pageSlugs.add(p.slug.slice(0, -"/index".length));
35
+ }
36
+ }
37
+ const result = { linkMisses: [], assetMisses: [] };
38
+ for (const page of pages) {
39
+ walkTree(page.tree, (href, kind) => {
40
+ check(href, kind, page.slug, basePath, pageSlugs, knownAssets, result);
41
+ });
42
+ }
43
+ return result;
44
+ }
45
+ /** Internal walk — invokes `visit` for every href/src in the tree. */
46
+ function walkTree(nodes, visit) {
47
+ for (const node of nodes) {
48
+ if (node.inline)
49
+ walkInline(node.inline, visit);
50
+ if (node.html)
51
+ walkHtml(node.html, visit);
52
+ if (node.props?.href && typeof node.props.href === "string") {
53
+ visit(node.props.href, "link");
54
+ }
55
+ if (node.props?.src && typeof node.props.src === "string") {
56
+ visit(node.props.src, "image");
57
+ }
58
+ if (node.children)
59
+ walkTree(node.children, visit);
60
+ }
61
+ }
62
+ function walkInline(nodes, visit) {
63
+ for (const node of nodes) {
64
+ if (node.type === "link" && typeof node.href === "string") {
65
+ visit(node.href, "link");
66
+ if (node.children)
67
+ walkInline(node.children, visit);
68
+ }
69
+ else if (node.type === "image" && typeof node.src === "string") {
70
+ visit(node.src, "image");
71
+ }
72
+ else if (node.type === "highlight" && node.children) {
73
+ walkInline(node.children, visit);
74
+ }
75
+ }
76
+ }
77
+ function walkHtml(html, visit) {
78
+ // Pre-rendered HTML in `node.html` (Starlight importer output,
79
+ // MkDocs raw HTML, etc.) — scan with regexes. Cheap; the audit
80
+ // doesn't need a full HTML parser for this.
81
+ const aRe = /<a\b[^>]*\shref="([^"]+)"/gi;
82
+ let m;
83
+ while ((m = aRe.exec(html)) !== null)
84
+ visit(m[1], "link");
85
+ const imgRe = /<img\b[^>]*\ssrc="([^"]+)"/gi;
86
+ while ((m = imgRe.exec(html)) !== null)
87
+ visit(m[1], "image");
88
+ }
89
+ function check(href, kind, pageSlug, basePath, pageSlugs, knownAssets, result) {
90
+ if (!href || EXTERNAL_RE.test(href))
91
+ return;
92
+ if (ANCHOR_ONLY_RE.test(href))
93
+ return;
94
+ if (DATA_URI_RE.test(href))
95
+ return;
96
+ if (!href.startsWith("/"))
97
+ return; // relative — out of scope for v1
98
+ // Strip query and fragment for the comparison.
99
+ const cleanHref = href.split(/[?#]/)[0];
100
+ // Strip basePath if the author prefixed it. We compare against
101
+ // bare slugs, so `/docs/intro` and `/intro` should both match
102
+ // page slug "intro" (basePath="/docs").
103
+ let path = cleanHref;
104
+ if (basePath && (path === basePath || path.startsWith(`${basePath}/`))) {
105
+ path = path.slice(basePath.length);
106
+ }
107
+ // Strip leading / and trailing /, plus the `.md` extension.
108
+ let trimmed = path.replace(/^\//, "").replace(/\/$/, "");
109
+ trimmed = trimmed.replace(/\.md$/i, "");
110
+ // Image src — always asset.
111
+ // Anchor href — asset if it has an asset extension, else page.
112
+ const wantsAsset = kind === "image" || looksLikeAsset(cleanHref);
113
+ if (wantsAsset) {
114
+ // Asset paths are absolute under the public root.
115
+ // knownAssets stores entries like "/_assets/foo.png".
116
+ const lookup = cleanHref.startsWith("/") && basePath && cleanHref.startsWith(`${basePath}/`)
117
+ ? cleanHref.slice(basePath.length)
118
+ : cleanHref;
119
+ if (!knownAssets.has(lookup)) {
120
+ result.assetMisses.push({
121
+ pageSlug,
122
+ href,
123
+ resolvedAgainst: "asset",
124
+ });
125
+ }
126
+ return;
127
+ }
128
+ // Page lookup
129
+ if (!pageSlugs.has(trimmed)) {
130
+ result.linkMisses.push({ pageSlug, href, resolvedAgainst: "page" });
131
+ }
132
+ }
@@ -22,7 +22,7 @@
22
22
  * `_resetSitemapCache()`.
23
23
  */
24
24
  import { existsSync, readFileSync } from "node:fs";
25
- import { join } from "node:path";
25
+ import { basename, dirname, join } from "node:path";
26
26
  const STATE_CACHE = new Map();
27
27
  export function _resetSitemapCache() {
28
28
  STATE_CACHE.clear();
@@ -50,11 +50,26 @@ function getState(distRoot) {
50
50
  // sitemap integration which produces well-formed XML, so
51
51
  // the only failure mode is "missing entirely" or
52
52
  // "doesn't parse at all."
53
- if (/<(urlset|sitemapindex)[\s>]/.test(content)) {
53
+ const isIndex = /<sitemapindex[\s>]/.test(content);
54
+ const isUrlset = /<urlset[\s>]/.test(content);
55
+ if (isIndex || isUrlset) {
54
56
  sitemapValid = true;
55
- const matches = content.matchAll(/<loc>([^<]+)<\/loc>/g);
56
- for (const m of matches) {
57
- sitemapLocs.add(m[1].trim());
57
+ if (isIndex) {
58
+ // sitemap-index.xml each <loc> points at a child
59
+ // sitemap file (not a page). Read each child and union
60
+ // its <loc> entries (those ARE the page URLs). Without
61
+ // this recursion, sitemapLocs would carry sitemap file
62
+ // URLs only and every page would be flagged "missing
63
+ // from sitemap" — the entire audit class is unusable.
64
+ collectChildSitemapLocs(content, sitemapPath, sitemapLocs);
65
+ }
66
+ else {
67
+ // Plain <urlset> — extract <loc>s directly. Same as
68
+ // before.
69
+ const matches = content.matchAll(/<loc>([^<]+)<\/loc>/g);
70
+ for (const m of matches) {
71
+ sitemapLocs.add(m[1].trim());
72
+ }
58
73
  }
59
74
  }
60
75
  }
@@ -123,7 +138,7 @@ export const sitemapComplete = {
123
138
  severity: "warning",
124
139
  description: "Every built HTML page is listed in the sitemap.",
125
140
  run(ctx) {
126
- const { file, distRoot, allFiles } = ctx;
141
+ const { file, distRoot, allFiles, config } = ctx;
127
142
  const state = getState(distRoot);
128
143
  // Emit once per audit run — we have access to allFiles here.
129
144
  if (state.emitted.has("seo/sitemap-complete"))
@@ -134,13 +149,33 @@ export const sitemapComplete = {
134
149
  // to add here.
135
150
  return [];
136
151
  }
152
+ // urlBase comes from the path component of site.url. When the
153
+ // site is mounted at a subpath (typical GH Pages project deploy:
154
+ // `site.url: https://user.github.io/repo`), sitemap entries
155
+ // carry the urlBase prefix (the emitter writes absolute URLs),
156
+ // but the filesystem-derived urlPath below doesn't — files
157
+ // live at `dist/<page>/index.html` regardless of urlBase. Pass
158
+ // urlBase into the comparison so the suffix-strip step
159
+ // accounts for it. See plans/sitemap-audit-urlbase.md.
160
+ const urlBase = urlBaseFromSiteUrl(config?.siteUrl);
137
161
  // Map each html file to its likely "page URL" form. Astro
138
162
  // emits `docs/intro/index.html` for the URL `/docs/intro/`,
139
163
  // so we strip `/index.html` and ensure a leading slash.
164
+ //
165
+ // Skip pages with `<meta name="robots" content="noindex">`.
166
+ // The sitemap emitter intentionally excludes those (per the
167
+ // platform contract documented in reference/frontmatter.md
168
+ // and `packages/format-astro/src/sitemap.ts` isExcluded), so
169
+ // flagging them as "missing" would tell users to fix something
170
+ // that's working as intended. The auto-emitted taxonomy index
171
+ // pages (/tags/, /by-audience/, …) all ship with noindex.
140
172
  const issues = [];
141
173
  for (const f of allFiles) {
174
+ const robots = f.$('meta[name="robots"]').attr("content") ?? "";
175
+ if (/\bnoindex\b/i.test(robots))
176
+ continue;
142
177
  const expectedPath = htmlPathToUrlPath(f.path);
143
- const found = pathListedInSitemap(expectedPath, state.sitemapLocs);
178
+ const found = pathListedInSitemap(expectedPath, state.sitemapLocs, urlBase);
144
179
  if (!found) {
145
180
  issues.push({
146
181
  ruleId: "seo/sitemap-complete",
@@ -194,6 +229,51 @@ export const sitemapRobotsCoherence = {
194
229
  return issues;
195
230
  },
196
231
  };
232
+ /**
233
+ * Recurse into a `<sitemapindex>` document: for each child
234
+ * sitemap referenced by `<loc>`, read the file from disk and
235
+ * union its page-level `<loc>` entries into the supplied set.
236
+ *
237
+ * Resolution strategy: take the basename of the child URL and
238
+ * resolve it against the directory that contains the index file.
239
+ * That covers both layouts in use today — host-root
240
+ * (`dist/sitemap-index.xml` + `dist/sitemap-0.xml`) and
241
+ * per-mount (`dist/<basePath>/sitemap-index.xml` +
242
+ * `dist/<basePath>/sitemap-0.xml`). Children that don't resolve
243
+ * to a local file are silently skipped — better to miss a few
244
+ * URLs than to crash the audit when a deploy ships a partial
245
+ * sitemap. The page-level rule still surfaces orphan pages, so
246
+ * an incomplete recursion just downgrades to the previous
247
+ * misbehaviour (all pages flagged as missing) — never worse.
248
+ */
249
+ function collectChildSitemapLocs(indexContent, indexPath, out) {
250
+ const indexDir = dirname(indexPath);
251
+ for (const m of indexContent.matchAll(/<loc>([^<]+)<\/loc>/g)) {
252
+ const childUrl = m[1].trim();
253
+ let childName;
254
+ try {
255
+ const u = new URL(childUrl);
256
+ childName = basename(u.pathname);
257
+ }
258
+ catch {
259
+ childName = basename(childUrl);
260
+ }
261
+ if (!childName || childName === "/")
262
+ continue;
263
+ const childPath = join(indexDir, childName);
264
+ if (!existsSync(childPath))
265
+ continue;
266
+ try {
267
+ const childContent = readFileSync(childPath, "utf-8");
268
+ for (const cm of childContent.matchAll(/<loc>([^<]+)<\/loc>/g)) {
269
+ out.add(cm[1].trim());
270
+ }
271
+ }
272
+ catch {
273
+ // Skip unreadable child — see function-level comment.
274
+ }
275
+ }
276
+ }
197
277
  /**
198
278
  * Convert an HTML file path within `dist/` into the public URL
199
279
  * path the sitemap is likely to list. Drops `/index.html` and
@@ -216,9 +296,18 @@ function htmlPathToUrlPath(htmlPath) {
216
296
  * Sitemap entries can be absolute (`https://example.com/foo/`)
217
297
  * or relative (`/foo/`); we match either by suffix.
218
298
  *
299
+ * `urlBase` is the path component of `site.url` — e.g. `/repo` for
300
+ * a GH Pages project deploy at `https://user.github.io/repo`. When
301
+ * non-empty, the platform's sitemap emitter writes absolute URLs
302
+ * including that segment (`https://user.github.io/repo/intro/`),
303
+ * but the filesystem-derived `urlPath` doesn't have it (files live
304
+ * at `dist/intro/index.html`). Strip the urlBase off the sitemap
305
+ * suffix before comparing so the audit doesn't flag every page as
306
+ * missing on every subpath-mounted deploy.
307
+ *
219
308
  * Trailing slash tolerant: `/foo/` matches `/foo` too.
220
309
  */
221
- function pathListedInSitemap(urlPath, locs) {
310
+ function pathListedInSitemap(urlPath, locs, urlBase) {
222
311
  for (const loc of locs) {
223
312
  if (loc === urlPath)
224
313
  return true;
@@ -230,6 +319,16 @@ function pathListedInSitemap(urlPath, locs) {
230
319
  if (afterHost >= 0)
231
320
  suffix = loc.slice(afterHost);
232
321
  }
322
+ // Strip the urlBase prefix when the sitemap entry carries one
323
+ // and the comparison target doesn't.
324
+ if (urlBase) {
325
+ if (suffix === urlBase) {
326
+ suffix = "/";
327
+ }
328
+ else if (suffix.startsWith(`${urlBase}/`)) {
329
+ suffix = suffix.slice(urlBase.length);
330
+ }
331
+ }
233
332
  if (suffix === urlPath)
234
333
  return true;
235
334
  // Trailing-slash flexibility
@@ -238,3 +337,21 @@ function pathListedInSitemap(urlPath, locs) {
238
337
  }
239
338
  return false;
240
339
  }
340
+ /**
341
+ * Extract the path component of `site.url` for use as the urlBase
342
+ * stripping prefix. Returns `""` when site.url is missing, has no
343
+ * path, or doesn't parse — comparison falls back to today's
344
+ * behaviour.
345
+ */
346
+ function urlBaseFromSiteUrl(siteUrl) {
347
+ if (!siteUrl)
348
+ return "";
349
+ try {
350
+ const u = new URL(siteUrl);
351
+ const path = u.pathname.replace(/\/$/, "");
352
+ return path === "/" ? "" : path;
353
+ }
354
+ catch {
355
+ return "";
356
+ }
357
+ }
@@ -0,0 +1,41 @@
1
+ import { validateRefs } from "../../lib/validate-refs.js";
2
+ export const assetRefs = {
3
+ id: "structure/asset-refs",
4
+ category: "structure",
5
+ stage: "source-corpus",
6
+ severity: "error",
7
+ description: "Every absolute `<img src>` and asset-style `<a href>` resolves to a file under the content tree.",
8
+ run(rawCtx) {
9
+ const ctx = rawCtx;
10
+ if (!ctx.pages || ctx.pages.length === 0)
11
+ return [];
12
+ if (!ctx.knownAssets) {
13
+ // No asset set available — typically a unit-test ctx that
14
+ // didn't bother. No-op rather than false-positive.
15
+ return [];
16
+ }
17
+ const { assetMisses } = validateRefs(ctx.pages, {
18
+ basePath: ctx.basePath ?? "",
19
+ knownAssets: ctx.knownAssets,
20
+ });
21
+ return assetMisses.map((m) => {
22
+ // Where the user should drop the file to satisfy THIS ref —
23
+ // strip optional basePath, prepend `content` for the
24
+ // suggestion. (We can't know the exact content dir layout
25
+ // for multi-source sites without more plumbing; keep it
26
+ // intuitive.)
27
+ const cleanHref = m.href.split(/[?#]/)[0];
28
+ const expected = `content${cleanHref}`;
29
+ return {
30
+ ruleId: "structure/asset-refs",
31
+ severity: "error",
32
+ file: `${m.pageSlug}.md`,
33
+ message: `Asset reference ${m.href} doesn't match any file in the ` +
34
+ `content tree. To fix: correct the path, rename the ` +
35
+ `existing file to match, or drop the asset at ` +
36
+ `${expected} to satisfy this exact reference.`,
37
+ context: m.href,
38
+ };
39
+ });
40
+ },
41
+ };
@@ -11,8 +11,12 @@
11
11
  * - structure/duplicate-slugs (corpus; two sources produced the same URL)
12
12
  */
13
13
  import { registerRule } from "../../registry.js";
14
+ import { assetRefs } from "./asset-refs.js";
15
+ import { internalLinks } from "./internal-links.js";
14
16
  import { localeCoherence } from "./locale-coherence.js";
15
17
  import { namespaceCoherence } from "./namespace-coherence.js";
18
+ import { navTargetExists } from "./nav-target-exists.js";
19
+ import { unresolvedDirectives } from "./unresolved-directives.js";
16
20
  import { versionCoherence } from "./version-coherence.js";
17
21
  let registered = false;
18
22
  /**
@@ -26,6 +30,10 @@ export function registerStructureRules() {
26
30
  registerRule(namespaceCoherence);
27
31
  registerRule(versionCoherence);
28
32
  registerRule(localeCoherence);
33
+ registerRule(navTargetExists);
34
+ registerRule(internalLinks);
35
+ registerRule(assetRefs);
36
+ registerRule(unresolvedDirectives);
29
37
  }
30
38
  /**
31
39
  * Test-only: reset the "registered" flag so unit tests can
@@ -0,0 +1,28 @@
1
+ import { validateRefs } from "../../lib/validate-refs.js";
2
+ export const internalLinks = {
3
+ id: "structure/internal-links",
4
+ category: "structure",
5
+ stage: "source-corpus",
6
+ severity: "error",
7
+ description: "Every absolute internal `<a href>` resolves to a built page in the corpus.",
8
+ run(rawCtx) {
9
+ const ctx = rawCtx;
10
+ if (!ctx.pages || ctx.pages.length === 0)
11
+ return [];
12
+ const { linkMisses } = validateRefs(ctx.pages, {
13
+ basePath: ctx.basePath ?? "",
14
+ knownAssets: ctx.knownAssets ?? new Set(),
15
+ });
16
+ return linkMisses.map((m) => ({
17
+ ruleId: "structure/internal-links",
18
+ severity: "error",
19
+ file: `${m.pageSlug}.md`,
20
+ message: `Internal link to ${m.href} doesn't resolve to any built ` +
21
+ `page. To fix: correct the path, rename the target page ` +
22
+ `to match, or remove the link. (Common cause: page renamed ` +
23
+ `without updating the body text. ` +
24
+ `Use \`dogsbay site check\` to surface every miss.)`,
25
+ context: m.href,
26
+ }));
27
+ },
28
+ };
@@ -0,0 +1,107 @@
1
+ /**
2
+ * `structure/nav-target-exists` — every `file:` reference in a
3
+ * declared nav file resolves to a content file on disk.
4
+ *
5
+ * Re-runs the same resolution `loadNavFile` does at build time,
6
+ * but in `collect` mode: instead of `console.warn` + label-only
7
+ * fallback (the build-pipeline behaviour), the audit surfaces
8
+ * every miss as a structured Issue. This is what makes nav-file
9
+ * typos:
10
+ *
11
+ * - count in the `dogsbay site check` summary
12
+ * - bump the exit code (error severity)
13
+ * - get categorised under `structure` for selective opt-out
14
+ *
15
+ * Without this rule, a misnamed file in nav.yml emitted a
16
+ * `[dogsbay] Nav file references missing file: ...` line to
17
+ * stderr during build but never entered the audit catalog, so
18
+ * CI couldn't catch it. Reported by the docs team against
19
+ * dogsbay-docs-platform.
20
+ *
21
+ * Source content + nav heuristic: matches `buildNavFromDirectory`
22
+ * — an explicit `source.nav` path wins, otherwise we look for
23
+ * `nav.yml` / `nav.yaml` / `nav.json` in the resolved content
24
+ * dir. If no nav file is present, the rule is a no-op (the site
25
+ * uses directory-scan nav, which has its own validation in the
26
+ * importer).
27
+ */
28
+ import { existsSync } from "node:fs";
29
+ import { isAbsolute, join, resolve } from "node:path";
30
+ import { loadNavFile } from "@dogsbay/format-dogsbay-md";
31
+ const HEURISTIC_NAV_FILES = ["nav.yml", "nav.yaml", "nav.json"];
32
+ export const navTargetExists = {
33
+ id: "structure/nav-target-exists",
34
+ category: "structure",
35
+ stage: "source-corpus",
36
+ severity: "error",
37
+ description: "Every `file:` reference in nav files (nav.yml / nav.yaml / nav.json) resolves to a content file that exists on disk.",
38
+ run(rawCtx) {
39
+ const ctx = rawCtx;
40
+ const auditSources = ctx.auditSources;
41
+ const siteRoot = ctx.siteRoot;
42
+ if (!auditSources || auditSources.length === 0 || !siteRoot) {
43
+ // No way to find nav files — typically a unit-test run with
44
+ // pre-imported nav.
45
+ return [];
46
+ }
47
+ const issues = [];
48
+ for (const src of auditSources) {
49
+ const { contentDir, navOverride } = src;
50
+ const navPath = resolveNavFile(navOverride, contentDir, siteRoot);
51
+ if (!navPath)
52
+ continue; // directory-scan source → skip
53
+ // Use `collect` mode so the loader pushes structured
54
+ // findings to onMissingTarget instead of console.warn'ing.
55
+ const misses = [];
56
+ try {
57
+ loadNavFile(navPath, contentDir, {
58
+ missingFile: "collect",
59
+ onMissingTarget: (m) => misses.push(m),
60
+ });
61
+ }
62
+ catch (err) {
63
+ // Nav file itself is unreadable / malformed — emit as a
64
+ // distinct issue. The build's heuristic-loader warns and
65
+ // falls back to directory scan; the audit makes the
66
+ // condition visible.
67
+ issues.push({
68
+ ruleId: "structure/nav-target-exists",
69
+ severity: "error",
70
+ file: navPath,
71
+ message: `Nav file ${navPath} failed to load: ${err.message}`,
72
+ });
73
+ continue;
74
+ }
75
+ for (const miss of misses) {
76
+ issues.push({
77
+ ruleId: "structure/nav-target-exists",
78
+ severity: "error",
79
+ file: navPath,
80
+ message: `Nav references missing file: ${miss.file} ` +
81
+ `(resolved to ${miss.resolvedAbs}). To fix: rename the ` +
82
+ `file to match, correct the path in the nav file, or ` +
83
+ `remove the entry.`,
84
+ context: miss.file,
85
+ });
86
+ }
87
+ }
88
+ return issues;
89
+ },
90
+ };
91
+ /**
92
+ * Mirror the resolution `buildNavFromDirectory` uses:
93
+ * 1. Explicit `source.nav` wins (relative to siteRoot)
94
+ * 2. Auto-detect nav.{yml,yaml,json} in the resolved content dir
95
+ * 3. Otherwise undefined (directory-scan nav — no nav file to audit)
96
+ */
97
+ function resolveNavFile(navOverride, contentDir, siteRoot) {
98
+ if (navOverride) {
99
+ return isAbsolute(navOverride) ? navOverride : resolve(siteRoot, navOverride);
100
+ }
101
+ for (const fileName of HEURISTIC_NAV_FILES) {
102
+ const candidate = join(contentDir, fileName);
103
+ if (existsSync(candidate))
104
+ return candidate;
105
+ }
106
+ return undefined;
107
+ }