dogsbay 0.2.0-beta.7 → 0.2.0-beta.71
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/audit/lib/collect-assets.js +64 -0
- package/dist/audit/lib/validate-refs.js +132 -0
- package/dist/audit/rules/seo/sitemap.js +125 -8
- package/dist/audit/rules/structure/asset-refs.js +41 -0
- package/dist/audit/rules/structure/index.js +8 -0
- package/dist/audit/rules/structure/internal-links.js +28 -0
- package/dist/audit/rules/structure/nav-target-exists.js +107 -0
- package/dist/audit/rules/structure/unresolved-directives.js +108 -0
- package/dist/audit/run.js +4 -0
- package/dist/commands/import-mkdocs.js +97 -66
- package/dist/commands/migrate-asciidoc.js +724 -0
- package/dist/commands/migrate-mkdocs.js +1005 -0
- package/dist/commands/site-build.js +314 -16
- package/dist/commands/site-check.js +21 -0
- package/dist/commands/site-dev.js +182 -23
- package/dist/commands/site-init.js +45 -10
- package/dist/config/defaults.js +30 -23
- package/dist/config/load.js +111 -32
- package/dist/config/to-astro-options.js +5 -0
- package/dist/import-content.js +24 -33
- package/dist/index.js +64 -4
- package/dist/passthrough-astro.js +152 -0
- package/dist/registry.js +8 -0
- package/dist/resolve-autodoc.js +224 -0
- package/dist/site-build/preprocess.js +292 -0
- package/dist/utils/gitignore.js +54 -0
- package/package.json +16 -9
- package/skills/platform/cli-commands/SKILL.md +3 -2
- package/skills/platform/frontmatter-fields/SKILL.md +240 -48
- package/skills/platform/markdown-directives/SKILL.md +123 -10
- package/skills/platform/migration-shape/SKILL.md +314 -0
- package/skills/platform/multi-source/SKILL.md +21 -10
- package/skills/platform/taxonomy-config/SKILL.md +245 -5
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Walk one or more content directories collecting absolute-path
|
|
3
|
+
* asset references that the build's `copyAssets` would emit to
|
|
4
|
+
* `public/`. The audit rule `structure/asset-refs` cross-checks
|
|
5
|
+
* `<img src>` / `<a href>` references against this set so a
|
|
6
|
+
* misnamed file shows up as a structured Issue.
|
|
7
|
+
*
|
|
8
|
+
* Mirrors the extension set used by `copyAssets` in
|
|
9
|
+
* `@dogsbay/format-astro/src/project.ts`. Keep in sync — drift
|
|
10
|
+
* would either flag valid refs as broken or miss real misses.
|
|
11
|
+
*/
|
|
12
|
+
import { existsSync, readdirSync, statSync } from "node:fs";
|
|
13
|
+
import { join, relative } from "node:path";
|
|
14
|
+
// Same sets as format-astro's copyAssets. Keep in sync.
|
|
15
|
+
const OPTIMIZABLE_EXTS = new Set([".png", ".jpg", ".jpeg", ".gif", ".webp"]);
|
|
16
|
+
const PASSTHROUGH_EXTS = new Set([".svg", ".ico", ".pdf"]);
|
|
17
|
+
/**
|
|
18
|
+
* Return the set of absolute asset paths (leading slash) that
|
|
19
|
+
* `copyAssets` would emit for the given content directories.
|
|
20
|
+
*
|
|
21
|
+
* @param contentDirs Absolute paths, one per source.
|
|
22
|
+
*/
|
|
23
|
+
export function collectAssets(contentDirs) {
|
|
24
|
+
const out = new Set();
|
|
25
|
+
for (const dir of contentDirs) {
|
|
26
|
+
if (!existsSync(dir))
|
|
27
|
+
continue;
|
|
28
|
+
walk(dir, dir, out);
|
|
29
|
+
}
|
|
30
|
+
return out;
|
|
31
|
+
}
|
|
32
|
+
function walk(rootDir, dir, out) {
|
|
33
|
+
let entries;
|
|
34
|
+
try {
|
|
35
|
+
entries = readdirSync(dir);
|
|
36
|
+
}
|
|
37
|
+
catch {
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
for (const entry of entries) {
|
|
41
|
+
const full = join(dir, entry);
|
|
42
|
+
let isDir = false;
|
|
43
|
+
try {
|
|
44
|
+
isDir = statSync(full).isDirectory();
|
|
45
|
+
}
|
|
46
|
+
catch {
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
if (isDir) {
|
|
50
|
+
walk(rootDir, full, out);
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
53
|
+
const dot = entry.lastIndexOf(".");
|
|
54
|
+
if (dot < 0)
|
|
55
|
+
continue;
|
|
56
|
+
const ext = entry.slice(dot).toLowerCase();
|
|
57
|
+
if (!OPTIMIZABLE_EXTS.has(ext) && !PASSTHROUGH_EXTS.has(ext))
|
|
58
|
+
continue;
|
|
59
|
+
const rel = relative(rootDir, full);
|
|
60
|
+
// Public URL is "/<rel>" with forward slashes — copyAssets
|
|
61
|
+
// copies rel-to-source verbatim into public/.
|
|
62
|
+
out.add("/" + rel.replace(/\\/g, "/"));
|
|
63
|
+
}
|
|
64
|
+
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
const EXTERNAL_RE = /^(?:[a-z][a-z0-9+.-]*:|\/\/)/i;
|
|
2
|
+
const ANCHOR_ONLY_RE = /^#/;
|
|
3
|
+
const DATA_URI_RE = /^data:/i;
|
|
4
|
+
const ASSET_EXT_RE = /\.(png|jpe?g|gif|webp|svg|avif|pdf|zip|csv|tsv|json|yml|yaml|toml|mp4|webm|mp3|ogg|woff2?|ttf|otf)$/i;
|
|
5
|
+
/**
|
|
6
|
+
* Heuristic for "looks like an asset reference," used to decide
|
|
7
|
+
* whether an `<a href>` should be validated against the asset
|
|
8
|
+
* set or the page set. Image src is always asset; href can be
|
|
9
|
+
* either. Anything ending in a known asset extension is treated
|
|
10
|
+
* as asset.
|
|
11
|
+
*/
|
|
12
|
+
function looksLikeAsset(href) {
|
|
13
|
+
// Strip query + fragment before checking extension
|
|
14
|
+
const noFragment = href.split(/[?#]/)[0];
|
|
15
|
+
return ASSET_EXT_RE.test(noFragment);
|
|
16
|
+
}
|
|
17
|
+
export function validateRefs(pages, options) {
|
|
18
|
+
const basePath = options.basePath.replace(/\/$/, "");
|
|
19
|
+
const knownAssets = options.knownAssets;
|
|
20
|
+
// Page slug set — multiple match forms so the validator
|
|
21
|
+
// accepts the common ways authors write the same slug.
|
|
22
|
+
// For slug "tutorials/quickstart" any of these match:
|
|
23
|
+
// /tutorials/quickstart
|
|
24
|
+
// /tutorials/quickstart/
|
|
25
|
+
// /tutorials/quickstart.md
|
|
26
|
+
// /<basePath>/tutorials/quickstart (and the above three)
|
|
27
|
+
const pageSlugs = new Set();
|
|
28
|
+
for (const p of pages) {
|
|
29
|
+
pageSlugs.add(p.slug);
|
|
30
|
+
// Index pages can be referenced as their parent dir
|
|
31
|
+
if (p.slug === "index")
|
|
32
|
+
pageSlugs.add("");
|
|
33
|
+
if (p.slug.endsWith("/index")) {
|
|
34
|
+
pageSlugs.add(p.slug.slice(0, -"/index".length));
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
const result = { linkMisses: [], assetMisses: [] };
|
|
38
|
+
for (const page of pages) {
|
|
39
|
+
walkTree(page.tree, (href, kind) => {
|
|
40
|
+
check(href, kind, page.slug, basePath, pageSlugs, knownAssets, result);
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
return result;
|
|
44
|
+
}
|
|
45
|
+
/** Internal walk — invokes `visit` for every href/src in the tree. */
|
|
46
|
+
function walkTree(nodes, visit) {
|
|
47
|
+
for (const node of nodes) {
|
|
48
|
+
if (node.inline)
|
|
49
|
+
walkInline(node.inline, visit);
|
|
50
|
+
if (node.html)
|
|
51
|
+
walkHtml(node.html, visit);
|
|
52
|
+
if (node.props?.href && typeof node.props.href === "string") {
|
|
53
|
+
visit(node.props.href, "link");
|
|
54
|
+
}
|
|
55
|
+
if (node.props?.src && typeof node.props.src === "string") {
|
|
56
|
+
visit(node.props.src, "image");
|
|
57
|
+
}
|
|
58
|
+
if (node.children)
|
|
59
|
+
walkTree(node.children, visit);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
function walkInline(nodes, visit) {
|
|
63
|
+
for (const node of nodes) {
|
|
64
|
+
if (node.type === "link" && typeof node.href === "string") {
|
|
65
|
+
visit(node.href, "link");
|
|
66
|
+
if (node.children)
|
|
67
|
+
walkInline(node.children, visit);
|
|
68
|
+
}
|
|
69
|
+
else if (node.type === "image" && typeof node.src === "string") {
|
|
70
|
+
visit(node.src, "image");
|
|
71
|
+
}
|
|
72
|
+
else if (node.type === "highlight" && node.children) {
|
|
73
|
+
walkInline(node.children, visit);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
function walkHtml(html, visit) {
|
|
78
|
+
// Pre-rendered HTML in `node.html` (Starlight importer output,
|
|
79
|
+
// MkDocs raw HTML, etc.) — scan with regexes. Cheap; the audit
|
|
80
|
+
// doesn't need a full HTML parser for this.
|
|
81
|
+
const aRe = /<a\b[^>]*\shref="([^"]+)"/gi;
|
|
82
|
+
let m;
|
|
83
|
+
while ((m = aRe.exec(html)) !== null)
|
|
84
|
+
visit(m[1], "link");
|
|
85
|
+
const imgRe = /<img\b[^>]*\ssrc="([^"]+)"/gi;
|
|
86
|
+
while ((m = imgRe.exec(html)) !== null)
|
|
87
|
+
visit(m[1], "image");
|
|
88
|
+
}
|
|
89
|
+
function check(href, kind, pageSlug, basePath, pageSlugs, knownAssets, result) {
|
|
90
|
+
if (!href || EXTERNAL_RE.test(href))
|
|
91
|
+
return;
|
|
92
|
+
if (ANCHOR_ONLY_RE.test(href))
|
|
93
|
+
return;
|
|
94
|
+
if (DATA_URI_RE.test(href))
|
|
95
|
+
return;
|
|
96
|
+
if (!href.startsWith("/"))
|
|
97
|
+
return; // relative — out of scope for v1
|
|
98
|
+
// Strip query and fragment for the comparison.
|
|
99
|
+
const cleanHref = href.split(/[?#]/)[0];
|
|
100
|
+
// Strip basePath if the author prefixed it. We compare against
|
|
101
|
+
// bare slugs, so `/docs/intro` and `/intro` should both match
|
|
102
|
+
// page slug "intro" (basePath="/docs").
|
|
103
|
+
let path = cleanHref;
|
|
104
|
+
if (basePath && (path === basePath || path.startsWith(`${basePath}/`))) {
|
|
105
|
+
path = path.slice(basePath.length);
|
|
106
|
+
}
|
|
107
|
+
// Strip leading / and trailing /, plus the `.md` extension.
|
|
108
|
+
let trimmed = path.replace(/^\//, "").replace(/\/$/, "");
|
|
109
|
+
trimmed = trimmed.replace(/\.md$/i, "");
|
|
110
|
+
// Image src — always asset.
|
|
111
|
+
// Anchor href — asset if it has an asset extension, else page.
|
|
112
|
+
const wantsAsset = kind === "image" || looksLikeAsset(cleanHref);
|
|
113
|
+
if (wantsAsset) {
|
|
114
|
+
// Asset paths are absolute under the public root.
|
|
115
|
+
// knownAssets stores entries like "/_assets/foo.png".
|
|
116
|
+
const lookup = cleanHref.startsWith("/") && basePath && cleanHref.startsWith(`${basePath}/`)
|
|
117
|
+
? cleanHref.slice(basePath.length)
|
|
118
|
+
: cleanHref;
|
|
119
|
+
if (!knownAssets.has(lookup)) {
|
|
120
|
+
result.assetMisses.push({
|
|
121
|
+
pageSlug,
|
|
122
|
+
href,
|
|
123
|
+
resolvedAgainst: "asset",
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
// Page lookup
|
|
129
|
+
if (!pageSlugs.has(trimmed)) {
|
|
130
|
+
result.linkMisses.push({ pageSlug, href, resolvedAgainst: "page" });
|
|
131
|
+
}
|
|
132
|
+
}
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
* `_resetSitemapCache()`.
|
|
23
23
|
*/
|
|
24
24
|
import { existsSync, readFileSync } from "node:fs";
|
|
25
|
-
import { join } from "node:path";
|
|
25
|
+
import { basename, dirname, join } from "node:path";
|
|
26
26
|
const STATE_CACHE = new Map();
|
|
27
27
|
export function _resetSitemapCache() {
|
|
28
28
|
STATE_CACHE.clear();
|
|
@@ -50,11 +50,26 @@ function getState(distRoot) {
|
|
|
50
50
|
// sitemap integration which produces well-formed XML, so
|
|
51
51
|
// the only failure mode is "missing entirely" or
|
|
52
52
|
// "doesn't parse at all."
|
|
53
|
-
|
|
53
|
+
const isIndex = /<sitemapindex[\s>]/.test(content);
|
|
54
|
+
const isUrlset = /<urlset[\s>]/.test(content);
|
|
55
|
+
if (isIndex || isUrlset) {
|
|
54
56
|
sitemapValid = true;
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
57
|
+
if (isIndex) {
|
|
58
|
+
// sitemap-index.xml — each <loc> points at a child
|
|
59
|
+
// sitemap file (not a page). Read each child and union
|
|
60
|
+
// its <loc> entries (those ARE the page URLs). Without
|
|
61
|
+
// this recursion, sitemapLocs would carry sitemap file
|
|
62
|
+
// URLs only and every page would be flagged "missing
|
|
63
|
+
// from sitemap" — the entire audit class is unusable.
|
|
64
|
+
collectChildSitemapLocs(content, sitemapPath, sitemapLocs);
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
// Plain <urlset> — extract <loc>s directly. Same as
|
|
68
|
+
// before.
|
|
69
|
+
const matches = content.matchAll(/<loc>([^<]+)<\/loc>/g);
|
|
70
|
+
for (const m of matches) {
|
|
71
|
+
sitemapLocs.add(m[1].trim());
|
|
72
|
+
}
|
|
58
73
|
}
|
|
59
74
|
}
|
|
60
75
|
}
|
|
@@ -123,7 +138,7 @@ export const sitemapComplete = {
|
|
|
123
138
|
severity: "warning",
|
|
124
139
|
description: "Every built HTML page is listed in the sitemap.",
|
|
125
140
|
run(ctx) {
|
|
126
|
-
const { file, distRoot, allFiles } = ctx;
|
|
141
|
+
const { file, distRoot, allFiles, config } = ctx;
|
|
127
142
|
const state = getState(distRoot);
|
|
128
143
|
// Emit once per audit run — we have access to allFiles here.
|
|
129
144
|
if (state.emitted.has("seo/sitemap-complete"))
|
|
@@ -134,13 +149,33 @@ export const sitemapComplete = {
|
|
|
134
149
|
// to add here.
|
|
135
150
|
return [];
|
|
136
151
|
}
|
|
152
|
+
// urlBase comes from the path component of site.url. When the
|
|
153
|
+
// site is mounted at a subpath (typical GH Pages project deploy:
|
|
154
|
+
// `site.url: https://user.github.io/repo`), sitemap entries
|
|
155
|
+
// carry the urlBase prefix (the emitter writes absolute URLs),
|
|
156
|
+
// but the filesystem-derived urlPath below doesn't — files
|
|
157
|
+
// live at `dist/<page>/index.html` regardless of urlBase. Pass
|
|
158
|
+
// urlBase into the comparison so the suffix-strip step
|
|
159
|
+
// accounts for it. See plans/sitemap-audit-urlbase.md.
|
|
160
|
+
const urlBase = urlBaseFromSiteUrl(config?.siteUrl);
|
|
137
161
|
// Map each html file to its likely "page URL" form. Astro
|
|
138
162
|
// emits `docs/intro/index.html` for the URL `/docs/intro/`,
|
|
139
163
|
// so we strip `/index.html` and ensure a leading slash.
|
|
164
|
+
//
|
|
165
|
+
// Skip pages with `<meta name="robots" content="noindex">`.
|
|
166
|
+
// The sitemap emitter intentionally excludes those (per the
|
|
167
|
+
// platform contract documented in reference/frontmatter.md
|
|
168
|
+
// and `packages/format-astro/src/sitemap.ts` isExcluded), so
|
|
169
|
+
// flagging them as "missing" would tell users to fix something
|
|
170
|
+
// that's working as intended. The auto-emitted taxonomy index
|
|
171
|
+
// pages (/tags/, /by-audience/, …) all ship with noindex.
|
|
140
172
|
const issues = [];
|
|
141
173
|
for (const f of allFiles) {
|
|
174
|
+
const robots = f.$('meta[name="robots"]').attr("content") ?? "";
|
|
175
|
+
if (/\bnoindex\b/i.test(robots))
|
|
176
|
+
continue;
|
|
142
177
|
const expectedPath = htmlPathToUrlPath(f.path);
|
|
143
|
-
const found = pathListedInSitemap(expectedPath, state.sitemapLocs);
|
|
178
|
+
const found = pathListedInSitemap(expectedPath, state.sitemapLocs, urlBase);
|
|
144
179
|
if (!found) {
|
|
145
180
|
issues.push({
|
|
146
181
|
ruleId: "seo/sitemap-complete",
|
|
@@ -194,6 +229,51 @@ export const sitemapRobotsCoherence = {
|
|
|
194
229
|
return issues;
|
|
195
230
|
},
|
|
196
231
|
};
|
|
232
|
+
/**
|
|
233
|
+
* Recurse into a `<sitemapindex>` document: for each child
|
|
234
|
+
* sitemap referenced by `<loc>`, read the file from disk and
|
|
235
|
+
* union its page-level `<loc>` entries into the supplied set.
|
|
236
|
+
*
|
|
237
|
+
* Resolution strategy: take the basename of the child URL and
|
|
238
|
+
* resolve it against the directory that contains the index file.
|
|
239
|
+
* That covers both layouts in use today — host-root
|
|
240
|
+
* (`dist/sitemap-index.xml` + `dist/sitemap-0.xml`) and
|
|
241
|
+
* per-mount (`dist/<basePath>/sitemap-index.xml` +
|
|
242
|
+
* `dist/<basePath>/sitemap-0.xml`). Children that don't resolve
|
|
243
|
+
* to a local file are silently skipped — better to miss a few
|
|
244
|
+
* URLs than to crash the audit when a deploy ships a partial
|
|
245
|
+
* sitemap. The page-level rule still surfaces orphan pages, so
|
|
246
|
+
* an incomplete recursion just downgrades to the previous
|
|
247
|
+
* misbehaviour (all pages flagged as missing) — never worse.
|
|
248
|
+
*/
|
|
249
|
+
function collectChildSitemapLocs(indexContent, indexPath, out) {
|
|
250
|
+
const indexDir = dirname(indexPath);
|
|
251
|
+
for (const m of indexContent.matchAll(/<loc>([^<]+)<\/loc>/g)) {
|
|
252
|
+
const childUrl = m[1].trim();
|
|
253
|
+
let childName;
|
|
254
|
+
try {
|
|
255
|
+
const u = new URL(childUrl);
|
|
256
|
+
childName = basename(u.pathname);
|
|
257
|
+
}
|
|
258
|
+
catch {
|
|
259
|
+
childName = basename(childUrl);
|
|
260
|
+
}
|
|
261
|
+
if (!childName || childName === "/")
|
|
262
|
+
continue;
|
|
263
|
+
const childPath = join(indexDir, childName);
|
|
264
|
+
if (!existsSync(childPath))
|
|
265
|
+
continue;
|
|
266
|
+
try {
|
|
267
|
+
const childContent = readFileSync(childPath, "utf-8");
|
|
268
|
+
for (const cm of childContent.matchAll(/<loc>([^<]+)<\/loc>/g)) {
|
|
269
|
+
out.add(cm[1].trim());
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
catch {
|
|
273
|
+
// Skip unreadable child — see function-level comment.
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
197
277
|
/**
|
|
198
278
|
* Convert an HTML file path within `dist/` into the public URL
|
|
199
279
|
* path the sitemap is likely to list. Drops `/index.html` and
|
|
@@ -216,9 +296,18 @@ function htmlPathToUrlPath(htmlPath) {
|
|
|
216
296
|
* Sitemap entries can be absolute (`https://example.com/foo/`)
|
|
217
297
|
* or relative (`/foo/`); we match either by suffix.
|
|
218
298
|
*
|
|
299
|
+
* `urlBase` is the path component of `site.url` — e.g. `/repo` for
|
|
300
|
+
* a GH Pages project deploy at `https://user.github.io/repo`. When
|
|
301
|
+
* non-empty, the platform's sitemap emitter writes absolute URLs
|
|
302
|
+
* including that segment (`https://user.github.io/repo/intro/`),
|
|
303
|
+
* but the filesystem-derived `urlPath` doesn't have it (files live
|
|
304
|
+
* at `dist/intro/index.html`). Strip the urlBase off the sitemap
|
|
305
|
+
* suffix before comparing so the audit doesn't flag every page as
|
|
306
|
+
* missing on every subpath-mounted deploy.
|
|
307
|
+
*
|
|
219
308
|
* Trailing slash tolerant: `/foo/` matches `/foo` too.
|
|
220
309
|
*/
|
|
221
|
-
function pathListedInSitemap(urlPath, locs) {
|
|
310
|
+
function pathListedInSitemap(urlPath, locs, urlBase) {
|
|
222
311
|
for (const loc of locs) {
|
|
223
312
|
if (loc === urlPath)
|
|
224
313
|
return true;
|
|
@@ -230,6 +319,16 @@ function pathListedInSitemap(urlPath, locs) {
|
|
|
230
319
|
if (afterHost >= 0)
|
|
231
320
|
suffix = loc.slice(afterHost);
|
|
232
321
|
}
|
|
322
|
+
// Strip the urlBase prefix when the sitemap entry carries one
|
|
323
|
+
// and the comparison target doesn't.
|
|
324
|
+
if (urlBase) {
|
|
325
|
+
if (suffix === urlBase) {
|
|
326
|
+
suffix = "/";
|
|
327
|
+
}
|
|
328
|
+
else if (suffix.startsWith(`${urlBase}/`)) {
|
|
329
|
+
suffix = suffix.slice(urlBase.length);
|
|
330
|
+
}
|
|
331
|
+
}
|
|
233
332
|
if (suffix === urlPath)
|
|
234
333
|
return true;
|
|
235
334
|
// Trailing-slash flexibility
|
|
@@ -238,3 +337,21 @@ function pathListedInSitemap(urlPath, locs) {
|
|
|
238
337
|
}
|
|
239
338
|
return false;
|
|
240
339
|
}
|
|
340
|
+
/**
|
|
341
|
+
* Extract the path component of `site.url` for use as the urlBase
|
|
342
|
+
* stripping prefix. Returns `""` when site.url is missing, has no
|
|
343
|
+
* path, or doesn't parse — comparison falls back to today's
|
|
344
|
+
* behaviour.
|
|
345
|
+
*/
|
|
346
|
+
function urlBaseFromSiteUrl(siteUrl) {
|
|
347
|
+
if (!siteUrl)
|
|
348
|
+
return "";
|
|
349
|
+
try {
|
|
350
|
+
const u = new URL(siteUrl);
|
|
351
|
+
const path = u.pathname.replace(/\/$/, "");
|
|
352
|
+
return path === "/" ? "" : path;
|
|
353
|
+
}
|
|
354
|
+
catch {
|
|
355
|
+
return "";
|
|
356
|
+
}
|
|
357
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { validateRefs } from "../../lib/validate-refs.js";
|
|
2
|
+
export const assetRefs = {
|
|
3
|
+
id: "structure/asset-refs",
|
|
4
|
+
category: "structure",
|
|
5
|
+
stage: "source-corpus",
|
|
6
|
+
severity: "error",
|
|
7
|
+
description: "Every absolute `<img src>` and asset-style `<a href>` resolves to a file under the content tree.",
|
|
8
|
+
run(rawCtx) {
|
|
9
|
+
const ctx = rawCtx;
|
|
10
|
+
if (!ctx.pages || ctx.pages.length === 0)
|
|
11
|
+
return [];
|
|
12
|
+
if (!ctx.knownAssets) {
|
|
13
|
+
// No asset set available — typically a unit-test ctx that
|
|
14
|
+
// didn't bother. No-op rather than false-positive.
|
|
15
|
+
return [];
|
|
16
|
+
}
|
|
17
|
+
const { assetMisses } = validateRefs(ctx.pages, {
|
|
18
|
+
basePath: ctx.basePath ?? "",
|
|
19
|
+
knownAssets: ctx.knownAssets,
|
|
20
|
+
});
|
|
21
|
+
return assetMisses.map((m) => {
|
|
22
|
+
// Where the user should drop the file to satisfy THIS ref —
|
|
23
|
+
// strip optional basePath, prepend `content` for the
|
|
24
|
+
// suggestion. (We can't know the exact content dir layout
|
|
25
|
+
// for multi-source sites without more plumbing; keep it
|
|
26
|
+
// intuitive.)
|
|
27
|
+
const cleanHref = m.href.split(/[?#]/)[0];
|
|
28
|
+
const expected = `content${cleanHref}`;
|
|
29
|
+
return {
|
|
30
|
+
ruleId: "structure/asset-refs",
|
|
31
|
+
severity: "error",
|
|
32
|
+
file: `${m.pageSlug}.md`,
|
|
33
|
+
message: `Asset reference ${m.href} doesn't match any file in the ` +
|
|
34
|
+
`content tree. To fix: correct the path, rename the ` +
|
|
35
|
+
`existing file to match, or drop the asset at ` +
|
|
36
|
+
`${expected} to satisfy this exact reference.`,
|
|
37
|
+
context: m.href,
|
|
38
|
+
};
|
|
39
|
+
});
|
|
40
|
+
},
|
|
41
|
+
};
|
|
@@ -11,8 +11,12 @@
|
|
|
11
11
|
* - structure/duplicate-slugs (corpus; two sources produced the same URL)
|
|
12
12
|
*/
|
|
13
13
|
import { registerRule } from "../../registry.js";
|
|
14
|
+
import { assetRefs } from "./asset-refs.js";
|
|
15
|
+
import { internalLinks } from "./internal-links.js";
|
|
14
16
|
import { localeCoherence } from "./locale-coherence.js";
|
|
15
17
|
import { namespaceCoherence } from "./namespace-coherence.js";
|
|
18
|
+
import { navTargetExists } from "./nav-target-exists.js";
|
|
19
|
+
import { unresolvedDirectives } from "./unresolved-directives.js";
|
|
16
20
|
import { versionCoherence } from "./version-coherence.js";
|
|
17
21
|
let registered = false;
|
|
18
22
|
/**
|
|
@@ -26,6 +30,10 @@ export function registerStructureRules() {
|
|
|
26
30
|
registerRule(namespaceCoherence);
|
|
27
31
|
registerRule(versionCoherence);
|
|
28
32
|
registerRule(localeCoherence);
|
|
33
|
+
registerRule(navTargetExists);
|
|
34
|
+
registerRule(internalLinks);
|
|
35
|
+
registerRule(assetRefs);
|
|
36
|
+
registerRule(unresolvedDirectives);
|
|
29
37
|
}
|
|
30
38
|
/**
|
|
31
39
|
* Test-only: reset the "registered" flag so unit tests can
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { validateRefs } from "../../lib/validate-refs.js";
|
|
2
|
+
export const internalLinks = {
|
|
3
|
+
id: "structure/internal-links",
|
|
4
|
+
category: "structure",
|
|
5
|
+
stage: "source-corpus",
|
|
6
|
+
severity: "error",
|
|
7
|
+
description: "Every absolute internal `<a href>` resolves to a built page in the corpus.",
|
|
8
|
+
run(rawCtx) {
|
|
9
|
+
const ctx = rawCtx;
|
|
10
|
+
if (!ctx.pages || ctx.pages.length === 0)
|
|
11
|
+
return [];
|
|
12
|
+
const { linkMisses } = validateRefs(ctx.pages, {
|
|
13
|
+
basePath: ctx.basePath ?? "",
|
|
14
|
+
knownAssets: ctx.knownAssets ?? new Set(),
|
|
15
|
+
});
|
|
16
|
+
return linkMisses.map((m) => ({
|
|
17
|
+
ruleId: "structure/internal-links",
|
|
18
|
+
severity: "error",
|
|
19
|
+
file: `${m.pageSlug}.md`,
|
|
20
|
+
message: `Internal link to ${m.href} doesn't resolve to any built ` +
|
|
21
|
+
`page. To fix: correct the path, rename the target page ` +
|
|
22
|
+
`to match, or remove the link. (Common cause: page renamed ` +
|
|
23
|
+
`without updating the body text. ` +
|
|
24
|
+
`Use \`dogsbay site check\` to surface every miss.)`,
|
|
25
|
+
context: m.href,
|
|
26
|
+
}));
|
|
27
|
+
},
|
|
28
|
+
};
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `structure/nav-target-exists` — every `file:` reference in a
|
|
3
|
+
* declared nav file resolves to a content file on disk.
|
|
4
|
+
*
|
|
5
|
+
* Re-runs the same resolution `loadNavFile` does at build time,
|
|
6
|
+
* but in `collect` mode: instead of `console.warn` + label-only
|
|
7
|
+
* fallback (the build-pipeline behaviour), the audit surfaces
|
|
8
|
+
* every miss as a structured Issue. This is what makes nav-file
|
|
9
|
+
* typos:
|
|
10
|
+
*
|
|
11
|
+
* - count in the `dogsbay site check` summary
|
|
12
|
+
* - bump the exit code (error severity)
|
|
13
|
+
* - get categorised under `structure` for selective opt-out
|
|
14
|
+
*
|
|
15
|
+
* Without this rule, a misnamed file in nav.yml emitted a
|
|
16
|
+
* `[dogsbay] Nav file references missing file: ...` line to
|
|
17
|
+
* stderr during build but never entered the audit catalog, so
|
|
18
|
+
* CI couldn't catch it. Reported by the docs team against
|
|
19
|
+
* dogsbay-docs-platform.
|
|
20
|
+
*
|
|
21
|
+
* Source content + nav heuristic: matches `buildNavFromDirectory`
|
|
22
|
+
* — an explicit `source.nav` path wins, otherwise we look for
|
|
23
|
+
* `nav.yml` / `nav.yaml` / `nav.json` in the resolved content
|
|
24
|
+
* dir. If no nav file is present, the rule is a no-op (the site
|
|
25
|
+
* uses directory-scan nav, which has its own validation in the
|
|
26
|
+
* importer).
|
|
27
|
+
*/
|
|
28
|
+
import { existsSync } from "node:fs";
|
|
29
|
+
import { isAbsolute, join, resolve } from "node:path";
|
|
30
|
+
import { loadNavFile } from "@dogsbay/format-dogsbay-md";
|
|
31
|
+
const HEURISTIC_NAV_FILES = ["nav.yml", "nav.yaml", "nav.json"];
|
|
32
|
+
export const navTargetExists = {
|
|
33
|
+
id: "structure/nav-target-exists",
|
|
34
|
+
category: "structure",
|
|
35
|
+
stage: "source-corpus",
|
|
36
|
+
severity: "error",
|
|
37
|
+
description: "Every `file:` reference in nav files (nav.yml / nav.yaml / nav.json) resolves to a content file that exists on disk.",
|
|
38
|
+
run(rawCtx) {
|
|
39
|
+
const ctx = rawCtx;
|
|
40
|
+
const auditSources = ctx.auditSources;
|
|
41
|
+
const siteRoot = ctx.siteRoot;
|
|
42
|
+
if (!auditSources || auditSources.length === 0 || !siteRoot) {
|
|
43
|
+
// No way to find nav files — typically a unit-test run with
|
|
44
|
+
// pre-imported nav.
|
|
45
|
+
return [];
|
|
46
|
+
}
|
|
47
|
+
const issues = [];
|
|
48
|
+
for (const src of auditSources) {
|
|
49
|
+
const { contentDir, navOverride } = src;
|
|
50
|
+
const navPath = resolveNavFile(navOverride, contentDir, siteRoot);
|
|
51
|
+
if (!navPath)
|
|
52
|
+
continue; // directory-scan source → skip
|
|
53
|
+
// Use `collect` mode so the loader pushes structured
|
|
54
|
+
// findings to onMissingTarget instead of console.warn'ing.
|
|
55
|
+
const misses = [];
|
|
56
|
+
try {
|
|
57
|
+
loadNavFile(navPath, contentDir, {
|
|
58
|
+
missingFile: "collect",
|
|
59
|
+
onMissingTarget: (m) => misses.push(m),
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
catch (err) {
|
|
63
|
+
// Nav file itself is unreadable / malformed — emit as a
|
|
64
|
+
// distinct issue. The build's heuristic-loader warns and
|
|
65
|
+
// falls back to directory scan; the audit makes the
|
|
66
|
+
// condition visible.
|
|
67
|
+
issues.push({
|
|
68
|
+
ruleId: "structure/nav-target-exists",
|
|
69
|
+
severity: "error",
|
|
70
|
+
file: navPath,
|
|
71
|
+
message: `Nav file ${navPath} failed to load: ${err.message}`,
|
|
72
|
+
});
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
75
|
+
for (const miss of misses) {
|
|
76
|
+
issues.push({
|
|
77
|
+
ruleId: "structure/nav-target-exists",
|
|
78
|
+
severity: "error",
|
|
79
|
+
file: navPath,
|
|
80
|
+
message: `Nav references missing file: ${miss.file} ` +
|
|
81
|
+
`(resolved to ${miss.resolvedAbs}). To fix: rename the ` +
|
|
82
|
+
`file to match, correct the path in the nav file, or ` +
|
|
83
|
+
`remove the entry.`,
|
|
84
|
+
context: miss.file,
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return issues;
|
|
89
|
+
},
|
|
90
|
+
};
|
|
91
|
+
/**
|
|
92
|
+
* Mirror the resolution `buildNavFromDirectory` uses:
|
|
93
|
+
* 1. Explicit `source.nav` wins (relative to siteRoot)
|
|
94
|
+
* 2. Auto-detect nav.{yml,yaml,json} in the resolved content dir
|
|
95
|
+
* 3. Otherwise undefined (directory-scan nav — no nav file to audit)
|
|
96
|
+
*/
|
|
97
|
+
function resolveNavFile(navOverride, contentDir, siteRoot) {
|
|
98
|
+
if (navOverride) {
|
|
99
|
+
return isAbsolute(navOverride) ? navOverride : resolve(siteRoot, navOverride);
|
|
100
|
+
}
|
|
101
|
+
for (const fileName of HEURISTIC_NAV_FILES) {
|
|
102
|
+
const candidate = join(contentDir, fileName);
|
|
103
|
+
if (existsSync(candidate))
|
|
104
|
+
return candidate;
|
|
105
|
+
}
|
|
106
|
+
return undefined;
|
|
107
|
+
}
|