dogsbay 0.2.0-beta.45 → 0.2.0-beta.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/audit/rules/seo/sitemap.js +65 -5
- package/dist/audit/rules/structure/index.js +2 -0
- package/dist/audit/rules/structure/unresolved-directives.js +108 -0
- package/dist/commands/migrate-asciidoc.js +628 -0
- package/dist/commands/site-build.js +91 -2
- package/dist/commands/site-init.js +19 -1
- package/dist/config/load.js +31 -0
- package/dist/config/to-astro-options.js +2 -0
- package/dist/import-content.js +2 -0
- package/dist/index.js +28 -0
- package/dist/site-build/preprocess.js +266 -0
- package/dist/utils/gitignore.js +54 -0
- package/package.json +14 -10
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
* `_resetSitemapCache()`.
|
|
23
23
|
*/
|
|
24
24
|
import { existsSync, readFileSync } from "node:fs";
|
|
25
|
-
import { join } from "node:path";
|
|
25
|
+
import { basename, dirname, join } from "node:path";
|
|
26
26
|
const STATE_CACHE = new Map();
|
|
27
27
|
export function _resetSitemapCache() {
|
|
28
28
|
STATE_CACHE.clear();
|
|
@@ -50,11 +50,26 @@ function getState(distRoot) {
|
|
|
50
50
|
// sitemap integration which produces well-formed XML, so
|
|
51
51
|
// the only failure mode is "missing entirely" or
|
|
52
52
|
// "doesn't parse at all."
|
|
53
|
-
|
|
53
|
+
const isIndex = /<sitemapindex[\s>]/.test(content);
|
|
54
|
+
const isUrlset = /<urlset[\s>]/.test(content);
|
|
55
|
+
if (isIndex || isUrlset) {
|
|
54
56
|
sitemapValid = true;
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
57
|
+
if (isIndex) {
|
|
58
|
+
// sitemap-index.xml — each <loc> points at a child
|
|
59
|
+
// sitemap file (not a page). Read each child and union
|
|
60
|
+
// its <loc> entries (those ARE the page URLs). Without
|
|
61
|
+
// this recursion, sitemapLocs would carry sitemap file
|
|
62
|
+
// URLs only and every page would be flagged "missing
|
|
63
|
+
// from sitemap" — the entire audit class is unusable.
|
|
64
|
+
collectChildSitemapLocs(content, sitemapPath, sitemapLocs);
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
// Plain <urlset> — extract <loc>s directly. Same as
|
|
68
|
+
// before.
|
|
69
|
+
const matches = content.matchAll(/<loc>([^<]+)<\/loc>/g);
|
|
70
|
+
for (const m of matches) {
|
|
71
|
+
sitemapLocs.add(m[1].trim());
|
|
72
|
+
}
|
|
58
73
|
}
|
|
59
74
|
}
|
|
60
75
|
}
|
|
@@ -214,6 +229,51 @@ export const sitemapRobotsCoherence = {
|
|
|
214
229
|
return issues;
|
|
215
230
|
},
|
|
216
231
|
};
|
|
232
|
+
/**
|
|
233
|
+
* Recurse into a `<sitemapindex>` document: for each child
|
|
234
|
+
* sitemap referenced by `<loc>`, read the file from disk and
|
|
235
|
+
* union its page-level `<loc>` entries into the supplied set.
|
|
236
|
+
*
|
|
237
|
+
* Resolution strategy: take the basename of the child URL and
|
|
238
|
+
* resolve it against the directory that contains the index file.
|
|
239
|
+
* That covers both layouts in use today — host-root
|
|
240
|
+
* (`dist/sitemap-index.xml` + `dist/sitemap-0.xml`) and
|
|
241
|
+
* per-mount (`dist/<basePath>/sitemap-index.xml` +
|
|
242
|
+
* `dist/<basePath>/sitemap-0.xml`). Children that don't resolve
|
|
243
|
+
* to a local file are silently skipped — better to miss a few
|
|
244
|
+
* URLs than to crash the audit when a deploy ships a partial
|
|
245
|
+
* sitemap. The page-level rule still surfaces orphan pages, so
|
|
246
|
+
* an incomplete recursion just downgrades to the previous
|
|
247
|
+
* misbehaviour (all pages flagged as missing) — never worse.
|
|
248
|
+
*/
|
|
249
|
+
function collectChildSitemapLocs(indexContent, indexPath, out) {
|
|
250
|
+
const indexDir = dirname(indexPath);
|
|
251
|
+
for (const m of indexContent.matchAll(/<loc>([^<]+)<\/loc>/g)) {
|
|
252
|
+
const childUrl = m[1].trim();
|
|
253
|
+
let childName;
|
|
254
|
+
try {
|
|
255
|
+
const u = new URL(childUrl);
|
|
256
|
+
childName = basename(u.pathname);
|
|
257
|
+
}
|
|
258
|
+
catch {
|
|
259
|
+
childName = basename(childUrl);
|
|
260
|
+
}
|
|
261
|
+
if (!childName || childName === "/")
|
|
262
|
+
continue;
|
|
263
|
+
const childPath = join(indexDir, childName);
|
|
264
|
+
if (!existsSync(childPath))
|
|
265
|
+
continue;
|
|
266
|
+
try {
|
|
267
|
+
const childContent = readFileSync(childPath, "utf-8");
|
|
268
|
+
for (const cm of childContent.matchAll(/<loc>([^<]+)<\/loc>/g)) {
|
|
269
|
+
out.add(cm[1].trim());
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
catch {
|
|
273
|
+
// Skip unreadable child — see function-level comment.
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
217
277
|
/**
|
|
218
278
|
* Convert an HTML file path within `dist/` into the public URL
|
|
219
279
|
* path the sitemap is likely to list. Drops `/index.html` and
|
|
@@ -16,6 +16,7 @@ import { internalLinks } from "./internal-links.js";
|
|
|
16
16
|
import { localeCoherence } from "./locale-coherence.js";
|
|
17
17
|
import { namespaceCoherence } from "./namespace-coherence.js";
|
|
18
18
|
import { navTargetExists } from "./nav-target-exists.js";
|
|
19
|
+
import { unresolvedDirectives } from "./unresolved-directives.js";
|
|
19
20
|
import { versionCoherence } from "./version-coherence.js";
|
|
20
21
|
let registered = false;
|
|
21
22
|
/**
|
|
@@ -32,6 +33,7 @@ export function registerStructureRules() {
|
|
|
32
33
|
registerRule(navTargetExists);
|
|
33
34
|
registerRule(internalLinks);
|
|
34
35
|
registerRule(assetRefs);
|
|
36
|
+
registerRule(unresolvedDirectives);
|
|
35
37
|
}
|
|
36
38
|
/**
|
|
37
39
|
* Test-only: reset the "registered" flag so unit tests can
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Strict variable form: `{{ name }}`, `{{ obj.field }}`,
|
|
3
|
+
* `{{ name|filter }}`, `{{ name | filter }}`. Identifier-then-optional-filter
|
|
4
|
+
* shape so a stray `{{` in legitimate prose (e.g. discussing
|
|
5
|
+
* mathematical notation) doesn't false-positive.
|
|
6
|
+
*/
|
|
7
|
+
const VAR_RE = /\{\{\s*([a-zA-Z_][\w.-]*)\s*(?:\|[^}]+)?\s*\}\}/g;
|
|
8
|
+
/**
|
|
9
|
+
* Strict block form: `{% if name %}` / `{% endif %}` / `{% set ... %}` /
|
|
10
|
+
* `{% include "..." %}` / `{%- ... -%}` (whitespace control variants).
|
|
11
|
+
* Anchored to a known keyword so a generic `{% ... %}` we don't
|
|
12
|
+
* recognise isn't flagged (Minja's grammar is a subset of Jinja —
|
|
13
|
+
* surfacing only the patterns the engine actually emits keeps the
|
|
14
|
+
* rule focused on what the preprocessor would have resolved).
|
|
15
|
+
*/
|
|
16
|
+
const BLOCK_RE = /\{%-?\s*(if|elif|else|endif|for|endfor|set|include|raw|endraw|leveloffset|switch|case|endswitch|default)\b[^%]*%\}/g;
|
|
17
|
+
/**
|
|
18
|
+
* Walk a TreeNode tree, accumulating directive hits from text
|
|
19
|
+
* content. Skips code-flavoured nodes so documentation of Jinja
|
|
20
|
+
* syntax doesn't false-positive.
|
|
21
|
+
*/
|
|
22
|
+
function findHits(nodes, out) {
|
|
23
|
+
for (const node of nodes) {
|
|
24
|
+
if (node.type === "code")
|
|
25
|
+
continue;
|
|
26
|
+
if (node.inline)
|
|
27
|
+
findInlineHits(node.inline, out);
|
|
28
|
+
if (typeof node.html === "string")
|
|
29
|
+
scanText(node.html, out);
|
|
30
|
+
if (typeof node.props?.title === "string")
|
|
31
|
+
scanText(node.props.title, out);
|
|
32
|
+
if (node.children)
|
|
33
|
+
findHits(node.children, out);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
function findInlineHits(nodes, out) {
|
|
37
|
+
for (const node of nodes) {
|
|
38
|
+
if (node.type === "code")
|
|
39
|
+
continue;
|
|
40
|
+
if (node.type === "text") {
|
|
41
|
+
scanText(node.text, out);
|
|
42
|
+
}
|
|
43
|
+
else if (node.type === "link" && node.children) {
|
|
44
|
+
findInlineHits(node.children, out);
|
|
45
|
+
}
|
|
46
|
+
else if (node.type === "highlight" && node.children) {
|
|
47
|
+
findInlineHits(node.children, out);
|
|
48
|
+
}
|
|
49
|
+
else if (node.type === "html-inline" && typeof node.html === "string") {
|
|
50
|
+
scanText(node.html, out);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
function scanText(text, out) {
|
|
55
|
+
let m;
|
|
56
|
+
VAR_RE.lastIndex = 0;
|
|
57
|
+
while ((m = VAR_RE.exec(text)) !== null) {
|
|
58
|
+
out.push({ match: m[0], kind: "variable" });
|
|
59
|
+
}
|
|
60
|
+
BLOCK_RE.lastIndex = 0;
|
|
61
|
+
while ((m = BLOCK_RE.exec(text)) !== null) {
|
|
62
|
+
out.push({ match: m[0], kind: "block" });
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Build the three-fix message for one hit. Keep it dense but
|
|
67
|
+
* actionable — the audit text formatter renders one line per
|
|
68
|
+
* finding plus the message.
|
|
69
|
+
*/
|
|
70
|
+
function formatMessage(hit) {
|
|
71
|
+
const kindLabel = hit.kind === "variable"
|
|
72
|
+
? "variable reference"
|
|
73
|
+
: "conditional / block directive";
|
|
74
|
+
return (`Unresolved Minja ${kindLabel} \`${hit.match}\` survived to the rendered ` +
|
|
75
|
+
`page. Fix one of: ` +
|
|
76
|
+
`(1) add the missing value to the \`attributes:\` block in ` +
|
|
77
|
+
`\`dogsbay.config.yml\`; ` +
|
|
78
|
+
`(2) pass \`dogsbay site build --attribute name=value\` for per-build ` +
|
|
79
|
+
`overrides (CI secrets, deploy targets); ` +
|
|
80
|
+
`(3) if the literal is intentional (e.g. the page documents Jinja ` +
|
|
81
|
+
`syntax), add \`preprocess: false\` to the page's frontmatter.`);
|
|
82
|
+
}
|
|
83
|
+
export const unresolvedDirectives = {
|
|
84
|
+
id: "structure/unresolved-directives",
|
|
85
|
+
category: "structure",
|
|
86
|
+
stage: "source",
|
|
87
|
+
severity: "warning",
|
|
88
|
+
description: "Flags surviving Minja `{{ var }}` / `{% if %}` directives in page content " +
|
|
89
|
+
"(the preprocessor couldn't resolve them and they're heading to the rendered HTML).",
|
|
90
|
+
run(rawCtx) {
|
|
91
|
+
const ctx = rawCtx;
|
|
92
|
+
if (!ctx.page)
|
|
93
|
+
return [];
|
|
94
|
+
if (ctx.page.frontmatter?.preprocess === false)
|
|
95
|
+
return [];
|
|
96
|
+
const hits = [];
|
|
97
|
+
findHits(ctx.page.tree, hits);
|
|
98
|
+
if (hits.length === 0)
|
|
99
|
+
return [];
|
|
100
|
+
return hits.map((hit) => ({
|
|
101
|
+
ruleId: "structure/unresolved-directives",
|
|
102
|
+
severity: "warning",
|
|
103
|
+
file: `${ctx.page.slug}.md`,
|
|
104
|
+
message: formatMessage(hit),
|
|
105
|
+
context: hit.match,
|
|
106
|
+
}));
|
|
107
|
+
},
|
|
108
|
+
};
|