dogsbay 0.2.0-beta.44 → 0.2.0-beta.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,7 +22,7 @@
22
22
  * `_resetSitemapCache()`.
23
23
  */
24
24
  import { existsSync, readFileSync } from "node:fs";
25
- import { join } from "node:path";
25
+ import { basename, dirname, join } from "node:path";
26
26
  const STATE_CACHE = new Map();
27
27
  export function _resetSitemapCache() {
28
28
  STATE_CACHE.clear();
@@ -50,11 +50,26 @@ function getState(distRoot) {
50
50
  // sitemap integration which produces well-formed XML, so
51
51
  // the only failure mode is "missing entirely" or
52
52
  // "doesn't parse at all."
53
- if (/<(urlset|sitemapindex)[\s>]/.test(content)) {
53
+ const isIndex = /<sitemapindex[\s>]/.test(content);
54
+ const isUrlset = /<urlset[\s>]/.test(content);
55
+ if (isIndex || isUrlset) {
54
56
  sitemapValid = true;
55
- const matches = content.matchAll(/<loc>([^<]+)<\/loc>/g);
56
- for (const m of matches) {
57
- sitemapLocs.add(m[1].trim());
57
+ if (isIndex) {
58
+ // sitemap-index.xml each <loc> points at a child
59
+ // sitemap file (not a page). Read each child and union
60
+ // its <loc> entries (those ARE the page URLs). Without
61
+ // this recursion, sitemapLocs would carry sitemap file
62
+ // URLs only and every page would be flagged "missing
63
+ // from sitemap" — the entire audit class is unusable.
64
+ collectChildSitemapLocs(content, sitemapPath, sitemapLocs);
65
+ }
66
+ else {
67
+ // Plain <urlset> — extract <loc>s directly. Same as
68
+ // before.
69
+ const matches = content.matchAll(/<loc>([^<]+)<\/loc>/g);
70
+ for (const m of matches) {
71
+ sitemapLocs.add(m[1].trim());
72
+ }
58
73
  }
59
74
  }
60
75
  }
@@ -146,8 +161,19 @@ export const sitemapComplete = {
146
161
  // Map each html file to its likely "page URL" form. Astro
147
162
  // emits `docs/intro/index.html` for the URL `/docs/intro/`,
148
163
  // so we strip `/index.html` and ensure a leading slash.
164
+ //
165
+ // Skip pages with `<meta name="robots" content="noindex">`.
166
+ // The sitemap emitter intentionally excludes those (per the
167
+ // platform contract documented in reference/frontmatter.md
168
+ // and `packages/format-astro/src/sitemap.ts` isExcluded), so
169
+ // flagging them as "missing" would tell users to fix something
170
+ // that's working as intended. The auto-emitted taxonomy index
171
+ // pages (/tags/, /by-audience/, …) all ship with noindex.
149
172
  const issues = [];
150
173
  for (const f of allFiles) {
174
+ const robots = f.$('meta[name="robots"]').attr("content") ?? "";
175
+ if (/\bnoindex\b/i.test(robots))
176
+ continue;
151
177
  const expectedPath = htmlPathToUrlPath(f.path);
152
178
  const found = pathListedInSitemap(expectedPath, state.sitemapLocs, urlBase);
153
179
  if (!found) {
@@ -203,6 +229,51 @@ export const sitemapRobotsCoherence = {
203
229
  return issues;
204
230
  },
205
231
  };
232
+ /**
233
+ * Recurse into a `<sitemapindex>` document: for each child
234
+ * sitemap referenced by `<loc>`, read the file from disk and
235
+ * union its page-level `<loc>` entries into the supplied set.
236
+ *
237
+ * Resolution strategy: take the basename of the child URL and
238
+ * resolve it against the directory that contains the index file.
239
+ * That covers both layouts in use today — host-root
240
+ * (`dist/sitemap-index.xml` + `dist/sitemap-0.xml`) and
241
+ * per-mount (`dist/<basePath>/sitemap-index.xml` +
242
+ * `dist/<basePath>/sitemap-0.xml`). Children that don't resolve
243
+ * to a local file are silently skipped — better to miss a few
244
+ * URLs than to crash the audit when a deploy ships a partial
245
+ * sitemap. The page-level rule still surfaces orphan pages, so
246
+ * an incomplete recursion just downgrades to the previous
247
+ * misbehaviour (all pages flagged as missing) — never worse.
248
+ */
249
+ function collectChildSitemapLocs(indexContent, indexPath, out) {
250
+ const indexDir = dirname(indexPath);
251
+ for (const m of indexContent.matchAll(/<loc>([^<]+)<\/loc>/g)) {
252
+ const childUrl = m[1].trim();
253
+ let childName;
254
+ try {
255
+ const u = new URL(childUrl);
256
+ childName = basename(u.pathname);
257
+ }
258
+ catch {
259
+ childName = basename(childUrl);
260
+ }
261
+ if (!childName || childName === "/")
262
+ continue;
263
+ const childPath = join(indexDir, childName);
264
+ if (!existsSync(childPath))
265
+ continue;
266
+ try {
267
+ const childContent = readFileSync(childPath, "utf-8");
268
+ for (const cm of childContent.matchAll(/<loc>([^<]+)<\/loc>/g)) {
269
+ out.add(cm[1].trim());
270
+ }
271
+ }
272
+ catch {
273
+ // Skip unreadable child — see function-level comment.
274
+ }
275
+ }
276
+ }
206
277
  /**
207
278
  * Convert an HTML file path within `dist/` into the public URL
208
279
  * path the sitemap is likely to list. Drops `/index.html` and
@@ -32,6 +32,24 @@ export async function siteBuild(cwd, options) {
32
32
  const siteRoot = dirname(configPath);
33
33
  // 2. Load it
34
34
  let config = loadConfig(configPath);
35
+ // 2b. Migration warning: the default basePath changed from "/docs"
36
+ // to "" in v0.3 (see plans/default-basepath-root.md). Surface the
37
+ // change to users whose config doesn't pin a value — they get to
38
+ // accept the new default (basePath: "") or restore the previous
39
+ // shape (basePath: "/docs"), but can't be silently moved. Fires
40
+ // once per build until the user sets the field to anything.
41
+ if (config.site.basePath === undefined) {
42
+ console.warn(pc.yellow(`\nWarning: site.basePath is not set in dogsbay.config.yml.`));
43
+ console.warn(` The default changed in v0.3: it was "/docs", it is now "" (host root).`);
44
+ console.warn(` Your site will mount at the host root instead of under /docs/.`);
45
+ console.warn(``);
46
+ console.warn(` ${pc.cyan('To accept the new default and silence this warning, add:')}`);
47
+ console.warn(` ${pc.cyan('basePath: ""')} # under site:`);
48
+ console.warn(``);
49
+ console.warn(` ${pc.cyan('To restore the previous behaviour, add:')}`);
50
+ console.warn(` ${pc.cyan('basePath: "/docs"')} # under site:`);
51
+ console.warn(``);
52
+ }
35
53
  // 3. CLI overrides
36
54
  config = mergeOverrides(config, options);
37
55
  // 4. Resolve where the generated Astro project lives. Default
@@ -142,7 +142,11 @@ function seedStarterContent(absTarget, config) {
142
142
  mkdirSync(contentDir, { recursive: true });
143
143
  const written = [];
144
144
  const siteName = config.site.name?.trim() || "your documentation site";
145
- const basePath = config.site.basePath ?? "/docs";
145
+ // `??` fallback used to be "/docs"; matched the framework default
146
+ // pre-v0.3. The default flipped to "" (host root); align this
147
+ // fallback so the scaffold's intra-link text matches the URL
148
+ // shape a new site actually serves. See plans/default-basepath-root.md.
149
+ const basePath = config.site.basePath ?? "";
146
150
  const indexPath = join(contentDir, "index.md");
147
151
  writeFileSync(indexPath, `---
148
152
  title: Welcome
@@ -483,8 +487,15 @@ async function resolveInteractive(opts) {
483
487
  }
484
488
  // ─── SiteInitOptions → DogsbayConfig ─────────────────────────────────────
485
489
  function buildConfig(opts) {
490
+ // Always write basePath explicitly. The v0.3 default is "" (host
491
+ // root); scaffolding it visibly stops the migration warning from
492
+ // firing on the user's first build AND makes the URL shape
493
+ // obvious without forcing the reader to know the framework
494
+ // default. Authors who want /docs (or any other prefix) just edit
495
+ // this line. See plans/default-basepath-root.md.
486
496
  const site = {
487
497
  name: opts.siteName.trim(),
498
+ basePath: "",
488
499
  ...(opts.siteUrl?.trim() ? { url: opts.siteUrl.trim() } : {}),
489
500
  ...(opts.description?.trim()
490
501
  ? { description: opts.description.trim() }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dogsbay",
3
- "version": "0.2.0-beta.44",
3
+ "version": "0.2.0-beta.46",
4
4
  "description": "CLI for Dogsbay — scaffold, build, and serve documentation sites with markdown / MkDocs / Obsidian / OpenAPI sources",
5
5
  "type": "module",
6
6
  "bin": {
@@ -32,15 +32,15 @@
32
32
  "picocolors": "^1.1.0",
33
33
  "prompts": "^2.4.2",
34
34
  "yaml": "^2.8.3",
35
- "@dogsbay/autodoc-python": "0.2.0-beta.44",
36
- "@dogsbay/format-mkdocs": "0.2.0-beta.44",
37
- "@dogsbay/format-astro": "0.2.0-beta.44",
38
- "@dogsbay/format-obsidian": "0.2.0-beta.44",
39
- "@dogsbay/format-mdx": "0.2.0-beta.44",
40
- "@dogsbay/format-starlight": "0.2.0-beta.44",
41
- "@dogsbay/format-dogsbay-md": "0.2.0-beta.44",
42
- "@dogsbay/format-openapi": "0.2.0-beta.44",
43
- "@dogsbay/types": "0.2.0-beta.44"
35
+ "@dogsbay/autodoc-python": "0.2.0-beta.46",
36
+ "@dogsbay/format-mkdocs": "0.2.0-beta.46",
37
+ "@dogsbay/format-mdx": "0.2.0-beta.46",
38
+ "@dogsbay/format-astro": "0.2.0-beta.46",
39
+ "@dogsbay/format-starlight": "0.2.0-beta.46",
40
+ "@dogsbay/format-dogsbay-md": "0.2.0-beta.46",
41
+ "@dogsbay/format-obsidian": "0.2.0-beta.46",
42
+ "@dogsbay/format-openapi": "0.2.0-beta.46",
43
+ "@dogsbay/types": "0.2.0-beta.46"
44
44
  },
45
45
  "devDependencies": {
46
46
  "@types/markdown-it": "^14.1.0",