@dogsbay/format-astro 0.2.0-beta.4 → 0.2.0-beta.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/project.js CHANGED
@@ -4,13 +4,28 @@
4
4
  * Takes ExportPage[] + NavItem[] and generates a complete Astro project
5
5
  * with static .astro pages using real Dogsbay components.
6
6
  */
7
- import { existsSync, mkdirSync, writeFileSync, readFileSync, cpSync, readdirSync, statSync, } from "node:fs";
7
+ import { existsSync, mkdirSync, writeFileSync, readFileSync, copyFileSync, cpSync, readdirSync, statSync, } from "node:fs";
8
8
  import { join, dirname, relative, resolve } from "node:path";
9
9
  import { fileURLToPath } from "node:url";
10
10
  import { treeToDogsbayMd } from "@dogsbay/format-dogsbay-md";
11
- import { treeToAstro } from "./serialize.js";
11
+ import { treeToAstro, TONE_CLASSES } from "./serialize.js";
12
12
  import { buildLlmsTxt, buildSectionLlmsTxt, buildLlmsFullTxt } from "./llms-txt.js";
13
- import { normalizeBasePath, basePathSegments, buildCurrentPath } from "./base-path.js";
13
+ import { buildSitemap, buildSitemapIndex } from "./sitemap.js";
14
+ import { normalizeBasePath, basePathSegments, buildCurrentPath, withBasePath, parseSiteUrl, combinePrefix, } from "./base-path.js";
15
+ /**
16
+ * Combined URL prefix = urlBase (Astro `base` from site.url path) +
17
+ * basePath (filesystem layout prefix). Every URL emitter (nav,
18
+ * sitemap, llms.txt, .md mirror, _headers, taxonomy) uses this for
19
+ * href output. Filesystem-layout consumers (mkdir, page output
20
+ * paths) keep using basePath alone — Astro's `base` config adds the
21
+ * urlBase prefix at route time.
22
+ *
23
+ * See plans/astro-base-from-site-url.md.
24
+ */
25
+ function combinedPrefix(options) {
26
+ const { urlBase } = parseSiteUrl(options.siteUrl);
27
+ return combinePrefix(urlBase, normalizeBasePath(options.basePath));
28
+ }
14
29
  import { detectLeadingNodes } from "./lead.js";
15
30
  /**
16
31
  * Recursively prefix all hrefs in a nav tree.
@@ -97,6 +112,55 @@ function rewriteHref(href, prefix) {
97
112
  return href;
98
113
  return prefix + href;
99
114
  }
115
+ /**
116
+ * Rewrite image srcs in inline nodes + raw HTML to include the
117
+ * combined URL prefix.
118
+ *
119
+ * Astro auto-prefixes `<a href>` and image imports going through
120
+ * `<AstroImage>`, but raw `<img src="...">` HTML in template
121
+ * output is left untouched. The serializer emits raw `<img>` for
122
+ * inline images and falls back to it for non-optimized block
123
+ * images, so we have to prefix manually before serialization to
124
+ * make `/_assets/...` paths resolve under subpath-mounted deploys
125
+ * (GH Pages project pages, multi-mount Cloudflare).
126
+ *
127
+ * Symmetric with rewriteTreeHrefs — same skip-rules (external,
128
+ * anchors, already-prefixed). Block images keep their prefix
129
+ * stripped back off for the `imageMap[...]` lookup key (see
130
+ * paragraphToAstro in serialize.ts) so Astro's image optimization
131
+ * still finds the source.
132
+ */
133
+ function rewriteTreeImageSrcs(nodes, prefix) {
134
+ for (const node of nodes) {
135
+ if (node.inline) {
136
+ rewriteInlineImageSrcs(node.inline, prefix);
137
+ }
138
+ if (node.html) {
139
+ node.html = rewriteHtmlImageSrcs(node.html, prefix);
140
+ }
141
+ if (node.children) {
142
+ rewriteTreeImageSrcs(node.children, prefix);
143
+ }
144
+ }
145
+ }
146
+ function rewriteInlineImageSrcs(nodes, prefix) {
147
+ for (const node of nodes) {
148
+ if (node.type === "image" && typeof node.src === "string") {
149
+ node.src = rewriteHref(node.src, prefix);
150
+ }
151
+ else if (node.type === "link") {
152
+ // Links wrap inline children (which may include images) — same
153
+ // recursion shape as rewriteInlineHrefs.
154
+ rewriteInlineImageSrcs(node.children, prefix);
155
+ }
156
+ else if (node.type === "highlight" && node.children) {
157
+ rewriteInlineImageSrcs(node.children, prefix);
158
+ }
159
+ }
160
+ }
161
+ function rewriteHtmlImageSrcs(html, prefix) {
162
+ return html.replace(/(<img\b[^>]*\ssrc=")(\/[^"]+)"/g, (_match, before, src) => `${before}${rewriteHref(src, prefix)}"`);
163
+ }
100
164
  /**
101
165
  * Build a `wrangler.jsonc` for Cloudflare Workers Static Assets.
102
166
  *
@@ -147,6 +211,125 @@ function buildWranglerConfig(siteName, options) {
147
211
  lines.push(`}`);
148
212
  return lines.join("\n") + "\n";
149
213
  }
214
+ /**
215
+ * Build the GitHub Actions workflow YAML for `actions/deploy-pages`.
216
+ *
217
+ * The workflow:
218
+ * 1. Checks out the repo on every push to the default branch.
219
+ * 2. Installs node + pnpm at the Astro project directory, runs
220
+ * `dogsbay site build` (via `pnpm dlx` since Dogsbay is a
221
+ * global CLI, not a project dep), then `pnpm run build`
222
+ * (which runs `astro build && pagefind`).
223
+ * 3. Uploads `<astroDirRel>/dist` as a Pages artifact via
224
+ * `actions/upload-pages-artifact`.
225
+ * 4. Deploys via `actions/deploy-pages`.
226
+ *
227
+ * `astroDirRel` is the path of the Astro output relative to the
228
+ * repo root (typically "astro" — the default config has
229
+ * `output: ./astro`). Empty string is allowed when the project is
230
+ * flat (outputDir === projectDir); the workflow degrades naturally
231
+ * by omitting the `defaults: working-directory` block.
232
+ *
233
+ * Author edits — extra build steps, secrets, deploy gating — survive
234
+ * subsequent `dogsbay site build` runs because the file is written
235
+ * write-if-missing (see emitDeployArtifacts). To start over, delete
236
+ * the workflow file and rebuild.
237
+ *
238
+ * Note on basePath: GitHub Pages serves project sites at
239
+ * `https://<user>.github.io/<repo>/`. Authors who want their docs at
240
+ * the repo root should set `site.basePath: /<repo-name>` (or empty
241
+ * for user/org pages). The platform's basePath plumbing handles all
242
+ * URL rewriting; this workflow doesn't need to know about it.
243
+ */
244
+ function buildGitHubPagesWorkflow(astroDirRel) {
245
+ // When the Astro output IS the project root, drop the working-
246
+ // directory block and reference cache + artifact paths without a
247
+ // prefix. This is the flat-layout case (rare for site-init flows;
248
+ // common for `dogsbay convert` outputs that get manually wired up).
249
+ const isFlat = astroDirRel === "" || astroDirRel === ".";
250
+ const workingDirBlock = isFlat
251
+ ? ""
252
+ : `
253
+ defaults:
254
+ run:
255
+ working-directory: ${astroDirRel}`;
256
+ const cacheDep = isFlat
257
+ ? "pnpm-lock.yaml"
258
+ : `${astroDirRel}/pnpm-lock.yaml`;
259
+ const artifactPath = isFlat ? "dist" : `${astroDirRel}/dist`;
260
+ return `# Deploy to GitHub Pages.
261
+ # Generated by \`dogsbay site init --deploy=github-pages\` (or by
262
+ # adding \`deploy: { target: github-pages }\` to dogsbay.config.yml
263
+ # and running \`dogsbay site build\`). Author edits survive every
264
+ # subsequent build — the file is never overwritten. To regenerate
265
+ # from template, delete the file and rebuild.
266
+ #
267
+ # Repo settings: Settings → Pages → Source = "GitHub Actions".
268
+ name: Deploy to GitHub Pages
269
+
270
+ on:
271
+ push:
272
+ branches: [main]
273
+ workflow_dispatch:
274
+
275
+ permissions:
276
+ contents: read
277
+ pages: write
278
+ id-token: write
279
+
280
+ # Allow only one concurrent deployment, skipping queued runs.
281
+ concurrency:
282
+ group: pages
283
+ cancel-in-progress: false
284
+
285
+ jobs:
286
+ build:
287
+ runs-on: ubuntu-latest${workingDirBlock}
288
+ steps:
289
+ - uses: actions/checkout@v4
290
+
291
+ - uses: pnpm/action-setup@v4
292
+ with:
293
+ version: 10
294
+
295
+ - uses: actions/setup-node@v4
296
+ with:
297
+ # Astro 6 requires Node ^20.19.5 || >=22.12.0; pin 22 for
298
+ # forward-compat (Node 20 LTS is fine for Astro 5 sites
299
+ # but the Dogsbay scaffold targets Astro 6).
300
+ node-version: 22
301
+ cache: pnpm
302
+ cache-dependency-path: ${cacheDep}
303
+
304
+ - name: Install dependencies
305
+ run: pnpm install --frozen-lockfile
306
+
307
+ # \`dogsbay\` is a global CLI, not a project dep — pnpm dlx
308
+ # fetches it on demand. To pin a version, replace with e.g.
309
+ # \`pnpm dlx dogsbay@0.2.0-beta.18 site build\`.
310
+ - name: Build with Dogsbay
311
+ run: pnpm dlx dogsbay@beta site build
312
+
313
+ - name: Build Astro site
314
+ run: pnpm run build
315
+
316
+ - name: Upload Pages artifact
317
+ uses: actions/upload-pages-artifact@v3
318
+ with:
319
+ path: ${artifactPath}
320
+
321
+ deploy:
322
+ needs: build
323
+ runs-on: ubuntu-latest
324
+ environment:
325
+ name: github-pages
326
+ url: \${{ steps.deployment.outputs.page_url }}
327
+ steps:
328
+ - name: Deploy to GitHub Pages
329
+ id: deployment
330
+ uses: actions/deploy-pages@v4
331
+ `;
332
+ }
150
333
  /**
151
334
  * Construct the SiteConfig object that gets serialized to
152
335
  * `src/data/site.json`. Backward-compatible: existing fields keep their
@@ -156,8 +339,6 @@ function buildSiteConfig(siteName, options) {
156
339
  const cfg = {
157
340
  siteName,
158
341
  repoUrl: options.repoUrl || "",
159
- editUri: options.editUri || "blob/main/docs/",
160
- copyright: options.copyright || "",
161
342
  };
162
343
  if (options.siteUrl)
163
344
  cfg.siteUrl = options.siteUrl;
@@ -169,6 +350,15 @@ function buildSiteConfig(siteName, options) {
169
350
  cfg.twitterHandle = options.twitterHandle;
170
351
  if (options.themeColor)
171
352
  cfg.themeColor = options.themeColor;
353
+ // editUri + copyright follow the same omit-on-empty pattern as the
354
+ // optional fields above; previously they were always written
355
+ // (editUri defaulted to "blob/main/docs/", copyright to ""), which
356
+ // left zombie config in src/data/site.json. Downstream guards already
357
+ // treat empty / undefined as "don't render" so this is purely a tidy.
358
+ if (options.editUri)
359
+ cfg.editUri = options.editUri;
360
+ if (options.copyright)
361
+ cfg.copyright = options.copyright;
172
362
  if (options.brandKeywords && options.brandKeywords.length > 0) {
173
363
  cfg.brandKeywords = options.brandKeywords;
174
364
  }
@@ -187,11 +377,58 @@ function buildSiteConfig(siteName, options) {
187
377
  }
188
378
  if (options.taxonomyIndexPaths &&
189
379
  Object.keys(options.taxonomyIndexPaths).length > 0) {
190
- cfg.taxonomyIndexPaths = options.taxonomyIndexPaths;
380
+ // Bake basePath into every emitted indexPath so consumers
381
+ // (TypeBadge / StatusBadge / future components) compose hrefs
382
+ // like `${indexPath}/<value>/` and resolve under the configured
383
+ // site base. Without the prefix, `/by-type/tutorial/` 404s on
384
+ // any site with `site.basePath` set. Caller passes raw config
385
+ // values (`/by-type`, `/tags`, etc.) — basePath threading is
386
+ // this emitter's responsibility, matching how `page.url` is
387
+ // already prefixed in the taxonomy data file.
388
+ // Taxonomy index paths are baked into site.json so components
389
+ // (TagList, TaxonomyIndex, TypeBadge) emit correct hrefs at
390
+ // runtime. Use combined so these resolve under the host's
391
+ // served subpath.
392
+ const taxoPrefix = combinedPrefix(options);
393
+ cfg.taxonomyIndexPaths = Object.fromEntries(Object.entries(options.taxonomyIndexPaths).map(([name, raw]) => [
394
+ name,
395
+ withBasePath(taxoPrefix, raw),
396
+ ]));
191
397
  }
192
398
  if (options.taxonomyDisplay &&
193
399
  Object.keys(options.taxonomyDisplay).length > 0) {
194
- cfg.taxonomyDisplay = options.taxonomyDisplay;
400
+ // Flatten prefix labels into top-level entries so the
401
+ // search-facets resolver finds them after DocsLayout splits
402
+ // slash-nested tags into per-prefix Pagefind filter divs.
403
+ //
404
+ // Input:
405
+ // tags.prefixes = { difficulty: { label, color }, ... }
406
+ // tags.labels = { "difficulty/1": "Beginner", "difficulty/2": ... }
407
+ // Output additions (kept alongside the original `tags` entry):
408
+ // difficulty.labels = { "1": "Beginner", "2": ... }
409
+ //
410
+ // Resolver does `display[facetName].labels[value]` — facet name
411
+ // is now `difficulty`, value is `1`, → "Beginner". See
412
+ // plans/per-prefix-search-facets.md.
413
+ const flat = { ...options.taxonomyDisplay };
414
+ const tagsDisplay = options.taxonomyDisplay.tags;
415
+ if (tagsDisplay?.prefixes) {
416
+ for (const prefix of Object.keys(tagsDisplay.prefixes)) {
417
+ if (flat[prefix])
418
+ continue; // top-level entry wins
419
+ const leafLabels = {};
420
+ if (tagsDisplay.labels) {
421
+ const needle = `${prefix}/`;
422
+ for (const [slug, label] of Object.entries(tagsDisplay.labels)) {
423
+ if (slug.startsWith(needle)) {
424
+ leafLabels[slug.slice(needle.length)] = label;
425
+ }
426
+ }
427
+ }
428
+ flat[prefix] = { labels: leafLabels };
429
+ }
430
+ }
431
+ cfg.taxonomyDisplay = flat;
195
432
  }
196
433
  return cfg;
197
434
  }
@@ -257,6 +494,74 @@ function ensureDirectoryStructure(outputDir, basePath) {
257
494
  export function emitSiteConfig(outputDir, siteName, options) {
258
495
  mkdirSync(join(outputDir, "src", "data"), { recursive: true });
259
496
  writeFileSync(join(outputDir, "src", "data", "site.json"), JSON.stringify(buildSiteConfig(siteName, options), null, 2));
497
+ // Auto-generated companion to astro.config.mjs. Carries the
498
+ // site/base values derived from dogsbay.config.yml's site.url so
499
+ // changes propagate without --force-rescaffolding the main
500
+ // astro.config.mjs (which is scaffold-once and may have author
501
+ // edits — custom integrations, build hooks, etc.). The main
502
+ // config imports `dogsbaySite` + `dogsbayBase` from here.
503
+ // See plans/astro-base-from-site-url.md.
504
+ const { origin, urlBase: astroBase } = parseSiteUrl(options.siteUrl);
505
+ const hasSiteUrl = Boolean(options.siteUrl && /^https?:\/\//.test(options.siteUrl));
506
+ const dogsbaySiteJson = hasSiteUrl
507
+ ? JSON.stringify(origin ?? options.siteUrl)
508
+ : "undefined";
509
+ const dogsbayBaseJson = astroBase ? JSON.stringify(astroBase) : "undefined";
510
+ // build.inlineStylesheets — defaults to "auto" (Astro's own
511
+ // default; matches our docs-first bias since theme.css is ~120KB
512
+ // and externalizing it lets the file cache cross-page). Authors
513
+ // wanting "always" / "never" set it via dogsbay.config.yml's
514
+ // build.inlineStylesheets. See docs/perf-tuning.md.
515
+ const dogsbayInline = options.inlineStylesheets ?? "auto";
516
+ writeFileSync(join(outputDir, "astro.config.dogsbay.mjs"), [
517
+ "// Auto-generated by `dogsbay site build` — DO NOT EDIT.",
518
+ "// Tracks site.url + derived Astro base + build behaviour from",
519
+ "// dogsbay.config.yml. Edit dogsbay.config.yml and rebuild;",
520
+ "// edits to this file will be overwritten on the next build.",
521
+ `export const dogsbaySite = ${dogsbaySiteJson};`,
522
+ `export const dogsbayBase = ${dogsbayBaseJson};`,
523
+ `export const dogsbayInlineStylesheets = ${JSON.stringify(dogsbayInline)};`,
524
+ "",
525
+ ].join("\n"));
526
+ // Migration check: pre-beta.20 sites have an astro.config.mjs that
527
+ // doesn't import the companion. Without the import, the values
528
+ // emitted above are unused and Astro's `base` stays unset — the
529
+ // exact bug this work was meant to close. Warn loudly, with the
530
+ // patch the user needs to apply, until astro.config.mjs is
531
+ // updated. We don't auto-patch because the file may have author
532
+ // edits (custom integrations, build hooks).
533
+ const astroConfigPath = join(outputDir, "astro.config.mjs");
534
+ if (existsSync(astroConfigPath)) {
535
+ const astroConfigSrc = readFileSync(astroConfigPath, "utf-8");
536
+ if (!astroConfigSrc.includes("astro.config.dogsbay.mjs")) {
537
+ console.warn([
538
+ "",
539
+ " ⚠ astro.config.mjs is missing the dogsbay companion import.",
540
+ " Without it, Astro's `base` config stays unset and assets",
541
+ " served from a host subpath (GH Pages project pages,",
542
+ " multi-mount Cloudflare) will 404.",
543
+ "",
544
+ " Add these two lines to astro.config.mjs:",
545
+ "",
546
+ ' import {',
547
+ ' dogsbaySite,',
548
+ ' dogsbayBase,',
549
+ ' dogsbayInlineStylesheets,',
550
+ ' } from "./astro.config.dogsbay.mjs";',
551
+ "",
552
+ " export default defineConfig({",
553
+ " ...(dogsbaySite ? { site: dogsbaySite } : {}),",
554
+ " ...(dogsbayBase ? { base: dogsbayBase } : {}),",
555
+ " build: { inlineStylesheets: dogsbayInlineStylesheets },",
556
+ " // ...your existing config...",
557
+ " });",
558
+ "",
559
+ " OR regenerate from template (overwrites your edits):",
560
+ " dogsbay site init . --scaffold-only --force",
561
+ "",
562
+ ].join("\n"));
563
+ }
564
+ }
260
565
  }
261
566
  export function emitSiteScaffold(outputDir, siteName, options, writeScaffold) {
262
567
  let scaffoldFilesSkipped = 0;
@@ -300,6 +605,7 @@ export function emitSiteScaffold(outputDir, siteName, options, writeScaffold) {
300
605
  };
301
606
  // Per-deploy-target additions to package.json
302
607
  const isCloudflare = options.deploy === "cloudflare-workers";
608
+ const isGitHubPages = options.deploy === "github-pages";
303
609
  const deployScripts = isCloudflare
304
610
  ? { deploy: "pnpm build && wrangler deploy" }
305
611
  : {};
@@ -325,14 +631,25 @@ export function emitSiteScaffold(outputDir, siteName, options, writeScaffold) {
325
631
  },
326
632
  dependencies: {
327
633
  astro: "^6.0.0",
328
- "@astrojs/sitemap": "^3.0.0",
634
+ // Sitemap is emitted directly by Dogsbay into
635
+ // public/<basePath>/sitemap-{index,0}.xml so multi-mount
636
+ // deploys don't collide at the host root. We deliberately
637
+ // do NOT depend on @astrojs/sitemap (it hardcodes output to
638
+ // dist/ root, which is what we're moving away from).
329
639
  // Pagefind is invoked from the build script (see scripts.build above).
330
640
  // Lives in dependencies (not devDependencies) so production builds
331
641
  // include it; the produced search index is shipped statically and
332
642
  // doesn't load this dep at runtime.
333
643
  pagefind: "^1.4.0",
334
644
  tailwindcss: "^4.0.0",
335
- "@tailwindcss/vite": "^4.0.0",
645
+ // Pinned to 4.2.x — `@tailwindcss/vite` 4.3.x ships an
646
+ // oxcResolvePlugin shape that breaks Astro 6's
647
+ // rolldown-vite ("Missing field tsconfigPaths in
648
+ // oxcResolvePlugin"). Surfaced during the FastAPI import
649
+ // (~150-page MkDocs site) on a fresh `dogsbay site init`.
650
+ // Drop the ~ when Astro 6 picks up a compatible rolldown
651
+ // build OR @tailwindcss/vite restores the prior shape.
652
+ "@tailwindcss/vite": "~4.2.2",
336
653
  "tailwind-variants": "^0.3.0",
337
654
  shiki: "^4.0.0",
338
655
  "@shikijs/transformers": "^4.0.0",
@@ -348,6 +665,15 @@ export function emitSiteScaffold(outputDir, siteName, options, writeScaffold) {
348
665
  "@dogsbay/primitives": dogsbayDep("primitives"),
349
666
  "@dogsbay/icons": dogsbayDep("icons"),
350
667
  "@dogsbay/elements": dogsbayDep("elements"),
668
+ // Transitive of `@dogsbay/primitives` (via
669
+ // `@floating-ui/dom`). Listed at the top level because
670
+ // npm doesn't hoist the second-level transitive when
671
+ // `@dogsbay/primitives` is linked via `file:` (the
672
+ // `--local` monorepo mode + the canary publish flow on
673
+ // GH Pages CI both hit this). Surfaced during the
674
+ // FastAPI import: Rollup failed with "Cannot resolve
675
+ // @floating-ui/core" at astro build time.
676
+ "@floating-ui/core": "^1.7.0",
351
677
  },
352
678
  // Pin transitive Vite to 7. Vite 8 just released; Astro 6
353
679
  // peer-deps Vite 7 and prints a warning when 8 is hoisted.
@@ -374,6 +700,18 @@ export function emitSiteScaffold(outputDir, siteName, options, writeScaffold) {
374
700
  scaffoldFilesSkipped++;
375
701
  }
376
702
  }
703
+ // GitHub Pages deploy artifacts — workflow + .nojekyll. The actual
704
+ // emission lives in `emitDeployArtifacts` so site-build can also
705
+ // call it on existing sites without going through scaffold (a user
706
+ // adds `deploy: github-pages` to dogsbay.config.yml and reruns
707
+ // `site build` to get the workflow). At scaffold-time we pass
708
+ // forceOverwrite=writeScaffold so `--force` regenerates from
709
+ // template; on regular builds it stays write-if-missing.
710
+ if (isGitHubPages) {
711
+ emitDeployArtifacts(outputDir, options, {
712
+ forceOverwrite: writeScaffold,
713
+ });
714
+ }
377
715
  // Generate astro.config.mjs
378
716
  // `preserveSymlinks: true` is used with --local to pin local file: deps to
379
717
  // their on-disk paths. Inside a pnpm workspace this breaks Astro's internal
@@ -385,52 +723,50 @@ export function emitSiteScaffold(outputDir, siteName, options, writeScaffold) {
385
723
  preserveSymlinks: true,
386
724
  },`
387
725
  : "";
388
- // Sitemap integration is conditional: requires an absolute site URL so
389
- // <loc> entries can be properly absolute. Without siteUrl, the sitemap
390
- // step is skipped (the import + integration call are simply omitted from
391
- // the generated config). Sitemap also filters out frontmatter-noindex pages.
392
- const hasSiteUrl = Boolean(options.siteUrl && /^https?:\/\//.test(options.siteUrl));
393
- const sitemapImport = hasSiteUrl ? `import sitemap from "@astrojs/sitemap";\n` : "";
394
- // Strip any path component from site.url before emitting. The
395
- // config validator already rejects `site.url` containing a path
396
- // when `basePath` is non-empty (canonical URLs would double-count
397
- // the prefix); this is a defensive normalisation for the case
398
- // where the validator is bypassed or basePath is empty.
726
+ // siteUrl gates absolute-URL emission (sitemap <loc> entries,
727
+ // canonical tags). Without one, both are skipped relative URLs
728
+ // are still correct, the sitemap is just not generated.
399
729
  //
400
- // Note: we deliberately do NOT emit Astro's `base:` field. With
401
- // the current file emission (pages live under
402
- // `src/pages/<basePath>/...`), adding `base` would cause Astro
403
- // to doubly-prefix every route. Switching to `base`-driven
404
- // routing is a separate refactor — see plans/configurable-base-path.md.
405
- let siteField = "";
406
- if (hasSiteUrl) {
407
- let originOnly;
408
- try {
409
- const u = new URL(options.siteUrl);
410
- originOnly = `${u.protocol}//${u.host}`;
411
- }
412
- catch {
413
- originOnly = options.siteUrl;
414
- }
415
- siteField = `\n site: ${JSON.stringify(originOnly)},`;
416
- }
417
- const integrationsField = hasSiteUrl ? `\n integrations: [sitemap()],` : "";
418
- // astro.config.mjs scaffold-once. Maintainer adds custom integrations.
419
- // The plugin-aliases import is for the Dogsbay plugin API: each
420
- // build emits `astro.config.plugins.mjs` exporting `pluginAliases`,
421
- // a Vite alias map for `virtual:dogsbay-plugin-config/<id>` modules.
422
- // When no plugins use defineClientConfig the map is empty and the
423
- // spread is a no-op. See plans/plugin-api.md.
730
+ // Sitemap is emitted directly by Dogsbay (see emitSitemapFiles)
731
+ // into public/<basePath>/sitemap-*.xml. We deliberately do NOT
732
+ // wire @astrojs/sitemap here; that integration hardcodes output
733
+ // to dist/ root, breaking multi-mount deploys.
734
+ const hasSiteUrl = Boolean(options.siteUrl && /^https?:\/\//.test(options.siteUrl));
735
+ // site.url's path component (if any) becomes Astro's `base`. The
736
+ // origin alone goes into `site`. This split lets dogsbay model
737
+ // both axes independently:
738
+ // - Astro's `base` (= urlBase) controls the URL prefix Astro
739
+ // bakes into HTML asset references (`<basePath>/_astro/...`)
740
+ // and the routes Astro generates from src/pages.
741
+ // - dogsbay's basePath controls the filesystem layout
742
+ // (`src/pages/<basePath>/...`).
743
+ // The two compose at emit time — combining for nav hrefs,
744
+ // sitemap, llms.txt, etc. See plans/astro-base-from-site-url.md.
745
+ const { origin, urlBase: astroBase } = parseSiteUrl(options.siteUrl);
746
+ // astro.config.mjs — scaffold-once, but the site/base values flow
747
+ // through a separate auto-generated file (`astro.config.dogsbay.mjs`,
748
+ // emitted unconditionally below) so dogsbay-derived values stay in
749
+ // sync with `dogsbay.config.yml` even on existing sites where the
750
+ // main config is preserved. Same pattern as
751
+ // `astro.config.plugins.mjs` the import line is the load-bearing
752
+ // bit; the auto-file is what changes.
424
753
  if (writeScaffold) {
425
754
  writeFileSync(join(outputDir, "astro.config.mjs"), `import { defineConfig } from "astro/config";
426
755
  import tailwindcss from "@tailwindcss/vite";
427
- ${sitemapImport}import { pluginAliases, pluginFsAllow } from "./astro.config.plugins.mjs";
756
+ import { pluginAliases, pluginFsAllow } from "./astro.config.plugins.mjs";
757
+ import {
758
+ dogsbaySite,
759
+ dogsbayBase,
760
+ dogsbayInlineStylesheets,
761
+ } from "./astro.config.dogsbay.mjs";
428
762
 
429
- export default defineConfig({${siteField}
763
+ export default defineConfig({
764
+ ...(dogsbaySite ? { site: dogsbaySite } : {}),
765
+ ...(dogsbayBase ? { base: dogsbayBase } : {}),
430
766
  output: "static",
431
767
  build: {
432
- inlineStylesheets: "always",
433
- },${integrationsField}
768
+ inlineStylesheets: dogsbayInlineStylesheets,
769
+ },
434
770
  vite: {
435
771
  plugins: [tailwindcss()],
436
772
  resolve: {
@@ -452,6 +788,9 @@ export default defineConfig({${siteField}
452
788
  else {
453
789
  scaffoldFilesSkipped++;
454
790
  }
791
+ // astro.config.dogsbay.mjs is emitted by emitSiteConfig (called
792
+ // above and on every site build) so site/base values stay in
793
+ // sync without a re-scaffold. See its definition for rationale.
455
794
  // Always seed an empty astro.config.plugins.mjs so the import in
456
795
  // astro.config.mjs resolves before the first plugin-emitting
457
796
  // build. Subsequent builds replace it via emitPluginRuntime.
@@ -522,7 +861,12 @@ export default defineConfig({${siteField}
522
861
  */
523
862
  export async function emitAstroPages(pages, nav, outputDir, options) {
524
863
  const siteName = options.siteName || "Documentation";
864
+ // basePath = filesystem layout prefix (where pages live under
865
+ // src/pages/...). combined = the URL prefix HTML hrefs need
866
+ // (urlBase + basePath). The two diverge whenever site.url has a
867
+ // path component (GH Pages project pages, multi-mount Cloudflare).
525
868
  const basePath = normalizeBasePath(options.basePath);
869
+ const combined = combinedPrefix(options);
526
870
  const baseSegments = basePathSegments(basePath);
527
871
  // Ensure dirs exist (callers may invoke us without going through the
528
872
  // full exportAstroProject orchestrator, e.g. dogsbay convert at Step 7).
@@ -543,7 +887,11 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
543
887
  // Remove existing entry for this section (full replace)
544
888
  existingNav = existingNav.filter((item) => item.label?.toLowerCase() !== siteName.toLowerCase()
545
889
  && item.label?.toLowerCase() !== section.toLowerCase());
546
- const prefixedNav = prefixNavHrefs(nav, section, basePath);
890
+ // Nav hrefs already carry the `combined` prefix (the importer
891
+ // emits them via fileToHref(file, hrefPrefix=combined)).
892
+ // prefixNavHrefs takes the existing prefix and weaves a section
893
+ // segment into it.
894
+ const prefixedNav = prefixNavHrefs(nav, section, combined);
547
895
  const sectionLabel = siteName
548
896
  || section.split("-").map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(" ");
549
897
  existingNav.push({ label: sectionLabel, children: prefixedNav });
@@ -558,20 +906,29 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
558
906
  copyAssets(options.sourceDir, outputDir, options.imageOptimization);
559
907
  }
560
908
  let generated = 0;
909
+ const generatedPaths = new Set();
561
910
  const pagesDir = join(outputDir, "src", "pages", ...baseSegments);
562
911
  const useImageOpt = options.imageOptimization ?? false;
563
- // hrefPrefix is the same string as basePath. rewriteHref handles the
564
- // empty-basePath case correctly: any link starting with "/" matches
565
- // the early-return guard, so root-relative links pass through
566
- // unrewritten when the site is served at host root.
567
- const hrefPrefix = basePath;
912
+ // hrefPrefix is the COMBINED prefix (urlBase + basePath) what
913
+ // rendered HTML hrefs need so internal links resolve under the
914
+ // host's served subpath AND under the dogsbay basePath. For
915
+ // simple host-apex deploys with basePath, urlBase is empty so
916
+ // combined === basePath (back-compat). For GH Pages project pages
917
+ // and multi-mount Cloudflare, combined adds the urlBase layer.
918
+ const hrefPrefix = combined;
568
919
  for (const page of pages) {
569
920
  try {
570
921
  // Rewrite internal hrefs to match the output URL structure
571
922
  rewriteTreeHrefs(page.tree, hrefPrefix);
923
+ // Same for raw image srcs — Astro doesn't auto-prefix
924
+ // `<img src="/_assets/...">` so we do it here. Block images
925
+ // strip the prefix back off for the `imageMap[...]` lookup
926
+ // (see paragraphToAstro in serialize.ts).
927
+ rewriteTreeImageSrcs(page.tree, hrefPrefix);
572
928
  const result = treeToAstro(page.tree, {
573
929
  imageOptimization: useImageOpt,
574
930
  codeBlockTitle: options.codeBlockTitle ?? true,
931
+ combinedPrefix: hrefPrefix,
575
932
  });
576
933
  const imageSetup = useImageOpt ? [
577
934
  '',
@@ -589,7 +946,17 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
589
946
  const fm = (page.frontmatter ?? {});
590
947
  const pageDescription = fm.description ?? "";
591
948
  const pageOgImage = fm.ogImage ?? "";
592
- const pageNoindex = fm.noindex === true || fm.draft === true;
949
+ // Noindex / nofollow are independent meta directives. Site-level
950
+ // forces both bits site-wide (staging / compliance lockdown);
951
+ // page frontmatter can ESCALATE either bit independently but
952
+ // cannot opt out of a site-level lockdown. `draft: true` keeps
953
+ // its existing role as a noindex shorthand. See
954
+ // plans/site-level-robots-meta.md.
955
+ const pageNoindex = options.noindex === true ||
956
+ fm.noindex === true ||
957
+ fm.draft === true;
958
+ const pageNofollow = options.nofollow === true ||
959
+ fm.nofollow === true;
593
960
  // Independent of noindex: pages can be excluded from in-site
594
961
  // Pagefind search even when external SEs should index them
595
962
  // (or vice versa). See DocsLayout's prop docs for the
@@ -608,7 +975,28 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
608
975
  const pageCategory = Array.isArray(pageMeta?.category)
609
976
  ? pageMeta.category
610
977
  : undefined;
611
- const tagsIndexPath = options.tagsIndexPath ?? "/tags";
978
+ // Custom-taxonomy values lifted from frontmatter into
979
+ // `meta.taxonomies` by the importer (see `parseMeta` in
980
+ // `@dogsbay/types`). Surfaced to DocsLayout so it can emit one
981
+ // `<div data-pagefind-filter="<name>:<value>">` per entry — this
982
+ // is what makes user-declared taxonomies (`difficulty`, `team`,
983
+ // anything not in the five built-ins) appear as visible facet
984
+ // checkboxes in the search dialog. Without this passthrough
985
+ // they're silently dropped after the importer.
986
+ const pageTaxonomies = pageMeta?.taxonomies && Object.keys(pageMeta.taxonomies).length > 0
987
+ ? pageMeta.taxonomies
988
+ : undefined;
989
+ // `tagsIndexPath` flows to `<TagList>` for chip hrefs
990
+ // (`${indexPath}/${tag}/`). Caller passes the raw config value
991
+ // (e.g. `/tags`); we bake the COMBINED prefix (urlBase from
992
+ // site.url's path + basePath) here so chips resolve under both
993
+ // the host's served subpath AND the dogsbay basePath. With
994
+ // basePath alone, chips 404 on GH Pages project deploys
995
+ // (basePath="" + non-empty urlBase) — same shape as the
996
+ // typeBadgeHref / statusBadgeHref composition in DocsLayout,
997
+ // which already reads combined-prefixed values out of
998
+ // siteConfig.taxonomyIndexPaths (baked in buildSiteConfig).
999
+ const tagsIndexPath = withBasePath(combined, options.tagsIndexPath ?? "/tags");
612
1000
  // Auto-lede detection. If the markdown body doesn't already
613
1001
  // start with an H1 / leading paragraph, we ask DocsLayout to
614
1002
  // render the frontmatter title / description at the top of
@@ -640,12 +1028,20 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
640
1028
  // available via other means. The "Open in" deep links work
641
1029
  // regardless of mirror availability — agents that can't fetch
642
1030
  // the page just see the URL in their chat.
1031
+ // pageHrefBase uses combined (urlBase + basePath) so the URL
1032
+ // resolves correctly when the host serves dist/ at a subpath
1033
+ // (GH Pages project page, multi-mount Cloudflare).
643
1034
  const pageHrefBase = section
644
- ? (basePath ? `${basePath}/${section}/${page.slug}` : `/${section}/${page.slug}`)
645
- : (basePath ? `${basePath}/${page.slug}` : `/${page.slug}`);
1035
+ ? (combined ? `${combined}/${section}/${page.slug}` : `/${section}/${page.slug}`)
1036
+ : (combined ? `${combined}/${page.slug}` : `/${page.slug}`);
646
1037
  const pageMdHref = `${pageHrefBase}.md`;
647
- const pageMdAbsoluteUrl = options.siteUrl
648
- ? options.siteUrl.replace(/\/$/, "") + pageMdHref
1038
+ // For absolute URLs (the "Copy as MD" deep link), use the
1039
+ // origin (no path) + the full combined path; siteUrl alone
1040
+ // would double-include the urlBase since pageHrefBase already
1041
+ // contains it.
1042
+ const { origin } = parseSiteUrl(options.siteUrl);
1043
+ const pageMdAbsoluteUrl = origin
1044
+ ? origin + pageMdHref
649
1045
  : pageMdHref;
650
1046
  // Markdown body for the Copy button. Reuse the same serializer
651
1047
  // that produces the .md mirror so what the user copies matches
@@ -687,7 +1083,10 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
687
1083
  "",
688
1084
  `const headings = ${JSON.stringify(page.headings || [])};`,
689
1085
  `const nav = navData;`,
690
- `const currentPath = "${buildCurrentPath(basePath, section, page.slug)}";`,
1086
+ // currentPath uses combined so it matches nav.json hrefs
1087
+ // (which are also combined-prefixed). getPagination compares
1088
+ // them as strings; mismatched prefixes break prev/next.
1089
+ `const currentPath = "${buildCurrentPath(combined, section, page.slug)}";`,
691
1090
  // Filter nav to the current (locale, version) bucket
692
1091
  // before computing prev/next — without this, pagination
693
1092
  // walks the global nav and a "Next" link can leak from
@@ -705,12 +1104,14 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
705
1104
  `const description = ${JSON.stringify(pageDescription)} || undefined;`,
706
1105
  `const ogImage = ${JSON.stringify(pageOgImage)} || undefined;`,
707
1106
  `const noindex = ${JSON.stringify(pageNoindex)};`,
1107
+ `const nofollow = ${JSON.stringify(pageNofollow)};`,
708
1108
  `const excludeFromSearch = ${JSON.stringify(pageExcludeFromSearch)};`,
709
1109
  `const pageTags = ${JSON.stringify(pageTags ?? null)};`,
710
1110
  `const pageStatus = ${JSON.stringify(pageStatus ?? null)};`,
711
1111
  `const pageType = ${JSON.stringify(pageTypeStr ?? null)};`,
712
1112
  `const pageAudience = ${JSON.stringify(pageAudience ?? null)};`,
713
1113
  `const pageCategory = ${JSON.stringify(pageCategory ?? null)};`,
1114
+ `const pageTaxonomies = ${JSON.stringify(pageTaxonomies ?? null)};`,
714
1115
  `const tagsIndexPath = ${JSON.stringify(tagsIndexPath)};`,
715
1116
  `const llmActionsProps = ${JSON.stringify(llmActionsEnabled
716
1117
  ? {
@@ -741,6 +1142,7 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
741
1142
  ` twitterHandle={siteConfig.twitterHandle || undefined}`,
742
1143
  ` themeColor={siteConfig.themeColor || undefined}`,
743
1144
  ` noindex={noindex}`,
1145
+ ` nofollow={nofollow}`,
744
1146
  ` excludeFromSearch={excludeFromSearch}`,
745
1147
  ` plausibleDomain={siteConfig.plausible?.domain}`,
746
1148
  ` plausibleScriptUrl={siteConfig.plausible?.scriptUrl}`,
@@ -756,12 +1158,33 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
756
1158
  ` pageType={pageType ?? undefined}`,
757
1159
  ` audience={pageAudience ?? undefined}`,
758
1160
  ` category={pageCategory ?? undefined}`,
1161
+ ` taxonomies={pageTaxonomies ?? undefined}`,
759
1162
  ` autoH1={${autoH1}}`,
760
1163
  ` autoLede={${autoLede}}`,
761
1164
  ` llmActions={llmActionsProps}`,
762
1165
  ` multiSource={${JSON.stringify(page.multiSource ?? null)} ?? undefined}`,
763
1166
  ` switcherMap={switcherMapData}`,
764
- ` basePath={${JSON.stringify(basePath || "/docs")}}`,
1167
+ // basePath here is the COMBINED URL prefix (urlBase from
1168
+ // site.url's path + dogsbay basePath). DocsLayout uses it
1169
+ // for switcher links, the footer llms.txt link, and the
1170
+ // <head> alternate link — all three need the full URL
1171
+ // prefix the host actually serves under. Empty string is
1172
+ // valid (root-served sites with no urlBase or basePath);
1173
+ // don't fall back to "/docs" — that would 404 for those.
1174
+ ` basePath={${JSON.stringify(combined)}}`,
1175
+ // Pagefind index URL — must include the combined prefix or
1176
+ // the loader 404s on subpath-mounted deploys. The pagefind
1177
+ // CLI writes to <astroOutput>/dist/pagefind/ which Astro
1178
+ // serves under its `base` (= urlBase); dogsbay's basePath
1179
+ // adds the second prefix layer. Empty combined → `/pagefind/`.
1180
+ ` pagefindUrl={${JSON.stringify(combined ? `${combined}/pagefind/` : "/pagefind/")}}`,
1181
+ // Favicon — composed with combined prefix so the
1182
+ // <link rel="icon"> resolves on subpath-mounted deploys.
1183
+ // Authors who want a different favicon override via the
1184
+ // `favicon` slot on DocsLayout, or drop the file at
1185
+ // `public/favicon.ico` in their Astro project (which is
1186
+ // what the default points at).
1187
+ ` favicon={${JSON.stringify(combined ? `${combined}/favicon.ico` : "/favicon.ico")}}`,
765
1188
  ` wideLayout={${wideLayout}}`,
766
1189
  `>`,
767
1190
  ` <MarkdownContentStack>`,
@@ -798,6 +1221,7 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
798
1221
  mkdirSync(dirname(pagePath), { recursive: true });
799
1222
  writeFileSync(pagePath, pageLines.join("\n") + "\n");
800
1223
  generated++;
1224
+ generatedPaths.add(relative(outputDir, pagePath));
801
1225
  // Companion .md endpoint for content negotiation. Prerendered, so
802
1226
  // it's served as a static asset at runtime — no Worker overhead.
803
1227
  //
@@ -838,9 +1262,43 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
838
1262
  // redirect target), and writing a redirect would clobber it.
839
1263
  if (basePath !== "") {
840
1264
  const firstHref = findFirstNavHref(nav, basePath);
841
- writeFileSync(join(outputDir, "src", "pages", "index.astro"), `---\nreturn Astro.redirect("${firstHref}");\n---\n`);
1265
+ const indexPath = join(outputDir, "src", "pages", "index.astro");
1266
+ writeFileSync(indexPath, `---\nreturn Astro.redirect("${firstHref}");\n---\n`);
1267
+ generatedPaths.add(relative(outputDir, indexPath));
842
1268
  }
843
- return { generated, outputNav };
1269
+ return { generated, outputNav, generatedPaths };
1270
+ }
1271
+ /**
1272
+ * Copy each passthrough `.astro` source to its computed output path.
1273
+ * Aborts with a clear error if the destination is already in
1274
+ * `generatedPaths` (a generated page from `emitAstroPages` would
1275
+ * silently overwrite the hand-authored file otherwise).
1276
+ */
1277
+ export function emitPassthroughAstroPages(copies, outputDir, generatedPaths) {
1278
+ if (copies.length === 0)
1279
+ return { copied: 0 };
1280
+ // Collision detection — a generated page and a passthrough page
1281
+ // would write to the same file. Refuse to overwrite; tell the
1282
+ // author exactly which two files conflict.
1283
+ const collisions = [];
1284
+ for (const copy of copies) {
1285
+ if (generatedPaths.has(copy.outputRelPath)) {
1286
+ collisions.push(copy.outputRelPath);
1287
+ }
1288
+ }
1289
+ if (collisions.length > 0) {
1290
+ throw new Error(`Passthrough Astro page collides with a generated page:\n` +
1291
+ collisions.map((c) => ` - ${c}`).join("\n") + "\n" +
1292
+ `Rename the .astro source or remove the colliding entry from nav.yml.`);
1293
+ }
1294
+ let copied = 0;
1295
+ for (const copy of copies) {
1296
+ const dest = join(outputDir, copy.outputRelPath);
1297
+ mkdirSync(dirname(dest), { recursive: true });
1298
+ copyFileSync(copy.sourceAbs, dest);
1299
+ copied++;
1300
+ }
1301
+ return { copied };
844
1302
  }
845
1303
  // ─── Tier 1: config-derived ─────────────────────────────────────────────
846
1304
  // Files driven entirely by config + flags. Always regenerated; site
@@ -856,6 +1314,54 @@ export function emitConfigDerivedFiles(outputDir, options) {
856
1314
  const hasSiteUrl = Boolean(options.siteUrl && /^https?:\/\//.test(options.siteUrl));
857
1315
  writeFileSync(join(outputDir, "public", "robots.txt"), buildRobotsTxt(options, hasSiteUrl));
858
1316
  }
1317
+ /**
1318
+ * Per-deploy-target artifact emission.
1319
+ *
1320
+ * Called from `emitSiteScaffold` (with `forceOverwrite=writeScaffold`
1321
+ * so `--force` regenerates from template) and from `dogsbay site
1322
+ * build` (with `forceOverwrite=false` so an existing site can adopt
1323
+ * a deploy target by editing config and rebuilding — the missing
1324
+ * artifact gets created on the next build).
1325
+ *
1326
+ * Emit policy is the union: write when forced OR when the file is
1327
+ * missing. Author edits to e.g. the workflow YAML survive every
1328
+ * regular build.
1329
+ *
1330
+ * Currently handles `github-pages` (workflow + .nojekyll). The
1331
+ * existing `cloudflare-workers` artifacts (wrangler.jsonc + package
1332
+ * scripts) stay in the scaffold-only path because they overlap with
1333
+ * scaffold-only files (package.json scripts, devDependencies). A
1334
+ * future refactor could fold them in here too.
1335
+ */
1336
+ export function emitDeployArtifacts(outputDir, options, opts = { forceOverwrite: false }) {
1337
+ if (options.deploy === "github-pages") {
1338
+ // GitHub reads workflows from <repo-root>/.github/workflows/, NOT
1339
+ // from inside subdirectories. Use projectDir (the repo root) for
1340
+ // the workflow file; fall back to outputDir when unset (flat
1341
+ // `dogsbay convert` flows where the Astro project IS the repo).
1342
+ const projectDir = options.projectDir ?? outputDir;
1343
+ // Path of the Astro output relative to the project root. Used by
1344
+ // the workflow's working-directory + cache-dependency-path so
1345
+ // pnpm install / pnpm run build target the right place. Empty
1346
+ // string when outputDir === projectDir (flat layout).
1347
+ const astroDirRel = relative(projectDir, outputDir).replace(/\\/g, "/");
1348
+ const workflowPath = join(projectDir, ".github", "workflows", "deploy.yml");
1349
+ if (opts.forceOverwrite || !existsSync(workflowPath)) {
1350
+ mkdirSync(dirname(workflowPath), { recursive: true });
1351
+ writeFileSync(workflowPath, buildGitHubPagesWorkflow(astroDirRel));
1352
+ }
1353
+ // .nojekyll — must exist in the deployed artifact root so GH
1354
+ // Pages skips Jekyll's `_underscored-paths` filter (Astro's
1355
+ // `_astro/` chunk dir gets eaten otherwise). Lives inside the
1356
+ // Astro project's `public/` so it's copied into `dist/` at
1357
+ // build time.
1358
+ const nojekyllPath = join(outputDir, "public", ".nojekyll");
1359
+ mkdirSync(dirname(nojekyllPath), { recursive: true });
1360
+ if (opts.forceOverwrite || !existsSync(nojekyllPath)) {
1361
+ writeFileSync(nojekyllPath, "");
1362
+ }
1363
+ }
1364
+ }
859
1365
  /**
860
1366
  * Emit `src/data/switcherMap.json` describing per-page
861
1367
  * version + locale equivalents. Always writes the file —
@@ -872,7 +1378,10 @@ export function emitConfigDerivedFiles(outputDir, options) {
872
1378
  * baseline page in a multi-version site).
873
1379
  */
874
1380
  export function emitSwitcherMap(pages, outputDir, options) {
875
- const basePath = normalizeBasePath(options.basePath);
1381
+ // Switcher URLs use combined so the link the dropdown emits
1382
+ // resolves under the host's served subpath (GH Pages project
1383
+ // pages, multi-mount Cloudflare).
1384
+ const combined = combinedPrefix(options);
876
1385
  const dataDir = join(outputDir, "src", "data");
877
1386
  const outPath = join(dataDir, "switcherMap.json");
878
1387
  // Detect axis activation by inspecting the data the loader
@@ -911,7 +1420,7 @@ export function emitSwitcherMap(pages, outputDir, options) {
911
1420
  const variant = {
912
1421
  ...(ms.locale !== undefined ? { locale: ms.locale } : {}),
913
1422
  ...(ms.version !== undefined ? { version: ms.version } : {}),
914
- url: `${basePath}/${page.slug}`,
1423
+ url: `${combined}/${page.slug}`,
915
1424
  };
916
1425
  if (!byLogicalKey[key])
917
1426
  byLogicalKey[key] = [];
@@ -989,6 +1498,10 @@ export function emitMissingTranslationStubs(pages, outputDir, options) {
989
1498
  return;
990
1499
  const basePath = normalizeBasePath(options.basePath);
991
1500
  const baseSegments = basePathSegments(basePath);
1501
+ // combined drives the redirect URL (the user-facing path they
1502
+ // get bounced to); basePath stays the filesystem path under
1503
+ // src/pages/ where the stub lives.
1504
+ const combined = combinedPrefix(options);
992
1505
  // Index existing pages by (slug after locale segment) so we
993
1506
  // can detect missing translations cheaply. Key shape:
994
1507
  // `<other-axis-prefix>/<originalSlug>` where other-axis-prefix
@@ -1022,7 +1535,7 @@ export function emitMissingTranslationStubs(pages, outputDir, options) {
1022
1535
  const targetUrl = `${basePath}/${targetSlug}`;
1023
1536
  if (existingByUrl.has(targetUrl))
1024
1537
  continue; // already translated
1025
- const defaultUrl = `${basePath}/${defaultPage.slug}`;
1538
+ const defaultUrl = `${combined}/${defaultPage.slug}`;
1026
1539
  const filePath = join(outputDir, "src", "pages", ...baseSegments, ...targetSlug.split("/"));
1027
1540
  // Ensure parent dir exists; write a redirect-stub Astro
1028
1541
  // file. Adding `.astro` to the leaf turns it into a
@@ -1064,10 +1577,20 @@ export function emitAgentReadinessFiles(pages, outputNav, outputDir, siteName, o
1064
1577
  if (options.llmsTxt !== false) {
1065
1578
  emitLlmsTxtFiles(outputDir, siteName, options, outputNav, pages);
1066
1579
  // public/_headers — Cloudflare Workers / Pages convention. Adds an
1067
- // RFC 8288 Link header pointing agents at /llms.txt without parsing
1068
- // HTML. Emitted alongside llms.txt so the two files travel together.
1580
+ // RFC 8288 Link header pointing agents at this mount's llms.txt
1581
+ // (basePath-prefixed) without parsing HTML. Emitted alongside
1582
+ // llms.txt so the two files travel together.
1069
1583
  mkdirSync(join(outputDir, "public"), { recursive: true });
1070
- writeFileSync(join(outputDir, "public", "_headers"), buildHeadersFile());
1584
+ // _headers Link header points at the per-mount llms.txt at
1585
+ // <combined>/llms.txt — the URL agents would actually fetch.
1586
+ writeFileSync(join(outputDir, "public", "_headers"), buildHeadersFile(combinedPrefix(options)));
1587
+ }
1588
+ // Sitemap — emitted by Dogsbay (not @astrojs/sitemap) into
1589
+ // public/<basePath>/sitemap-{index,0}.xml so multi-mount deploys
1590
+ // don't collide at host root. Gated on a valid http(s) siteUrl
1591
+ // because <loc> entries must be absolute.
1592
+ if (options.siteUrl && /^https?:\/\//.test(options.siteUrl)) {
1593
+ emitSitemapFiles(outputDir, options, pages);
1071
1594
  }
1072
1595
  // src/middleware.ts — Tier 1 (always update). Drives both the
1073
1596
  // `Accept: text/markdown` content-negotiation rewrite (via
@@ -1086,12 +1609,29 @@ export function emitAgentReadinessFiles(pages, outputNav, outputDir, siteName, o
1086
1609
  const localeRedirectOn = options.defaultLocale !== undefined && knownLocales.length >= 2;
1087
1610
  const axisRedirectOn = versionRedirectOn || localeRedirectOn;
1088
1611
  if (mdMirrorOn || axisRedirectOn) {
1612
+ // Taxonomy index paths share a single global namespace across
1613
+ // locales / versions (one `/tags/` for the whole site, not one
1614
+ // per locale). The redirect helper has to know to skip them or
1615
+ // it will 302 chip hrefs to non-existent locale-prefixed routes.
1616
+ // Strip leading `/` and pull just the first segment so a config
1617
+ // like `/tags` becomes the global-prefix entry `tags`.
1618
+ const globalPrefixes = [];
1619
+ if (options.taxonomyIndexPaths) {
1620
+ for (const raw of Object.values(options.taxonomyIndexPaths)) {
1621
+ const first = raw.replace(/^\/+/, "").split("/")[0];
1622
+ if (first)
1623
+ globalPrefixes.push(first);
1624
+ }
1625
+ }
1089
1626
  mkdirSync(join(outputDir, "src"), { recursive: true });
1090
1627
  writeFileSync(join(outputDir, "src", "middleware.ts"), buildMiddlewareSource({
1091
1628
  mdMirror: mdMirrorOn,
1092
1629
  axisRedirect: axisRedirectOn
1093
1630
  ? {
1094
- basePath: normalizeBasePath(options.basePath),
1631
+ // Middleware compares paths against the request URL,
1632
+ // which carries the host's served subpath — so use the
1633
+ // combined prefix here.
1634
+ basePath: combinedPrefix(options),
1095
1635
  ...(versionRedirectOn
1096
1636
  ? {
1097
1637
  defaultVersion: options.defaultVersion,
@@ -1104,6 +1644,7 @@ export function emitAgentReadinessFiles(pages, outputNav, outputDir, siteName, o
1104
1644
  knownLocales,
1105
1645
  }
1106
1646
  : {}),
1647
+ ...(globalPrefixes.length > 0 ? { globalPrefixes } : {}),
1107
1648
  }
1108
1649
  : undefined,
1109
1650
  }));
@@ -1164,21 +1705,49 @@ function buildRobotsTxt(options, hasSiteUrl) {
1164
1705
  const aiInput = options.aiInput ?? "yes";
1165
1706
  const aiTrain = options.aiTrain ?? "no";
1166
1707
  const contentSignal = `Content-Signal: search=${search}, ai-input=${aiInput}, ai-train=${aiTrain}\n`;
1167
- const sitemap = hasSiteUrl
1168
- ? `Sitemap: ${options.siteUrl.replace(/\/$/, "")}/sitemap-index.xml\n`
1708
+ // Per-mount sitemap path: each Dogsbay site emits its sitemap
1709
+ // index under <basePath>/, so robots.txt must point there too.
1710
+ // (Multi-mount deploys end up with one robots.txt per site at
1711
+ // their respective hosts / paths; each correctly references its
1712
+ // own mount's sitemap-index.)
1713
+ // Sitemap URL = origin + combined + /sitemap-index.xml. Use the
1714
+ // origin (no path) from site.url and the combined prefix (urlBase
1715
+ // + basePath); siteUrl could itself include a path component when
1716
+ // hosting on a subpath (GH Pages project page), so we strip it
1717
+ // here to avoid double-counting.
1718
+ const { origin } = parseSiteUrl(options.siteUrl);
1719
+ const combined = combinedPrefix(options);
1720
+ const sitemap = hasSiteUrl && origin
1721
+ ? `Sitemap: ${origin}${withBasePath(combined, "/sitemap-index.xml")}\n`
1722
+ : "";
1723
+ // Llms-Txt: line — non-standard but follows the same shape as
1724
+ // `Sitemap:`. Crawlers and agents that scan robots.txt before
1725
+ // fetching pages get a direct pointer at the per-mount llms.txt.
1726
+ // RFC 9309 explicitly permits unknown directives ("intentionally
1727
+ // permissive of such future extensions") so this is harmless to
1728
+ // standards-compliant parsers. Emitted alongside Sitemap when
1729
+ // siteUrl is set; absolute URLs only (relative paths would be
1730
+ // ambiguous without a base).
1731
+ const llmsTxt = options.llmsTxt !== false && hasSiteUrl && origin
1732
+ ? `Llms-Txt: ${origin}${withBasePath(combined, "/llms.txt")}\n`
1169
1733
  : "";
1170
- return `User-agent: *\nAllow: /\n${contentSignal}${sitemap}`;
1734
+ return `User-agent: *\nAllow: /\n${contentSignal}${sitemap}${llmsTxt}`;
1171
1735
  }
1172
1736
  /**
1173
1737
  * Build the contents of `public/_headers` (Cloudflare Pages / Workers
1174
1738
  * Static Assets convention). Emits a global RFC 8288 Link header
1175
- * pointing at the site's llms.txt index, so agents don't need to
1739
+ * pointing at this mount's llms.txt index, so agents don't need to
1176
1740
  * parse HTML to discover the LLM-friendly content listing.
1741
+ *
1742
+ * The Link target is basePath-prefixed (`</docs/llms.txt>` for a
1743
+ * `/docs` mount) — matches where the platform actually emits
1744
+ * llms.txt under the per-mount layout.
1177
1745
  */
1178
- function buildHeadersFile() {
1746
+ function buildHeadersFile(basePath) {
1747
+ const llmsHref = withBasePath(basePath, "/llms.txt");
1179
1748
  return [
1180
1749
  "/*",
1181
- ' Link: </llms.txt>; rel="describedby"; type="text/plain"',
1750
+ ` Link: <${llmsHref}>; rel="describedby"; type="text/plain"`,
1182
1751
  "",
1183
1752
  ].join("\n");
1184
1753
  }
@@ -1187,20 +1756,28 @@ function buildMiddlewareSource(config) {
1187
1756
  "// AUTO-GENERATED by `dogsbay site build` — do not edit.",
1188
1757
  "// Composes the docs-layout middleware helpers.",
1189
1758
  "//",
1190
- "// Markdown content negotiation:",
1191
- "// This middleware fires on every request, but in Astro's static",
1192
- "// prerender mode (output: \"static\") request headers are NOT",
1193
- "// forwarded Astro warns about \"Astro.request.headers was used",
1194
- "// when rendering...\" and serves a prerendered HTML response.",
1195
- "// That means `Accept: text/markdown` negotiation only kicks in",
1196
- "// under SSR (output: \"server\") or via an edge function on the",
1197
- "// deployment layer (Cloudflare Worker, Netlify Edge, etc.).",
1198
- "// For pure-static deploys, agents should follow the page's",
1199
- "// <link rel=\"alternate\" type=\"text/markdown\"> href to fetch",
1200
- "// the .md mirror directly (e.g. /docs.md).",
1759
+ "// Static-prerender guard:",
1760
+ "// In Astro's static output mode, this middleware is invoked",
1761
+ "// for every prerendered route at build time. Reading",
1762
+ "// `context.request.headers` there triggers an Astro warning",
1763
+ "// per page (\"Astro.request.headers was used during static",
1764
+ "// render\"), which floods `dogsbay site build` / `site preview`",
1765
+ "// output. Worse, the negotiation can't actually happen at",
1766
+ "// build time there's no runtime client whose Accept header",
1767
+ "// we'd be honoring.",
1768
+ "//",
1769
+ "// We guard with `context.isPrerendered` so prerendered routes",
1770
+ "// short-circuit to `next()` immediately. At runtime in static",
1771
+ "// deploys, middleware doesn't fire at all (no server); at",
1772
+ "// runtime in SSR / hybrid deploys, only dynamic routes fire,",
1773
+ "// which is exactly when negotiation makes sense.",
1201
1774
  "//",
1202
- "// The Cloudflare-Worker-driven full fix is tracked in",
1203
- "// plans/cloudflare-deploy-content-negotiation.md.",
1775
+ "// Markdown content negotiation:",
1776
+ "// For pure-static deploys, `Accept: text/markdown` is honored",
1777
+ "// by the platform (Cloudflare _headers + Worker, Netlify Edge",
1778
+ "// functions). Agents that can't send Accept headers should",
1779
+ "// follow the page's <link rel=\"alternate\" type=\"text/markdown\">",
1780
+ "// to fetch the .md mirror directly (e.g. /docs.md).",
1204
1781
  'import { defineMiddleware } from "astro:middleware";',
1205
1782
  ];
1206
1783
  if (config.mdMirror) {
@@ -1214,6 +1791,11 @@ function buildMiddlewareSource(config) {
1214
1791
  lines.push(`const AXIS_REDIRECT_CONFIG = ${JSON.stringify(config.axisRedirect, null, 2)};`, "");
1215
1792
  }
1216
1793
  lines.push("export const onRequest = defineMiddleware((context, next) => {");
1794
+ // Skip prerendered routes — see file-top comment for the rationale.
1795
+ // Avoids per-page Astro.request.headers warnings during build, and
1796
+ // matches runtime semantics (middleware doesn't fire on prerendered
1797
+ // routes when deployed).
1798
+ lines.push(" if (context.isPrerendered) return next();");
1217
1799
  lines.push(" const url = new URL(context.request.url);");
1218
1800
  if (config.mdMirror) {
1219
1801
  lines.push(' const accept = context.request.headers.get("accept");', " const mdTarget = shouldRewriteToMarkdown(accept, url.pathname);", " if (mdTarget) return context.rewrite(mdTarget);");
@@ -1245,8 +1827,18 @@ function buildMdEndpoint(page, sourceRel) {
1245
1827
  ].join("\n");
1246
1828
  }
1247
1829
  /**
1248
- * Emit `public/llms.txt`, `public/llms-full.txt`, and per-section
1249
- * `public/<dir>/llms.txt` files for the site.
1830
+ * Emit per-mount llms.txt + llms-full.txt + per-section indexes.
1831
+ *
1832
+ * Files live under `public/<basePath>/...` so multiple Dogsbay sites
1833
+ * can mount on the same host (`/docs/llms.txt` + `/api/llms.txt` +
1834
+ * `/handbook/llms.txt`) without colliding at the root. When basePath
1835
+ * is empty, this collapses to `public/llms.txt` — the single-site
1836
+ * llmstxt.org-spec layout.
1837
+ *
1838
+ * The host root `/llms.txt` is intentionally NOT emitted by the
1839
+ * platform: it's the user's umbrella file, analogous to
1840
+ * `sitemap-index.xml`. Multi-mount deploys hand-write a top-level
1841
+ * `/llms.txt` that links to each per-mount index.
1250
1842
  *
1251
1843
  * Per-section files are written for every top-level nav group that
1252
1844
  * resolves to a site directory (either via `group.href` or via the
@@ -1258,26 +1850,92 @@ function emitLlmsTxtFiles(outputDir, siteName, options, nav, pages) {
1258
1850
  description: options.description,
1259
1851
  siteUrl: options.siteUrl,
1260
1852
  };
1261
- const publicDir = join(outputDir, "public");
1262
- mkdirSync(publicDir, { recursive: true });
1263
- const hrefPrefix = normalizeBasePath(options.basePath);
1264
- writeFileSync(join(publicDir, "llms.txt"), buildLlmsTxt(siteConfig, nav, pages, { hrefPrefix }));
1265
- writeFileSync(join(publicDir, "llms-full.txt"), buildLlmsFullTxt(siteConfig, nav, pages, {
1853
+ // hrefPrefix is the COMBINED prefix — used for the URL paths that
1854
+ // appear inside the llms.txt body (so agents fetch the correct
1855
+ // host-relative URLs). Filesystem layout uses basePath alone:
1856
+ // `public/<basePath>/llms.txt` matches the existing per-mount
1857
+ // delivery shape.
1858
+ const hrefPrefix = combinedPrefix(options);
1859
+ const basePath = normalizeBasePath(options.basePath);
1860
+ const baseSegments = basePathSegments(basePath);
1861
+ const mountDir = join(outputDir, "public", ...baseSegments);
1862
+ mkdirSync(mountDir, { recursive: true });
1863
+ writeFileSync(join(mountDir, "llms.txt"), buildLlmsTxt(siteConfig, nav, pages, { hrefPrefix }));
1864
+ writeFileSync(join(mountDir, "llms-full.txt"), buildLlmsFullTxt(siteConfig, nav, pages, {
1266
1865
  summary: "body",
1267
1866
  serializePage: serializePageMd,
1268
1867
  hrefPrefix,
1269
1868
  }));
1869
+ // Per-section files. `deriveSectionDir` returns a host-absolute
1870
+ // path derived from nav hrefs, which since the combined-prefix
1871
+ // refactor (commit 132891e) include urlBase + basePath — NOT just
1872
+ // basePath. So joining its return onto public/ directly would
1873
+ // double-prefix into `public/<urlBase>/<basePath>/<section>/llms.txt`,
1874
+ // which then serves at `<urlBase>/<urlBase>/<basePath>/<section>/...`
1875
+ // once Astro's base prefix is applied at request time.
1876
+ //
1877
+ // Strip the combined prefix off the section dir to get just the
1878
+ // section tail, then re-prepend basePath via mountDir. Result:
1879
+ // `public/<basePath>/<section>/llms.txt`, served under the deploy's
1880
+ // base mount as `<urlBase>/<basePath>/<section>/llms.txt`.
1881
+ const combinedSegs = hrefPrefix.replace(/^\//, "");
1270
1882
  for (const group of nav) {
1271
1883
  if (!group.children || group.children.length === 0)
1272
1884
  continue;
1273
1885
  const dir = deriveSectionDir(group);
1274
1886
  if (!dir)
1275
1887
  continue;
1276
- const sectionPath = join(publicDir, dir, "llms.txt");
1888
+ let relDir;
1889
+ if (combinedSegs && dir === combinedSegs) {
1890
+ relDir = "";
1891
+ }
1892
+ else if (combinedSegs && dir.startsWith(`${combinedSegs}/`)) {
1893
+ relDir = dir.slice(combinedSegs.length + 1);
1894
+ }
1895
+ else {
1896
+ // Defensive: if for some reason the dir doesn't carry the
1897
+ // combined prefix (older importer, manual nav.yml, etc.), fall
1898
+ // back to the raw value rather than rooting at /.
1899
+ relDir = dir;
1900
+ }
1901
+ const sectionPath = relDir
1902
+ ? join(mountDir, relDir, "llms.txt")
1903
+ : join(mountDir, "llms.txt");
1277
1904
  mkdirSync(dirname(sectionPath), { recursive: true });
1278
1905
  writeFileSync(sectionPath, buildSectionLlmsTxt(siteConfig, group, pages, { hrefPrefix }));
1279
1906
  }
1280
1907
  }
1908
+ /**
1909
+ * Emit per-mount sitemap files.
1910
+ *
1911
+ * Writes `public/<basePath>/sitemap-index.xml` + `sitemap-0.xml`.
1912
+ * The index lists the single sub-sitemap today; future splits add
1913
+ * more sub-sitemap entries as the page count grows past
1914
+ * sitemaps.org's 50K-URL recommendation.
1915
+ *
1916
+ * Caller has already guarded on a valid http(s) `siteUrl` — without
1917
+ * one, `<loc>` entries can't be absolute and crawlers reject the
1918
+ * file. Skip emission rather than write a broken sitemap.
1919
+ */
1920
+ function emitSitemapFiles(outputDir, options, pages) {
1921
+ // Filesystem path uses basePath (sitemap files live in
1922
+ // public/<basePath>/sitemap-*.xml). The URL prefix encoded into
1923
+ // each <loc> uses combined so the absolute URLs resolve under the
1924
+ // host's served subpath. buildSitemap strips path off siteUrl
1925
+ // internally, so passing siteUrl + combined as basePath gives
1926
+ // origin + combined as the final URL.
1927
+ const basePath = normalizeBasePath(options.basePath);
1928
+ const combined = combinedPrefix(options);
1929
+ const baseSegments = basePathSegments(basePath);
1930
+ const mountDir = join(outputDir, "public", ...baseSegments);
1931
+ mkdirSync(mountDir, { recursive: true });
1932
+ writeFileSync(join(mountDir, "sitemap-0.xml"), buildSitemap(pages, {
1933
+ siteUrl: options.siteUrl,
1934
+ basePath: combined,
1935
+ siteNoindex: options.noindex === true,
1936
+ }));
1937
+ writeFileSync(join(mountDir, "sitemap-index.xml"), buildSitemapIndex({ siteUrl: options.siteUrl, basePath: combined }));
1938
+ }
1281
1939
  /**
1282
1940
  * Pick a directory under `public/` for a top-level nav group. Prefers
1283
1941
  * the group's own href (already a `/docs/x/y` path); otherwise falls
@@ -1355,6 +2013,11 @@ function copyComponents(outputDir) {
1355
2013
  "response-tabs", "schema-viewer", "code-samples", "copy-button",
1356
2014
  "markdown-example",
1357
2015
  "accordion", "link-card", "avatar", "math",
2016
+ // Icon resolves @ui/icon/Icon.astro → built-time SVG inlining
2017
+ // via @dogsbay/icons. Used by `:::cards` `{icon=...}` and the
2018
+ // inline `:icon[name]` directive. Without this entry every page
2019
+ // emitting the icon import 500s with "module not found".
2020
+ "icon",
1358
2021
  ];
1359
2022
  for (const name of needed) {
1360
2023
  const src = join(componentsSource, name);
@@ -1409,6 +2072,32 @@ function copyAssets(sourceDir, outputDir, imageOptimization) {
1409
2072
  catch { /* source may not exist */ }
1410
2073
  }
1411
2074
  // ── CSS generation (ported from import-mkdocs.ts) ───────
2075
+ /**
2076
+ * Build the `@source inline("...")` directive that pins the
2077
+ * grid-tone palette into the generated stylesheet.
2078
+ *
2079
+ * Why we need it: tone classes like `bg-primary/10` only appear in
2080
+ * `.astro` pages emitted by Dogsbay's grid-item serializer. When
2081
+ * Tailwind's content scanner doesn't pick them up — because the
2082
+ * page lives outside the default scan globs, or because a class
2083
+ * is composed at the boundary of an interpolation — they get
2084
+ * purged. Result observed in dogsbay-docs-markdown audit: half
2085
+ * the grid demo cells render with no background. Pinning forces
2086
+ * generation regardless of scanner reach.
2087
+ *
2088
+ * Single source of truth: derived from TONE_CLASSES so any new
2089
+ * tone added to the palette is automatically safelisted.
2090
+ */
2091
+ function buildToneSafelist() {
2092
+ const seen = new Set();
2093
+ for (const classes of Object.values(TONE_CLASSES)) {
2094
+ for (const cls of classes.split(/\s+/)) {
2095
+ if (cls)
2096
+ seen.add(cls);
2097
+ }
2098
+ }
2099
+ return [...seen].sort().join(" ");
2100
+ }
1412
2101
  function generateGlobalCss() {
1413
2102
  return `@import "tailwindcss";
1414
2103
  @import "./theme.css";
@@ -1417,6 +2106,14 @@ function generateGlobalCss() {
1417
2106
  @source "../../node_modules/@dogsbay/ui/src";
1418
2107
  @source "../../node_modules/@dogsbay/docs-layout/src";
1419
2108
 
2109
+ /* Pin the grid-tone palette. These classes are emitted into
2110
+ markdown-generated .astro pages by the grid-item serializer
2111
+ (TONE_CLASSES in @dogsbay/format-astro). Without inlining,
2112
+ opacity-modified utilities like bg-primary/10 get purged when
2113
+ Tailwind doesn't see them in the scanned globs, leaving grid
2114
+ demo cells with no visible background. */
2115
+ @source inline("${buildToneSafelist()}");
2116
+
1420
2117
  /* Prose typography for rendered content */
1421
2118
  .docs-prose {
1422
2119
  line-height: 1.7;