@dogsbay/format-astro 0.2.0-beta.3 → 0.2.0-beta.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/project.js CHANGED
@@ -4,13 +4,28 @@
4
4
  * Takes ExportPage[] + NavItem[] and generates a complete Astro project
5
5
  * with static .astro pages using real Dogsbay components.
6
6
  */
7
- import { existsSync, mkdirSync, writeFileSync, readFileSync, cpSync, readdirSync, statSync, } from "node:fs";
7
+ import { existsSync, mkdirSync, writeFileSync, readFileSync, copyFileSync, cpSync, readdirSync, statSync, } from "node:fs";
8
8
  import { join, dirname, relative, resolve } from "node:path";
9
9
  import { fileURLToPath } from "node:url";
10
10
  import { treeToDogsbayMd } from "@dogsbay/format-dogsbay-md";
11
- import { treeToAstro } from "./serialize.js";
11
+ import { treeToAstro, TONE_CLASSES } from "./serialize.js";
12
12
  import { buildLlmsTxt, buildSectionLlmsTxt, buildLlmsFullTxt } from "./llms-txt.js";
13
- import { normalizeBasePath, basePathSegments, buildCurrentPath } from "./base-path.js";
13
+ import { buildSitemap, buildSitemapIndex } from "./sitemap.js";
14
+ import { normalizeBasePath, basePathSegments, buildCurrentPath, withBasePath, parseSiteUrl, combinePrefix, } from "./base-path.js";
15
+ /**
16
+ * Combined URL prefix = urlBase (Astro `base` from site.url path) +
17
+ * basePath (filesystem layout prefix). Every URL emitter (nav,
18
+ * sitemap, llms.txt, .md mirror, _headers, taxonomy) uses this for
19
+ * href output. Filesystem-layout consumers (mkdir, page output
20
+ * paths) keep using basePath alone — Astro's `base` config adds the
21
+ * urlBase prefix at route time.
22
+ *
23
+ * See plans/astro-base-from-site-url.md.
24
+ */
25
+ function combinedPrefix(options) {
26
+ const { urlBase } = parseSiteUrl(options.siteUrl);
27
+ return combinePrefix(urlBase, normalizeBasePath(options.basePath));
28
+ }
14
29
  import { detectLeadingNodes } from "./lead.js";
15
30
  /**
16
31
  * Recursively prefix all hrefs in a nav tree.
@@ -97,6 +112,55 @@ function rewriteHref(href, prefix) {
97
112
  return href;
98
113
  return prefix + href;
99
114
  }
115
+ /**
116
+ * Rewrite image srcs in inline nodes + raw HTML to include the
117
+ * combined URL prefix.
118
+ *
119
+ * Astro auto-prefixes `<a href>` and image imports going through
120
+ * `<AstroImage>`, but raw `<img src="...">` HTML in template
121
+ * output is left untouched. The serializer emits raw `<img>` for
122
+ * inline images and falls back to it for non-optimized block
123
+ * images, so we have to prefix manually before serialization to
124
+ * make `/_assets/...` paths resolve under subpath-mounted deploys
125
+ * (GH Pages project pages, multi-mount Cloudflare).
126
+ *
127
+ * Symmetric with rewriteTreeHrefs — same skip-rules (external,
128
+ * anchors, already-prefixed). Block images keep their prefix
129
+ * stripped back off for the `imageMap[...]` lookup key (see
130
+ * paragraphToAstro in serialize.ts) so Astro's image optimization
131
+ * still finds the source.
132
+ */
133
+ function rewriteTreeImageSrcs(nodes, prefix) {
134
+ for (const node of nodes) {
135
+ if (node.inline) {
136
+ rewriteInlineImageSrcs(node.inline, prefix);
137
+ }
138
+ if (node.html) {
139
+ node.html = rewriteHtmlImageSrcs(node.html, prefix);
140
+ }
141
+ if (node.children) {
142
+ rewriteTreeImageSrcs(node.children, prefix);
143
+ }
144
+ }
145
+ }
146
+ function rewriteInlineImageSrcs(nodes, prefix) {
147
+ for (const node of nodes) {
148
+ if (node.type === "image" && typeof node.src === "string") {
149
+ node.src = rewriteHref(node.src, prefix);
150
+ }
151
+ else if (node.type === "link") {
152
+ // Links wrap inline children (which may include images) — same
153
+ // recursion shape as rewriteInlineHrefs.
154
+ rewriteInlineImageSrcs(node.children, prefix);
155
+ }
156
+ else if (node.type === "highlight" && node.children) {
157
+ rewriteInlineImageSrcs(node.children, prefix);
158
+ }
159
+ }
160
+ }
161
+ function rewriteHtmlImageSrcs(html, prefix) {
162
+ return html.replace(/(<img\b[^>]*\ssrc=")(\/[^"]+)"/g, (_match, before, src) => `${before}${rewriteHref(src, prefix)}"`);
163
+ }
100
164
  /**
101
165
  * Build a `wrangler.jsonc` for Cloudflare Workers Static Assets.
102
166
  *
@@ -147,6 +211,125 @@ function buildWranglerConfig(siteName, options) {
147
211
  lines.push(`}`);
148
212
  return lines.join("\n") + "\n";
149
213
  }
214
+ /**
215
+ * Build the GitHub Actions workflow YAML for `actions/deploy-pages`.
216
+ *
217
+ * The workflow:
218
+ * 1. Checks out the repo on every push to the default branch.
219
+ * 2. Installs node + pnpm at the Astro project directory, runs
220
+ * `dogsbay site build` (via `pnpm dlx` since Dogsbay is a
221
+ * global CLI, not a project dep), then `pnpm run build`
222
+ * (which runs `astro build && pagefind`).
223
+ * 3. Uploads `<astroDirRel>/dist` as a Pages artifact via
224
+ * `actions/upload-pages-artifact`.
225
+ * 4. Deploys via `actions/deploy-pages`.
226
+ *
227
+ * `astroDirRel` is the path of the Astro output relative to the
228
+ * repo root (typically "astro" — the default config has
229
+ * `output: ./astro`). Empty string is allowed when the project is
230
+ * flat (outputDir === projectDir); the workflow degrades naturally
231
+ * by omitting the `defaults: working-directory` block.
232
+ *
233
+ * Author edits — extra build steps, secrets, deploy gating — survive
234
+ * subsequent `dogsbay site build` runs because the file is written
235
+ * write-if-missing (see emitDeployArtifacts). To start over, delete
236
+ * the workflow file and rebuild.
237
+ *
238
+ * Note on basePath: GitHub Pages serves project sites at
239
+ * `https://<user>.github.io/<repo>/`. Authors who want their docs at
240
+ * the repo root should set `site.basePath: /<repo-name>` (or empty
241
+ * for user/org pages). The platform's basePath plumbing handles all
242
+ * URL rewriting; this workflow doesn't need to know about it.
243
+ */
244
+ function buildGitHubPagesWorkflow(astroDirRel) {
245
+ // When the Astro output IS the project root, drop the working-
246
+ // directory block and reference cache + artifact paths without a
247
+ // prefix. This is the flat-layout case (rare for site-init flows;
248
+ // common for `dogsbay convert` outputs that get manually wired up).
249
+ const isFlat = astroDirRel === "" || astroDirRel === ".";
250
+ const workingDirBlock = isFlat
251
+ ? ""
252
+ : `
253
+ defaults:
254
+ run:
255
+ working-directory: ${astroDirRel}`;
256
+ const cacheDep = isFlat
257
+ ? "pnpm-lock.yaml"
258
+ : `${astroDirRel}/pnpm-lock.yaml`;
259
+ const artifactPath = isFlat ? "dist" : `${astroDirRel}/dist`;
260
+ return `# Deploy to GitHub Pages.
261
+ # Generated by \`dogsbay site init --deploy=github-pages\` (or by
262
+ # adding \`deploy: { target: github-pages }\` to dogsbay.config.yml
263
+ # and running \`dogsbay site build\`). Author edits survive every
264
+ # subsequent build — the file is never overwritten. To regenerate
265
+ # from template, delete the file and rebuild.
266
+ #
267
+ # Repo settings: Settings → Pages → Source = "GitHub Actions".
268
+ name: Deploy to GitHub Pages
269
+
270
+ on:
271
+ push:
272
+ branches: [main]
273
+ workflow_dispatch:
274
+
275
+ permissions:
276
+ contents: read
277
+ pages: write
278
+ id-token: write
279
+
280
+ # Allow only one concurrent deployment, skipping queued runs.
281
+ concurrency:
282
+ group: pages
283
+ cancel-in-progress: false
284
+
285
+ jobs:
286
+ build:
287
+ runs-on: ubuntu-latest${workingDirBlock}
288
+ steps:
289
+ - uses: actions/checkout@v4
290
+
291
+ - uses: pnpm/action-setup@v4
292
+ with:
293
+ version: 10
294
+
295
+ - uses: actions/setup-node@v4
296
+ with:
297
+ # Astro 6 requires Node ^20.19.5 || >=22.12.0; pin 22 for
298
+ # forward-compat (Node 20 LTS is fine for Astro 5 sites
299
+ # but the Dogsbay scaffold targets Astro 6).
300
+ node-version: 22
301
+ cache: pnpm
302
+ cache-dependency-path: ${cacheDep}
303
+
304
+ - name: Install dependencies
305
+ run: pnpm install --frozen-lockfile
306
+
307
+ # \`dogsbay\` is a global CLI, not a project dep — pnpm dlx
308
+ # fetches it on demand. To pin a version, replace with e.g.
309
+ # \`pnpm dlx dogsbay@0.2.0-beta.18 site build\`.
310
+ - name: Build with Dogsbay
311
+ run: pnpm dlx dogsbay@beta site build
312
+
313
+ - name: Build Astro site
314
+ run: pnpm run build
315
+
316
+ - name: Upload Pages artifact
317
+ uses: actions/upload-pages-artifact@v3
318
+ with:
319
+ path: ${artifactPath}
320
+
321
+ deploy:
322
+ needs: build
323
+ runs-on: ubuntu-latest
324
+ environment:
325
+ name: github-pages
326
+ url: \${{ steps.deployment.outputs.page_url }}
327
+ steps:
328
+ - name: Deploy to GitHub Pages
329
+ id: deployment
330
+ uses: actions/deploy-pages@v4
331
+ `;
332
+ }
150
333
  /**
151
334
  * Construct the SiteConfig object that gets serialized to
152
335
  * `src/data/site.json`. Backward-compatible: existing fields keep their
@@ -156,8 +339,6 @@ function buildSiteConfig(siteName, options) {
156
339
  const cfg = {
157
340
  siteName,
158
341
  repoUrl: options.repoUrl || "",
159
- editUri: options.editUri || "blob/main/docs/",
160
- copyright: options.copyright || "",
161
342
  };
162
343
  if (options.siteUrl)
163
344
  cfg.siteUrl = options.siteUrl;
@@ -169,6 +350,15 @@ function buildSiteConfig(siteName, options) {
169
350
  cfg.twitterHandle = options.twitterHandle;
170
351
  if (options.themeColor)
171
352
  cfg.themeColor = options.themeColor;
353
+ // editUri + copyright follow the same omit-on-empty pattern as the
354
+ // optional fields above; previously they were always written
355
+ // (editUri defaulted to "blob/main/docs/", copyright to ""), which
356
+ // left zombie config in src/data/site.json. Downstream guards already
357
+ // treat empty / undefined as "don't render" so this is purely a tidy.
358
+ if (options.editUri)
359
+ cfg.editUri = options.editUri;
360
+ if (options.copyright)
361
+ cfg.copyright = options.copyright;
172
362
  if (options.brandKeywords && options.brandKeywords.length > 0) {
173
363
  cfg.brandKeywords = options.brandKeywords;
174
364
  }
@@ -187,7 +377,23 @@ function buildSiteConfig(siteName, options) {
187
377
  }
188
378
  if (options.taxonomyIndexPaths &&
189
379
  Object.keys(options.taxonomyIndexPaths).length > 0) {
190
- cfg.taxonomyIndexPaths = options.taxonomyIndexPaths;
380
+ // Bake basePath into every emitted indexPath so consumers
381
+ // (TypeBadge / StatusBadge / future components) compose hrefs
382
+ // like `${indexPath}/<value>/` and resolve under the configured
383
+ // site base. Without the prefix, `/by-type/tutorial/` 404s on
384
+ // any site with `site.basePath` set. Caller passes raw config
385
+ // values (`/by-type`, `/tags`, etc.) — basePath threading is
386
+ // this emitter's responsibility, matching how `page.url` is
387
+ // already prefixed in the taxonomy data file.
388
+ // Taxonomy index paths are baked into site.json so components
389
+ // (TagList, TaxonomyIndex, TypeBadge) emit correct hrefs at
390
+ // runtime. Use combined so these resolve under the host's
391
+ // served subpath.
392
+ const taxoPrefix = combinedPrefix(options);
393
+ cfg.taxonomyIndexPaths = Object.fromEntries(Object.entries(options.taxonomyIndexPaths).map(([name, raw]) => [
394
+ name,
395
+ withBasePath(taxoPrefix, raw),
396
+ ]));
191
397
  }
192
398
  if (options.taxonomyDisplay &&
193
399
  Object.keys(options.taxonomyDisplay).length > 0) {
@@ -257,6 +463,74 @@ function ensureDirectoryStructure(outputDir, basePath) {
257
463
  export function emitSiteConfig(outputDir, siteName, options) {
258
464
  mkdirSync(join(outputDir, "src", "data"), { recursive: true });
259
465
  writeFileSync(join(outputDir, "src", "data", "site.json"), JSON.stringify(buildSiteConfig(siteName, options), null, 2));
466
+ // Auto-generated companion to astro.config.mjs. Carries the
467
+ // site/base values derived from dogsbay.config.yml's site.url so
468
+ // changes propagate without --force-rescaffolding the main
469
+ // astro.config.mjs (which is scaffold-once and may have author
470
+ // edits — custom integrations, build hooks, etc.). The main
471
+ // config imports `dogsbaySite` + `dogsbayBase` from here.
472
+ // See plans/astro-base-from-site-url.md.
473
+ const { origin, urlBase: astroBase } = parseSiteUrl(options.siteUrl);
474
+ const hasSiteUrl = Boolean(options.siteUrl && /^https?:\/\//.test(options.siteUrl));
475
+ const dogsbaySiteJson = hasSiteUrl
476
+ ? JSON.stringify(origin ?? options.siteUrl)
477
+ : "undefined";
478
+ const dogsbayBaseJson = astroBase ? JSON.stringify(astroBase) : "undefined";
479
+ // build.inlineStylesheets — defaults to "auto" (Astro's own
480
+ // default; matches our docs-first bias since theme.css is ~120KB
481
+ // and externalizing it lets the file cache cross-page). Authors
482
+ // wanting "always" / "never" set it via dogsbay.config.yml's
483
+ // build.inlineStylesheets. See docs/perf-tuning.md.
484
+ const dogsbayInline = options.inlineStylesheets ?? "auto";
485
+ writeFileSync(join(outputDir, "astro.config.dogsbay.mjs"), [
486
+ "// Auto-generated by `dogsbay site build` — DO NOT EDIT.",
487
+ "// Tracks site.url + derived Astro base + build behaviour from",
488
+ "// dogsbay.config.yml. Edit dogsbay.config.yml and rebuild;",
489
+ "// edits to this file will be overwritten on the next build.",
490
+ `export const dogsbaySite = ${dogsbaySiteJson};`,
491
+ `export const dogsbayBase = ${dogsbayBaseJson};`,
492
+ `export const dogsbayInlineStylesheets = ${JSON.stringify(dogsbayInline)};`,
493
+ "",
494
+ ].join("\n"));
495
+ // Migration check: pre-beta.20 sites have an astro.config.mjs that
496
+ // doesn't import the companion. Without the import, the values
497
+ // emitted above are unused and Astro's `base` stays unset — the
498
+ // exact bug this work was meant to close. Warn loudly, with the
499
+ // patch the user needs to apply, until astro.config.mjs is
500
+ // updated. We don't auto-patch because the file may have author
501
+ // edits (custom integrations, build hooks).
502
+ const astroConfigPath = join(outputDir, "astro.config.mjs");
503
+ if (existsSync(astroConfigPath)) {
504
+ const astroConfigSrc = readFileSync(astroConfigPath, "utf-8");
505
+ if (!astroConfigSrc.includes("astro.config.dogsbay.mjs")) {
506
+ console.warn([
507
+ "",
508
+ " ⚠ astro.config.mjs is missing the dogsbay companion import.",
509
+ " Without it, Astro's `base` config stays unset and assets",
510
+ " served from a host subpath (GH Pages project pages,",
511
+ " multi-mount Cloudflare) will 404.",
512
+ "",
513
+ " Add these two lines to astro.config.mjs:",
514
+ "",
515
+ ' import {',
516
+ ' dogsbaySite,',
517
+ ' dogsbayBase,',
518
+ ' dogsbayInlineStylesheets,',
519
+ ' } from "./astro.config.dogsbay.mjs";',
520
+ "",
521
+ " export default defineConfig({",
522
+ " ...(dogsbaySite ? { site: dogsbaySite } : {}),",
523
+ " ...(dogsbayBase ? { base: dogsbayBase } : {}),",
524
+ " build: { inlineStylesheets: dogsbayInlineStylesheets },",
525
+ " // ...your existing config...",
526
+ " });",
527
+ "",
528
+ " OR regenerate from template (overwrites your edits):",
529
+ " dogsbay site init . --scaffold-only --force",
530
+ "",
531
+ ].join("\n"));
532
+ }
533
+ }
260
534
  }
261
535
  export function emitSiteScaffold(outputDir, siteName, options, writeScaffold) {
262
536
  let scaffoldFilesSkipped = 0;
@@ -300,6 +574,7 @@ export function emitSiteScaffold(outputDir, siteName, options, writeScaffold) {
300
574
  };
301
575
  // Per-deploy-target additions to package.json
302
576
  const isCloudflare = options.deploy === "cloudflare-workers";
577
+ const isGitHubPages = options.deploy === "github-pages";
303
578
  const deployScripts = isCloudflare
304
579
  ? { deploy: "pnpm build && wrangler deploy" }
305
580
  : {};
@@ -325,7 +600,11 @@ export function emitSiteScaffold(outputDir, siteName, options, writeScaffold) {
325
600
  },
326
601
  dependencies: {
327
602
  astro: "^6.0.0",
328
- "@astrojs/sitemap": "^3.0.0",
603
+ // Sitemap is emitted directly by Dogsbay into
604
+ // public/<basePath>/sitemap-{index,0}.xml so multi-mount
605
+ // deploys don't collide at the host root. We deliberately
606
+ // do NOT depend on @astrojs/sitemap (it hardcodes output to
607
+ // dist/ root, which is what we're moving away from).
329
608
  // Pagefind is invoked from the build script (see scripts.build above).
330
609
  // Lives in dependencies (not devDependencies) so production builds
331
610
  // include it; the produced search index is shipped statically and
@@ -349,6 +628,13 @@ export function emitSiteScaffold(outputDir, siteName, options, writeScaffold) {
349
628
  "@dogsbay/icons": dogsbayDep("icons"),
350
629
  "@dogsbay/elements": dogsbayDep("elements"),
351
630
  },
631
+ // Pin transitive Vite to 7. Vite 8 just released; Astro 6
632
+ // peer-deps Vite 7 and prints a warning when 8 is hoisted.
633
+ // Without this override npm picks up Vite 8 by default.
634
+ // Drop this when Astro 7 ships and bumps its peer.
635
+ overrides: {
636
+ vite: "^7",
637
+ },
352
638
  ...(Object.keys(deployDevDeps).length > 0
353
639
  ? { devDependencies: deployDevDeps }
354
640
  : {}),
@@ -367,6 +653,18 @@ export function emitSiteScaffold(outputDir, siteName, options, writeScaffold) {
367
653
  scaffoldFilesSkipped++;
368
654
  }
369
655
  }
656
+ // GitHub Pages deploy artifacts — workflow + .nojekyll. The actual
657
+ // emission lives in `emitDeployArtifacts` so site-build can also
658
+ // call it on existing sites without going through scaffold (a user
659
+ // adds `deploy: github-pages` to dogsbay.config.yml and reruns
660
+ // `site build` to get the workflow). At scaffold-time we pass
661
+ // forceOverwrite=writeScaffold so `--force` regenerates from
662
+ // template; on regular builds it stays write-if-missing.
663
+ if (isGitHubPages) {
664
+ emitDeployArtifacts(outputDir, options, {
665
+ forceOverwrite: writeScaffold,
666
+ });
667
+ }
370
668
  // Generate astro.config.mjs
371
669
  // `preserveSymlinks: true` is used with --local to pin local file: deps to
372
670
  // their on-disk paths. Inside a pnpm workspace this breaks Astro's internal
@@ -378,52 +676,50 @@ export function emitSiteScaffold(outputDir, siteName, options, writeScaffold) {
378
676
  preserveSymlinks: true,
379
677
  },`
380
678
  : "";
381
- // Sitemap integration is conditional: requires an absolute site URL so
382
- // <loc> entries can be properly absolute. Without siteUrl, the sitemap
383
- // step is skipped (the import + integration call are simply omitted from
384
- // the generated config). Sitemap also filters out frontmatter-noindex pages.
385
- const hasSiteUrl = Boolean(options.siteUrl && /^https?:\/\//.test(options.siteUrl));
386
- const sitemapImport = hasSiteUrl ? `import sitemap from "@astrojs/sitemap";\n` : "";
387
- // Strip any path component from site.url before emitting. The
388
- // config validator already rejects `site.url` containing a path
389
- // when `basePath` is non-empty (canonical URLs would double-count
390
- // the prefix); this is a defensive normalisation for the case
391
- // where the validator is bypassed or basePath is empty.
679
+ // siteUrl gates absolute-URL emission (sitemap <loc> entries,
680
+ // canonical tags). Without one, both are skipped relative URLs
681
+ // are still correct, the sitemap is just not generated.
392
682
  //
393
- // Note: we deliberately do NOT emit Astro's `base:` field. With
394
- // the current file emission (pages live under
395
- // `src/pages/<basePath>/...`), adding `base` would cause Astro
396
- // to doubly-prefix every route. Switching to `base`-driven
397
- // routing is a separate refactor — see plans/configurable-base-path.md.
398
- let siteField = "";
399
- if (hasSiteUrl) {
400
- let originOnly;
401
- try {
402
- const u = new URL(options.siteUrl);
403
- originOnly = `${u.protocol}//${u.host}`;
404
- }
405
- catch {
406
- originOnly = options.siteUrl;
407
- }
408
- siteField = `\n site: ${JSON.stringify(originOnly)},`;
409
- }
410
- const integrationsField = hasSiteUrl ? `\n integrations: [sitemap()],` : "";
411
- // astro.config.mjs scaffold-once. Maintainer adds custom integrations.
412
- // The plugin-aliases import is for the Dogsbay plugin API: each
413
- // build emits `astro.config.plugins.mjs` exporting `pluginAliases`,
414
- // a Vite alias map for `virtual:dogsbay-plugin-config/<id>` modules.
415
- // When no plugins use defineClientConfig the map is empty and the
416
- // spread is a no-op. See plans/plugin-api.md.
683
+ // Sitemap is emitted directly by Dogsbay (see emitSitemapFiles)
684
+ // into public/<basePath>/sitemap-*.xml. We deliberately do NOT
685
+ // wire @astrojs/sitemap here; that integration hardcodes output
686
+ // to dist/ root, breaking multi-mount deploys.
687
+ const hasSiteUrl = Boolean(options.siteUrl && /^https?:\/\//.test(options.siteUrl));
688
+ // site.url's path component (if any) becomes Astro's `base`. The
689
+ // origin alone goes into `site`. This split lets dogsbay model
690
+ // both axes independently:
691
+ // - Astro's `base` (= urlBase) controls the URL prefix Astro
692
+ // bakes into HTML asset references (`<basePath>/_astro/...`)
693
+ // and the routes Astro generates from src/pages.
694
+ // - dogsbay's basePath controls the filesystem layout
695
+ // (`src/pages/<basePath>/...`).
696
+ // The two compose at emit time — combining for nav hrefs,
697
+ // sitemap, llms.txt, etc. See plans/astro-base-from-site-url.md.
698
+ const { origin, urlBase: astroBase } = parseSiteUrl(options.siteUrl);
699
+ // astro.config.mjs — scaffold-once, but the site/base values flow
700
+ // through a separate auto-generated file (`astro.config.dogsbay.mjs`,
701
+ // emitted unconditionally below) so dogsbay-derived values stay in
702
+ // sync with `dogsbay.config.yml` even on existing sites where the
703
+ // main config is preserved. Same pattern as
704
+ // `astro.config.plugins.mjs` the import line is the load-bearing
705
+ // bit; the auto-file is what changes.
417
706
  if (writeScaffold) {
418
707
  writeFileSync(join(outputDir, "astro.config.mjs"), `import { defineConfig } from "astro/config";
419
708
  import tailwindcss from "@tailwindcss/vite";
420
- ${sitemapImport}import { pluginAliases, pluginFsAllow } from "./astro.config.plugins.mjs";
709
+ import { pluginAliases, pluginFsAllow } from "./astro.config.plugins.mjs";
710
+ import {
711
+ dogsbaySite,
712
+ dogsbayBase,
713
+ dogsbayInlineStylesheets,
714
+ } from "./astro.config.dogsbay.mjs";
421
715
 
422
- export default defineConfig({${siteField}
716
+ export default defineConfig({
717
+ ...(dogsbaySite ? { site: dogsbaySite } : {}),
718
+ ...(dogsbayBase ? { base: dogsbayBase } : {}),
423
719
  output: "static",
424
720
  build: {
425
- inlineStylesheets: "always",
426
- },${integrationsField}
721
+ inlineStylesheets: dogsbayInlineStylesheets,
722
+ },
427
723
  vite: {
428
724
  plugins: [tailwindcss()],
429
725
  resolve: {
@@ -445,6 +741,9 @@ export default defineConfig({${siteField}
445
741
  else {
446
742
  scaffoldFilesSkipped++;
447
743
  }
744
+ // astro.config.dogsbay.mjs is emitted by emitSiteConfig (called
745
+ // above and on every site build) so site/base values stay in
746
+ // sync without a re-scaffold. See its definition for rationale.
448
747
  // Always seed an empty astro.config.plugins.mjs so the import in
449
748
  // astro.config.mjs resolves before the first plugin-emitting
450
749
  // build. Subsequent builds replace it via emitPluginRuntime.
@@ -515,7 +814,12 @@ export default defineConfig({${siteField}
515
814
  */
516
815
  export async function emitAstroPages(pages, nav, outputDir, options) {
517
816
  const siteName = options.siteName || "Documentation";
817
+ // basePath = filesystem layout prefix (where pages live under
818
+ // src/pages/...). combined = the URL prefix HTML hrefs need
819
+ // (urlBase + basePath). The two diverge whenever site.url has a
820
+ // path component (GH Pages project pages, multi-mount Cloudflare).
518
821
  const basePath = normalizeBasePath(options.basePath);
822
+ const combined = combinedPrefix(options);
519
823
  const baseSegments = basePathSegments(basePath);
520
824
  // Ensure dirs exist (callers may invoke us without going through the
521
825
  // full exportAstroProject orchestrator, e.g. dogsbay convert at Step 7).
@@ -536,7 +840,11 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
536
840
  // Remove existing entry for this section (full replace)
537
841
  existingNav = existingNav.filter((item) => item.label?.toLowerCase() !== siteName.toLowerCase()
538
842
  && item.label?.toLowerCase() !== section.toLowerCase());
539
- const prefixedNav = prefixNavHrefs(nav, section, basePath);
843
+ // Nav hrefs already carry the `combined` prefix (the importer
844
+ // emits them via fileToHref(file, hrefPrefix=combined)).
845
+ // prefixNavHrefs takes the existing prefix and weaves a section
846
+ // segment into it.
847
+ const prefixedNav = prefixNavHrefs(nav, section, combined);
540
848
  const sectionLabel = siteName
541
849
  || section.split("-").map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(" ");
542
850
  existingNav.push({ label: sectionLabel, children: prefixedNav });
@@ -551,20 +859,29 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
551
859
  copyAssets(options.sourceDir, outputDir, options.imageOptimization);
552
860
  }
553
861
  let generated = 0;
862
+ const generatedPaths = new Set();
554
863
  const pagesDir = join(outputDir, "src", "pages", ...baseSegments);
555
864
  const useImageOpt = options.imageOptimization ?? false;
556
- // hrefPrefix is the same string as basePath. rewriteHref handles the
557
- // empty-basePath case correctly: any link starting with "/" matches
558
- // the early-return guard, so root-relative links pass through
559
- // unrewritten when the site is served at host root.
560
- const hrefPrefix = basePath;
865
+ // hrefPrefix is the COMBINED prefix (urlBase + basePath) what
866
+ // rendered HTML hrefs need so internal links resolve under the
867
+ // host's served subpath AND under the dogsbay basePath. For
868
+ // simple host-apex deploys with basePath, urlBase is empty so
869
+ // combined === basePath (back-compat). For GH Pages project pages
870
+ // and multi-mount Cloudflare, combined adds the urlBase layer.
871
+ const hrefPrefix = combined;
561
872
  for (const page of pages) {
562
873
  try {
563
874
  // Rewrite internal hrefs to match the output URL structure
564
875
  rewriteTreeHrefs(page.tree, hrefPrefix);
876
+ // Same for raw image srcs — Astro doesn't auto-prefix
877
+ // `<img src="/_assets/...">` so we do it here. Block images
878
+ // strip the prefix back off for the `imageMap[...]` lookup
879
+ // (see paragraphToAstro in serialize.ts).
880
+ rewriteTreeImageSrcs(page.tree, hrefPrefix);
565
881
  const result = treeToAstro(page.tree, {
566
882
  imageOptimization: useImageOpt,
567
883
  codeBlockTitle: options.codeBlockTitle ?? true,
884
+ combinedPrefix: hrefPrefix,
568
885
  });
569
886
  const imageSetup = useImageOpt ? [
570
887
  '',
@@ -582,7 +899,17 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
582
899
  const fm = (page.frontmatter ?? {});
583
900
  const pageDescription = fm.description ?? "";
584
901
  const pageOgImage = fm.ogImage ?? "";
585
- const pageNoindex = fm.noindex === true || fm.draft === true;
902
+ // Noindex / nofollow are independent meta directives. Site-level
903
+ // forces both bits site-wide (staging / compliance lockdown);
904
+ // page frontmatter can ESCALATE either bit independently but
905
+ // cannot opt out of a site-level lockdown. `draft: true` keeps
906
+ // its existing role as a noindex shorthand. See
907
+ // plans/site-level-robots-meta.md.
908
+ const pageNoindex = options.noindex === true ||
909
+ fm.noindex === true ||
910
+ fm.draft === true;
911
+ const pageNofollow = options.nofollow === true ||
912
+ fm.nofollow === true;
586
913
  // Independent of noindex: pages can be excluded from in-site
587
914
  // Pagefind search even when external SEs should index them
588
915
  // (or vice versa). See DocsLayout's prop docs for the
@@ -601,7 +928,23 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
601
928
  const pageCategory = Array.isArray(pageMeta?.category)
602
929
  ? pageMeta.category
603
930
  : undefined;
604
- const tagsIndexPath = options.tagsIndexPath ?? "/tags";
931
+ // Custom-taxonomy values lifted from frontmatter into
932
+ // `meta.taxonomies` by the importer (see `parseMeta` in
933
+ // `@dogsbay/types`). Surfaced to DocsLayout so it can emit one
934
+ // `<div data-pagefind-filter="<name>:<value>">` per entry — this
935
+ // is what makes user-declared taxonomies (`difficulty`, `team`,
936
+ // anything not in the five built-ins) appear as visible facet
937
+ // checkboxes in the search dialog. Without this passthrough
938
+ // they're silently dropped after the importer.
939
+ const pageTaxonomies = pageMeta?.taxonomies && Object.keys(pageMeta.taxonomies).length > 0
940
+ ? pageMeta.taxonomies
941
+ : undefined;
942
+ // `tagsIndexPath` flows to `<TagList>` for chip hrefs
943
+ // (`${indexPath}/${tag}/`). Caller passes the raw config value
944
+ // (e.g. `/tags`); we bake basePath here so chips resolve under
945
+ // the configured site base. Without the prefix, every tag chip
946
+ // 404s on any site with `site.basePath` set.
947
+ const tagsIndexPath = withBasePath(basePath, options.tagsIndexPath ?? "/tags");
605
948
  // Auto-lede detection. If the markdown body doesn't already
606
949
  // start with an H1 / leading paragraph, we ask DocsLayout to
607
950
  // render the frontmatter title / description at the top of
@@ -633,12 +976,20 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
633
976
  // available via other means. The "Open in" deep links work
634
977
  // regardless of mirror availability — agents that can't fetch
635
978
  // the page just see the URL in their chat.
979
+ // pageHrefBase uses combined (urlBase + basePath) so the URL
980
+ // resolves correctly when the host serves dist/ at a subpath
981
+ // (GH Pages project page, multi-mount Cloudflare).
636
982
  const pageHrefBase = section
637
- ? (basePath ? `${basePath}/${section}/${page.slug}` : `/${section}/${page.slug}`)
638
- : (basePath ? `${basePath}/${page.slug}` : `/${page.slug}`);
983
+ ? (combined ? `${combined}/${section}/${page.slug}` : `/${section}/${page.slug}`)
984
+ : (combined ? `${combined}/${page.slug}` : `/${page.slug}`);
639
985
  const pageMdHref = `${pageHrefBase}.md`;
640
- const pageMdAbsoluteUrl = options.siteUrl
641
- ? options.siteUrl.replace(/\/$/, "") + pageMdHref
986
+ // For absolute URLs (the "Copy as MD" deep link), use the
987
+ // origin (no path) + the full combined path; siteUrl alone
988
+ // would double-include the urlBase since pageHrefBase already
989
+ // contains it.
990
+ const { origin } = parseSiteUrl(options.siteUrl);
991
+ const pageMdAbsoluteUrl = origin
992
+ ? origin + pageMdHref
642
993
  : pageMdHref;
643
994
  // Markdown body for the Copy button. Reuse the same serializer
644
995
  // that produces the .md mirror so what the user copies matches
@@ -680,7 +1031,10 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
680
1031
  "",
681
1032
  `const headings = ${JSON.stringify(page.headings || [])};`,
682
1033
  `const nav = navData;`,
683
- `const currentPath = "${buildCurrentPath(basePath, section, page.slug)}";`,
1034
+ // currentPath uses combined so it matches nav.json hrefs
1035
+ // (which are also combined-prefixed). getPagination compares
1036
+ // them as strings; mismatched prefixes break prev/next.
1037
+ `const currentPath = "${buildCurrentPath(combined, section, page.slug)}";`,
684
1038
  // Filter nav to the current (locale, version) bucket
685
1039
  // before computing prev/next — without this, pagination
686
1040
  // walks the global nav and a "Next" link can leak from
@@ -698,12 +1052,14 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
698
1052
  `const description = ${JSON.stringify(pageDescription)} || undefined;`,
699
1053
  `const ogImage = ${JSON.stringify(pageOgImage)} || undefined;`,
700
1054
  `const noindex = ${JSON.stringify(pageNoindex)};`,
1055
+ `const nofollow = ${JSON.stringify(pageNofollow)};`,
701
1056
  `const excludeFromSearch = ${JSON.stringify(pageExcludeFromSearch)};`,
702
1057
  `const pageTags = ${JSON.stringify(pageTags ?? null)};`,
703
1058
  `const pageStatus = ${JSON.stringify(pageStatus ?? null)};`,
704
1059
  `const pageType = ${JSON.stringify(pageTypeStr ?? null)};`,
705
1060
  `const pageAudience = ${JSON.stringify(pageAudience ?? null)};`,
706
1061
  `const pageCategory = ${JSON.stringify(pageCategory ?? null)};`,
1062
+ `const pageTaxonomies = ${JSON.stringify(pageTaxonomies ?? null)};`,
707
1063
  `const tagsIndexPath = ${JSON.stringify(tagsIndexPath)};`,
708
1064
  `const llmActionsProps = ${JSON.stringify(llmActionsEnabled
709
1065
  ? {
@@ -734,6 +1090,7 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
734
1090
  ` twitterHandle={siteConfig.twitterHandle || undefined}`,
735
1091
  ` themeColor={siteConfig.themeColor || undefined}`,
736
1092
  ` noindex={noindex}`,
1093
+ ` nofollow={nofollow}`,
737
1094
  ` excludeFromSearch={excludeFromSearch}`,
738
1095
  ` plausibleDomain={siteConfig.plausible?.domain}`,
739
1096
  ` plausibleScriptUrl={siteConfig.plausible?.scriptUrl}`,
@@ -749,12 +1106,33 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
749
1106
  ` pageType={pageType ?? undefined}`,
750
1107
  ` audience={pageAudience ?? undefined}`,
751
1108
  ` category={pageCategory ?? undefined}`,
1109
+ ` taxonomies={pageTaxonomies ?? undefined}`,
752
1110
  ` autoH1={${autoH1}}`,
753
1111
  ` autoLede={${autoLede}}`,
754
1112
  ` llmActions={llmActionsProps}`,
755
1113
  ` multiSource={${JSON.stringify(page.multiSource ?? null)} ?? undefined}`,
756
1114
  ` switcherMap={switcherMapData}`,
757
- ` basePath={${JSON.stringify(basePath || "/docs")}}`,
1115
+ // basePath here is the COMBINED URL prefix (urlBase from
1116
+ // site.url's path + dogsbay basePath). DocsLayout uses it
1117
+ // for switcher links, the footer llms.txt link, and the
1118
+ // <head> alternate link — all three need the full URL
1119
+ // prefix the host actually serves under. Empty string is
1120
+ // valid (root-served sites with no urlBase or basePath);
1121
+ // don't fall back to "/docs" — that would 404 for those.
1122
+ ` basePath={${JSON.stringify(combined)}}`,
1123
+ // Pagefind index URL — must include the combined prefix or
1124
+ // the loader 404s on subpath-mounted deploys. The pagefind
1125
+ // CLI writes to <astroOutput>/dist/pagefind/ which Astro
1126
+ // serves under its `base` (= urlBase); dogsbay's basePath
1127
+ // adds the second prefix layer. Empty combined → `/pagefind/`.
1128
+ ` pagefindUrl={${JSON.stringify(combined ? `${combined}/pagefind/` : "/pagefind/")}}`,
1129
+ // Favicon — composed with combined prefix so the
1130
+ // <link rel="icon"> resolves on subpath-mounted deploys.
1131
+ // Authors who want a different favicon override via the
1132
+ // `favicon` slot on DocsLayout, or drop the file at
1133
+ // `public/favicon.ico` in their Astro project (which is
1134
+ // what the default points at).
1135
+ ` favicon={${JSON.stringify(combined ? `${combined}/favicon.ico` : "/favicon.ico")}}`,
758
1136
  ` wideLayout={${wideLayout}}`,
759
1137
  `>`,
760
1138
  ` <MarkdownContentStack>`,
@@ -791,6 +1169,7 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
791
1169
  mkdirSync(dirname(pagePath), { recursive: true });
792
1170
  writeFileSync(pagePath, pageLines.join("\n") + "\n");
793
1171
  generated++;
1172
+ generatedPaths.add(relative(outputDir, pagePath));
794
1173
  // Companion .md endpoint for content negotiation. Prerendered, so
795
1174
  // it's served as a static asset at runtime — no Worker overhead.
796
1175
  //
@@ -831,9 +1210,43 @@ export async function emitAstroPages(pages, nav, outputDir, options) {
831
1210
  // redirect target), and writing a redirect would clobber it.
832
1211
  if (basePath !== "") {
833
1212
  const firstHref = findFirstNavHref(nav, basePath);
834
- writeFileSync(join(outputDir, "src", "pages", "index.astro"), `---\nreturn Astro.redirect("${firstHref}");\n---\n`);
1213
+ const indexPath = join(outputDir, "src", "pages", "index.astro");
1214
+ writeFileSync(indexPath, `---\nreturn Astro.redirect("${firstHref}");\n---\n`);
1215
+ generatedPaths.add(relative(outputDir, indexPath));
835
1216
  }
836
- return { generated, outputNav };
1217
+ return { generated, outputNav, generatedPaths };
1218
+ }
1219
+ /**
1220
+ * Copy each passthrough `.astro` source to its computed output path.
1221
+ * Aborts with a clear error if the destination is already in
1222
+ * `generatedPaths` (a generated page from `emitAstroPages` would
1223
+ * silently overwrite the hand-authored file otherwise).
1224
+ */
1225
+ export function emitPassthroughAstroPages(copies, outputDir, generatedPaths) {
1226
+ if (copies.length === 0)
1227
+ return { copied: 0 };
1228
+ // Collision detection — a generated page and a passthrough page
1229
+ // would write to the same file. Refuse to overwrite; tell the
1230
+ // author exactly which two files conflict.
1231
+ const collisions = [];
1232
+ for (const copy of copies) {
1233
+ if (generatedPaths.has(copy.outputRelPath)) {
1234
+ collisions.push(copy.outputRelPath);
1235
+ }
1236
+ }
1237
+ if (collisions.length > 0) {
1238
+ throw new Error(`Passthrough Astro page collides with a generated page:\n` +
1239
+ collisions.map((c) => ` - ${c}`).join("\n") + "\n" +
1240
+ `Rename the .astro source or remove the colliding entry from nav.yml.`);
1241
+ }
1242
+ let copied = 0;
1243
+ for (const copy of copies) {
1244
+ const dest = join(outputDir, copy.outputRelPath);
1245
+ mkdirSync(dirname(dest), { recursive: true });
1246
+ copyFileSync(copy.sourceAbs, dest);
1247
+ copied++;
1248
+ }
1249
+ return { copied };
837
1250
  }
838
1251
  // ─── Tier 1: config-derived ─────────────────────────────────────────────
839
1252
  // Files driven entirely by config + flags. Always regenerated; site
@@ -849,6 +1262,54 @@ export function emitConfigDerivedFiles(outputDir, options) {
849
1262
  const hasSiteUrl = Boolean(options.siteUrl && /^https?:\/\//.test(options.siteUrl));
850
1263
  writeFileSync(join(outputDir, "public", "robots.txt"), buildRobotsTxt(options, hasSiteUrl));
851
1264
  }
1265
+ /**
1266
+ * Per-deploy-target artifact emission.
1267
+ *
1268
+ * Called from `emitSiteScaffold` (with `forceOverwrite=writeScaffold`
1269
+ * so `--force` regenerates from template) and from `dogsbay site
1270
+ * build` (with `forceOverwrite=false` so an existing site can adopt
1271
+ * a deploy target by editing config and rebuilding — the missing
1272
+ * artifact gets created on the next build).
1273
+ *
1274
+ * Emit policy is the union: write when forced OR when the file is
1275
+ * missing. Author edits to e.g. the workflow YAML survive every
1276
+ * regular build.
1277
+ *
1278
+ * Currently handles `github-pages` (workflow + .nojekyll). The
1279
+ * existing `cloudflare-workers` artifacts (wrangler.jsonc + package
1280
+ * scripts) stay in the scaffold-only path because they overlap with
1281
+ * scaffold-only files (package.json scripts, devDependencies). A
1282
+ * future refactor could fold them in here too.
1283
+ */
1284
+ export function emitDeployArtifacts(outputDir, options, opts = { forceOverwrite: false }) {
1285
+ if (options.deploy === "github-pages") {
1286
+ // GitHub reads workflows from <repo-root>/.github/workflows/, NOT
1287
+ // from inside subdirectories. Use projectDir (the repo root) for
1288
+ // the workflow file; fall back to outputDir when unset (flat
1289
+ // `dogsbay convert` flows where the Astro project IS the repo).
1290
+ const projectDir = options.projectDir ?? outputDir;
1291
+ // Path of the Astro output relative to the project root. Used by
1292
+ // the workflow's working-directory + cache-dependency-path so
1293
+ // pnpm install / pnpm run build target the right place. Empty
1294
+ // string when outputDir === projectDir (flat layout).
1295
+ const astroDirRel = relative(projectDir, outputDir).replace(/\\/g, "/");
1296
+ const workflowPath = join(projectDir, ".github", "workflows", "deploy.yml");
1297
+ if (opts.forceOverwrite || !existsSync(workflowPath)) {
1298
+ mkdirSync(dirname(workflowPath), { recursive: true });
1299
+ writeFileSync(workflowPath, buildGitHubPagesWorkflow(astroDirRel));
1300
+ }
1301
+ // .nojekyll — must exist in the deployed artifact root so GH
1302
+ // Pages skips Jekyll's `_underscored-paths` filter (Astro's
1303
+ // `_astro/` chunk dir gets eaten otherwise). Lives inside the
1304
+ // Astro project's `public/` so it's copied into `dist/` at
1305
+ // build time.
1306
+ const nojekyllPath = join(outputDir, "public", ".nojekyll");
1307
+ mkdirSync(dirname(nojekyllPath), { recursive: true });
1308
+ if (opts.forceOverwrite || !existsSync(nojekyllPath)) {
1309
+ writeFileSync(nojekyllPath, "");
1310
+ }
1311
+ }
1312
+ }
852
1313
  /**
853
1314
  * Emit `src/data/switcherMap.json` describing per-page
854
1315
  * version + locale equivalents. Always writes the file —
@@ -865,7 +1326,10 @@ export function emitConfigDerivedFiles(outputDir, options) {
865
1326
  * baseline page in a multi-version site).
866
1327
  */
867
1328
  export function emitSwitcherMap(pages, outputDir, options) {
868
- const basePath = normalizeBasePath(options.basePath);
1329
+ // Switcher URLs use combined so the link the dropdown emits
1330
+ // resolves under the host's served subpath (GH Pages project
1331
+ // pages, multi-mount Cloudflare).
1332
+ const combined = combinedPrefix(options);
869
1333
  const dataDir = join(outputDir, "src", "data");
870
1334
  const outPath = join(dataDir, "switcherMap.json");
871
1335
  // Detect axis activation by inspecting the data the loader
@@ -904,7 +1368,7 @@ export function emitSwitcherMap(pages, outputDir, options) {
904
1368
  const variant = {
905
1369
  ...(ms.locale !== undefined ? { locale: ms.locale } : {}),
906
1370
  ...(ms.version !== undefined ? { version: ms.version } : {}),
907
- url: `${basePath}/${page.slug}`,
1371
+ url: `${combined}/${page.slug}`,
908
1372
  };
909
1373
  if (!byLogicalKey[key])
910
1374
  byLogicalKey[key] = [];
@@ -982,6 +1446,10 @@ export function emitMissingTranslationStubs(pages, outputDir, options) {
982
1446
  return;
983
1447
  const basePath = normalizeBasePath(options.basePath);
984
1448
  const baseSegments = basePathSegments(basePath);
1449
+ // combined drives the redirect URL (the user-facing path they
1450
+ // get bounced to); basePath stays the filesystem path under
1451
+ // src/pages/ where the stub lives.
1452
+ const combined = combinedPrefix(options);
985
1453
  // Index existing pages by (slug after locale segment) so we
986
1454
  // can detect missing translations cheaply. Key shape:
987
1455
  // `<other-axis-prefix>/<originalSlug>` where other-axis-prefix
@@ -1015,7 +1483,7 @@ export function emitMissingTranslationStubs(pages, outputDir, options) {
1015
1483
  const targetUrl = `${basePath}/${targetSlug}`;
1016
1484
  if (existingByUrl.has(targetUrl))
1017
1485
  continue; // already translated
1018
- const defaultUrl = `${basePath}/${defaultPage.slug}`;
1486
+ const defaultUrl = `${combined}/${defaultPage.slug}`;
1019
1487
  const filePath = join(outputDir, "src", "pages", ...baseSegments, ...targetSlug.split("/"));
1020
1488
  // Ensure parent dir exists; write a redirect-stub Astro
1021
1489
  // file. Adding `.astro` to the leaf turns it into a
@@ -1057,10 +1525,20 @@ export function emitAgentReadinessFiles(pages, outputNav, outputDir, siteName, o
1057
1525
  if (options.llmsTxt !== false) {
1058
1526
  emitLlmsTxtFiles(outputDir, siteName, options, outputNav, pages);
1059
1527
  // public/_headers — Cloudflare Workers / Pages convention. Adds an
1060
- // RFC 8288 Link header pointing agents at /llms.txt without parsing
1061
- // HTML. Emitted alongside llms.txt so the two files travel together.
1528
+ // RFC 8288 Link header pointing agents at this mount's llms.txt
1529
+ // (basePath-prefixed) without parsing HTML. Emitted alongside
1530
+ // llms.txt so the two files travel together.
1062
1531
  mkdirSync(join(outputDir, "public"), { recursive: true });
1063
- writeFileSync(join(outputDir, "public", "_headers"), buildHeadersFile());
1532
+ // _headers Link header points at the per-mount llms.txt at
1533
+ // <combined>/llms.txt — the URL agents would actually fetch.
1534
+ writeFileSync(join(outputDir, "public", "_headers"), buildHeadersFile(combinedPrefix(options)));
1535
+ }
1536
+ // Sitemap — emitted by Dogsbay (not @astrojs/sitemap) into
1537
+ // public/<basePath>/sitemap-{index,0}.xml so multi-mount deploys
1538
+ // don't collide at host root. Gated on a valid http(s) siteUrl
1539
+ // because <loc> entries must be absolute.
1540
+ if (options.siteUrl && /^https?:\/\//.test(options.siteUrl)) {
1541
+ emitSitemapFiles(outputDir, options, pages);
1064
1542
  }
1065
1543
  // src/middleware.ts — Tier 1 (always update). Drives both the
1066
1544
  // `Accept: text/markdown` content-negotiation rewrite (via
@@ -1079,12 +1557,29 @@ export function emitAgentReadinessFiles(pages, outputNav, outputDir, siteName, o
1079
1557
  const localeRedirectOn = options.defaultLocale !== undefined && knownLocales.length >= 2;
1080
1558
  const axisRedirectOn = versionRedirectOn || localeRedirectOn;
1081
1559
  if (mdMirrorOn || axisRedirectOn) {
1560
+ // Taxonomy index paths share a single global namespace across
1561
+ // locales / versions (one `/tags/` for the whole site, not one
1562
+ // per locale). The redirect helper has to know to skip them or
1563
+ // it will 302 chip hrefs to non-existent locale-prefixed routes.
1564
+ // Strip leading `/` and pull just the first segment so a config
1565
+ // like `/tags` becomes the global-prefix entry `tags`.
1566
+ const globalPrefixes = [];
1567
+ if (options.taxonomyIndexPaths) {
1568
+ for (const raw of Object.values(options.taxonomyIndexPaths)) {
1569
+ const first = raw.replace(/^\/+/, "").split("/")[0];
1570
+ if (first)
1571
+ globalPrefixes.push(first);
1572
+ }
1573
+ }
1082
1574
  mkdirSync(join(outputDir, "src"), { recursive: true });
1083
1575
  writeFileSync(join(outputDir, "src", "middleware.ts"), buildMiddlewareSource({
1084
1576
  mdMirror: mdMirrorOn,
1085
1577
  axisRedirect: axisRedirectOn
1086
1578
  ? {
1087
- basePath: normalizeBasePath(options.basePath),
1579
+ // Middleware compares paths against the request URL,
1580
+ // which carries the host's served subpath — so use the
1581
+ // combined prefix here.
1582
+ basePath: combinedPrefix(options),
1088
1583
  ...(versionRedirectOn
1089
1584
  ? {
1090
1585
  defaultVersion: options.defaultVersion,
@@ -1097,6 +1592,7 @@ export function emitAgentReadinessFiles(pages, outputNav, outputDir, siteName, o
1097
1592
  knownLocales,
1098
1593
  }
1099
1594
  : {}),
1595
+ ...(globalPrefixes.length > 0 ? { globalPrefixes } : {}),
1100
1596
  }
1101
1597
  : undefined,
1102
1598
  }));
@@ -1157,21 +1653,49 @@ function buildRobotsTxt(options, hasSiteUrl) {
1157
1653
  const aiInput = options.aiInput ?? "yes";
1158
1654
  const aiTrain = options.aiTrain ?? "no";
1159
1655
  const contentSignal = `Content-Signal: search=${search}, ai-input=${aiInput}, ai-train=${aiTrain}\n`;
1160
- const sitemap = hasSiteUrl
1161
- ? `Sitemap: ${options.siteUrl.replace(/\/$/, "")}/sitemap-index.xml\n`
1656
+ // Per-mount sitemap path: each Dogsbay site emits its sitemap
1657
+ // index under <basePath>/, so robots.txt must point there too.
1658
+ // (Multi-mount deploys end up with one robots.txt per site at
1659
+ // their respective hosts / paths; each correctly references its
1660
+ // own mount's sitemap-index.)
1661
+ // Sitemap URL = origin + combined + /sitemap-index.xml. Use the
1662
+ // origin (no path) from site.url and the combined prefix (urlBase
1663
+ // + basePath); siteUrl could itself include a path component when
1664
+ // hosting on a subpath (GH Pages project page), so we strip it
1665
+ // here to avoid double-counting.
1666
+ const { origin } = parseSiteUrl(options.siteUrl);
1667
+ const combined = combinedPrefix(options);
1668
+ const sitemap = hasSiteUrl && origin
1669
+ ? `Sitemap: ${origin}${withBasePath(combined, "/sitemap-index.xml")}\n`
1162
1670
  : "";
1163
- return `User-agent: *\nAllow: /\n${contentSignal}${sitemap}`;
1671
+ // Llms-Txt: line — non-standard but follows the same shape as
1672
+ // `Sitemap:`. Crawlers and agents that scan robots.txt before
1673
+ // fetching pages get a direct pointer at the per-mount llms.txt.
1674
+ // RFC 9309 explicitly permits unknown directives ("intentionally
1675
+ // permissive of such future extensions") so this is harmless to
1676
+ // standards-compliant parsers. Emitted alongside Sitemap when
1677
+ // siteUrl is set; absolute URLs only (relative paths would be
1678
+ // ambiguous without a base).
1679
+ const llmsTxt = options.llmsTxt !== false && hasSiteUrl && origin
1680
+ ? `Llms-Txt: ${origin}${withBasePath(combined, "/llms.txt")}\n`
1681
+ : "";
1682
+ return `User-agent: *\nAllow: /\n${contentSignal}${sitemap}${llmsTxt}`;
1164
1683
  }
1165
1684
  /**
1166
1685
  * Build the contents of `public/_headers` (Cloudflare Pages / Workers
1167
1686
  * Static Assets convention). Emits a global RFC 8288 Link header
1168
- * pointing at the site's llms.txt index, so agents don't need to
1687
+ * pointing at this mount's llms.txt index, so agents don't need to
1169
1688
  * parse HTML to discover the LLM-friendly content listing.
1689
+ *
1690
+ * The Link target is basePath-prefixed (`</docs/llms.txt>` for a
1691
+ * `/docs` mount) — matches where the platform actually emits
1692
+ * llms.txt under the per-mount layout.
1170
1693
  */
1171
- function buildHeadersFile() {
1694
+ function buildHeadersFile(basePath) {
1695
+ const llmsHref = withBasePath(basePath, "/llms.txt");
1172
1696
  return [
1173
1697
  "/*",
1174
- ' Link: </llms.txt>; rel="describedby"; type="text/plain"',
1698
+ ` Link: <${llmsHref}>; rel="describedby"; type="text/plain"`,
1175
1699
  "",
1176
1700
  ].join("\n");
1177
1701
  }
@@ -1180,20 +1704,28 @@ function buildMiddlewareSource(config) {
1180
1704
  "// AUTO-GENERATED by `dogsbay site build` — do not edit.",
1181
1705
  "// Composes the docs-layout middleware helpers.",
1182
1706
  "//",
1183
- "// Markdown content negotiation:",
1184
- "// This middleware fires on every request, but in Astro's static",
1185
- "// prerender mode (output: \"static\") request headers are NOT",
1186
- "// forwarded Astro warns about \"Astro.request.headers was used",
1187
- "// when rendering...\" and serves a prerendered HTML response.",
1188
- "// That means `Accept: text/markdown` negotiation only kicks in",
1189
- "// under SSR (output: \"server\") or via an edge function on the",
1190
- "// deployment layer (Cloudflare Worker, Netlify Edge, etc.).",
1191
- "// For pure-static deploys, agents should follow the page's",
1192
- "// <link rel=\"alternate\" type=\"text/markdown\"> href to fetch",
1193
- "// the .md mirror directly (e.g. /docs.md).",
1707
+ "// Static-prerender guard:",
1708
+ "// In Astro's static output mode, this middleware is invoked",
1709
+ "// for every prerendered route at build time. Reading",
1710
+ "// `context.request.headers` there triggers an Astro warning",
1711
+ "// per page (\"Astro.request.headers was used during static",
1712
+ "// render\"), which floods `dogsbay site build` / `site preview`",
1713
+ "// output. Worse, the negotiation can't actually happen at",
1714
+ "// build time there's no runtime client whose Accept header",
1715
+ "// we'd be honoring.",
1194
1716
  "//",
1195
- "// The Cloudflare-Worker-driven full fix is tracked in",
1196
- "// plans/cloudflare-deploy-content-negotiation.md.",
1717
+ "// We guard with `context.isPrerendered` so prerendered routes",
1718
+ "// short-circuit to `next()` immediately. At runtime in static",
1719
+ "// deploys, middleware doesn't fire at all (no server); at",
1720
+ "// runtime in SSR / hybrid deploys, only dynamic routes fire,",
1721
+ "// which is exactly when negotiation makes sense.",
1722
+ "//",
1723
+ "// Markdown content negotiation:",
1724
+ "// For pure-static deploys, `Accept: text/markdown` is honored",
1725
+ "// by the platform (Cloudflare _headers + Worker, Netlify Edge",
1726
+ "// functions). Agents that can't send Accept headers should",
1727
+ "// follow the page's <link rel=\"alternate\" type=\"text/markdown\">",
1728
+ "// to fetch the .md mirror directly (e.g. /docs.md).",
1197
1729
  'import { defineMiddleware } from "astro:middleware";',
1198
1730
  ];
1199
1731
  if (config.mdMirror) {
@@ -1207,6 +1739,11 @@ function buildMiddlewareSource(config) {
1207
1739
  lines.push(`const AXIS_REDIRECT_CONFIG = ${JSON.stringify(config.axisRedirect, null, 2)};`, "");
1208
1740
  }
1209
1741
  lines.push("export const onRequest = defineMiddleware((context, next) => {");
1742
+ // Skip prerendered routes — see file-top comment for the rationale.
1743
+ // Avoids per-page Astro.request.headers warnings during build, and
1744
+ // matches runtime semantics (middleware doesn't fire on prerendered
1745
+ // routes when deployed).
1746
+ lines.push(" if (context.isPrerendered) return next();");
1210
1747
  lines.push(" const url = new URL(context.request.url);");
1211
1748
  if (config.mdMirror) {
1212
1749
  lines.push(' const accept = context.request.headers.get("accept");', " const mdTarget = shouldRewriteToMarkdown(accept, url.pathname);", " if (mdTarget) return context.rewrite(mdTarget);");
@@ -1238,8 +1775,18 @@ function buildMdEndpoint(page, sourceRel) {
1238
1775
  ].join("\n");
1239
1776
  }
1240
1777
  /**
1241
- * Emit `public/llms.txt`, `public/llms-full.txt`, and per-section
1242
- * `public/<dir>/llms.txt` files for the site.
1778
+ * Emit per-mount llms.txt + llms-full.txt + per-section indexes.
1779
+ *
1780
+ * Files live under `public/<basePath>/...` so multiple Dogsbay sites
1781
+ * can mount on the same host (`/docs/llms.txt` + `/api/llms.txt` +
1782
+ * `/handbook/llms.txt`) without colliding at the root. When basePath
1783
+ * is empty, this collapses to `public/llms.txt` — the single-site
1784
+ * llmstxt.org-spec layout.
1785
+ *
1786
+ * The host root `/llms.txt` is intentionally NOT emitted by the
1787
+ * platform: it's the user's umbrella file, analogous to
1788
+ * `sitemap-index.xml`. Multi-mount deploys hand-write a top-level
1789
+ * `/llms.txt` that links to each per-mount index.
1243
1790
  *
1244
1791
  * Per-section files are written for every top-level nav group that
1245
1792
  * resolves to a site directory (either via `group.href` or via the
@@ -1251,26 +1798,92 @@ function emitLlmsTxtFiles(outputDir, siteName, options, nav, pages) {
1251
1798
  description: options.description,
1252
1799
  siteUrl: options.siteUrl,
1253
1800
  };
1254
- const publicDir = join(outputDir, "public");
1255
- mkdirSync(publicDir, { recursive: true });
1256
- const hrefPrefix = normalizeBasePath(options.basePath);
1257
- writeFileSync(join(publicDir, "llms.txt"), buildLlmsTxt(siteConfig, nav, pages, { hrefPrefix }));
1258
- writeFileSync(join(publicDir, "llms-full.txt"), buildLlmsFullTxt(siteConfig, nav, pages, {
1801
+ // hrefPrefix is the COMBINED prefix — used for the URL paths that
1802
+ // appear inside the llms.txt body (so agents fetch the correct
1803
+ // host-relative URLs). Filesystem layout uses basePath alone:
1804
+ // `public/<basePath>/llms.txt` matches the existing per-mount
1805
+ // delivery shape.
1806
+ const hrefPrefix = combinedPrefix(options);
1807
+ const basePath = normalizeBasePath(options.basePath);
1808
+ const baseSegments = basePathSegments(basePath);
1809
+ const mountDir = join(outputDir, "public", ...baseSegments);
1810
+ mkdirSync(mountDir, { recursive: true });
1811
+ writeFileSync(join(mountDir, "llms.txt"), buildLlmsTxt(siteConfig, nav, pages, { hrefPrefix }));
1812
+ writeFileSync(join(mountDir, "llms-full.txt"), buildLlmsFullTxt(siteConfig, nav, pages, {
1259
1813
  summary: "body",
1260
1814
  serializePage: serializePageMd,
1261
1815
  hrefPrefix,
1262
1816
  }));
1817
+ // Per-section files. `deriveSectionDir` returns a host-absolute
1818
+ // path derived from nav hrefs, which since the combined-prefix
1819
+ // refactor (commit 132891e) include urlBase + basePath — NOT just
1820
+ // basePath. So joining its return onto public/ directly would
1821
+ // double-prefix into `public/<urlBase>/<basePath>/<section>/llms.txt`,
1822
+ // which then serves at `<urlBase>/<urlBase>/<basePath>/<section>/...`
1823
+ // once Astro's base prefix is applied at request time.
1824
+ //
1825
+ // Strip the combined prefix off the section dir to get just the
1826
+ // section tail, then re-prepend basePath via mountDir. Result:
1827
+ // `public/<basePath>/<section>/llms.txt`, served under the deploy's
1828
+ // base mount as `<urlBase>/<basePath>/<section>/llms.txt`.
1829
+ const combinedSegs = hrefPrefix.replace(/^\//, "");
1263
1830
  for (const group of nav) {
1264
1831
  if (!group.children || group.children.length === 0)
1265
1832
  continue;
1266
1833
  const dir = deriveSectionDir(group);
1267
1834
  if (!dir)
1268
1835
  continue;
1269
- const sectionPath = join(publicDir, dir, "llms.txt");
1836
+ let relDir;
1837
+ if (combinedSegs && dir === combinedSegs) {
1838
+ relDir = "";
1839
+ }
1840
+ else if (combinedSegs && dir.startsWith(`${combinedSegs}/`)) {
1841
+ relDir = dir.slice(combinedSegs.length + 1);
1842
+ }
1843
+ else {
1844
+ // Defensive: if for some reason the dir doesn't carry the
1845
+ // combined prefix (older importer, manual nav.yml, etc.), fall
1846
+ // back to the raw value rather than rooting at /.
1847
+ relDir = dir;
1848
+ }
1849
+ const sectionPath = relDir
1850
+ ? join(mountDir, relDir, "llms.txt")
1851
+ : join(mountDir, "llms.txt");
1270
1852
  mkdirSync(dirname(sectionPath), { recursive: true });
1271
1853
  writeFileSync(sectionPath, buildSectionLlmsTxt(siteConfig, group, pages, { hrefPrefix }));
1272
1854
  }
1273
1855
  }
1856
+ /**
1857
+ * Emit per-mount sitemap files.
1858
+ *
1859
+ * Writes `public/<basePath>/sitemap-index.xml` + `sitemap-0.xml`.
1860
+ * The index lists the single sub-sitemap today; future splits add
1861
+ * more sub-sitemap entries as the page count grows past
1862
+ * sitemaps.org's 50K-URL recommendation.
1863
+ *
1864
+ * Caller has already guarded on a valid http(s) `siteUrl` — without
1865
+ * one, `<loc>` entries can't be absolute and crawlers reject the
1866
+ * file. Skip emission rather than write a broken sitemap.
1867
+ */
1868
+ function emitSitemapFiles(outputDir, options, pages) {
1869
+ // Filesystem path uses basePath (sitemap files live in
1870
+ // public/<basePath>/sitemap-*.xml). The URL prefix encoded into
1871
+ // each <loc> uses combined so the absolute URLs resolve under the
1872
+ // host's served subpath. buildSitemap strips path off siteUrl
1873
+ // internally, so passing siteUrl + combined as basePath gives
1874
+ // origin + combined as the final URL.
1875
+ const basePath = normalizeBasePath(options.basePath);
1876
+ const combined = combinedPrefix(options);
1877
+ const baseSegments = basePathSegments(basePath);
1878
+ const mountDir = join(outputDir, "public", ...baseSegments);
1879
+ mkdirSync(mountDir, { recursive: true });
1880
+ writeFileSync(join(mountDir, "sitemap-0.xml"), buildSitemap(pages, {
1881
+ siteUrl: options.siteUrl,
1882
+ basePath: combined,
1883
+ siteNoindex: options.noindex === true,
1884
+ }));
1885
+ writeFileSync(join(mountDir, "sitemap-index.xml"), buildSitemapIndex({ siteUrl: options.siteUrl, basePath: combined }));
1886
+ }
1274
1887
  /**
1275
1888
  * Pick a directory under `public/` for a top-level nav group. Prefers
1276
1889
  * the group's own href (already a `/docs/x/y` path); otherwise falls
@@ -1348,6 +1961,11 @@ function copyComponents(outputDir) {
1348
1961
  "response-tabs", "schema-viewer", "code-samples", "copy-button",
1349
1962
  "markdown-example",
1350
1963
  "accordion", "link-card", "avatar", "math",
1964
+ // Icon resolves @ui/icon/Icon.astro → built-time SVG inlining
1965
+ // via @dogsbay/icons. Used by `:::cards` `{icon=...}` and the
1966
+ // inline `:icon[name]` directive. Without this entry every page
1967
+ // emitting the icon import 500s with "module not found".
1968
+ "icon",
1351
1969
  ];
1352
1970
  for (const name of needed) {
1353
1971
  const src = join(componentsSource, name);
@@ -1402,6 +2020,32 @@ function copyAssets(sourceDir, outputDir, imageOptimization) {
1402
2020
  catch { /* source may not exist */ }
1403
2021
  }
1404
2022
  // ── CSS generation (ported from import-mkdocs.ts) ───────
2023
+ /**
2024
+ * Build the `@source inline("...")` directive that pins the
2025
+ * grid-tone palette into the generated stylesheet.
2026
+ *
2027
+ * Why we need it: tone classes like `bg-primary/10` only appear in
2028
+ * `.astro` pages emitted by Dogsbay's grid-item serializer. When
2029
+ * Tailwind's content scanner doesn't pick them up — because the
2030
+ * page lives outside the default scan globs, or because a class
2031
+ * is composed at the boundary of an interpolation — they get
2032
+ * purged. Result observed in dogsbay-docs-markdown audit: half
2033
+ * the grid demo cells render with no background. Pinning forces
2034
+ * generation regardless of scanner reach.
2035
+ *
2036
+ * Single source of truth: derived from TONE_CLASSES so any new
2037
+ * tone added to the palette is automatically safelisted.
2038
+ */
2039
+ function buildToneSafelist() {
2040
+ const seen = new Set();
2041
+ for (const classes of Object.values(TONE_CLASSES)) {
2042
+ for (const cls of classes.split(/\s+/)) {
2043
+ if (cls)
2044
+ seen.add(cls);
2045
+ }
2046
+ }
2047
+ return [...seen].sort().join(" ");
2048
+ }
1405
2049
  function generateGlobalCss() {
1406
2050
  return `@import "tailwindcss";
1407
2051
  @import "./theme.css";
@@ -1410,6 +2054,14 @@ function generateGlobalCss() {
1410
2054
  @source "../../node_modules/@dogsbay/ui/src";
1411
2055
  @source "../../node_modules/@dogsbay/docs-layout/src";
1412
2056
 
2057
+ /* Pin the grid-tone palette. These classes are emitted into
2058
+ markdown-generated .astro pages by the grid-item serializer
2059
+ (TONE_CLASSES in @dogsbay/format-astro). Without inlining,
2060
+ opacity-modified utilities like bg-primary/10 get purged when
2061
+ Tailwind doesn't see them in the scanned globs, leaving grid
2062
+ demo cells with no visible background. */
2063
+ @source inline("${buildToneSafelist()}");
2064
+
1413
2065
  /* Prose typography for rendered content */
1414
2066
  .docs-prose {
1415
2067
  line-height: 1.7;