dogsbay 0.2.0-beta.2 → 0.2.0-beta.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/agent.js +305 -0
- package/dist/commands/site-build.js +66 -15
- package/dist/commands/site-dev.js +181 -23
- package/dist/commands/site-init.js +193 -32
- package/dist/config/defaults.js +8 -1
- package/dist/config/load.js +6 -32
- package/dist/config/to-astro-options.js +1 -0
- package/dist/import-content.js +13 -12
- package/dist/index.js +19 -4
- package/dist/passthrough-astro.js +152 -0
- package/dist/registry.js +8 -0
- package/package.json +11 -9
- package/skills/platform/agent-readiness/SKILL.md +262 -0
- package/skills/platform/cli-commands/SKILL.md +205 -0
- package/skills/platform/config-yml/SKILL.md +219 -0
- package/skills/platform/frontmatter-fields/SKILL.md +310 -0
- package/skills/platform/markdown-directives/SKILL.md +329 -0
- package/skills/platform/multi-source/SKILL.md +294 -0
- package/skills/platform/nav-file/SKILL.md +107 -0
- package/skills/platform/openapi-source/SKILL.md +237 -0
- package/skills/platform/plugin-api/SKILL.md +280 -0
- package/skills/platform/project-anatomy/SKILL.md +156 -0
- package/skills/platform/taxonomy-config/SKILL.md +392 -0
- package/skills/platform/theme-tokens/SKILL.md +276 -0
package/dist/index.js
CHANGED
|
@@ -15,6 +15,7 @@ import { siteInit } from "./commands/site-init.js";
|
|
|
15
15
|
import { siteBuild } from "./commands/site-build.js";
|
|
16
16
|
import { siteCheck } from "./commands/site-check.js";
|
|
17
17
|
import { siteDev, sitePreview } from "./commands/site-dev.js";
|
|
18
|
+
import { agentInstall } from "./commands/agent.js";
|
|
18
19
|
// Read version from the runtime package.json so `dogsbay --version`
|
|
19
20
|
// never drifts from what's published. Walks one level up from
|
|
20
21
|
// `dist/index.js` to `package.json` (works in both monorepo dev and
|
|
@@ -62,7 +63,7 @@ site
|
|
|
62
63
|
.option("--edit-uri <path>", "Repo path prefix for edit links (default: blob/main/content/)")
|
|
63
64
|
.option("--copyright <text>", "Footer copyright text")
|
|
64
65
|
.option("--theme <name>", "Theme preset (default | material)")
|
|
65
|
-
.option("--deploy <target>", "Deploy target (cloudflare-workers)")
|
|
66
|
+
.option("--deploy <target>", "Deploy target (cloudflare-workers | github-pages)")
|
|
66
67
|
.option("--content <path>", "Path to source markdown (relative to config)")
|
|
67
68
|
.option("--from <format>", "Source format (auto | dogsbay-md | mkdocs | obsidian | starlight | mdx)")
|
|
68
69
|
.option("--nav <path>", "Path to explicit nav file (.json/.yml)")
|
|
@@ -96,7 +97,7 @@ site
|
|
|
96
97
|
.option("--edit-uri <path>", "Override site.editUri")
|
|
97
98
|
.option("--copyright <text>", "Override site.copyright")
|
|
98
99
|
.option("--theme <name>", "Override theme (default | material)")
|
|
99
|
-
.option("--deploy <target>", "Override deploy.target (cloudflare-workers)")
|
|
100
|
+
.option("--deploy <target>", "Override deploy.target (cloudflare-workers | github-pages)")
|
|
100
101
|
.option("--content <path>", "Override content.source")
|
|
101
102
|
.option("--from <format>", "Override content.from")
|
|
102
103
|
.option("--nav <path>", "Override content.nav")
|
|
@@ -108,8 +109,12 @@ site
|
|
|
108
109
|
.option("--ai-input <yes|no>", "Override Content-Signal aiInput")
|
|
109
110
|
.option("--ai-search <yes|no>", "Override Content-Signal search")
|
|
110
111
|
.option("--include-drafts", "Include status: draft pages (default: excluded from prod build)")
|
|
111
|
-
.option("--
|
|
112
|
-
"Default builds
|
|
112
|
+
.option("--primary-only", "Build only sources marked primary: true (skip non-primary). " +
|
|
113
|
+
"Default builds the full publish matrix — every locale / version / namespace " +
|
|
114
|
+
"the writer declared. Use --primary-only for fast single-source iteration in CI " +
|
|
115
|
+
"lint jobs (rare).")
|
|
116
|
+
.option("--publish", "Deprecated — `dogsbay site build` defaults to publish mode now. " +
|
|
117
|
+
"Kept as a no-op for compatibility with older scripts.")
|
|
113
118
|
.option("--refresh", "Wipe the per-source git cache before resolving — forces re-clone of every git: source. " +
|
|
114
119
|
"Useful when a tracked branch's HEAD has moved.")
|
|
115
120
|
.action((dir, options) => siteBuild(dir, options));
|
|
@@ -218,6 +223,16 @@ program
|
|
|
218
223
|
.option("--concurrency <n>", "Maximum concurrent fetches (default: 3)", "3")
|
|
219
224
|
.option("--rate-limit <ms>", "Minimum ms between request batches (default: 200)", "200")
|
|
220
225
|
.action((url, options) => pull(url, options));
|
|
226
|
+
// ── `dogsbay agent` — wire skill discovery for LLM agents ──────────────
|
|
227
|
+
const agent = program
|
|
228
|
+
.command("agent")
|
|
229
|
+
.description("Wire Dogsbay platform skills into LLM-agent discovery paths");
|
|
230
|
+
agent
|
|
231
|
+
.command("install")
|
|
232
|
+
.description("Install platform skills + per-agent discovery symlinks")
|
|
233
|
+
.option("--agent <names>", "Comma-separated list (e.g. claude,cursor)")
|
|
234
|
+
.option("--all", "Install for every supported agent")
|
|
235
|
+
.action((options) => agentInstall(undefined, options));
|
|
221
236
|
program
|
|
222
237
|
.command("export-techdocs")
|
|
223
238
|
.description("Post-process Astro build output into Backstage TechDocs format")
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Passthrough Astro page collection.
|
|
3
|
+
*
|
|
4
|
+
* Hand-authored `.astro` files that live under a content source's
|
|
5
|
+
* directory get copied verbatim to `src/pages/<basePath>/...` at
|
|
6
|
+
* build time. Authors opt in by listing the file in `nav.yml`; the
|
|
7
|
+
* intersection of "files on disk" and "hrefs in nav" defines the
|
|
8
|
+
* passthrough set.
|
|
9
|
+
*
|
|
10
|
+
* See plans/passthrough-astro-pages.md.
|
|
11
|
+
*/
|
|
12
|
+
import { existsSync, readdirSync, statSync } from "node:fs";
|
|
13
|
+
import { join, posix, relative, sep } from "node:path";
|
|
14
|
+
/**
|
|
15
|
+
* Walk `contentDir` for `.astro` files, compute each one's canonical
|
|
16
|
+
* href, and return only the entries whose href appears in the
|
|
17
|
+
* resolved nav. Files outside the nav are ignored — passthrough is
|
|
18
|
+
* opt-in to avoid accidentally publishing scratch components.
|
|
19
|
+
*/
|
|
20
|
+
export function collectPassthroughEntries(contentDir, nav, options) {
|
|
21
|
+
if (!existsSync(contentDir))
|
|
22
|
+
return [];
|
|
23
|
+
const navHrefs = collectNavHrefs(nav);
|
|
24
|
+
const candidates = walkAstroFiles(contentDir, contentDir);
|
|
25
|
+
const entries = [];
|
|
26
|
+
for (const sourceAbs of candidates) {
|
|
27
|
+
const source = toPosix(relative(contentDir, sourceAbs));
|
|
28
|
+
const href = sourceToHref(source, options.basePath);
|
|
29
|
+
if (!navHrefs.has(href))
|
|
30
|
+
continue;
|
|
31
|
+
entries.push({
|
|
32
|
+
source,
|
|
33
|
+
sourceAbs,
|
|
34
|
+
outputRelPath: sourceToOutputRelPath(source, options.basePath),
|
|
35
|
+
href,
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
return entries;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Recursively gather every `.astro` file under `dir`. Skips
|
|
42
|
+
* `node_modules`, dot-directories, and any directory named
|
|
43
|
+
* `_components` / `_partials` (a common convention for "this is
|
|
44
|
+
* shared, don't publish it" — leaves authors an obvious escape
|
|
45
|
+
* hatch when they want to ship private helpers alongside content).
|
|
46
|
+
*/
|
|
47
|
+
function walkAstroFiles(dir, root) {
|
|
48
|
+
const out = [];
|
|
49
|
+
let entries = [];
|
|
50
|
+
try {
|
|
51
|
+
entries = readdirSync(dir, { withFileTypes: true });
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
return out;
|
|
55
|
+
}
|
|
56
|
+
for (const entry of entries) {
|
|
57
|
+
if (entry.name.startsWith("."))
|
|
58
|
+
continue;
|
|
59
|
+
if (entry.name === "node_modules")
|
|
60
|
+
continue;
|
|
61
|
+
if (entry.name === "_components" || entry.name === "_partials")
|
|
62
|
+
continue;
|
|
63
|
+
const full = join(dir, entry.name);
|
|
64
|
+
if (entry.isDirectory()) {
|
|
65
|
+
out.push(...walkAstroFiles(full, root));
|
|
66
|
+
}
|
|
67
|
+
else if (entry.isFile() && entry.name.endsWith(".astro")) {
|
|
68
|
+
out.push(full);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return out;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Compute the public-facing href for a source path. Mirrors the
|
|
75
|
+
* slug logic in `format-dogsbay-md/src/nav-file.ts:fileToHref` so a
|
|
76
|
+
* passthrough .astro file produces the same href as a hypothetical
|
|
77
|
+
* .md sibling at the same path.
|
|
78
|
+
*
|
|
79
|
+
* - "tutorials/playground.astro" → "/docs/tutorials/playground"
|
|
80
|
+
* - "reference/index.astro" → "/docs/reference"
|
|
81
|
+
* - "index.astro" → "/docs"
|
|
82
|
+
*/
|
|
83
|
+
function sourceToHref(source, basePath) {
|
|
84
|
+
let slug = source.replace(/\.astro$/i, "");
|
|
85
|
+
if (slug.endsWith("/index"))
|
|
86
|
+
slug = slug.slice(0, -"/index".length);
|
|
87
|
+
if (slug === "index")
|
|
88
|
+
slug = "";
|
|
89
|
+
const prefix = basePath.endsWith("/") ? basePath.slice(0, -1) : basePath;
|
|
90
|
+
return slug ? `${prefix}/${slug}` : prefix || "";
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Compute the output path (relative to outputDir) where a passthrough
|
|
94
|
+
* source should be copied. Preserves the source's directory shape
|
|
95
|
+
* under `src/pages/<basePath segments>/`. Index files survive as
|
|
96
|
+
* `index.astro` so Astro's directory-routing matches the nav href.
|
|
97
|
+
*/
|
|
98
|
+
function sourceToOutputRelPath(source, basePath) {
|
|
99
|
+
const baseSegs = basePath.split("/").filter((s) => s.length > 0);
|
|
100
|
+
const sourceSegs = source.split("/");
|
|
101
|
+
return ["src", "pages", ...baseSegs, ...sourceSegs].join(sep);
|
|
102
|
+
}
|
|
103
|
+
/** Walk a NavItem tree collecting every href into a Set. */
|
|
104
|
+
function collectNavHrefs(nav) {
|
|
105
|
+
const out = new Set();
|
|
106
|
+
const stack = [...nav];
|
|
107
|
+
while (stack.length > 0) {
|
|
108
|
+
const item = stack.pop();
|
|
109
|
+
if (item.href)
|
|
110
|
+
out.add(item.href);
|
|
111
|
+
if (item.children)
|
|
112
|
+
stack.push(...item.children);
|
|
113
|
+
}
|
|
114
|
+
return out;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Normalize backslashes (Windows path separators) to forward slashes
|
|
118
|
+
* before doing any href / slug computation. The slug shape is
|
|
119
|
+
* URL-flavoured even on Windows.
|
|
120
|
+
*/
|
|
121
|
+
function toPosix(p) {
|
|
122
|
+
return p.split(sep).join(posix.sep);
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Build the slug set covered by the passthrough entries. Used by
|
|
126
|
+
* site-build to assert no collision with generated slugs from
|
|
127
|
+
* `emitAstroPages`.
|
|
128
|
+
*/
|
|
129
|
+
export function passthroughSlugs(entries) {
|
|
130
|
+
return new Set(entries.map((e) => e.href));
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Convenience guard — verify every passthrough source still exists
|
|
134
|
+
* on disk. Walking already only returns existing files, but this
|
|
135
|
+
* helper is useful when entries are constructed externally (e.g. in
|
|
136
|
+
* tests).
|
|
137
|
+
*/
|
|
138
|
+
export function assertPassthroughFilesExist(entries) {
|
|
139
|
+
for (const entry of entries) {
|
|
140
|
+
if (!existsSync(entry.sourceAbs)) {
|
|
141
|
+
throw new Error(`Passthrough Astro source missing: ${entry.source} ` +
|
|
142
|
+
`(resolved: ${entry.sourceAbs})`);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
// Touch statSync to detect non-file entries (defensive)
|
|
146
|
+
for (const entry of entries) {
|
|
147
|
+
const st = statSync(entry.sourceAbs);
|
|
148
|
+
if (!st.isFile()) {
|
|
149
|
+
throw new Error(`Passthrough Astro source is not a file: ${entry.source}`);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
package/dist/registry.js
CHANGED
|
@@ -388,6 +388,14 @@ export const registry = {
|
|
|
388
388
|
primitives: [],
|
|
389
389
|
description: "Clickable card with link",
|
|
390
390
|
},
|
|
391
|
+
icon: {
|
|
392
|
+
name: "icon",
|
|
393
|
+
files: ["Icon.astro", "index.ts"],
|
|
394
|
+
dependencies: ["@dogsbay/icons"],
|
|
395
|
+
registryDependencies: [],
|
|
396
|
+
primitives: [],
|
|
397
|
+
description: "Build-time-resolved icon (Lucide default; mdi:, simple-icons:, etc. via Iconify)",
|
|
398
|
+
},
|
|
391
399
|
sidebar: {
|
|
392
400
|
name: "sidebar",
|
|
393
401
|
files: ["Sidebar.astro", "SidebarContent.astro", "SidebarGroup.astro", "SidebarGroupContent.astro", "SidebarGroupLabel.astro", "SidebarHeader.astro", "SidebarInset.astro", "SidebarMenu.astro", "SidebarMenuButton.astro", "SidebarMenuItem.astro", "SidebarNavTree.astro", "SidebarProvider.astro", "SidebarRail.astro", "SidebarSeparator.astro", "SidebarTrigger.astro", "sidebar.ts", "index.ts"],
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "dogsbay",
|
|
3
|
-
"version": "0.2.0-beta.
|
|
3
|
+
"version": "0.2.0-beta.21",
|
|
4
4
|
"description": "CLI for Dogsbay — scaffold, build, and serve documentation sites with markdown / MkDocs / Obsidian / OpenAPI sources",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
"files": [
|
|
10
10
|
"dist",
|
|
11
11
|
"bin",
|
|
12
|
+
"skills",
|
|
12
13
|
"README.md"
|
|
13
14
|
],
|
|
14
15
|
"keywords": [
|
|
@@ -25,19 +26,20 @@
|
|
|
25
26
|
],
|
|
26
27
|
"dependencies": {
|
|
27
28
|
"cheerio": "^1.0.0",
|
|
29
|
+
"chokidar": "^4.0.0",
|
|
28
30
|
"commander": "^13.0.0",
|
|
29
31
|
"markdown-it": "^14.0.0",
|
|
30
32
|
"picocolors": "^1.1.0",
|
|
31
33
|
"prompts": "^2.4.2",
|
|
32
34
|
"yaml": "^2.8.3",
|
|
33
|
-
"@dogsbay/format-mkdocs": "0.2.0-beta.
|
|
34
|
-
"@dogsbay/format-astro": "0.2.0-beta.
|
|
35
|
-
"@dogsbay/format-
|
|
36
|
-
"@dogsbay/format-
|
|
37
|
-
"@dogsbay/format-starlight": "0.2.0-beta.
|
|
38
|
-
"@dogsbay/format-dogsbay-md": "0.2.0-beta.
|
|
39
|
-
"@dogsbay/
|
|
40
|
-
"@dogsbay/
|
|
35
|
+
"@dogsbay/format-mkdocs": "0.2.0-beta.21",
|
|
36
|
+
"@dogsbay/format-astro": "0.2.0-beta.21",
|
|
37
|
+
"@dogsbay/format-obsidian": "0.2.0-beta.21",
|
|
38
|
+
"@dogsbay/format-mdx": "0.2.0-beta.21",
|
|
39
|
+
"@dogsbay/format-starlight": "0.2.0-beta.21",
|
|
40
|
+
"@dogsbay/format-dogsbay-md": "0.2.0-beta.21",
|
|
41
|
+
"@dogsbay/format-openapi": "0.2.0-beta.21",
|
|
42
|
+
"@dogsbay/types": "0.2.0-beta.21"
|
|
41
43
|
},
|
|
42
44
|
"devDependencies": {
|
|
43
45
|
"@types/node": "^22.0.0",
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: dogsbay:agent-readiness
|
|
3
|
+
description: How Dogsbay sites expose content to LLM agents and search indexers — llms.txt, llms-full.txt, .md mirrors, Content-Signal HTTP headers, robots.txt. Use when configuring agent.* in dogsbay.config.yml or debugging agent consumption.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Agent readiness
|
|
7
|
+
|
|
8
|
+
Every Dogsbay site is built to be **agent-readable by default**.
|
|
9
|
+
Three mechanisms work together so that any modern LLM, search
|
|
10
|
+
engine, or AI-answer-engine can consume the docs as cleanly as
|
|
11
|
+
a human reader:
|
|
12
|
+
|
|
13
|
+
1. **llms.txt** at the root — the canonical agent index
|
|
14
|
+
2. **`.md` mirror** for every page — the prose body without
|
|
15
|
+
chrome
|
|
16
|
+
3. **Content-Signal HTTP headers** — IETF-track signal for "what
|
|
17
|
+
AI use is permitted"
|
|
18
|
+
|
|
19
|
+
All three are emitted at `dogsbay site build` time. Toggleable
|
|
20
|
+
via the `agent:` block in `dogsbay.config.yml`.
|
|
21
|
+
|
|
22
|
+
## llms.txt
|
|
23
|
+
|
|
24
|
+
The standard at [llmstxt.org](https://llmstxt.org/) — a single
|
|
25
|
+
file at the site root that lists every page with title +
|
|
26
|
+
description + URL. Two flavours:
|
|
27
|
+
|
|
28
|
+
- `/llms.txt` — short index (title + description + URL per page,
|
|
29
|
+
grouped by section). The agent's "table of contents."
|
|
30
|
+
- `/llms-full.txt` — full index with the markdown body of every
|
|
31
|
+
page concatenated. The agent's "everything in one paste."
|
|
32
|
+
|
|
33
|
+
Per-section mini-indexes also emit (`/llms-${section}.txt` for
|
|
34
|
+
each top-level nav group), so an agent can pull just the
|
|
35
|
+
relevant slice without grabbing the whole site.
|
|
36
|
+
|
|
37
|
+
Format example (`/llms.txt`):
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
# Acme Docs
|
|
41
|
+
|
|
42
|
+
> Documentation for the Acme platform.
|
|
43
|
+
|
|
44
|
+
## Getting started
|
|
45
|
+
|
|
46
|
+
- [Installation](/docs/install): Install the CLI on macOS, Linux, or Windows.
|
|
47
|
+
- [Quickstart](/docs/quickstart): Your first request in 60 seconds.
|
|
48
|
+
|
|
49
|
+
## API reference
|
|
50
|
+
|
|
51
|
+
- [List pets](/docs/api/pets/list-pets): Returns paginated list.
|
|
52
|
+
- [Create a pet](/docs/api/pets/create-pet): Idempotent creation.
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Toggleable:
|
|
56
|
+
|
|
57
|
+
```yaml
|
|
58
|
+
agent:
|
|
59
|
+
llmsTxt: true # default true; set false to omit
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## `.md` mirror
|
|
63
|
+
|
|
64
|
+
Every emitted page has a sibling `.md` route that returns the
|
|
65
|
+
markdown source (or a faithful prose rendering of it) with
|
|
66
|
+
`Content-Type: text/markdown`.
|
|
67
|
+
|
|
68
|
+
For a page at `/docs/api/pets/list-pets`, the mirror is at
|
|
69
|
+
`/docs/api/pets/list-pets.md`. For `/docs/`, it's at `/docs.md`.
|
|
70
|
+
|
|
71
|
+
Why two URLs? A human visiting `/docs/api/pets/list-pets` gets
|
|
72
|
+
the rich HTML page with components, sidebar, search. An agent
|
|
73
|
+
hitting `/docs/api/pets/list-pets.md` gets just the prose —
|
|
74
|
+
faster to parse, no HTML noise, no dependency on a Cloudflare
|
|
75
|
+
worker for content negotiation.
|
|
76
|
+
|
|
77
|
+
### Discovery via `<link rel="alternate">`
|
|
78
|
+
|
|
79
|
+
Every HTML page emits:
|
|
80
|
+
|
|
81
|
+
```html
|
|
82
|
+
<link rel="alternate" type="text/markdown" href="/docs/api/pets/list-pets.md">
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Agents that follow `rel="alternate"` find the mirror without
|
|
86
|
+
guessing at URL conventions. Anthropic's prompt-cache, Mintlify's
|
|
87
|
+
agents, and several others do this.
|
|
88
|
+
|
|
89
|
+
Toggleable:
|
|
90
|
+
|
|
91
|
+
```yaml
|
|
92
|
+
agent:
|
|
93
|
+
mdMirror: true # default true
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Per-page opt-out
|
|
97
|
+
|
|
98
|
+
Some pages don't have useful prose mirrors (e.g. landing pages
|
|
99
|
+
that are mostly hero components). Opt out per-page:
|
|
100
|
+
|
|
101
|
+
```yaml
|
|
102
|
+
---
|
|
103
|
+
title: Home
|
|
104
|
+
mdMirror: false
|
|
105
|
+
---
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Or via the global `agent.mdMirror: false`.
|
|
109
|
+
|
|
110
|
+
### Content negotiation (Cloudflare worker)
|
|
111
|
+
|
|
112
|
+
Astro's static-mode output doesn't pass per-request headers to
|
|
113
|
+
middleware, so the in-build middleware can't respond to
|
|
114
|
+
`Accept: text/markdown` by serving the `.md` body. The current
|
|
115
|
+
mitigation: the explicit `.md` URL is always available, and
|
|
116
|
+
`<link rel="alternate">` exposes it. A Cloudflare worker that
|
|
117
|
+
does proper content negotiation at the edge is planned (see
|
|
118
|
+
`plans/cloudflare-deploy-content-negotiation.md`).
|
|
119
|
+
|
|
120
|
+
## Content-Signal HTTP headers
|
|
121
|
+
|
|
122
|
+
Per the IETF Content-Signal draft, sites can declare AI-use
|
|
123
|
+
permissions via HTTP headers:
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
Content-Signal: aiTrain=no, aiInput=yes, search=yes
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Three keys:
|
|
130
|
+
|
|
131
|
+
| Key | Values | Meaning |
|
|
132
|
+
|---|---|---|
|
|
133
|
+
| `aiTrain` | `yes` / `no` | May this content be used for AI model training? |
|
|
134
|
+
| `aiInput` | `yes` / `no` | May this content be used as input to a live AI session (RAG, prompt context)? |
|
|
135
|
+
| `search` | `yes` / `no` | May this content be indexed by search engines? |
|
|
136
|
+
|
|
137
|
+
Configure via `agent.contentSignal`:
|
|
138
|
+
|
|
139
|
+
```yaml
|
|
140
|
+
agent:
|
|
141
|
+
contentSignal:
|
|
142
|
+
aiTrain: "no" # don't use my docs to train models
|
|
143
|
+
aiInput: "yes" # but DO use them as live context (e.g. for users in Claude / Cursor)
|
|
144
|
+
search: "yes" # standard search indexing OK
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Emitted in two places:
|
|
148
|
+
|
|
149
|
+
- `public/_headers` — Cloudflare Pages / Vercel / Netlify pick
|
|
150
|
+
this up at the edge automatically
|
|
151
|
+
- `<meta>` tags in HTML head — for hosts that don't read
|
|
152
|
+
`_headers`
|
|
153
|
+
|
|
154
|
+
## robots.txt
|
|
155
|
+
|
|
156
|
+
Auto-emitted at `public/robots.txt` based on `noindex` settings
|
|
157
|
+
+ Content-Signal `search` value. Disallows crawlers when
|
|
158
|
+
`search: "no"`; otherwise allows everything.
|
|
159
|
+
|
|
160
|
+
For per-page `noindex`, the `robots` meta tag handles it (see
|
|
161
|
+
`dogsbay:frontmatter-fields`).
|
|
162
|
+
|
|
163
|
+
## Per-page LLM action UI
|
|
164
|
+
|
|
165
|
+
Beyond the data side, Dogsbay can render an action cluster
|
|
166
|
+
("Copy as markdown", "Open in Claude", "Open in ChatGPT") on
|
|
167
|
+
each page:
|
|
168
|
+
|
|
169
|
+
```yaml
|
|
170
|
+
agent:
|
|
171
|
+
llmsTxt: true
|
|
172
|
+
mdMirror: true
|
|
173
|
+
|
|
174
|
+
llmActions:
|
|
175
|
+
enabled: true
|
|
176
|
+
providers: [claude, chatgpt, perplexity, gemini] # render order
|
|
177
|
+
placement: header # header | inline | both
|
|
178
|
+
copyButton: true
|
|
179
|
+
promptTemplate: "Read this docs page: {url}"
|
|
180
|
+
footerLink: true
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
`{url}` resolves to the absolute `.md` mirror URL. The user
|
|
184
|
+
clicks "Open in Claude" → goes to `claude.ai/new?q=...` with a
|
|
185
|
+
prepopulated prompt that pulls the markdown into Claude's
|
|
186
|
+
context.
|
|
187
|
+
|
|
188
|
+
Per-page opt-out via `llmActions: false` in frontmatter.
|
|
189
|
+
|
|
190
|
+
## What agents see
|
|
191
|
+
|
|
192
|
+
When an LLM is given the URL of a Dogsbay site:
|
|
193
|
+
|
|
194
|
+
1. It fetches `/llms.txt` (table of contents)
|
|
195
|
+
2. Picks pages relevant to the question
|
|
196
|
+
3. Fetches each as `/{path}.md` (full prose)
|
|
197
|
+
4. Reads `Content-Signal` to know if it's allowed to use the
|
|
198
|
+
content as context (typically yes if `aiInput=yes`)
|
|
199
|
+
|
|
200
|
+
That's a self-contained agent-consumption loop with no special
|
|
201
|
+
configuration on the agent's side.
|
|
202
|
+
|
|
203
|
+
## Common patterns
|
|
204
|
+
|
|
205
|
+
### Public docs, no AI training, allow live context
|
|
206
|
+
|
|
207
|
+
```yaml
|
|
208
|
+
agent:
|
|
209
|
+
llmsTxt: true
|
|
210
|
+
mdMirror: true
|
|
211
|
+
contentSignal:
|
|
212
|
+
aiTrain: "no"
|
|
213
|
+
aiInput: "yes"
|
|
214
|
+
search: "yes"
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
The default for most teams. Their docs help users in AI sessions
|
|
218
|
+
but don't end up in training data.
|
|
219
|
+
|
|
220
|
+
### Internal docs (no public agent access)
|
|
221
|
+
|
|
222
|
+
```yaml
|
|
223
|
+
agent:
|
|
224
|
+
llmsTxt: false # don't advertise to crawlers
|
|
225
|
+
mdMirror: true # but keep the dev-side .md surface
|
|
226
|
+
contentSignal:
|
|
227
|
+
aiTrain: "no"
|
|
228
|
+
aiInput: "no"
|
|
229
|
+
search: "no"
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
Plus host-side auth (Cloudflare Access, Vercel password, etc.)
|
|
233
|
+
to gate the site itself.
|
|
234
|
+
|
|
235
|
+
### Marketing-site mode (everything open)
|
|
236
|
+
|
|
237
|
+
```yaml
|
|
238
|
+
agent:
|
|
239
|
+
llmsTxt: true
|
|
240
|
+
mdMirror: true
|
|
241
|
+
contentSignal:
|
|
242
|
+
aiTrain: "yes" # put us in the training data; we want the visibility
|
|
243
|
+
aiInput: "yes"
|
|
244
|
+
search: "yes"
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
## Common mistakes
|
|
248
|
+
|
|
249
|
+
- ❌ Setting `agent.mdMirror: false` and expecting llms.txt to
|
|
250
|
+
still link to .md files — the index emits whatever URLs the
|
|
251
|
+
build produces. If mirrors aren't built, the index can't link
|
|
252
|
+
to them.
|
|
253
|
+
- ❌ Trusting `Accept: text/markdown` content negotiation today —
|
|
254
|
+
static-mode middleware doesn't see request headers. Use the
|
|
255
|
+
explicit `.md` URL.
|
|
256
|
+
- ❌ `aiTrain: "no"` + a public-internet-readable site —
|
|
257
|
+
Content-Signal is **declarative**, not enforceable. Crawlers
|
|
258
|
+
can ignore it. For real protection, gate access at the
|
|
259
|
+
network level.
|
|
260
|
+
- ❌ Mistyping the Content-Signal values (`"true"` instead of
|
|
261
|
+
`"yes"`) — the loader doesn't normalise; the header emits
|
|
262
|
+
literally what you wrote.
|