dogsbay 0.2.0-beta.5 → 0.2.0-beta.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,216 @@
1
+ /**
2
+ * `dogsbay agent install` — wire skill discovery for an LLM agent
3
+ * (Claude Code, Cursor, Copilot, etc.).
4
+ *
5
+ * Bundled platform skills live at `<cli-install-dir>/skills/platform/*.md`.
6
+ * This command:
7
+ * 1. Resolves the bundled platform skills directory.
8
+ * 2. Symlinks them into `<project>/.dogsbay/skills/platform/`.
9
+ * 3. Creates `.dogsbay/skills/site/` (empty + README) and
10
+ * `.dogsbay/skills/plugins/` (empty placeholder).
11
+ * 4. For each requested --agent, writes the per-agent discovery
12
+ * path (`.claude/skills/dogsbay/`, `.cursor/rules/dogsbay/`).
13
+ *
14
+ * Re-running is idempotent — symlinks are recreated; existing
15
+ * site/ files are never touched.
16
+ *
17
+ * See plans/dogsbay-agent-skills.md for the four-tier ownership
18
+ * model and how this fits.
19
+ */
20
+ import { existsSync, mkdirSync, symlinkSync, unlinkSync, writeFileSync, readlinkSync } from "node:fs";
21
+ import { dirname, join, relative, resolve } from "node:path";
22
+ import { fileURLToPath } from "node:url";
23
+ import pc from "picocolors";
24
+ const SUPPORTED_AGENTS = ["claude", "cursor"];
25
+ const AGENT_TARGETS = {
26
+ claude: {
27
+ name: "claude",
28
+ path: ".claude/skills/dogsbay",
29
+ label: "Claude Code (.claude/skills/dogsbay/)",
30
+ },
31
+ cursor: {
32
+ name: "cursor",
33
+ path: ".cursor/rules/dogsbay",
34
+ label: "Cursor (.cursor/rules/dogsbay/)",
35
+ },
36
+ };
37
+ export async function agentInstall(cwd, options) {
38
+ const projectRoot = resolve(cwd || ".");
39
+ // Resolve the bundled platform-skills directory. We're running
40
+ // from <cli-install>/dist/commands/agent.js, so walk up to
41
+ // <cli-install>/skills/platform/.
42
+ const here = dirname(fileURLToPath(import.meta.url));
43
+ const platformSkills = resolve(here, "..", "..", "skills", "platform");
44
+ if (!existsSync(platformSkills)) {
45
+ console.error(pc.red(`Error: bundled platform skills not found at ${platformSkills}.`));
46
+ console.error(` The dogsbay CLI install seems incomplete. Reinstall with`);
47
+ console.error(` 'npm install -g dogsbay@latest'.`);
48
+ process.exit(1);
49
+ }
50
+ // Pick the agents to install.
51
+ const agents = pickAgents(options);
52
+ if (agents.length === 0) {
53
+ printDetected(projectRoot);
54
+ return;
55
+ }
56
+ console.log(pc.cyan("→ Installing skill discovery"));
57
+ // 1. Always set up .dogsbay/skills/{platform,site,plugins}.
58
+ const dogsbayDir = join(projectRoot, ".dogsbay");
59
+ const skillsDir = join(dogsbayDir, "skills");
60
+ mkdirSync(skillsDir, { recursive: true });
61
+ const platformLink = join(skillsDir, "platform");
62
+ refreshSymlink(platformLink, platformSkills);
63
+ console.log(pc.green(` ✓ ${relative(projectRoot, platformLink)} → bundled platform skills`));
64
+ const siteDir = join(skillsDir, "site");
65
+ if (!existsSync(siteDir)) {
66
+ mkdirSync(siteDir, { recursive: true });
67
+ writeFileSync(join(siteDir, "README.md"), `# Site skills
68
+
69
+ This directory holds **site-specific** skills — your team's style
70
+ guide, voice / tone, terminology, glossary, internal conventions.
71
+ Anything an LLM should know that's specific to THIS site.
72
+
73
+ Each skill is a single \`.md\` file with frontmatter:
74
+
75
+ \`\`\`markdown
76
+ ---
77
+ name: site:style-guide
78
+ description: Our team's writing voice, terminology, and PR conventions.
79
+ ---
80
+
81
+ # Style guide
82
+
83
+ We use Oxford commas. Sentence-case headings. ...
84
+ \`\`\`
85
+
86
+ These skills are picked up automatically by any agent you've
87
+ installed via \`dogsbay agent install --agent <name>\`.
88
+
89
+ To override a platform skill (e.g. a different opinion on
90
+ \`nav-file.md\`), put your version under \`overrides/<skill-name>.md\`.
91
+ The agent loader checks overrides first.
92
+ `);
93
+ console.log(pc.green(` ✓ ${relative(projectRoot, siteDir)} created (empty + README)`));
94
+ }
95
+ else {
96
+ console.log(pc.gray(` · ${relative(projectRoot, siteDir)} already exists (preserved)`));
97
+ }
98
+ const pluginsDir = join(skillsDir, "plugins");
99
+ if (!existsSync(pluginsDir)) {
100
+ mkdirSync(pluginsDir, { recursive: true });
101
+ }
102
+ // 2. Per-agent discovery symlinks.
103
+ for (const agent of agents) {
104
+ const target = AGENT_TARGETS[agent];
105
+ const agentLink = join(projectRoot, target.path);
106
+ mkdirSync(dirname(agentLink), { recursive: true });
107
+ refreshSymlink(agentLink, skillsDir);
108
+ console.log(pc.green(` ✓ ${target.label} → .dogsbay/skills/`));
109
+ }
110
+ console.log("");
111
+ console.log(pc.cyan("Next:"));
112
+ console.log(" Open your editor — the agent should now see Dogsbay platform skills");
113
+ console.log(" on next prompt.");
114
+ console.log("");
115
+ console.log(" Add team-specific skills to .dogsbay/skills/site/.");
116
+ console.log(" Override a platform skill with .dogsbay/skills/site/overrides/<name>.md.");
117
+ }
118
+ /**
119
+ * Decide which agents to set up. Priority:
120
+ * --all → every supported agent
121
+ * --agent claude,cursor → exactly that list
122
+ * neither → return [], caller prints detected agents and exits
123
+ */
124
+ function pickAgents(options) {
125
+ if (options.all)
126
+ return [...SUPPORTED_AGENTS];
127
+ if (options.agent) {
128
+ const requested = options.agent.split(",").map((a) => a.trim().toLowerCase());
129
+ const valid = [];
130
+ for (const r of requested) {
131
+ if (SUPPORTED_AGENTS.includes(r)) {
132
+ valid.push(r);
133
+ }
134
+ else {
135
+ console.error(pc.yellow(` warn: unknown agent "${r}" (supported: ${SUPPORTED_AGENTS.join(", ")})`));
136
+ }
137
+ }
138
+ return valid;
139
+ }
140
+ return [];
141
+ }
142
+ /**
143
+ * When called without --agent or --all, just probe the project
144
+ * for known agent-config dirs and suggest commands.
145
+ */
146
+ function printDetected(projectRoot) {
147
+ const detected = [];
148
+ if (existsSync(join(projectRoot, ".claude"))) {
149
+ detected.push({ agent: "claude", signal: ".claude/" });
150
+ }
151
+ if (existsSync(join(projectRoot, ".cursor")) ||
152
+ existsSync(join(projectRoot, ".cursorrules"))) {
153
+ detected.push({ agent: "cursor", signal: ".cursor/ or .cursorrules" });
154
+ }
155
+ console.log(pc.cyan("Dogsbay agent install"));
156
+ console.log("");
157
+ console.log("Wires Dogsbay platform skills into the discovery path of an");
158
+ console.log("LLM agent so it picks them up on every prompt.");
159
+ console.log("");
160
+ if (detected.length > 0) {
161
+ console.log(pc.green("Detected in this project:"));
162
+ for (const d of detected) {
163
+ console.log(` ${d.agent.padEnd(8)} (${d.signal})`);
164
+ }
165
+ console.log("");
166
+ console.log("Run:");
167
+ for (const d of detected) {
168
+ console.log(` dogsbay agent install --agent ${d.agent}`);
169
+ }
170
+ console.log(" dogsbay agent install --all # set up every detected agent");
171
+ }
172
+ else {
173
+ console.log(pc.yellow("No supported agent configs detected in this project."));
174
+ console.log("");
175
+ console.log("Run:");
176
+ console.log(" dogsbay agent install --agent claude");
177
+ console.log(" dogsbay agent install --agent cursor");
178
+ console.log(" dogsbay agent install --all");
179
+ }
180
+ console.log("");
181
+ console.log(`Supported agents: ${SUPPORTED_AGENTS.join(", ")}`);
182
+ }
183
+ /**
184
+ * Replace any existing entry at `linkPath` with a fresh symlink
185
+ * pointing at `target`. Idempotent: if the link already points at
186
+ * the right place, leaves it alone.
187
+ */
188
+ function refreshSymlink(linkPath, target) {
189
+ if (existsSync(linkPath) || isBrokenSymlink(linkPath)) {
190
+ try {
191
+ const current = readlinkSync(linkPath);
192
+ const resolved = resolve(dirname(linkPath), current);
193
+ if (resolved === target)
194
+ return; // already correct
195
+ }
196
+ catch {
197
+ // not a symlink
198
+ }
199
+ try {
200
+ unlinkSync(linkPath);
201
+ }
202
+ catch {
203
+ // ignore
204
+ }
205
+ }
206
+ symlinkSync(target, linkPath, "dir");
207
+ }
208
+ function isBrokenSymlink(p) {
209
+ try {
210
+ readlinkSync(p);
211
+ return true; // it's a symlink, regardless of target validity
212
+ }
213
+ catch {
214
+ return false;
215
+ }
216
+ }
package/dist/index.js CHANGED
@@ -15,6 +15,7 @@ import { siteInit } from "./commands/site-init.js";
15
15
  import { siteBuild } from "./commands/site-build.js";
16
16
  import { siteCheck } from "./commands/site-check.js";
17
17
  import { siteDev, sitePreview } from "./commands/site-dev.js";
18
+ import { agentInstall } from "./commands/agent.js";
18
19
  // Read version from the runtime package.json so `dogsbay --version`
19
20
  // never drifts from what's published. Walks one level up from
20
21
  // `dist/index.js` to `package.json` (works in both monorepo dev and
@@ -218,6 +219,16 @@ program
218
219
  .option("--concurrency <n>", "Maximum concurrent fetches (default: 3)", "3")
219
220
  .option("--rate-limit <ms>", "Minimum ms between request batches (default: 200)", "200")
220
221
  .action((url, options) => pull(url, options));
222
+ // ── `dogsbay agent` — wire skill discovery for LLM agents ──────────────
223
+ const agent = program
224
+ .command("agent")
225
+ .description("Wire Dogsbay platform skills into LLM-agent discovery paths");
226
+ agent
227
+ .command("install")
228
+ .description("Install platform skills + per-agent discovery symlinks")
229
+ .option("--agent <names>", "Comma-separated list (e.g. claude,cursor)")
230
+ .option("--all", "Install for every supported agent")
231
+ .action((options) => agentInstall(undefined, options));
221
232
  program
222
233
  .command("export-techdocs")
223
234
  .description("Post-process Astro build output into Backstage TechDocs format")
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dogsbay",
3
- "version": "0.2.0-beta.5",
3
+ "version": "0.2.0-beta.6",
4
4
  "description": "CLI for Dogsbay — scaffold, build, and serve documentation sites with markdown / MkDocs / Obsidian / OpenAPI sources",
5
5
  "type": "module",
6
6
  "bin": {
@@ -9,6 +9,7 @@
9
9
  "files": [
10
10
  "dist",
11
11
  "bin",
12
+ "skills",
12
13
  "README.md"
13
14
  ],
14
15
  "keywords": [
@@ -30,14 +31,14 @@
30
31
  "picocolors": "^1.1.0",
31
32
  "prompts": "^2.4.2",
32
33
  "yaml": "^2.8.3",
33
- "@dogsbay/format-mkdocs": "0.2.0-beta.5",
34
- "@dogsbay/format-astro": "0.2.0-beta.5",
35
- "@dogsbay/format-obsidian": "0.2.0-beta.5",
36
- "@dogsbay/format-mdx": "0.2.0-beta.5",
37
- "@dogsbay/format-dogsbay-md": "0.2.0-beta.5",
38
- "@dogsbay/format-starlight": "0.2.0-beta.5",
39
- "@dogsbay/format-openapi": "0.2.0-beta.5",
40
- "@dogsbay/types": "0.2.0-beta.5"
34
+ "@dogsbay/format-astro": "0.2.0-beta.6",
35
+ "@dogsbay/format-obsidian": "0.2.0-beta.6",
36
+ "@dogsbay/format-mkdocs": "0.2.0-beta.6",
37
+ "@dogsbay/format-mdx": "0.2.0-beta.6",
38
+ "@dogsbay/format-starlight": "0.2.0-beta.6",
39
+ "@dogsbay/format-dogsbay-md": "0.2.0-beta.6",
40
+ "@dogsbay/format-openapi": "0.2.0-beta.6",
41
+ "@dogsbay/types": "0.2.0-beta.6"
41
42
  },
42
43
  "devDependencies": {
43
44
  "@types/node": "^22.0.0",
@@ -0,0 +1,262 @@
1
+ ---
2
+ name: dogsbay:agent-readiness
3
+ description: How Dogsbay sites expose content to LLM agents and search indexers — llms.txt, llms-full.txt, .md mirrors, Content-Signal HTTP headers, robots.txt. Use when configuring agent.* in dogsbay.config.yml or debugging agent consumption.
4
+ ---
5
+
6
+ # Agent readiness
7
+
8
+ Every Dogsbay site is built to be **agent-readable by default**.
9
+ Three mechanisms work together so that any modern LLM, search
10
+ engine, or AI-answer-engine can consume the docs as cleanly as
11
+ a human reader:
12
+
13
+ 1. **llms.txt** at the root — the canonical agent index
14
+ 2. **`.md` mirror** for every page — the prose body without
15
+ chrome
16
+ 3. **Content-Signal HTTP headers** — IETF-track signal for "what
17
+ AI use is permitted"
18
+
19
+ All three are emitted at `dogsbay site build` time. Toggleable
20
+ via the `agent:` block in `dogsbay.config.yml`.
21
+
22
+ ## llms.txt
23
+
24
+ The standard at [llmstxt.org](https://llmstxt.org/) — a single
25
+ file at the site root that lists every page with title +
26
+ description + URL. Two flavours:
27
+
28
+ - `/llms.txt` — short index (title + description + URL per page,
29
+ grouped by section). The agent's "table of contents."
30
+ - `/llms-full.txt` — full index with the markdown body of every
31
+ page concatenated. The agent's "everything in one paste."
32
+
33
+ Per-section mini-indexes also emit (`/llms-${section}.txt` for
34
+ each top-level nav group), so an agent can pull just the
35
+ relevant slice without grabbing the whole site.
36
+
37
+ Format example (`/llms.txt`):
38
+
39
+ ```
40
+ # Acme Docs
41
+
42
+ > Documentation for the Acme platform.
43
+
44
+ ## Getting started
45
+
46
+ - [Installation](/docs/install): Install the CLI on macOS, Linux, or Windows.
47
+ - [Quickstart](/docs/quickstart): Your first request in 60 seconds.
48
+
49
+ ## API reference
50
+
51
+ - [List pets](/docs/api/pets/list-pets): Returns paginated list.
52
+ - [Create a pet](/docs/api/pets/create-pet): Idempotent creation.
53
+ ```
54
+
55
+ Toggleable:
56
+
57
+ ```yaml
58
+ agent:
59
+ llmsTxt: true # default true; set false to omit
60
+ ```
61
+
62
+ ## `.md` mirror
63
+
64
+ Every emitted page has a sibling `.md` route that returns the
65
+ markdown source (or a faithful prose rendering of it) with
66
+ `Content-Type: text/markdown`.
67
+
68
+ For a page at `/docs/api/pets/list-pets`, the mirror is at
69
+ `/docs/api/pets/list-pets.md`. For `/docs/`, it's at `/docs.md`.
70
+
71
+ Why two URLs? A human visiting `/docs/api/pets/list-pets` gets
72
+ the rich HTML page with components, sidebar, search. An agent
73
+ hitting `/docs/api/pets/list-pets.md` gets just the prose —
74
+ faster to parse, no HTML noise, no dependency on a Cloudflare
75
+ worker for content negotiation.
76
+
77
+ ### Discovery via `<link rel="alternate">`
78
+
79
+ Every HTML page emits:
80
+
81
+ ```html
82
+ <link rel="alternate" type="text/markdown" href="/docs/api/pets/list-pets.md">
83
+ ```
84
+
85
+ Agents that follow `rel="alternate"` find the mirror without
86
+ guessing at URL conventions. Anthropic's prompt-cache, Mintlify's
87
+ agents, and several others do this.
88
+
89
+ Toggleable:
90
+
91
+ ```yaml
92
+ agent:
93
+ mdMirror: true # default true
94
+ ```
95
+
96
+ ### Per-page opt-out
97
+
98
+ Some pages don't have useful prose mirrors (e.g. landing pages
99
+ that are mostly hero components). Opt out per-page:
100
+
101
+ ```yaml
102
+ ---
103
+ title: Home
104
+ mdMirror: false
105
+ ---
106
+ ```
107
+
108
+ Or via the global `agent.mdMirror: false`.
109
+
110
+ ### Content negotiation (Cloudflare worker)
111
+
112
+ Astro's static-mode output doesn't pass per-request headers to
113
+ middleware, so the in-build middleware can't respond to
114
+ `Accept: text/markdown` by serving the `.md` body. The current
115
+ mitigation: the explicit `.md` URL is always available, and
116
+ `<link rel="alternate">` exposes it. A Cloudflare worker that
117
+ does proper content negotiation at the edge is planned (see
118
+ `plans/cloudflare-deploy-content-negotiation.md`).
119
+
120
+ ## Content-Signal HTTP headers
121
+
122
+ Per the IETF Content-Signal draft, sites can declare AI-use
123
+ permissions via HTTP headers:
124
+
125
+ ```
126
+ Content-Signal: aiTrain=no, aiInput=yes, search=yes
127
+ ```
128
+
129
+ Three keys:
130
+
131
+ | Key | Values | Meaning |
132
+ |---|---|---|
133
+ | `aiTrain` | `yes` / `no` | May this content be used for AI model training? |
134
+ | `aiInput` | `yes` / `no` | May this content be used as input to a live AI session (RAG, prompt context)? |
135
+ | `search` | `yes` / `no` | May this content be indexed by search engines? |
136
+
137
+ Configure via `agent.contentSignal`:
138
+
139
+ ```yaml
140
+ agent:
141
+ contentSignal:
142
+ aiTrain: "no" # don't use my docs to train models
143
+ aiInput: "yes" # but DO use them as live context (e.g. for users in Claude / Cursor)
144
+ search: "yes" # standard search indexing OK
145
+ ```
146
+
147
+ Emitted in two places:
148
+
149
+ - `public/_headers` — Cloudflare Pages / Vercel / Netlify pick
150
+ this up at the edge automatically
151
+ - `<meta>` tags in HTML head — for hosts that don't read
152
+ `_headers`
153
+
154
+ ## robots.txt
155
+
156
+ Auto-emitted at `public/robots.txt` based on `noindex` settings
157
+ + Content-Signal `search` value. Disallows crawlers when
158
+ `search: "no"`; otherwise allows everything.
159
+
160
+ For per-page `noindex`, the `robots` meta tag handles it (see
161
+ `dogsbay:frontmatter-fields`).
162
+
163
+ ## Per-page LLM action UI
164
+
165
+ Beyond the data side, Dogsbay can render an action cluster
166
+ ("Copy as markdown", "Open in Claude", "Open in ChatGPT") on
167
+ each page:
168
+
169
+ ```yaml
170
+ agent:
171
+ llmsTxt: true
172
+ mdMirror: true
173
+
174
+ llmActions:
175
+ enabled: true
176
+ providers: [claude, chatgpt, perplexity, gemini] # render order
177
+ placement: header # header | inline | both
178
+ copyButton: true
179
+ promptTemplate: "Read this docs page: {url}"
180
+ footerLink: true
181
+ ```
182
+
183
+ `{url}` resolves to the absolute `.md` mirror URL. The user
184
+ clicks "Open in Claude" → goes to `claude.ai/new?q=...` with a
185
+ prepopulated prompt that pulls the markdown into Claude's
186
+ context.
187
+
188
+ Per-page opt-out via `llmActions: false` in frontmatter.
189
+
190
+ ## What agents see
191
+
192
+ When an LLM is given the URL of a Dogsbay site:
193
+
194
+ 1. It fetches `/llms.txt` (table of contents)
195
+ 2. Picks pages relevant to the question
196
+ 3. Fetches each as `/{path}.md` (full prose)
197
+ 4. Reads `Content-Signal` to know if it's allowed to use the
198
+ content as context (typically yes if `aiInput=yes`)
199
+
200
+ That's a self-contained agent-consumption loop with no special
201
+ configuration on the agent's side.
202
+
203
+ ## Common patterns
204
+
205
+ ### Public docs, no AI training, allow live context
206
+
207
+ ```yaml
208
+ agent:
209
+ llmsTxt: true
210
+ mdMirror: true
211
+ contentSignal:
212
+ aiTrain: "no"
213
+ aiInput: "yes"
214
+ search: "yes"
215
+ ```
216
+
217
+ The default for most teams. Their docs help users in AI sessions
218
+ but don't end up in training data.
219
+
220
+ ### Internal docs (no public agent access)
221
+
222
+ ```yaml
223
+ agent:
224
+ llmsTxt: false # don't advertise to crawlers
225
+ mdMirror: true # but keep the dev-side .md surface
226
+ contentSignal:
227
+ aiTrain: "no"
228
+ aiInput: "no"
229
+ search: "no"
230
+ ```
231
+
232
+ Plus host-side auth (Cloudflare Access, Vercel password, etc.)
233
+ to gate the site itself.
234
+
235
+ ### Marketing-site mode (everything open)
236
+
237
+ ```yaml
238
+ agent:
239
+ llmsTxt: true
240
+ mdMirror: true
241
+ contentSignal:
242
+ aiTrain: "yes" # put us in the training data; we want the visibility
243
+ aiInput: "yes"
244
+ search: "yes"
245
+ ```
246
+
247
+ ## Common mistakes
248
+
249
+ - ❌ Setting `agent.mdMirror: false` and expecting llms.txt to
250
+ still link to .md files — the index emits whatever URLs the
251
+ build produces. If mirrors aren't built, the index can't link
252
+ to them.
253
+ - ❌ Trusting `Accept: text/markdown` content negotiation today —
254
+ static-mode middleware doesn't see request headers. Use the
255
+ explicit `.md` URL.
256
+ - ❌ `aiTrain: "no"` + a public-internet-readable site —
257
+ Content-Signal is **declarative**, not enforceable. Crawlers
258
+ can ignore it. For real protection, gate access at the
259
+ network level.
260
+ - ❌ Mistyping the Content-Signal values (`"true"` instead of
261
+ `"yes"`) — the loader doesn't normalise; the header emits
262
+ literally what you wrote.