dogsbay 0.2.0-beta.2 → 0.2.0-beta.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -15,6 +15,7 @@ import { siteInit } from "./commands/site-init.js";
15
15
  import { siteBuild } from "./commands/site-build.js";
16
16
  import { siteCheck } from "./commands/site-check.js";
17
17
  import { siteDev, sitePreview } from "./commands/site-dev.js";
18
+ import { agentInstall } from "./commands/agent.js";
18
19
  // Read version from the runtime package.json so `dogsbay --version`
19
20
  // never drifts from what's published. Walks one level up from
20
21
  // `dist/index.js` to `package.json` (works in both monorepo dev and
@@ -62,7 +63,7 @@ site
62
63
  .option("--edit-uri <path>", "Repo path prefix for edit links (default: blob/main/content/)")
63
64
  .option("--copyright <text>", "Footer copyright text")
64
65
  .option("--theme <name>", "Theme preset (default | material)")
65
- .option("--deploy <target>", "Deploy target (cloudflare-workers)")
66
+ .option("--deploy <target>", "Deploy target (cloudflare-workers | github-pages)")
66
67
  .option("--content <path>", "Path to source markdown (relative to config)")
67
68
  .option("--from <format>", "Source format (auto | dogsbay-md | mkdocs | obsidian | starlight | mdx)")
68
69
  .option("--nav <path>", "Path to explicit nav file (.json/.yml)")
@@ -96,7 +97,7 @@ site
96
97
  .option("--edit-uri <path>", "Override site.editUri")
97
98
  .option("--copyright <text>", "Override site.copyright")
98
99
  .option("--theme <name>", "Override theme (default | material)")
99
- .option("--deploy <target>", "Override deploy.target (cloudflare-workers)")
100
+ .option("--deploy <target>", "Override deploy.target (cloudflare-workers | github-pages)")
100
101
  .option("--content <path>", "Override content.source")
101
102
  .option("--from <format>", "Override content.from")
102
103
  .option("--nav <path>", "Override content.nav")
@@ -108,8 +109,12 @@ site
108
109
  .option("--ai-input <yes|no>", "Override Content-Signal aiInput")
109
110
  .option("--ai-search <yes|no>", "Override Content-Signal search")
110
111
  .option("--include-drafts", "Include status: draft pages (default: excluded from prod build)")
111
- .option("--publish", "Build the full publish matrix (every source, ignoring primary: flag). " +
112
- "Default builds only sources marked primary: true (or all sources when no primary is set).")
112
+ .option("--primary-only", "Build only sources marked primary: true (skip non-primary). " +
113
+ "Default builds the full publish matrix every locale / version / namespace " +
114
+ "the writer declared. Use --primary-only for fast single-source iteration in CI " +
115
+ "lint jobs (rare).")
116
+ .option("--publish", "Deprecated — `dogsbay site build` defaults to publish mode now. " +
117
+ "Kept as a no-op for compatibility with older scripts.")
113
118
  .option("--refresh", "Wipe the per-source git cache before resolving — forces re-clone of every git: source. " +
114
119
  "Useful when a tracked branch's HEAD has moved.")
115
120
  .action((dir, options) => siteBuild(dir, options));
@@ -218,6 +223,16 @@ program
218
223
  .option("--concurrency <n>", "Maximum concurrent fetches (default: 3)", "3")
219
224
  .option("--rate-limit <ms>", "Minimum ms between request batches (default: 200)", "200")
220
225
  .action((url, options) => pull(url, options));
226
+ // ── `dogsbay agent` — wire skill discovery for LLM agents ──────────────
227
+ const agent = program
228
+ .command("agent")
229
+ .description("Wire Dogsbay platform skills into LLM-agent discovery paths");
230
+ agent
231
+ .command("install")
232
+ .description("Install platform skills + per-agent discovery symlinks")
233
+ .option("--agent <names>", "Comma-separated list (e.g. claude,cursor)")
234
+ .option("--all", "Install for every supported agent")
235
+ .action((options) => agentInstall(undefined, options));
221
236
  program
222
237
  .command("export-techdocs")
223
238
  .description("Post-process Astro build output into Backstage TechDocs format")
@@ -0,0 +1,152 @@
1
+ /**
2
+ * Passthrough Astro page collection.
3
+ *
4
+ * Hand-authored `.astro` files that live under a content source's
5
+ * directory get copied verbatim to `src/pages/<basePath>/...` at
6
+ * build time. Authors opt in by listing the file in `nav.yml`; the
7
+ * intersection of "files on disk" and "hrefs in nav" defines the
8
+ * passthrough set.
9
+ *
10
+ * See plans/passthrough-astro-pages.md.
11
+ */
12
+ import { existsSync, readdirSync, statSync } from "node:fs";
13
+ import { join, posix, relative, sep } from "node:path";
14
+ /**
15
+ * Walk `contentDir` for `.astro` files, compute each one's canonical
16
+ * href, and return only the entries whose href appears in the
17
+ * resolved nav. Files outside the nav are ignored — passthrough is
18
+ * opt-in to avoid accidentally publishing scratch components.
19
+ */
20
+ export function collectPassthroughEntries(contentDir, nav, options) {
21
+ if (!existsSync(contentDir))
22
+ return [];
23
+ const navHrefs = collectNavHrefs(nav);
24
+ const candidates = walkAstroFiles(contentDir, contentDir);
25
+ const entries = [];
26
+ for (const sourceAbs of candidates) {
27
+ const source = toPosix(relative(contentDir, sourceAbs));
28
+ const href = sourceToHref(source, options.basePath);
29
+ if (!navHrefs.has(href))
30
+ continue;
31
+ entries.push({
32
+ source,
33
+ sourceAbs,
34
+ outputRelPath: sourceToOutputRelPath(source, options.basePath),
35
+ href,
36
+ });
37
+ }
38
+ return entries;
39
+ }
40
+ /**
41
+ * Recursively gather every `.astro` file under `dir`. Skips
42
+ * `node_modules`, dot-directories, and any directory named
43
+ * `_components` / `_partials` (a common convention for "this is
44
+ * shared, don't publish it" — leaves authors an obvious escape
45
+ * hatch when they want to ship private helpers alongside content).
46
+ */
47
+ function walkAstroFiles(dir, root) {
48
+ const out = [];
49
+ let entries = [];
50
+ try {
51
+ entries = readdirSync(dir, { withFileTypes: true });
52
+ }
53
+ catch {
54
+ return out;
55
+ }
56
+ for (const entry of entries) {
57
+ if (entry.name.startsWith("."))
58
+ continue;
59
+ if (entry.name === "node_modules")
60
+ continue;
61
+ if (entry.name === "_components" || entry.name === "_partials")
62
+ continue;
63
+ const full = join(dir, entry.name);
64
+ if (entry.isDirectory()) {
65
+ out.push(...walkAstroFiles(full, root));
66
+ }
67
+ else if (entry.isFile() && entry.name.endsWith(".astro")) {
68
+ out.push(full);
69
+ }
70
+ }
71
+ return out;
72
+ }
73
+ /**
74
+ * Compute the public-facing href for a source path. Mirrors the
75
+ * slug logic in `format-dogsbay-md/src/nav-file.ts:fileToHref` so a
76
+ * passthrough .astro file produces the same href as a hypothetical
77
+ * .md sibling at the same path.
78
+ *
79
+ * - "tutorials/playground.astro" → "/docs/tutorials/playground"
80
+ * - "reference/index.astro" → "/docs/reference"
81
+ * - "index.astro" → "/docs"
82
+ */
83
+ function sourceToHref(source, basePath) {
84
+ let slug = source.replace(/\.astro$/i, "");
85
+ if (slug.endsWith("/index"))
86
+ slug = slug.slice(0, -"/index".length);
87
+ if (slug === "index")
88
+ slug = "";
89
+ const prefix = basePath.endsWith("/") ? basePath.slice(0, -1) : basePath;
90
+ return slug ? `${prefix}/${slug}` : prefix || "";
91
+ }
92
+ /**
93
+ * Compute the output path (relative to outputDir) where a passthrough
94
+ * source should be copied. Preserves the source's directory shape
95
+ * under `src/pages/<basePath segments>/`. Index files survive as
96
+ * `index.astro` so Astro's directory-routing matches the nav href.
97
+ */
98
+ function sourceToOutputRelPath(source, basePath) {
99
+ const baseSegs = basePath.split("/").filter((s) => s.length > 0);
100
+ const sourceSegs = source.split("/");
101
+ return ["src", "pages", ...baseSegs, ...sourceSegs].join(sep);
102
+ }
103
+ /** Walk a NavItem tree collecting every href into a Set. */
104
+ function collectNavHrefs(nav) {
105
+ const out = new Set();
106
+ const stack = [...nav];
107
+ while (stack.length > 0) {
108
+ const item = stack.pop();
109
+ if (item.href)
110
+ out.add(item.href);
111
+ if (item.children)
112
+ stack.push(...item.children);
113
+ }
114
+ return out;
115
+ }
116
+ /**
117
+ * Normalize backslashes (Windows path separators) to forward slashes
118
+ * before doing any href / slug computation. The slug shape is
119
+ * URL-flavoured even on Windows.
120
+ */
121
+ function toPosix(p) {
122
+ return p.split(sep).join(posix.sep);
123
+ }
124
+ /**
125
+ * Build the slug set covered by the passthrough entries. Used by
126
+ * site-build to assert no collision with generated slugs from
127
+ * `emitAstroPages`.
128
+ */
129
+ export function passthroughSlugs(entries) {
130
+ return new Set(entries.map((e) => e.href));
131
+ }
132
+ /**
133
+ * Convenience guard — verify every passthrough source still exists
134
+ * on disk. Walking already only returns existing files, but this
135
+ * helper is useful when entries are constructed externally (e.g. in
136
+ * tests).
137
+ */
138
+ export function assertPassthroughFilesExist(entries) {
139
+ for (const entry of entries) {
140
+ if (!existsSync(entry.sourceAbs)) {
141
+ throw new Error(`Passthrough Astro source missing: ${entry.source} ` +
142
+ `(resolved: ${entry.sourceAbs})`);
143
+ }
144
+ }
145
+ // Touch statSync to detect non-file entries (defensive)
146
+ for (const entry of entries) {
147
+ const st = statSync(entry.sourceAbs);
148
+ if (!st.isFile()) {
149
+ throw new Error(`Passthrough Astro source is not a file: ${entry.source}`);
150
+ }
151
+ }
152
+ }
package/dist/registry.js CHANGED
@@ -388,6 +388,14 @@ export const registry = {
388
388
  primitives: [],
389
389
  description: "Clickable card with link",
390
390
  },
391
+ icon: {
392
+ name: "icon",
393
+ files: ["Icon.astro", "index.ts"],
394
+ dependencies: ["@dogsbay/icons"],
395
+ registryDependencies: [],
396
+ primitives: [],
397
+ description: "Build-time-resolved icon (Lucide default; mdi:, simple-icons:, etc. via Iconify)",
398
+ },
391
399
  sidebar: {
392
400
  name: "sidebar",
393
401
  files: ["Sidebar.astro", "SidebarContent.astro", "SidebarGroup.astro", "SidebarGroupContent.astro", "SidebarGroupLabel.astro", "SidebarHeader.astro", "SidebarInset.astro", "SidebarMenu.astro", "SidebarMenuButton.astro", "SidebarMenuItem.astro", "SidebarNavTree.astro", "SidebarProvider.astro", "SidebarRail.astro", "SidebarSeparator.astro", "SidebarTrigger.astro", "sidebar.ts", "index.ts"],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dogsbay",
3
- "version": "0.2.0-beta.2",
3
+ "version": "0.2.0-beta.21",
4
4
  "description": "CLI for Dogsbay — scaffold, build, and serve documentation sites with markdown / MkDocs / Obsidian / OpenAPI sources",
5
5
  "type": "module",
6
6
  "bin": {
@@ -9,6 +9,7 @@
9
9
  "files": [
10
10
  "dist",
11
11
  "bin",
12
+ "skills",
12
13
  "README.md"
13
14
  ],
14
15
  "keywords": [
@@ -25,19 +26,20 @@
25
26
  ],
26
27
  "dependencies": {
27
28
  "cheerio": "^1.0.0",
29
+ "chokidar": "^4.0.0",
28
30
  "commander": "^13.0.0",
29
31
  "markdown-it": "^14.0.0",
30
32
  "picocolors": "^1.1.0",
31
33
  "prompts": "^2.4.2",
32
34
  "yaml": "^2.8.3",
33
- "@dogsbay/format-mkdocs": "0.2.0-beta.2",
34
- "@dogsbay/format-astro": "0.2.0-beta.2",
35
- "@dogsbay/format-mdx": "0.2.0-beta.2",
36
- "@dogsbay/format-obsidian": "0.2.0-beta.2",
37
- "@dogsbay/format-starlight": "0.2.0-beta.2",
38
- "@dogsbay/format-dogsbay-md": "0.2.0-beta.2",
39
- "@dogsbay/types": "0.2.0-beta.2",
40
- "@dogsbay/format-openapi": "0.2.0-beta.2"
35
+ "@dogsbay/format-mkdocs": "0.2.0-beta.21",
36
+ "@dogsbay/format-astro": "0.2.0-beta.21",
37
+ "@dogsbay/format-obsidian": "0.2.0-beta.21",
38
+ "@dogsbay/format-mdx": "0.2.0-beta.21",
39
+ "@dogsbay/format-starlight": "0.2.0-beta.21",
40
+ "@dogsbay/format-dogsbay-md": "0.2.0-beta.21",
41
+ "@dogsbay/format-openapi": "0.2.0-beta.21",
42
+ "@dogsbay/types": "0.2.0-beta.21"
41
43
  },
42
44
  "devDependencies": {
43
45
  "@types/node": "^22.0.0",
@@ -0,0 +1,262 @@
1
+ ---
2
+ name: dogsbay:agent-readiness
3
+ description: How Dogsbay sites expose content to LLM agents and search indexers — llms.txt, llms-full.txt, .md mirrors, Content-Signal HTTP headers, robots.txt. Use when configuring agent.* in dogsbay.config.yml or debugging agent consumption.
4
+ ---
5
+
6
+ # Agent readiness
7
+
8
+ Every Dogsbay site is built to be **agent-readable by default**.
9
+ Three mechanisms work together so that any modern LLM, search
10
+ engine, or AI-answer-engine can consume the docs as cleanly as
11
+ a human reader:
12
+
13
+ 1. **llms.txt** at the root — the canonical agent index
14
+ 2. **`.md` mirror** for every page — the prose body without
15
+ chrome
16
+ 3. **Content-Signal HTTP headers** — IETF-track signal for "what
17
+ AI use is permitted"
18
+
19
+ All three are emitted at `dogsbay site build` time. Toggleable
20
+ via the `agent:` block in `dogsbay.config.yml`.
21
+
22
+ ## llms.txt
23
+
24
+ The standard at [llmstxt.org](https://llmstxt.org/) — a single
25
+ file at the site root that lists every page with title +
26
+ description + URL. Two flavours:
27
+
28
+ - `/llms.txt` — short index (title + description + URL per page,
29
+ grouped by section). The agent's "table of contents."
30
+ - `/llms-full.txt` — full index with the markdown body of every
31
+ page concatenated. The agent's "everything in one paste."
32
+
33
+ Per-section mini-indexes also emit (`/llms-${section}.txt` for
34
+ each top-level nav group), so an agent can pull just the
35
+ relevant slice without grabbing the whole site.
36
+
37
+ Format example (`/llms.txt`):
38
+
39
+ ```
40
+ # Acme Docs
41
+
42
+ > Documentation for the Acme platform.
43
+
44
+ ## Getting started
45
+
46
+ - [Installation](/docs/install): Install the CLI on macOS, Linux, or Windows.
47
+ - [Quickstart](/docs/quickstart): Your first request in 60 seconds.
48
+
49
+ ## API reference
50
+
51
+ - [List pets](/docs/api/pets/list-pets): Returns paginated list.
52
+ - [Create a pet](/docs/api/pets/create-pet): Idempotent creation.
53
+ ```
54
+
55
+ Toggleable:
56
+
57
+ ```yaml
58
+ agent:
59
+ llmsTxt: true # default true; set false to omit
60
+ ```
61
+
62
+ ## `.md` mirror
63
+
64
+ Every emitted page has a sibling `.md` route that returns the
65
+ markdown source (or a faithful prose rendering of it) with
66
+ `Content-Type: text/markdown`.
67
+
68
+ For a page at `/docs/api/pets/list-pets`, the mirror is at
69
+ `/docs/api/pets/list-pets.md`. For `/docs/`, it's at `/docs.md`.
70
+
71
+ Why two URLs? A human visiting `/docs/api/pets/list-pets` gets
72
+ the rich HTML page with components, sidebar, search. An agent
73
+ hitting `/docs/api/pets/list-pets.md` gets just the prose —
74
+ faster to parse, no HTML noise, no dependency on a Cloudflare
75
+ worker for content negotiation.
76
+
77
+ ### Discovery via `<link rel="alternate">`
78
+
79
+ Every HTML page emits:
80
+
81
+ ```html
82
+ <link rel="alternate" type="text/markdown" href="/docs/api/pets/list-pets.md">
83
+ ```
84
+
85
+ Agents that follow `rel="alternate"` find the mirror without
86
+ guessing at URL conventions. Anthropic's prompt-cache, Mintlify's
87
+ agents, and several others do this.
88
+
89
+ Toggleable:
90
+
91
+ ```yaml
92
+ agent:
93
+ mdMirror: true # default true
94
+ ```
95
+
96
+ ### Per-page opt-out
97
+
98
+ Some pages don't have useful prose mirrors (e.g. landing pages
99
+ that are mostly hero components). Opt out per-page:
100
+
101
+ ```yaml
102
+ ---
103
+ title: Home
104
+ mdMirror: false
105
+ ---
106
+ ```
107
+
108
+ Or via the global `agent.mdMirror: false`.
109
+
110
+ ### Content negotiation (Cloudflare worker)
111
+
112
+ Astro's static-mode output doesn't pass per-request headers to
113
+ middleware, so the in-build middleware can't respond to
114
+ `Accept: text/markdown` by serving the `.md` body. The current
115
+ mitigation: the explicit `.md` URL is always available, and
116
+ `<link rel="alternate">` exposes it. A Cloudflare worker that
117
+ does proper content negotiation at the edge is planned (see
118
+ `plans/cloudflare-deploy-content-negotiation.md`).
119
+
120
+ ## Content-Signal HTTP headers
121
+
122
+ Per the IETF Content-Signal draft, sites can declare AI-use
123
+ permissions via HTTP headers:
124
+
125
+ ```
126
+ Content-Signal: aiTrain=no, aiInput=yes, search=yes
127
+ ```
128
+
129
+ Three keys:
130
+
131
+ | Key | Values | Meaning |
132
+ |---|---|---|
133
+ | `aiTrain` | `yes` / `no` | May this content be used for AI model training? |
134
+ | `aiInput` | `yes` / `no` | May this content be used as input to a live AI session (RAG, prompt context)? |
135
+ | `search` | `yes` / `no` | May this content be indexed by search engines? |
136
+
137
+ Configure via `agent.contentSignal`:
138
+
139
+ ```yaml
140
+ agent:
141
+ contentSignal:
142
+ aiTrain: "no" # don't use my docs to train models
143
+ aiInput: "yes" # but DO use them as live context (e.g. for users in Claude / Cursor)
144
+ search: "yes" # standard search indexing OK
145
+ ```
146
+
147
+ Emitted in two places:
148
+
149
+ - `public/_headers` — Cloudflare Pages / Vercel / Netlify pick
150
+ this up at the edge automatically
151
+ - `<meta>` tags in HTML head — for hosts that don't read
152
+ `_headers`
153
+
154
+ ## robots.txt
155
+
156
+ Auto-emitted at `public/robots.txt` based on `noindex` settings
157
+ + Content-Signal `search` value. Disallows crawlers when
158
+ `search: "no"`; otherwise allows everything.
159
+
160
+ For per-page `noindex`, the `robots` meta tag handles it (see
161
+ `dogsbay:frontmatter-fields`).
162
+
163
+ ## Per-page LLM action UI
164
+
165
+ Beyond the data side, Dogsbay can render an action cluster
166
+ ("Copy as markdown", "Open in Claude", "Open in ChatGPT") on
167
+ each page:
168
+
169
+ ```yaml
170
+ agent:
171
+ llmsTxt: true
172
+ mdMirror: true
173
+
174
+ llmActions:
175
+ enabled: true
176
+ providers: [claude, chatgpt, perplexity, gemini] # render order
177
+ placement: header # header | inline | both
178
+ copyButton: true
179
+ promptTemplate: "Read this docs page: {url}"
180
+ footerLink: true
181
+ ```
182
+
183
+ `{url}` resolves to the absolute `.md` mirror URL. The user
184
+ clicks "Open in Claude" → goes to `claude.ai/new?q=...` with a
185
+ prepopulated prompt that pulls the markdown into Claude's
186
+ context.
187
+
188
+ Per-page opt-out via `llmActions: false` in frontmatter.
189
+
190
+ ## What agents see
191
+
192
+ When an LLM is given the URL of a Dogsbay site:
193
+
194
+ 1. It fetches `/llms.txt` (table of contents)
195
+ 2. Picks pages relevant to the question
196
+ 3. Fetches each as `/{path}.md` (full prose)
197
+ 4. Reads `Content-Signal` to know if it's allowed to use the
198
+ content as context (typically yes if `aiInput=yes`)
199
+
200
+ That's a self-contained agent-consumption loop with no special
201
+ configuration on the agent's side.
202
+
203
+ ## Common patterns
204
+
205
+ ### Public docs, no AI training, allow live context
206
+
207
+ ```yaml
208
+ agent:
209
+ llmsTxt: true
210
+ mdMirror: true
211
+ contentSignal:
212
+ aiTrain: "no"
213
+ aiInput: "yes"
214
+ search: "yes"
215
+ ```
216
+
217
+ The default for most teams. Their docs help users in AI sessions
218
+ but don't end up in training data.
219
+
220
+ ### Internal docs (no public agent access)
221
+
222
+ ```yaml
223
+ agent:
224
+ llmsTxt: false # don't advertise to crawlers
225
+ mdMirror: true # but keep the dev-side .md surface
226
+ contentSignal:
227
+ aiTrain: "no"
228
+ aiInput: "no"
229
+ search: "no"
230
+ ```
231
+
232
+ Plus host-side auth (Cloudflare Access, Vercel password, etc.)
233
+ to gate the site itself.
234
+
235
+ ### Marketing-site mode (everything open)
236
+
237
+ ```yaml
238
+ agent:
239
+ llmsTxt: true
240
+ mdMirror: true
241
+ contentSignal:
242
+ aiTrain: "yes" # put us in the training data; we want the visibility
243
+ aiInput: "yes"
244
+ search: "yes"
245
+ ```
246
+
247
+ ## Common mistakes
248
+
249
+ - ❌ Setting `agent.mdMirror: false` and expecting llms.txt to
250
+ still link to .md files — the index emits whatever URLs the
251
+ build produces. If mirrors aren't built, the index can't link
252
+ to them.
253
+ - ❌ Trusting `Accept: text/markdown` content negotiation today —
254
+ static-mode middleware doesn't see request headers. Use the
255
+ explicit `.md` URL.
256
+ - ❌ `aiTrain: "no"` + a public-internet-readable site —
257
+ Content-Signal is **declarative**, not enforceable. Crawlers
258
+ can ignore it. For real protection, gate access at the
259
+ network level.
260
+ - ❌ Mistyping the Content-Signal values (`"true"` instead of
261
+ `"yes"`) — the loader doesn't normalise; the header emits
262
+ literally what you wrote.