@buildinternet/releases-skills 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,26 @@
1
+ {
2
+ "name": "@buildinternet/releases-skills",
3
+ "version": "0.12.1",
4
+ "description": "Agent skills bundled with the Releases CLI. Markdown playbooks for changelog ingest, discovery, and analysis.",
5
+ "type": "module",
6
+ "license": "MIT",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "git+https://github.com/zachdunn/releases-cli.git",
10
+ "directory": "packages/skills"
11
+ },
12
+ "exports": {
13
+ ".": "./src/index.ts"
14
+ },
15
+ "files": [
16
+ "src",
17
+ "skills",
18
+ "README.md"
19
+ ],
20
+ "publishConfig": {
21
+ "access": "public"
22
+ },
23
+ "scripts": {
24
+ "prepack": "rm -rf skills && cp -R ../../skills ./skills"
25
+ }
26
+ }
@@ -0,0 +1,74 @@
1
+ ---
2
+ name: analyzing-releases
3
+ description: >
4
+ Analyze release trends across multiple companies to produce competitive
5
+ intelligence. Use when asked to compare companies, analyze a market segment,
6
+ identify industry trends, forecast upcoming releases, or answer questions
7
+ like "what is X shipping lately" or "how does X compare to Y." Also triggers
8
+ on requests for competitive landscape analysis, feature gap analysis, or
9
+ release velocity comparisons.
10
+ ---
11
+
12
+ # Analyzing Releases
13
+
14
+ Turn changelog data into competitive intelligence by analyzing release patterns across a cohort of related companies.
15
+
16
+ ## Key Operations
17
+
18
+ | Operation | CLI | Typed tool |
19
+ |-----------|-----|------------|
20
+ | Check existing sources | `releases list --query <company> --json` | `list_sources` with query param |
21
+ | Fetch releases | `releases admin source fetch <slug> --max 50` | `fetch_source` with identifier (ID or slug) |
22
+ | Get latest releases | `releases latest <slug> --json` | `get_latest_releases` with source/org and limit |
23
+ | Search releases | `releases search <query> --json` | `search_releases` with query |
24
+ | Summarize | `releases summary <slug> --json` | (not available as typed tool) |
25
+ | Compare | `releases compare <slugA> <slugB> --json` | (not available as typed tool) |
26
+
27
+ ## Workflow
28
+
29
+ ### 1. Define the cohort
30
+
31
+ Pick 3-6 companies in the same competitive space. Good cohorts share a common buyer or technical layer (e.g., developer databases, frontend frameworks, observability tools).
32
+
33
+ ### 2. Check existing sources
34
+
35
+ Search for each company to see what sources are indexed. If a company isn't in the system, it needs to be onboarded first.
36
+
37
+ ### 3. Fetch recent releases
38
+
39
+ Fetch each source. The system skips unchanged feeds automatically.
40
+
41
+ ### 4. Get latest releases
42
+
43
+ Get structured release data with dates for each source. Use a limit (e.g., 50) to cap results. For org-wide views, filter by organization instead of individual source.
44
+
45
+ ### 5. Search and cross-reference
46
+
47
+ Search across all indexed releases to find specific features, breaking changes, or patterns. `search_releases` is hybrid (lexical + semantic) by default — natural-language queries like "auth refresh tokens" or "cold start improvements" work without exact keyword matches. Pass `mode: "lexical"` if you need strict keyword behavior.
48
+
49
+ **Result shape:** every hit carries a `kind` discriminator:
50
+ - `kind: "release"` — a normal release row, use as-is.
51
+ - `kind: "changelog_chunk"` — a passage from a stored CHANGELOG.md file. The hit includes `sourceSlug`, `chunkOffset`, and `chunkLength`. Chain into `get_source_changelog({ slug: sourceSlug, offset: chunkOffset, limit: chunkLength * 3 })` to read the surrounding section before quoting it. Chunk hits often surface older or more granular notes than what's in the indexed release rows, so they're useful for "when did X first ship" questions.
52
+
53
+ For org/product/source discovery (e.g. "find observability vendors with edge offerings"), use `search_registry` instead of `list_sources --query` — it's vector-backed and matches on description and category, not just slug substring.
54
+
55
+ ### 6. Synthesize
56
+
57
+ Combine summaries and comparisons into a structured analysis:
58
+
59
+ - **Release velocity table** — releases per company, cadence pattern
60
+ - **Trends adopted across the board** — features 3+ companies shipped in the same window
61
+ - **Differentiating bets** — what each company is investing in that others aren't
62
+ - **Gaps** — what competitors shipped that a company hasn't
63
+ - **Forecasts** — specific predictions based on pre-release tracks, deprecations, and trajectory
64
+
65
+ ## Output
66
+
67
+ Ask the user where to save the analysis, or use your best judgment based on the project's conventions. Include a "Process Notes" section documenting which CLI commands were used so the analysis is reproducible.
68
+
69
+ ## Important
70
+
71
+ - Focus on what companies shipped. If a source has noisy data (blog posts mixed in, missing dates), work around it silently. Don't include source quality commentary in the report unless a company had to be substantially excluded.
72
+ - Fill data gaps with web fetches. List sources to get release URLs, then WebFetch to spot-check pages for missing dates, versions, or feature details.
73
+ - For velocity counting, get the latest releases with dates — CLI: `releases latest <slug> --json`, typed tool: `get_latest_releases`.
74
+ - AI-powered summarize and compare are only available via CLI (`releases summary`, `releases compare`). When using typed tools, synthesize manually from raw release data.
@@ -0,0 +1,77 @@
1
+ ---
2
+ name: classify-media-relevance
3
+ description: Decide whether an image or video found on a release page is editorial content (screenshots, demos, diagrams, product shots) or site chrome (avatars, logos, tracking pixels, decorative badges). Used during parsing to populate a release's media array.
4
+ ---
5
+
6
+ # Classifying Media Relevance
7
+
8
+ Release pages contain two kinds of media: **editorial content** that belongs in the release (screenshots of the feature, demo videos, diagrams explaining a change) and **site chrome** that doesn't (author avatars, nav logos, tracking pixels, decorative separators). This skill governs which items end up in a release's `media[]` array.
9
+
10
+ The goal is precision-over-recall: a dropped editorial image is recoverable (users click through to the source page), but a kept junk image pollutes the UI and wastes storage.
11
+
12
+ ## When this runs
13
+
14
+ - During the parse pipeline, after the AI extracts release content from a fetched page.
15
+ - During crawl-mode fetches, when the extractor reads full-page markdown from a linked article and produces a fresh `media[]`.
16
+ - Not during feed fetches where the feed already scoped media to per-entry content (trust the feed).
17
+
18
+ ## Cheap pre-checks (keep in code, don't spend AI tokens)
19
+
20
+ These checks are deterministic, free, and catch the overwhelming majority of obvious junk. Always run them **before** invoking this skill. If a pre-check drops an item, no AI call is needed.
21
+
22
+ 1. **Tracking domains** — URL host matches a known tracking/analytics domain (`px.ads.linkedin.com`, `t.co`, `www.facebook.com/tr`, `analytics.twitter.com`, `bat.bing.com`). Drop with reason `tracking domain: <host>`.
23
+ 2. **Unsupported content-type** — after HEAD/GET, content-type isn't in the uploadable set (`image/png|jpeg|gif|webp|svg+xml|avif`, `video/mp4|webm`). Drop with reason `unsupported type`.
24
+ 3. **Size bounds** — body < 5 KB (tracking pixels, spacers) or > 10 MB (won't upload anyway). Drop.
25
+ 4. **Streaming embeds** — YouTube, Vimeo, Loom URLs are kept as `type: "video"` references without downloading. Never route through R2 upload or this skill.
26
+ 5. **ETag / content hash seen before** — if the R2 key derived from content hash already exists, reuse it and skip reclassification.
27
+
28
+ Everything else — the ambiguous middle where URL patterns overlap between chrome and content — goes through the skill.
29
+
30
+ ## Heuristic nudges (optional, low-confidence)
31
+
32
+ The old code treated path substrings like `/avatar`, `/logo`, `/icon`, `/badge`, `/favicon`, `1x1` as hard drops. That was wrong often enough to matter: a post titled "New icon set" shipped images under `/icons/` that were the actual product. **Do not hard-drop on path substrings.** Pass them through as weak negative signals and let the classifier weigh them against context.
33
+
34
+ The one exception: `/favicon.ico` and exact `/favicon*` at the site root are always chrome. Keep that single check in code.
35
+
36
+ ## Classification rules
37
+
38
+ For each remaining media item, decide **keep** or **drop** based on these signals, in rough order of importance:
39
+
40
+ **Strong keep signals**
41
+ - Image or video appears in the middle of release body content (not header/footer of the page).
42
+ - Alt text describes a feature, UI state, code, or demo ("New dashboard showing filters", "Architecture diagram", "CLI output").
43
+ - Filename suggests editorial content (`screenshot-*`, `demo-*`, `feature-*`, `*-hero.png`, version numbers in name).
44
+ - Dimensions consistent with screenshots/diagrams (wider than 400px, aspect ratio not 1:1 perfect square).
45
+ - Hosted on the org's CDN *under a posts/releases/blog path* (e.g., `cdn.example.com/posts/2026/new-thing.png`).
46
+
47
+ **Strong drop signals**
48
+ - Alt text is a person's name, a company name alone, or empty and the URL contains `avatar|profile|author|contributor`.
49
+ - Filename is generic site chrome (`logo.svg`, `wordmark.png`, `header-bg.jpg`, `footer-icon.svg`).
50
+ - Perfect 1:1 square under 200×200 with no contextual link to release content (likely avatar/badge).
51
+ - URL path includes `/wp-content/plugins/` or `/_next/static/media/` with no posts path — usually framework chrome.
52
+ - Appears in every release on the source (detectable by callers passing a frequency hint) — site-wide chrome bleeding into parses.
53
+
54
+ **Weak / context-dependent**
55
+ - `/icon`, `/icons/` paths — only chrome if the release isn't about icons; keep if the release announces icon/design updates.
56
+ - `/badge`, `/badges/` — drop if it's a shields.io CI badge, keep if the release is about achievements/credentials.
57
+ - SVGs at the top of the page — usually logos, but can be diagrams. Use surrounding alt text and position.
58
+
59
+ ## Output format
60
+
61
+ Return a JSON array, one entry per input item, in the same order:
62
+
63
+ ```json
64
+ [
65
+ { "url": "https://...", "decision": "keep", "confidence": "high", "reason": "screenshot of new dashboard, alt text describes feature" },
66
+ { "url": "https://...", "decision": "drop", "confidence": "high", "reason": "author avatar, 80x80 square at top of post" }
67
+ ]
68
+ ```
69
+
70
+ `confidence` is `high` when signals align, `low` when it's a judgment call. Callers treat `low` drops conservatively — they may keep low-confidence drops on high-value sources.
71
+
72
+ ## Anti-patterns
73
+
74
+ - **Don't** build a new substring blocklist inside the skill — that's what we're replacing.
75
+ - **Don't** drop based on URL alone without considering alt text, position, and release context.
76
+ - **Don't** request the image bytes to classify — work from URL + alt + surrounding content only. The byte-level decisions happen in the cheap pre-checks.
77
+ - **Don't** keep "just in case" — over-keeping pollutes the grid view more than under-keeping hurts individual releases.
@@ -0,0 +1,242 @@
1
+ ---
2
+ name: finding-changelogs
3
+ description: How to find, evaluate, and recommend the best ingestion method for changelog URLs — covers feed discovery, provider detection, GitHub API, markdown sources, and scraping fallback
4
+ ---
5
+
6
+ # Finding Changelogs
7
+
8
+ Determine the best way to get structured release data from a changelog or release notes page.
9
+
10
+ Many pages have better-structured data sources behind them — RSS feeds, raw markdown files, or API endpoints. Finding those avoids the complexity of parsing rendered HTML.
11
+
12
+ ## Content Verification
13
+
14
+ After discovering a feed or structured source, always spot-check the entries before accepting it. Sample a few entries and verify they are actual changelog or release content — not blog posts, marketing articles, tutorials, or unrelated editorial content.
15
+
16
+ Red flags that a feed is wrong:
17
+ - Entry URLs point to `/blog/` paths rather than `/changelog/` or `/releases/` paths
18
+ - Titles read like articles or tutorials (e.g., "Choosing a logging library: The definitive guide")
19
+ - No version numbers, semver patterns, or feature/fix language anywhere in the entries
20
+ - The feed URL is site-wide (e.g., `/feed.xml`) rather than section-specific (e.g., `/changelog/feed.xml`)
21
+ - Entry content discusses opinions, comparisons, or industry trends rather than product changes
22
+
23
+ If the entries don't look like releases, the feed is likely the wrong one. Look for a more specific feed, or fall back to a different ingestion method.
24
+
25
+ **Watch for redirects.** A URL like `blog.example.com/changelog/` may redirect to `example.com/changelog/`, but feed discovery may have already found the blog's site-wide feed before the redirect. Always check whether the discovered feed is scoped to the changelog section, not the entire site.
26
+
27
+ ## Priority Order
28
+
29
+ Well-known files > Link relations > Feeds > GitHub Releases API > raw markdown > page scraping.
30
+
31
+ For `github` sources, the fetch pipeline ingests tagged releases **and** the repo's canonical `CHANGELOG.md` (or `CHANGES.md` / `HISTORY.md` / `RELEASES.md` / `NEWS.md` at the repo root) on every fetch pass — the file is surfaced in the web UI as a separate tab, exposed via the `get_source_changelog` MCP tool, and is often the richer source when a project ships entries that never became tagged releases. The refresh piggybacks on each GitHub fetch with a content-hash short-circuit, so stored files stay in sync with tagged releases. You don't need to add a second source for the CHANGELOG file; the github adapter handles both.
32
+
33
+ ### Reading a tracked CHANGELOG
34
+
35
+ Once a github source is tracked, its CHANGELOG is readable via `GET /v1/sources/:slug/changelog` (REST), the `get_source_changelog` MCP tool, or `releases admin source changelog <slug>` (CLI). All three support heading-aligned slicing in two modes:
36
+
37
+ - **Token mode** (preferred for agent context budgeting) — pass `tokens` / `--tokens` with a cl100k_base budget. The response carries `sliceTokens` (actual count of the returned chunk) and `totalTokens` (whole file) so you can plan context precisely. Recommended brackets: 2000 / 5000 / 10000 / 20000.
38
+ - **Char mode** — pass `limit` / `--limit` for character budgets. Same snap/overshoot rules.
39
+
40
+ `tokens` wins when both are passed. Chain successive calls via the returned `nextOffset` to page through big files (e.g. Apollo Client's 700KB CHANGELOG) without pulling the whole thing at once. Every response includes `totalTokens` upfront, so you can budget the number of calls before you start reading.
41
+
42
+ ## Well-Known Files & Link Relations
43
+
44
+ The discovery pipeline checks for standardized changelog metadata before falling back to heuristic methods.
45
+
46
+ ### Well-known files (highest priority)
47
+
48
+ Checked in cascade — stops as soon as a tier produces results:
49
+ 1. `/.well-known/changelog.json` — JSON manifest (primary)
50
+ 2. `/.well-known/releases.json` — JSON manifest (alias)
51
+ 3. `/.well-known/changelog.txt` — text format (security.txt-style fallback)
52
+ 4. `/AGENTS.md`, `/AGENTS.txt` — AI agent instruction files with changelog references
53
+ 5. `/changelog.md`, `/changelog.txt`, `/releases.md`, `/releases.txt` (and uppercase variants) — root-level files
54
+
55
+ **JSON manifest format** (`/.well-known/changelog.json`):
56
+
57
+ Single product:
58
+ ```json
59
+ {
60
+ "version": 1,
61
+ "url": "https://example.com/changelog",
62
+ "feed": "https://example.com/changelog/feed.xml"
63
+ }
64
+ ```
65
+
66
+ Multi-product:
67
+ ```json
68
+ {
69
+ "version": 1,
70
+ "changelogs": [
71
+ { "name": "Platform", "url": "https://example.com/changelog", "feed": "https://example.com/changelog.rss" },
72
+ { "name": "API", "url": "https://example.com/api/changelog" }
73
+ ]
74
+ }
75
+ ```
76
+
77
+ **Text manifest format** (`/.well-known/changelog.txt`):
78
+ ```
79
+ # Changelog discovery — see https://releases.sh/well-known
80
+ Changelog: https://example.com/changelog
81
+ Feed: https://example.com/changelog/feed.xml
82
+ ```
83
+
84
+ Lines starting with `#` are comments. Keys are `Changelog:` and `Feed:`, one per line.
85
+
86
+ **AGENTS.md / AGENTS.txt** — AI agent instruction files may reference changelogs. The parser detects:
87
+ - Key-value lines: `Changelog: https://example.com/changelog`
88
+ - Markdown links: `[Our Changelog](https://example.com/changelog)`
89
+ - Bare URLs on lines mentioning "changelog", "release notes", etc.
90
+
91
+ **Root changelog/releases files** — `/changelog.md`, `/changelog.txt`, `/releases.md`, `/releases.txt` (and uppercase variants) are probed via HEAD request. Only accepted if the server returns text content (not an HTML error page).
92
+
93
+ ### Link relations
94
+
95
+ The discovery pipeline detects these `<link>` tags in the HTML `<head>`:
96
+
97
+ ```html
98
+ <link rel="changelog" href="/changelog">
99
+ <link rel="releases" href="/releases">
100
+ <link rel="release-notes" href="/docs/release-notes">
101
+ ```
102
+
103
+ If the tag includes a feed `type` attribute, the URL is treated as a feed source:
104
+ ```html
105
+ <link rel="changelog" type="application/atom+xml" href="/changelog.atom">
106
+ ```
107
+
108
+ These are distinct from standard feed autodiscovery (`rel="alternate"`) — they point directly to changelog pages or feeds, not generic site feeds.
109
+
110
+ ### Discovery method labels
111
+
112
+ Sources found via these mechanisms are tagged:
113
+ - `method: "well-known"` — from `/.well-known/` manifest files
114
+ - `method: "link-rel"` — from HTML `<link rel="changelog|releases|release-notes">`
115
+
116
+ Both carry `confidence: "high"` since they represent explicit publisher intent.
117
+
118
+ ## Evaluation
119
+
120
+ Evaluate a URL to determine the best ingestion method. CLI: `releases admin discovery evaluate <url> --json`. Typed tool: `evaluate_url` with url param.
121
+
122
+ Key fields in output:
123
+ - `recommendedMethod`: `feed`, `github`, `markdown`, `scrape`, or `crawl`
124
+ - `recommendedUrl`: The URL to use (may differ from the input URL)
125
+ - `feedUrl` / `feedType`: If a feed was found
126
+ - `githubRepo`: In `owner/repo` format, if applicable
127
+ - `pageStructure`: `single-page`, `index`, or `unknown`
128
+ - `confidence`: `high` (structured source found), `medium` (clear page structure), `low` (unclear)
129
+ - `alternatives`: Other viable sources found
130
+
131
+ ## Checking Existing Sources
132
+
133
+ Search with a domain or company name query to check what sources already exist. CLI: `releases list --query <text> --json`. Typed tool: `list_sources` with query param. Use as a starting point when you don't know where a company's changelogs live.
134
+
135
+ ## Pre-checks (automated)
136
+
137
+ The evaluate operation runs these before returning:
138
+
139
+ - **Provider fingerprinting** — identifies the hosting platform (Mintlify, ReadMe, Docusaurus, Ghost, etc.) via DNS CNAME, HTTP headers, and HTML patterns. Each provider has known capabilities.
140
+ - **Feed discovery** — probes ~15 well-known feed paths and HTML `<link rel="alternate">` tags.
141
+ - **Provider-specific probes** — if a provider is detected, tries its known feed paths and markdown suffix.
142
+
143
+ ## When to Evaluate Manually
144
+
145
+ If evaluation returns `confidence: low` or `recommendedMethod: scrape`, you may want to investigate the page yourself:
146
+
147
+ 1. **Fetch the page** with `WebFetch` and look at the HTML source.
148
+ 2. **Look for feeds** — feed URLs embedded in JavaScript, non-standard paths, or links to RSS/Atom.
149
+ 3. **Look for GitHub repos** — "View on GitHub", "CHANGELOG.md on GitHub", or repository links.
150
+ 4. **Look for raw markdown** — links to source `.md` files.
151
+ 5. **Classify the page structure** — is it a single-page changelog or an index of links to individual release pages?
152
+
153
+ ## Primary Changelogs
154
+
155
+ When evaluating multiple changelog sources for an org, identify which one is the company's **primary changelog** — the top-level, platform-wide changelog that covers the product as a whole. This is typically a website changelog page (e.g., `example.com/changelog`) rather than individual GitHub repos or product-specific pages.
156
+
157
+ After adding sources, mark the primary one. CLI: `releases admin source edit <identifier> --primary`. Typed tool: `edit_source` with identifier (ID or slug) and is_primary: true. Only one source per org should be primary. If there's no clear top-level changelog, don't mark any as primary.
158
+
159
+ ## When to Use Crawl
160
+
161
+ Use `--crawl` (or set `crawlEnabled` in source metadata) when:
162
+ - The page is an **index** linking to individual release pages (e.g., `/changelog/2024-03-15`)
163
+ - Single-page scraping only gets titles/dates but not full content
164
+ - The provider is known to use per-release pages (Intercom, Notion, some custom sites)
165
+
166
+ Do NOT use crawl for single-page changelogs or feeds.
167
+
168
+ ## Known Provider Capabilities
169
+
170
+ Detected automatically in pre-checks. Listed for reference:
171
+
172
+ | Provider | Feed Paths | Markdown Suffix | Static | Notes |
173
+ |----------|-----------|-----------------|--------|-------|
174
+ | Mintlify | `/rss.xml` | Yes (`.md`) | Yes | — |
175
+ | Fern | `/changelog.rss`, `/docs/changelog.rss` | — | No | RSS contains `fve-mdx-b64` attributes (noise, stripped automatically). `<generator>` tag = `buildwithfern.com`. |
176
+ | ReadMe | `/changelog.rss` | — | No | — |
177
+ | Docusaurus | `/blog/rss.xml`, `/blog/atom.xml`, `/blog/feed.json` | — | Yes | — |
178
+ | Ghost | `/rss/` | — | Yes | — |
179
+ | WordPress | `/feed/` | — | Yes | — |
180
+ | Productboard | `/changelog.rss`, `/changelog/feed` | — | No | — |
181
+ | Headway | `/feed` | — | No | — |
182
+ | Beamer | `/feed` | — | No | — |
183
+ | LaunchNotes | `/rss` | — | No | — |
184
+ | GitBook, Notion, Intercom, Zendesk, etc. | — | — | No | No feeds; use crawl or scrape. Some may expose a title-only RSS feed (no content body) — these are auto-detected as `summary-only` and fall through to scrape |
185
+
186
+ ## Rendering Optimization
187
+
188
+ When a source uses the `scrape` type and falls through to the single-page Cloudflare path, the adapter checks whether the provider serves pre-rendered HTML. Static providers (Docusaurus, VitePress, WordPress, Ghost, Mintlify, etc.) don't need a headless browser — the content is already in the HTML response.
189
+
190
+ For static providers, the adapter automatically uses Cloudflare's crawl API with `render: false`, which is ~10-30x faster than headless browser rendering and currently free.
191
+
192
+ **When evaluating a new scrape source**, note the provider in the playbook. If the provider isn't in the table above but you can see from the page source that content is in the initial HTML (no loading spinners, no `<div id="root"></div>` shells), set `--no-render` on the source to enable the fast path.
193
+
194
+ **If a fast fetch returns incomplete content**, the adapter falls back to full rendering automatically. If you notice this happening repeatedly for a source, set `--render` to force headless rendering and note the reason in the playbook.
195
+
196
+ The agent's role is to evaluate content completeness after the first fetch — check that releases have titles, dates, and content. If they do, the fast path is working. If releases are empty or missing, the page likely needs JS rendering.
197
+
198
+ ## Source Selection and Scope
199
+
200
+ Prefer **3–5 high-signal sources per org** over exhaustive coverage. More sources means more noise, more maintenance, and diminishing returns. Every source you add should justify itself — if you wouldn't want to read its releases, don't add it.
201
+
202
+ ### Core products vs ecosystem
203
+
204
+ Only index an org's **own products**, not their ecosystem or community plugins. For example:
205
+
206
+ - **Terraform** (core product) — yes
207
+ - `terraform-provider-aws` (ecosystem plugin maintained by a different team) — no
208
+ - **Next.js** (Vercel's own framework) — yes
209
+ - `next-auth` (community library) — no
210
+
211
+ Signs that a repo is ecosystem, not core:
212
+ - Maintained by a different team or community contributors
213
+ - One of hundreds of similar repos (providers, plugins, extensions, adapters)
214
+ - Ships independently of the org's main release cycle
215
+ - The org wouldn't mention it in their own changelog
216
+
217
+ ### Staleness signals — when to skip
218
+
219
+ Skip sources that show signs of being inactive or low-value:
220
+ - **Maintenance mode:** No meaningful releases in 6+ months, or only dependency bumps
221
+ - **Pre-release only:** Recent "releases" are all dev/alpha/RC builds with no stable versions
222
+ - **Superseded:** The product has been replaced by a successor (e.g., Vagrant → dev containers)
223
+ - **Winding down:** The org has announced deprecation or deprioritization
224
+ - **Low adoption:** The product exists but has minimal real-world usage
225
+
226
+ When in doubt, add and pause rather than skip entirely. A focused index with 3 core sources is more useful than 11 sources where half are noise.
227
+
228
+ ### Add and pause, don't omit
229
+
230
+ When you find a source that matches the staleness or ecosystem criteria above, **still add it to the database** but immediately set it to `--priority paused`. This prevents future onboard runs from rediscovering the same source and re-evaluating it. The source record serves as documentation that "we know about this, and we decided not to track it."
231
+
232
+ Add the source and immediately set it to paused priority. CLI: `releases admin source add <name> --url <url> --org <org> --type github` then `releases admin source edit <identifier> --priority paused`. Typed tools: `add_source` then `edit_source` with identifier (ID or slug) and fetch_priority: "paused".
233
+
234
+ Do the same for ecosystem plugins, deprecated products, and low-value repos. The goal is to capture the discovery decision, not to lose the knowledge.
235
+
236
+ ## Products, Categories, and Tags
237
+
238
+ Organizations can have multiple distinct products (e.g., Vercel → Next.js, Turborepo, v0). When discovering sources for an org, consider whether they belong to separate products.
239
+
240
+ Use product and org management operations to organize what you find. CLI: `releases admin product add`, `releases admin org tag add`, `releases categories`. Typed tools: `manage_product`, `manage_org`, `list_categories`. The full list of valid categories is provided in your system prompt.
241
+
242
+ Don't force product groupings when sources are ambiguous — leave them at the org level and note suggestions in the state file.
@@ -0,0 +1,179 @@
1
+ ---
2
+ name: managing-sources
3
+ description: How to add, remove, list, validate, and manage changelog sources — covers ignored/blocked URLs, duplicate detection, and the validation workflow
4
+ ---
5
+
6
+ # Managing Sources
7
+
8
+ Operational guide for managing changelog sources.
9
+
10
+ ## Tool Reference
11
+
12
+ Operations can be performed via CLI commands or typed MCP/agent tools. Use whichever interface is available in your context.
13
+
14
+ | Operation | CLI | Typed tool |
15
+ |-----------|-----|------------|
16
+ | List sources | `releases list [slug] --json [--org <org>] [--query <text>] [--has-feed] [--category <c>] [--compact] [--limit <n>] [--page <n>]` | `list_sources` with query, organization, category, has_feed params |
17
+ | Add source | `releases admin source add <name> --url <url> [--type <type>] [--org <org>] [--feed-url <url>]` | `add_source` with name, url, type, organization, feed_url params |
18
+ | Edit source | `releases admin source edit <identifier> [--primary] [--priority <p>]` | `edit_source` with identifier (ID or slug), is_primary, fetch_priority params |
19
+ | Remove source | `releases admin source remove <slug> [--ignore --reason <reason>]` | `remove_source` with identifier (ID or slug) param |
20
+ | Fetch releases | `releases admin source fetch <slug> [--dry-run] [--max <n>]` | `fetch_source` with identifier (ID or slug) param |
21
+ | Get latest releases | `releases latest [slug] --json [--org <org>]` | `get_latest_releases` with source, organization, limit params |
22
+ | Search releases | `releases search <query> --json` | `search_releases` with query, limit params |
23
+ | Evaluate URL | `releases admin discovery evaluate <url> --json` | `evaluate_url` with url param |
24
+ | Add org | `releases admin org add <name> [--domain <d>] [--description <t>] [--category <c>] [--tags <t1,t2>]` | `manage_org` action "add" with name, domain, description, category, tags |
25
+ | Edit org | `releases admin org edit <slug> [--category <c>]` | `manage_org` action "edit" with identifier, category |
26
+ | Show org | `releases admin org show <slug> --json` | `get_organization` with identifier |
27
+ | Add tags to org | `releases admin org tag add <slug> <tags...>` | `manage_org` action "tag_add" with identifier, tags |
28
+ | Link account | `releases admin org link <slug> --platform <p> --handle <h>` | `manage_org` action "link_account" with identifier, platform, handle |
29
+ | Add product | `releases admin product add <name> --org <org> [--category <c>] [--tags <t>]` | `manage_product` action "add" with name, organization, category, tags |
30
+ | Ignore URL | `releases admin policy ignore add --org <org> <url>` | `exclude_url` action "ignore" with url, organization |
31
+ | Block URL | `releases admin policy block add <url>` | `exclude_url` action "block" with url |
32
+ | List categories | `releases categories --json` | `list_categories` |
33
+ | Get playbook | `releases admin content playbook <org>` | `get_playbook` with organization param |
34
+ | Update playbook notes | `releases admin content playbook <org> --notes "..."` | `update_playbook_notes` with organization, notes params |
35
+
36
+ ## Listing Sources
37
+
38
+ Search for existing sources with optional filters:
39
+ - **query** — filter by name, slug, or URL
40
+ - **organization** — filter by org ID or slug
41
+ - **product** — filter by product ID or slug
42
+ - **category** — filter by category
43
+ - **has_feed** — only sources with a discovered feed URL
44
+
45
+ Use `--json` (CLI) for structured output. Typed tools always return JSON.
46
+
47
+ ## Adding Sources
48
+
49
+ Required: **name** and **url**. Optional: **type** (github, scrape, feed, agent — auto-detected from URL if omitted), **organization** (org ID or slug to associate with), **feed_url** (direct feed URL if known).
50
+
51
+ ### Naming sources and products
52
+
53
+ **Don't prefix names with the org name.** The org is already shown as context on every page — repeating it in each child source produces noise like "Datadog › Datadog dd-trace-py". Pick the bare, recognizable name instead.
54
+
55
+ Rules, in priority order:
56
+
57
+ 1. **GitHub sources → use the repo name.** `DataDog/dd-trace-py` → `dd-trace-py`, `vercel/next.js` → `next.js`. That's the name devs already recognize; the `owner/repo` byline underneath disambiguates.
58
+ 2. **Website/feed sources → strip the org name if present.** `Datadog Browser SDK` → `Browser SDK`, `Stripe API Changelog` → `API Changelog`.
59
+ 3. **Keep the org prefix only when it's part of the canonical product name.** `Claude Code`, `GitHub Actions`, `Google Cloud Run`, `Amazon S3` — people say them that way. If you strip the prefix and what's left is the actual name people use, strip. If stripping produces something nobody would recognize on its own, keep the prefix.
60
+ 4. **Org-level content sources keep the prefix.** `Datadog Blog`, `Vercel Engineering Blog` — "Blog" alone is meaningless, and org-prefix is the standard convention. Same for "Newsroom", "Announcements".
61
+ 5. **Products follow the same rules.** A product under Vercel should be `Next.js`, not `Vercel Next.js`. A product under Datadog whose actual name is `Agent` stays `Agent` — the org context above it already says Datadog.
62
+
63
+ When in doubt: would a developer reading this name on its own (with the org already shown above) recognize what it is? If yes, strip. If no, keep the prefix.
64
+
65
+ ### Organization descriptions
66
+
67
+ When creating an org, include a brief one-sentence product description. This grounds AI summaries for lesser-known products, and it's also the primary signal for the entity vector index — `search_registry` and the registry side of hybrid search match on description + category, not just name. A good description noticeably improves recall.
68
+
69
+ ### Embedding side effects
70
+
71
+ Adding or editing an org, product, or source triggers an entity embedding into the registry vector index in the background (fire-and-forget on the worker, never blocks the write). PATCHes are gated on the embed-relevant fields (name, description, category, domain, url) actually changing, so cosmetic edits and poll-driven metadata bumps don't re-embed. There's no manual step — if a write succeeds, treat the embedding as in-flight. If you ever need to verify or backfill, run `releases admin embed status` and then `releases admin embed entities` (remote mode only).
72
+
73
+ ## Removing Sources
74
+
75
+ When removing discovery results, also ignore the URL to prevent re-discovery. In CLI: `releases admin source remove <slug> --ignore --reason "..."`. With typed tools: call `remove_source` then `exclude_url` with action "ignore".
76
+
77
+ ## Ignored URLs (org-scoped)
78
+
79
+ A URL ignored for one org can still be valid for another org. Always scope ignores to the relevant organization.
80
+
81
+ ## Blocked URLs (global)
82
+
83
+ For spam domains and known-bad URLs that should never be added for any org. Use block_type "domain" to block an entire domain.
84
+
85
+ ## Validation Workflow
86
+
87
+ After adding a source, validate it:
88
+
89
+ 1. **Add the source** — provide name and URL
90
+ 2. **Fetch** — trigger a fetch (CLI: `--dry-run` for preview, then real fetch; typed tools: `fetch_source`)
91
+ 3. **Check results** — get latest releases and verify they have titles, dates, content
92
+ 4. **If bad:** remove the source and ignore the URL
93
+ 5. **If good:** the source is ready for production fetches
94
+
95
+ ## Primary Sources
96
+
97
+ An org can have one source marked as its **primary changelog** — the main, company-wide changelog. Mark it with `--primary` (CLI) or `is_primary: true` (typed tool).
98
+
99
+ When onboarding an org, if you find a single top-level changelog alongside product-specific or GitHub sources, mark the top-level one as primary.
100
+
101
+ ## Playbooks
102
+
103
+ Each org has a **playbook** — a README that tells any agent how to efficiently work with that org's changelog sources. The playbook has two layers:
104
+
105
+ - **Header** — auto-generated from source metadata. Shows source types, URLs, priorities, parseInstructions, and product groupings. Regenerates automatically on every source mutation. You never edit this directly.
106
+ - **Agent notes** — free-form markdown that you fully control. This is the most important part of the playbook. Write it like a README for a teammate who needs to fetch releases from this org without asking questions.
107
+
108
+ **Always read the playbook before fetching or working with an org's sources.** Typed tool: `get_playbook` with organization param. CLI: `releases admin content playbook <org>`. If no playbook exists yet, one will be auto-generated on the next source mutation (add/edit/remove).
109
+
110
+ ### Writing good agent notes
111
+
112
+ Write notes like a **skill for the agent that will fetch from this org** — imperative, action-oriented, concise. The reader is an agent about to do work; tell it what to do and what to watch for, not what things are.
113
+
114
+ Organize notes under these headings:
115
+
116
+ **`### Fetch instructions`** — One paragraph per source. Use imperative voice:
117
+ - What to do: "Set version=null", "Parse `<h2>` elements as version boundaries", "No filtering needed"
118
+ - What to expect: cadence, content quality, whether rendering is needed
119
+ - When to skip or deprioritize: "Only fetch when looking for launch announcements specifically"
120
+ - Cite version format examples where useful (e.g., "semver like 2.1.98")
121
+
122
+ **`### Traps`** — Concise warnings with **bolded trigger labels**:
123
+ - Each trap is a bullet with a bold label and a one-sentence explanation
124
+ - Example: `**Doubled paths on Platform**: Relative doc links get prefixed with the source URL, producing doubled paths.`
125
+ - Include disabled sources with "Don't re-discover" warnings so agents don't re-evaluate them
126
+ - Only include traps that would cause wasted work or bad data — skip informational notes
127
+
128
+ **`### Coverage`** — Two or three sentences max:
129
+ - Which sources are canonical vs supplementary
130
+ - Whether active sources cover the org's full release surface
131
+ - Any known gaps worth noting
132
+
133
+ **`### Release cadence`** — Call out rollup publishers explicitly. Some orgs don't ship incremental changelog entries at all — they publish seasonal, quarterly, or annual **rollup** pages that collect many features into one banner post or microsite (e.g. Shopify Editions, Brex Fall Release, Ramp quarterly blog). When this is the case, say so in the notes and tell the parser to classify matching pages as `type: rollup`. Example:
134
+
135
+ > Ramp publishes quarterly rollups at `/blog/new-on-ramp-q*-*` and monthly editions at `/blog/new-on-ramp-*-edition`. Classify all entries from this source as `type: rollup` — individual features within a rollup are not separately indexed.
136
+
137
+ The `parsing-changelogs` skill ("Classifying Rollups" section) covers what rollups look like and when to set the `type` field. Your job in the playbook is to capture the org-specific signal so future fetches don't have to re-derive it from the page.
138
+
139
+ ### Levels of playbook quality
140
+
141
+ **Compilation** (fast, from metadata only): Write notes based on source metadata — URL, type, priority, parseInstructions. Good for bulk coverage but claims about page structure, cadence, and version format are inferred, not verified. Suitable for initial scaffolding or low-priority orgs.
142
+
143
+ **Verified** (thorough, from actual data): Before writing, query release data and fetch logs to ground every claim in observation:
144
+
145
+ 1. `releases list <slug> --json` — Check actual version formats, titles, content length, publishedAt patterns
146
+ 2. `releases admin source fetch-log <slug> --json` — Check for errors, success rates, stale data
147
+ 3. Analyze: calculate real cadence from dates, identify empty content or null fields, spot date drift
148
+ 4. Write notes citing specific data points, not general assumptions
149
+
150
+ Use the verified approach for high-value orgs, when onboarding new orgs with scrape sources, or when refreshing stale compilation-only playbooks. The difference: "this source likely needs JS rendering" (compilation) vs "all 50 releases have empty content — the RSS feed delivers summaries only, needs crawl mode on per-release pages" (verified).
151
+
152
+ Write notes during onboarding after you've fetched and validated sources. Update them when you discover new quirks or when source behavior changes. If notes are empty or stale, write them before doing fetch work — future agents (including yourself in later sessions) will benefit.
153
+
154
+ **Updating notes:** Use `update_playbook_notes` with the complete notes content — it replaces the entire notes section. You can rewrite, reorganize, or clear notes at any time.
155
+
156
+ **Changing source configuration:** The header reflects current source metadata. To change things like `parseInstructions`, `fetchPriority`, or `crawlEnabled`, use `edit_source` with metadata — the header updates automatically.
157
+
158
+ **Product context:** Playbooks group sources by product when products are configured. Some sources (like an org's engineering blog) aren't tied to a specific product but may contain content relevant to any product under that org — the playbook calls these out as "Organization-Level Sources" with a note about which products they may cover.
159
+
160
+ ## Rendering Control
161
+
162
+ The scrape adapter can fetch pages with or without a headless browser. Static-site providers (Docusaurus, VitePress, WordPress, Ghost, Mintlify) are fetched without rendering by default — this is ~10-30x faster.
163
+
164
+ To override the default for a specific source:
165
+ - `releases admin source edit <identifier> --no-render` — force fast fetch (no headless browser)
166
+ - `releases admin source edit <identifier> --render` — force headless browser rendering
167
+
168
+ Use `--render` when you know a source needs JavaScript execution. Use `--no-render` when you've verified the content is in the initial HTML for a provider not yet in the static list.
169
+
170
+ After adding a new scrape source with an unknown provider, check the first fetch results. If content is complete, consider setting `--no-render` and noting the provider behavior in the playbook.
171
+
172
+ ## Duplicate Detection
173
+
174
+ Before adding sources, search for overlapping URLs.
175
+
176
+ Common duplicates:
177
+ - Same repo via GitHub URL vs changelog page (the GitHub source is usually better)
178
+ - RSS feed URL vs the page it feeds from (keep the feed)
179
+ - With and without trailing slash or `www.` prefix
@@ -0,0 +1,180 @@
1
+ ---
2
+ name: parsing-changelogs
3
+ description: How the Releases fetch and parse pipeline works — covers feed vs scrape adapters, incremental vs bulk parsing, dry-run testing, crawl mode, content hashing, and enrichment
4
+ ---
5
+
6
+ # Parsing Changelogs
7
+
8
+ How the Releases fetch pipeline converts changelog pages into structured release data.
9
+
10
+ ## Pipeline Overview
11
+
12
+ The fetch pipeline follows this priority order:
13
+
14
+ 1. **Feed adapter** — if the source has a known feed URL (in `metadata.feedUrl`), fetch and parse the feed directly. Fastest and most reliable.
15
+ 2. **Markdown fetch** — if `metadata.markdownUrl` is set, fetch raw markdown instead of rendered HTML.
16
+ 3. **Fast fetch (static providers)** — for providers known to serve pre-rendered HTML (Docusaurus, VitePress, WordPress, Ghost, Mintlify), fetch without headless browser rendering. Uses Cloudflare crawl API with `render: false`. ~10-30x faster than full rendering. Controlled by provider `staticContent` hint or per-source `renderRequired` metadata.
17
+ 4. **Cloudflare rendering** — for JS-heavy pages (React SPAs, Notion, etc.), use Cloudflare's browser rendering API to get the fully-rendered HTML. Fallback when fast fetch returns no content.
18
+
19
+ After fetching content, the pipeline parses it:
20
+ - **Incremental parsing** — if the source already has releases in the database, extract only new ones by comparing against known releases. This is the default for subsequent fetches.
21
+ - **Bulk parsing** — parse the entire page into releases. Used on first fetch or when `--full` is specified.
22
+
23
+ ## Fetching
24
+
25
+ Trigger a fetch for a source by ID or slug. CLI: `releases admin source fetch <slug> [--dry-run] [--max <n>]`. Typed tool: `fetch_source` with identifier (ID or slug) param.
26
+
27
+ Key CLI flags (not available via typed tool — the typed tool always does a full server-side fetch):
28
+ - `--dry-run` — parse but don't persist. Essential for validation.
29
+ - `--max <n>` — limit releases to extract (default: 200).
30
+ - `--full` — bypass incremental parsing, force full re-parse.
31
+ - `--crawl` / `--no-crawl` — enable/disable crawl mode.
32
+
33
+ ### Checking results
34
+
35
+ After fetching, verify releases were persisted. CLI: `releases latest <slug> --json` or `releases admin source fetch-log <slug>`. Typed tool: `get_latest_releases` with source param. Use `get_organization` (or `releases admin org show <slug> --json`) to see the full picture of an org's sources.
36
+
37
+ ## Incremental vs Bulk Parsing
38
+
39
+ - **Incremental** (default for sources with existing releases): The parser receives a list of known release titles/versions and extracts only releases that don't match any known ones. Much faster and cheaper for sources that add releases incrementally.
40
+ - **Bulk** (first fetch or `--full`): Parses the entire page content into releases. Used when no releases exist yet or when you suspect the incremental parser missed something.
41
+
42
+ ## Content Hashing
43
+
44
+ Each fetch computes a SHA-256 hash of the page content. If the hash matches the previous fetch, parsing is skipped entirely (no AI calls). This prevents redundant processing when a page hasn't changed.
45
+
46
+ ## Crawl Mode
47
+
48
+ For index-style pages that link to individual release pages:
49
+
50
+ 1. The crawler follows links matching the crawl pattern (auto-detected or from provider hints).
51
+ 2. Each linked page is fetched and parsed individually.
52
+ 3. Results are aggregated into releases.
53
+
54
+ Enable with `--crawl` flag or by setting `metadata.crawlEnabled: true` on the source.
55
+
56
+ ## Feed Content Depth Assessment
57
+
58
+ **Automatic detection:** The feed adapter now auto-detects title-only feeds — if every item has fewer than 20 characters of content, the feed is marked `feedContentDepth: "summary-only"` and `fetchViaFeed` returns null, causing the scrape adapter to fall through to crawl or single-page extraction. This handles the worst case (feeds like Notion, Apollo, LangChain, LaunchDarkly that carry only `<title>` + `<link>` with no `<description>` or `<content:encoded>`) without manual intervention. Once marked, the flag persists and subsequent fetches skip the feed entirely.
59
+
60
+ **Manual assessment is still required for partial-content feeds.** Auto-detection only catches completely empty content. Many feeds provide decent text summaries but the actual pages have significantly richer content — product screenshots, video demos, detailed code examples, and inline media that the feed strips out.
61
+
62
+ **The anti-pattern to avoid:** fetching the bare changelog index, seeing that content came back, and declaring success without ever checking whether each release has a dedicated article page with more detail. A paragraph of feed text is not evidence that the page is equally thin.
63
+
64
+ **When to check:** After every feed fetch where `feedContentDepth` is not already set. Do not skip this because feed entries have multiple sentences. The question is not "does the feed have some content?" but "does the actual page have substantially more?"
65
+
66
+ **How to check:** Dispatch a bulk-worker subagent to sample 2-3 release URLs. Prompt the subagent:
67
+
68
+ > "Fetch these URLs with WebFetch and compare the page content against these feed summaries. For each URL, report: (1) how much content is on the page vs the feed summary, (2) whether there are images, screenshots, or embedded videos (YouTube, Vimeo, Loom), (3) whether there are code examples or detailed explanations not in the feed. Summarize your findings."
69
+
70
+ Do NOT fetch release URLs in the parent agent — always delegate to a subagent to keep your context window clean.
71
+
72
+ **What to do based on the result:**
73
+
74
+ If pages are richer than feed content (more text, images, videos, or code examples):
75
+ 1. Record the assessment and enable crawl mode. CLI: `releases admin source edit <identifier> --metadata '{"feedContentDepth":"summary-only","crawlEnabled":true}'`. Typed tool: `edit_source` with the same metadata. Subsequent fetches will follow links to per-release pages and extract full content in one pass.
76
+ 2. Re-fetch the source once to backfill. CLI: `releases admin source fetch <slug> --full`. Typed tool: `fetch_source`.
77
+ 3. Verify results. CLI: `releases list <slug> --json` or `releases latest <slug>`. Typed tool: `get_latest_releases` — check content is richer after the re-fetch.
78
+
79
+ If feed already provides full content with no meaningful additions on the page:
80
+ 1. Record `feedContentDepth: "full"` so future sessions skip the sampling step.
81
+
82
+ Once `feedContentDepth` is set, skip the sampling step on future encounters. Crawl mode handles the rest during normal fetches — there is no separate enrichment phase.
83
+
84
+ **Per-source AI instructions:** If a source has unique content patterns (e.g., videos always embedded, unusual changelog format), note this in the discovery state so parseInstructions can be set later via the CLI.
85
+
86
+ ## Blog-Style Sources
87
+
88
+ Engineering blogs and news pages mix product announcements with educational content, opinion pieces, and corporate news. They can be useful supplementary sources but require aggressive filtering via `parseInstructions` to avoid noise.
89
+
90
+ **Before working with blog sources:** Check the org's playbook (`releases admin content playbook <org>`) for notes about how existing blog sources perform, what filtering works, and which products they cover.
91
+
92
+ **When to add a blog source:**
93
+ - The org's primary changelogs don't cover major product announcements (new models, new services)
94
+ - The blog has engineering/product content not found elsewhere
95
+ - The blog is a secondary signal source — primary coverage should come from dedicated changelogs
96
+
97
+ **How to configure:**
98
+ 1. Add as `--type scrape` with `--priority low` (blog pages change infrequently)
99
+ 2. Set `parseInstructions` that tell the AI what to include and — more importantly — what to skip
100
+ 3. Always dry-run first: `releases admin source fetch <slug> --dry-run` to check signal-to-noise ratio
101
+ 4. Iterate on instructions: tighten if too many irrelevant posts, loosen if genuine announcements are being filtered
102
+
103
+ **Writing effective parseInstructions for blogs:**
104
+
105
+ - Be explicit about what to SKIP — blogs have more noise categories than changelogs
106
+ - Use concrete signals: "titles containing 'Introducing'" is better than "posts about new features"
107
+ - Add a default-skip rule: "When in doubt, skip the post"
108
+ - Name the noise categories: "best practices guides, benchmark analyses, eval methodology, postmortems, partnership announcements, policy statements"
109
+ - For corporate news pages: skip partnerships, MOUs, office openings, funding, acquisitions, research papers, safety reports
110
+
111
+ **Example parseInstructions for an engineering blog:**
112
+ ```
113
+ ONLY extract posts that announce a NEW product, feature, tool, service, or capability.
114
+ Signals: titles containing "Introducing", "launching", or describing something new.
115
+ SKIP: best practices guides, benchmark analyses, eval methodology, postmortems,
116
+ technical deep-dives, and educational content. When in doubt, skip.
117
+ ```
118
+
119
+ **Example parseInstructions for a corporate news page:**
120
+ ```
121
+ ONLY extract posts about: (1) new model launches, (2) major new product features or services,
122
+ (3) significant platform capability announcements. Skip all: partnerships, MOUs, policy statements,
123
+ office openings, funding, acquisitions, research publications, safety reports, and opinion pieces.
124
+ ```
125
+
126
+ **Versioning:** Blog posts don't have traditional version strings. Set `parseInstructions` to tell the AI that dates are not versions (same as for date-headed changelogs like Claude's consumer release notes).
127
+
128
+ **Content depth:** Blog index pages typically show card summaries, not full post content. The extracted releases will have thin content. Enable crawl mode (`--crawl`) to follow links to full posts if richer content is needed, but this is expensive — only enable for high-value sources.
129
+
130
+ ## Dates
131
+
132
+ Every release should get a `publishedAt` if one can be recovered from the page, even an approximate one — sources with no dates drop out of the release feed's time-based views entirely.
133
+
134
+ - **Full dates** ("March 3, 2026", "2026-03-03"): use the exact ISO date — `2026-03-03`.
135
+ - **Month-only headings** ("April 2026", "March 2026"): use the **first of the month** — `2026-04-01`. Many API changelogs (e.g. Brex Developer API) group entries by month; this is the right call, not "omit date."
136
+ - **Quarter or season** ("Q3 2025", "Fall 2025"): use the first day of the quarter/season (Q3 → `2025-07-01`, Fall → `2025-09-01`).
137
+ - **Year only** ("2025"): use `2025-01-01`.
138
+ - **Nothing recoverable**: omit `publishedAt`. Only do this if there truly is no date signal anywhere — check adjacent headings, breadcrumbs, and the URL slug before giving up.
139
+
140
+ Approximation is better than omission. A release with an approximate month-start date still surfaces in sort orders, "last 30 days" windows, and monthly groupings.
141
+
142
+ ## Classifying Rollups
143
+
144
+ Most releases are **features** — individual version bumps, single product announcements, or tight incremental changelog entries. Some are **rollups** — seasonal, quarterly, or annual catch-all pages that collect many already-shipped features into a single banner post. The parser assigns each release a `type` field so agents and the web UI can treat them differently.
145
+
146
+ **When to set `type: "rollup"`:**
147
+
148
+ - The title names a season, quarter, or year range: "Fall Release 2025", "Spring 2026", "Q3 2025", "Summer '25 Edition", "New on Ramp January Edition", "Year in Review 2025", "What Shipped This Summer".
149
+ - The page re-announces many shipped features under section headings, rather than describing a single change.
150
+ - The post is published once, rarely updates, and anchors a date range (not a single `publishedAt` moment).
151
+ - The destination may be a full microsite or editorial landing page (not just a blog post) — rollups often get custom design treatment because they're marketing moments as well as product updates.
152
+ - Common examples: Shopify Editions (`shopify.com/editions/summer2025`, twice-yearly microsite with 100+ features), Brex Fall Release, Ramp quarterly blog posts, Vercel Ship recaps, Stripe Sessions roundups, AWS re:Invent summaries.
153
+
154
+ **When NOT to set rollup:**
155
+
156
+ - Single version releases (v2.0.0, v15.1), even when they bundle multiple fixes — those are features.
157
+ - Dated changelog entries like "March 3, 2026" that cover one day's changes.
158
+ - Blog posts announcing a single new product.
159
+ - Named platform launches (Next.js 15, Node 22 LTS) — those are version-anchored features, not rollups.
160
+
161
+ **How to recognize rollup sources:**
162
+
163
+ Before parsing, **always read the playbook** (CLI: `releases admin content playbook <org>`, typed tool: `get_playbook` with organization param). If a company publishes rollups as its primary cadence — quarterly, seasonal, "every few months" — the playbook notes should say so explicitly. Example notes:
164
+
165
+ - "Brex publishes quarterly seasonal rollup pages at `/product-announcements/{fall,spring,summer,winter}-release-YYYY`. Treat each as `type: rollup`."
166
+ - "Ramp's blog series `/blog/new-on-ramp-*-edition` and `/new-on-ramp-q*-*` are quarterly/monthly rollups. Classify as `type: rollup`; individual features within are not separately indexed."
167
+ - "Shopify publishes twice-yearly Editions as standalone microsites at `/editions/{summer,winter}YYYY` — index page at `/editions` lists all of them. Each Edition is a `type: rollup` with 100+ features under themed sections; crawl mode needed to pull the full page."
168
+
169
+ When you encounter a new rollup source during discovery or fetch, update the playbook notes so future fetches classify correctly without re-deriving the pattern. See the `managing-sources` skill for how to update notes.
170
+
171
+ **Leave `type` unset or `"feature"` by default.** Only mark rollup when the signals are clear.
172
+
173
+ ## Validation Workflow
174
+
175
+ When adding a new source, always validate before committing:
176
+
177
+ 1. **Fetch** — CLI: `releases admin source fetch <slug> --dry-run` then `releases admin source fetch <slug>`. Typed tool: `fetch_source` with identifier (ID or slug).
178
+ 2. **Verify** — CLI: `releases latest <slug> --json` or `releases admin source fetch-log <slug>`. Typed tool: `get_latest_releases` with source identifier.
179
+ 3. **If poor results** — try a different URL or type. CLI: `releases admin source edit <identifier> --type feed`. Typed tool: `edit_source` with identifier.
180
+ 4. **If no usable releases** — remove the source. CLI: `releases admin source remove <slug> --ignore --reason "..."`. Typed tool: `remove_source` with identifier, then `exclude_url`.
@@ -0,0 +1,217 @@
1
+ ---
2
+ name: seeding-playbooks
3
+ description: Coordinate bulk playbook writing using parallel sub-agents — covers org discovery, prompt templates, model selection, batch dispatch, verification, and the parent-saves pattern for working around subagent permission limits. Local-only (Claude Code CLI) — managed agents do not yet support spawning sub-agents.
4
+ ---
5
+
6
+ # Seeding Playbooks
7
+
8
+ Coordinate bulk creation or enrichment of playbook agent notes across many orgs using parallel sub-agents.
9
+
10
+ **Local-only**: This skill requires Claude Code's Agent tool to dispatch sub-agents. Managed agents (discovery worker, Haiku worker) cannot spawn sub-agents — that capability is behind a private beta and not yet available. When sub-agent support ships for managed agents, this skill can be adapted into a managed session mode.
11
+
12
+ ## When to Use
13
+
14
+ - Batch-populating playbooks for orgs that have sources but no notes
15
+ - Re-running the verified workflow on existing playbooks to enrich them with data-grounded observations
16
+ - After a wave of new orgs are onboarded and need initial playbook scaffolding
17
+
18
+ ## Step 1: Identify Targets
19
+
20
+ Find orgs that need playbooks. Run this to check coverage:
21
+
22
+ ```bash
23
+ bun -e "
24
+ const orgs = JSON.parse(Bun.spawnSync(['bun', 'src/index.ts', 'admin', 'org', 'list', '--json'], { stderr: 'ignore' }).stdout.toString());
25
+ const active = orgs.filter(o => o.sourceCount > 0).sort((a,b) => b.releaseCount - a.releaseCount);
26
+ for (const org of active) {
27
+ const playbook = JSON.parse(Bun.spawnSync(['bun', 'src/index.ts', 'admin', 'content', 'playbook', org.slug, '--json'], { stderr: 'ignore' }).stdout.toString());
28
+ const status = playbook.notes?.length > 100 ? 'has notes (' + playbook.notes.length + ' chars)' : 'NEEDS PLAYBOOK';
29
+ console.log(org.slug.padEnd(25) + ' sources=' + String(org.sourceCount).padStart(2) + ' ' + status);
30
+ }
31
+ " 2>/dev/null
32
+ ```
33
+
34
+ This produces a ranked list of orgs with their playbook status. Target orgs showing "NEEDS PLAYBOOK".
35
+
36
+ ## Step 2: Gather Source Details
37
+
38
+ Before dispatching agents, collect source metadata for the target orgs. Each agent needs to know the org's sources, types, URLs, and product structure. Gather this in bulk:
39
+
40
+ ```bash
41
+ for org in <slugs>; do
42
+ echo "=== $org ==="
43
+ bun src/index.ts admin org show "$org" --json 2>/dev/null | bun -e "
44
+ const d = JSON.parse(await Bun.stdin.text());
45
+ const products = d.products?.map(p => p.name + ' (' + p.slug + ')').join(', ') || 'none';
46
+ console.log('Products:', products);
47
+ d.sources?.forEach(s => {
48
+ const meta = s.metadata || {};
49
+ const parts = [s.slug, 'url=' + s.url, 'type=' + s.type];
50
+ if (meta.feedUrl) parts.push('feed=' + meta.feedUrl);
51
+ if (s.fetchPriority !== 'normal') parts.push('priority=' + s.fetchPriority);
52
+ if (meta.parseInstructions) parts.push('parseInstructions=YES');
53
+ console.log(' ' + parts.join(' | '));
54
+ });
55
+ " 2>/dev/null
56
+ done
57
+ ```
58
+
59
+ ## Step 3: Choose Workflow and Model
60
+
61
+ ### Compilation workflow (fast, metadata-only)
62
+ - Agent writes notes from source metadata without querying release data
63
+ - Good for: bulk scaffolding, low-priority orgs, initial coverage
64
+ - Notes are educated guesses — claims about page structure and cadence are inferred, not verified
65
+
66
+ ### Verified workflow (thorough, data-grounded)
67
+ - Agent queries release data (`list <slug> --json`) and fetch logs (`admin source fetch-log <slug> --json`) before writing
68
+ - Good for: high-value orgs, scrape sources, orgs with known data quality issues
69
+ - Every claim is backed by observed data — version formats, actual cadence, content quality, fetch errors
70
+
71
+ ### Model selection
72
+
73
+ | Model | Cost/playbook | Best for |
74
+ |-------|-----------|----------|
75
+ | Opus | ~$0.07 (compilation) / ~$0.13 (verified) | Top-10 orgs, complex source sets, first-time verified runs |
76
+ | Sonnet | ~$0.01 / ~$0.03 | Sweet spot for quality/cost. Most thorough output. Use for top-20 verified runs |
77
+ | Haiku | ~$0.008 / ~$0.009 | Bulk coverage (orgs 20+). Output is usable but may include filler. Cheapest even with higher token count (extra tokens are cached input) |
78
+
79
+ ## Step 4: Dispatch Sub-Agents
80
+
81
+ Launch one agent per org, in parallel. Use batches of 10 to avoid overwhelming the system.
82
+
83
+ ### Compilation prompt template
84
+
85
+ ```
86
+ Write playbook agent notes for the org "{slug}" and save them using the CLI.
87
+
88
+ Playbooks are **skills for agents that will fetch from this org**. Write in imperative voice — tell the agent what to do, not what things are.
89
+
90
+ Notes have three headings: `### Fetch instructions`, `### Traps`, `### Coverage`.
91
+
92
+ **{Org name}'s sources:**
93
+ {list each source with: slug, type, url, and any notable metadata}
94
+
95
+ Products: {product list or "none"}
96
+
97
+ **Fetch instructions**: One paragraph per source in imperative voice. Tell the agent what to do ("Set version=null", "Parse <h2> as version boundaries", "No filtering needed"), what to expect (cadence, content quality), and when to skip.
98
+
99
+ **Traps**: Bullet list with **bolded trigger labels**. Only include things that would cause wasted work or bad data. Include "Don't re-discover" warnings for disabled sources.
100
+
101
+ **Coverage**: 2-3 sentences. Which sources are canonical, whether there are gaps.
102
+
103
+ Save by running:
104
+ bun src/index.ts admin content playbook {slug} --regenerate 2>/dev/null
105
+ bun src/index.ts admin content playbook {slug} --notes "$(cat <<'NOTES'
106
+ YOUR NOTES HERE
107
+ NOTES
108
+ )" 2>/dev/null
109
+
110
+ Verify with: bun src/index.ts admin content playbook {slug} 2>/dev/null | tail -20
111
+ ```
112
+
113
+ ### Verified prompt template
114
+
115
+ ```
116
+ Write a **verified** playbook for the org "{slug}".
117
+ Unlike a basic playbook, you must do actual research first.
118
+
119
+ Playbooks are **skills for agents that will fetch from this org**. Write in imperative voice — tell the agent what to do, not what things are.
120
+
121
+ ## Step 1: Gather data (run all of these)
122
+
123
+ bun src/index.ts admin org show {slug} --json 2>/dev/null
124
+ {for each source:}
125
+ bun src/index.ts list {source-slug} --json 2>/dev/null
126
+ bun src/index.ts admin source fetch-log {source-slug} --json 2>/dev/null
127
+
128
+ ## Step 2: Analyze what you found
129
+
130
+ Before writing, answer these questions from the data:
131
+ - What version format does each source actually use? Cite examples.
132
+ - What's the real publish cadence? Count releases per month from dates.
133
+ - Are there fetch errors in the logs? What kind?
134
+ - Are there releases with missing dates, empty content, or data quality issues?
135
+
136
+ ## Step 3: Write skill-style notes grounded in data
137
+
138
+ Structure: `### Fetch instructions`, `### Traps`, `### Coverage`.
139
+
140
+ **Fetch instructions**: One paragraph per source in imperative voice. Tell the agent what to do ("Set version=null", "Parse <h2> as version boundaries"), what to expect (cadence, content quality), and when to skip. Cite version format examples from actual data.
141
+
142
+ **Traps**: Bullet list with **bolded trigger labels**. Only include things backed by evidence from fetch logs or release data. Include "Don't re-discover" warnings for disabled sources.
143
+
144
+ **Coverage**: 2-3 sentences. Which sources are canonical, whether there are gaps.
145
+
146
+ Every claim must cite observed data. If uncertain, say so explicitly.
147
+
148
+ ## Step 4: Save
149
+
150
+ bun src/index.ts admin content playbook {slug} --regenerate 2>/dev/null
151
+ bun src/index.ts admin content playbook {slug} --notes "$(cat <<'NOTES'
152
+ YOUR NOTES HERE
153
+ NOTES
154
+ )" 2>/dev/null
155
+
156
+ Verify with: bun src/index.ts admin content playbook {slug} 2>/dev/null | tail -20
157
+ ```
158
+
159
+ ### Dispatch pattern
160
+
161
+ ```typescript
162
+ // Launch up to 10 agents in parallel per batch
163
+ Agent({
164
+ description: "Write playbook: {slug}",
165
+ model: "sonnet", // or "haiku" for bulk
166
+ prompt: compiledPromptTemplate,
167
+ run_in_background: true,
168
+ })
169
+ ```
170
+
171
+ ## Step 5: Handle the Parent-Saves Pattern
172
+
173
+ Sub-agents may be blocked from saving notes via Bash (heredoc permission issues). When this happens:
174
+
175
+ 1. The agent completes analysis and reports its findings in the result
176
+ 2. The parent agent (you) saves the notes manually:
177
+
178
+ ```bash
179
+ bun src/index.ts admin content playbook {slug} --regenerate 2>/dev/null
180
+ bun src/index.ts admin content playbook {slug} --notes "$(cat <<'NOTES'
181
+ {paste notes from agent result}
182
+ NOTES
183
+ )" 2>/dev/null
184
+ ```
185
+
186
+ This is a known limitation of subagent permissions. Plan for it — check each agent's result and save manually if needed.
187
+
188
+ ## Step 6: Verify Results
189
+
190
+ After all agents complete, verify coverage in bulk:
191
+
192
+ ```bash
193
+ bun -e "
194
+ const orgs = [{target slugs}];
195
+ for (const org of orgs) {
196
+ const proc = Bun.spawnSync(['bun', 'src/index.ts', 'admin', 'content', 'playbook', org, '--json'], { stderr: 'ignore' });
197
+ try {
198
+ const d = JSON.parse(proc.stdout.toString());
199
+ const len = d.notes?.length ?? 0;
200
+ console.log(org.padEnd(25) + (len > 100 ? 'OK (' + len + ' chars)' : 'MISSING'));
201
+ } catch { console.log(org.padEnd(25) + 'ERROR'); }
202
+ }
203
+ " 2>/dev/null
204
+ ```
205
+
206
+ **Important**: Do not pipe `bun | bun` in shell for-loops — stdin contention causes silent failures. Use `Bun.spawnSync` in a single process as shown above.
207
+
208
+ ## Tracking Notes
209
+
210
+ When coordinating a batch run, keep notes on:
211
+
212
+ - **Failure modes**: Which agents failed to save? Was it permissions, timeouts, or bad output?
213
+ - **Data quality issues found**: Verified runs surface broken feeds, empty content, stale data. Collect these for follow-up fixes.
214
+ - **Model quality at this tier**: Did Haiku produce usable output or did it need manual cleanup?
215
+ - **Coverage gaps identified**: Agents often note missing sources — collect these as onboarding candidates.
216
+
217
+ Write findings to `.context/` for future reference.
package/src/index.ts ADDED
@@ -0,0 +1,21 @@
1
+ import { existsSync } from "fs";
2
+ import { dirname, join, resolve } from "path";
3
+ import { fileURLToPath } from "url";
4
+
5
+ const here = dirname(fileURLToPath(import.meta.url));
6
+
7
+ /**
8
+ * Absolute path to the bundled skills directory. Resolves to the sibling
9
+ * `skills/` folder when installed from npm (via the `files` field) and falls
10
+ * back to the repo-root `skills/` directory during local development.
11
+ */
12
+ export function skillsDir(): string {
13
+ const packaged = resolve(here, "..", "skills");
14
+ if (existsSync(packaged)) return packaged;
15
+ const repoRoot = resolve(here, "..", "..", "..", "skills");
16
+ return repoRoot;
17
+ }
18
+
19
+ export function skillPath(name: string): string {
20
+ return join(skillsDir(), name);
21
+ }