npm - @cyanheads/mcp-ts-core - Versions diffs - 0.6.9 → 0.6.11 - Mend

@cyanheads/mcp-ts-core 0.6.9 → 0.6.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/CLAUDE.md +3 -3
package/README.md +2 -2
package/biome.json +1 -1
package/changelog/0.6.x/0.6.10.md +21 -0
package/changelog/0.6.x/0.6.11.md +23 -0
package/dist/logs/combined.log +4 -0
package/dist/logs/error.log +4 -0
package/dist/logs/interactions.log +0 -0
package/dist/utils/index.d.ts +1 -1
package/dist/utils/index.d.ts.map +1 -1
package/dist/utils/index.js +1 -1
package/dist/utils/index.js.map +1 -1
package/dist/utils/parsing/htmlExtractor.d.ts +146 -0
package/dist/utils/parsing/htmlExtractor.d.ts.map +1 -0
package/dist/utils/parsing/htmlExtractor.js +171 -0
package/dist/utils/parsing/htmlExtractor.js.map +1 -0
package/dist/utils/parsing/index.d.ts +1 -0
package/dist/utils/parsing/index.d.ts.map +1 -1
package/dist/utils/parsing/index.js +1 -0
package/dist/utils/parsing/index.js.map +1 -1
package/package.json +19 -9
package/skills/field-test/SKILL.md +205 -82
package/skills/polish-docs-meta/references/package-meta.md +1 -1
package/skills/release-and-publish/SKILL.md +150 -0
package/skills/setup/SKILL.md +64 -14
package/skills/release/SKILL.md +0 -142

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@cyanheads/mcp-ts-core",
-  "version": "0.6.9",
+  "version": "0.6.11",
   "mcpName": "io.github.cyanheads/mcp-ts-core",
   "description": "Agent-native TypeScript framework for building MCP servers. Declarative definitions with auth, multi-backend storage, OpenTelemetry, and first-class support for Bun/Node/Cloudflare Workers.",
   "main": "dist/core/index.js",
@@ -146,7 +146,7 @@
     "audit:fix": "bun audit --fix",
     "changelog:build": "bun run scripts/build-changelog.ts",
     "changelog:check": "bun run scripts/build-changelog.ts --check",
-    "publish-mcp": "bunx mcp-publisher publish"
+    "publish-mcp": "mcp-publisher login github -token \"$(security find-generic-password -a \"$USER\" -s mcp-publisher-github-pat -w)\" && mcp-publisher publish"
   },
   "resolutions": {
     "brace-expansion": "1.1.14",
@@ -159,19 +159,19 @@
     "yaml": "1.10.3"
   },
   "devDependencies": {
-    "@biomejs/biome": "2.4.12",
-    "@cloudflare/workers-types": "^4.20260422.1",
+    "@biomejs/biome": "2.4.13",
+    "@cloudflare/workers-types": "^4.20260423.1",
     "@hono/otel": "^1.1.1",
-    "@opentelemetry/instrumentation-http": "^0.215.0",
     "@opentelemetry/exporter-metrics-otlp-http": "^0.215.0",
     "@opentelemetry/exporter-trace-otlp-http": "^0.215.0",
+    "@opentelemetry/instrumentation-http": "^0.215.0",
     "@opentelemetry/instrumentation-pino": "^0.61.0",
     "@opentelemetry/resources": "^2.7.0",
     "@opentelemetry/sdk-metrics": "^2.7.0",
     "@opentelemetry/sdk-node": "^0.215.0",
     "@opentelemetry/sdk-trace-node": "^2.7.0",
     "@opentelemetry/semantic-conventions": "^1.40.0",
-    "@supabase/supabase-js": "^2.104.0",
+    "@supabase/supabase-js": "^2.104.1",
     "@types/bun": "^1.3.13",
     "@types/js-yaml": "^4.0.9",
     "@types/node": "^25.6.0",
@@ -183,26 +183,28 @@
     "bun-types": "^1.3.13",
     "chrono-node": "^2.9.0",
     "clipboardy": "^5.3.1",
+    "defuddle": "^0.18.1",
     "depcheck": "^1.4.7",
     "diff": "^9.0.0",
     "execa": "^9.6.1",
     "fast-check": "^4.7.0",
-    "js-yaml": "^4.1.1",
     "ignore": "^7.0.5",
+    "js-yaml": "^4.1.1",
+    "linkedom": "^0.18.12",
     "node-cron": "^4.2.1",
     "openai": "^6.34.0",
     "papaparse": "^5.5.3",
     "partial-json": "^0.1.7",
     "pdf-lib": "^1.17.1",
     "pino-pretty": "^13.1.3",
-    "sanitize-html": "^2.17.3",
     "repomix": "^1.13.1",
+    "sanitize-html": "^2.17.3",
     "tsc-alias": "^1.8.16",
     "typedoc": "^0.28.19",
     "typescript": "^6.0.3",
     "unpdf": "^1.6.0",
     "validator": "^13.15.35",
-    "vite": "8.0.9",
+    "vite": "8.0.10",
     "vitest": "^4.1.5"
   },
   "keywords": [
@@ -278,9 +280,11 @@
     "@opentelemetry/semantic-conventions": "^1.40.0",
     "@supabase/supabase-js": "^2.103.3",
     "chrono-node": "^2.9.0",
+    "defuddle": "^0.18.1",
     "diff": "latest",
     "fast-xml-parser": "latest",
     "js-yaml": "^4.1.1",
+    "linkedom": "^0.18.12",
     "node-cron": "^4.2.1",
     "openai": "^6.34.0",
     "papaparse": "^5.5.3",
@@ -327,6 +331,9 @@
     "chrono-node": {
       "optional": true
     },
+    "defuddle": {
+      "optional": true
+    },
     "diff": {
       "optional": true
     },
@@ -336,6 +343,9 @@
     "js-yaml": {
       "optional": true
     },
+    "linkedom": {
+      "optional": true
+    },
     "node-cron": {
       "optional": true
     },

package/skills/field-test/SKILL.md CHANGED Viewed

@@ -1,127 +1,250 @@
 ---
 name: field-test
 description: >
-  Exercise tools, resources, and prompts with real-world inputs to verify behavior end-to-end. Use after adding or modifying definitions, or when the user asks to test, try out, or verify their MCP surface. Calls each definition with realistic and adversarial inputs and produces a report of issues, pain points, and recommendations.
+  Exercise tools, resources, and prompts against a live HTTP server via MCP JSON-RPC over curl. Starts the server, surfaces the catalog, runs real and adversarial inputs, and produces a tight report with concrete findings and numbered follow-up options. Use after adding or modifying definitions, or when the user asks to test, try out, or verify their MCP surface.
 metadata:
   author: cyanheads
-  version: "1.3"
+  version: "2.0"
   audience: external
   type: debug
 ---
 ## Context
-Unit tests (`add-test` skill) verify handler logic with mocked context. Field testing verifies the full picture: real server, real transport, real inputs, real outputs. It catches issues that unit tests miss — bad descriptions, awkward input shapes, unhelpful error messages, missing format functions, schema mismatches, silent divergence between `structuredContent` and model-visible `content[]`, and surprising edge-case behavior.
+Unit tests (`add-test` skill) verify handler logic with mocked context. Field testing exercises the real HTTP transport with real JSON-RPC: starts the server, calls `initialize`, surfaces the catalog, runs inputs, and checks what a client actually sees. It catches what unit tests miss — awkward input shapes, unhelpful errors, missing format output, drift between `structuredContent` and `content[]`, edge-case surprises.
-**Actively use** the tools — don't just read their code.
+**Actively call the tools. Don't read code and guess.**
 ---
 ## Steps
-### 1. Surface available definitions
+### 1. Start the server
+Write the helper to `/tmp/mcp-field-test.sh` once, then source it in every subsequent Bash call. Helper keeps PID / URL / session id in `/tmp/mcp-field-test.env` so state survives across tool invocations.
+```bash
+cat > /tmp/mcp-field-test.sh <<'HELPER_EOF'
+#!/bin/bash
+# Field-test helper: manage an MCP HTTP server + JSON-RPC session across shell calls.
+STATE_FILE="/tmp/mcp-field-test.env"
+[ -f "$STATE_FILE" ] && . "$STATE_FILE"
+mcp_start() {
+  local dir="${1:-$PWD}"
+  echo "building $dir ..."
+  (cd "$dir" && bun run rebuild) >/tmp/mcp-build.log 2>&1 \
+    || { echo "BUILD FAILED — see /tmp/mcp-build.log"; return 1; }
+  echo "starting server ..."
+  (cd "$dir" && bun run start:http) >/tmp/mcp-server.log 2>&1 &
+  local pid=$!
+  local line=""
+  for _ in $(seq 1 40); do
+    line=$(grep -Eo 'listening at http://[^" ]+/mcp' /tmp/mcp-server.log | head -1)
+    [ -n "$line" ] && break
+    sleep 0.25
+  done
+  if [ -z "$line" ]; then
+    echo "server failed to start — see /tmp/mcp-server.log"
+    kill "$pid" 2>/dev/null
+    return 1
+  fi
+  local url="${line#listening at }"
+  local port; port=$(echo "$url" | sed -E 's|.*:([0-9]+)/.*|\1|')
+  cat > "$STATE_FILE" <<EOF
+export MCP_PID=$pid
+export MCP_URL=$url
+export MCP_PORT=$port
+EOF
+  . "$STATE_FILE"
+  echo "ready pid=$pid url=$url"
+}
+mcp_init() {
+  [ -z "$MCP_URL" ] && { echo "run mcp_start first"; return 1; }
+  local hdr="/tmp/mcp-init-headers.txt"
+  curl -sS -D "$hdr" -X POST "$MCP_URL" \
+    -H "Content-Type: application/json" \
+    -H "Accept: application/json, text/event-stream" \
+    -d '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2025-06-18","capabilities":{},"clientInfo":{"name":"field-test","version":"2.0"}}}' >/dev/null
+  local sid; sid=$(grep -i '^mcp-session-id:' "$hdr" | awk '{print $2}' | tr -d '\r\n')
+  [ -z "$sid" ] && { echo "no session id returned"; return 1; }
+  cat > "$STATE_FILE" <<EOF
+export MCP_PID=$MCP_PID
+export MCP_URL=$MCP_URL
+export MCP_PORT=$MCP_PORT
+export MCP_SID=$sid
+EOF
+  . "$STATE_FILE"
+  curl -sS -X POST "$MCP_URL" \
+    -H "Content-Type: application/json" \
+    -H "Accept: application/json, text/event-stream" \
+    -H "Mcp-Session-Id: $sid" \
+    -d '{"jsonrpc":"2.0","method":"notifications/initialized"}' >/dev/null
+  echo "session=$sid"
+}
+# Usage: mcp_call METHOD [JSON_PARAMS]
+# Prints the JSON-RPC response (SSE framing stripped). Pipe to `jq`.
+mcp_call() {
+  [ -z "$MCP_SID" ] && { echo "run mcp_init first"; return 1; }
+  local method="$1"; local params="${2:-}"
+  local body
+  if [ -z "$params" ]; then
+    body=$(printf '{"jsonrpc":"2.0","id":%d,"method":"%s"}' "$RANDOM" "$method")
+  else
+    body=$(printf '{"jsonrpc":"2.0","id":%d,"method":"%s","params":%s}' "$RANDOM" "$method" "$params")
+  fi
+  curl -sS -X POST "$MCP_URL" \
+    -H "Content-Type: application/json" \
+    -H "Accept: application/json, text/event-stream" \
+    -H "Mcp-Session-Id: $MCP_SID" \
+    -d "$body" | sed -n 's/^data: //p'
+}
+mcp_stop() {
+  [ -n "$MCP_PID" ] && kill "$MCP_PID" 2>/dev/null
+  rm -f "$STATE_FILE"
+  echo "stopped"
+}
+HELPER_EOF
+. /tmp/mcp-field-test.sh
+mcp_start /absolute/path/to/server   # replace with the target server
+```
+**Notes**
+- `MCP_HTTP_PORT` is a *starting* port — the server auto-increments if taken. Helper parses the real URL from the log (`HTTP transport listening at ...`).
+- If `bun run rebuild` fails, stop. Don't field-test broken code — fix the build first.
+- If a server is already listening on the project's port (`lsof -i :<port>`), confirm with the user before killing it; it may be their own session.
+### 2. Initialize the session
+```bash
+. /tmp/mcp-field-test.sh
+mcp_init
+```
+Runs `initialize`, captures the session id, sends `notifications/initialized`.
+### 3. Surface the catalog
+```bash
+. /tmp/mcp-field-test.sh
+mcp_call tools/list     | jq '.result.tools[]     | {name, description, inputSchema}'
+mcp_call resources/list | jq '.result.resources[] | {uri, name, mimeType}'
+mcp_call prompts/list   | jq '.result.prompts[]   | {name, description, arguments}'
+```
+Present a compact catalog to the user: each definition's name + 1-line description. Flag vague or missing descriptions as you go — those feed into the report. Use this to build the test plan.
+### 4. Plan the test pass
+**Budget.** Don't run every category against every definition — the cross-product is infeasible. Apply the **universal battery** to everything; apply **situational categories** only when the definition triggers them.
+**Universal battery — run on every tool**
+| Category | What to verify |
+|:---------|:---------------|
+| Happy path | One realistic input. Output shape matches schema. `content[]` text reads clearly to a human. |
+| `structuredContent` ↔ `content[]` parity | Every field in `structuredContent` is surfaced in the text. Parity gap = client-specific blindness. |
+| Input error | One invalid input (wrong type or missing required). Error text says *what*, *why*, *how to fix*. |
+**Situational — add only when triggered**
+| Trigger (look in input schema or `annotations`) | Add category |
+|:------------------------------------------------|:-------------|
+| `include` / `fields` / `expand` / `view` / `projection` parameter | Field selection: non-default value renders requested fields |
+| Array return with `query` / `filter` inputs | Empty result: does response explain *why* (echo criteria, suggest broadening)? |
+| Batch / bulk input (arrays of IDs, multi-item ops) | Partial success: mix valid + invalid items |
+| `annotations.readOnlyHint: true` | Confirm no mutation happened |
+| `annotations.idempotentHint: true` | Call twice with same input — safe? |
+| Hits external API / live upstream | One call that exercises upstream; note rate-limit / timeout / transient-failure behavior |
+| Chained with other tools (search → detail → act) | Run one representative chain end-to-end; does each step return the IDs/cursors the next needs? |
+| `cursor` / `offset` / `limit` params | Pagination: second page, end-of-list |
-List the MCP tools, resources, and prompts available in your environment. This confirms the server is connected and gives you everything you need — names, descriptions, parameter schemas — to plan your tests.
+**Resources.** Happy path, not-found URI, `list` if defined, pagination if used.
+**Prompts.** Happy path, defaults omitted, skim message quality.
-If you don't see any MCP tools from this server, ask the user to connect it first (e.g. `claude mcp add` for Claude Code, or the equivalent for their client). Don't proceed until the tools are visible.
+**Sampling for large servers.** If more than 15 tools, run the universal battery on all, but pick roughly 30–40% for situational testing. Weight toward: write-shaped tools, complex schemas, external deps. List which ones you skipped in the report.
-Present what you find: each definition's name, parameters (with types and descriptions), and any notable schema details (optional fields, enums, constraints). This is your test surface.
+**Auth & external state.**
-### 2. Test each definition
+- If a tool needs real API keys and they're not set, note `skipped — requires $VAR` and move on. Don't fabricate inputs.
+- Tools that write to real external systems (third-party APIs, shared DBs): confirm with the user before running, or use a dry-run input if one exists.
-For every tool, resource, and prompt, run through these categories:
+### 5. Execute
-#### Tools
+Use `TaskCreate` — one task per definition. Mark complete as you go. Don't batch.
-| Category | What to test |
-|:---------|:-------------|
-| **Happy path** | Realistic input that should succeed. Verify output shape matches the output schema. Verify format function produces sensible content blocks. |
-| **`structuredContent` parity** | The `format-parity` lint rule already asserts every terminal field in the output schema appears in `format()`'s rendered text (via sentinel injection at startup). Field testing layers real-data checks on top: are values rendered accurately (not just their labels)? Do conditional-render branches in `format()` still render every field when specific values are present? Does the content look right to a human reading the LLM's view? |
-| **Variations** | Different valid input combinations — optional fields omitted, optional fields included, different enum values, min/max boundaries. |
-| **Field selection / projection** | For tools with `fields`, `include`, `expand`, `view`, or similar parameters, call the tool with non-default selections. Verify the handler returns the requested fields and `format()` renders each requested field rather than a hardcoded summary subset. |
-| **Edge cases** | Empty strings, zero values, very long inputs, special characters, Unicode. |
-| **Error paths** | Missing required fields, wrong types, nonexistent IDs, inputs that should trigger domain errors. Verify errors are clear and actionable — they should name what went wrong, why, and what to do next. |
-| **Empty results** | Inputs that match nothing. Verify the response explains *why* (echoes criteria, suggests broadening) rather than returning a bare empty array. |
-| **Partial success** | For tools that operate on multiple items, test cases where some succeed and some fail. Verify both outcomes are reported — not just the successes. |
-| **Annotations** | Review tool `annotations` (`readOnlyHint`, `destructiveHint`, `idempotentHint`, `openWorldHint`) against actual behavior. If a tool is marked read-only, verify it does not mutate state. If it is marked idempotent, verify retries with the same input are safe. If it is marked open-world false, verify it is not silently depending on live external systems. |
-| **Workflow chaining** | For servers with multi-step workflows, execute 1-2 representative chains end-to-end. Example: search → detail → follow-up action. Verify each step returns the IDs, cursors, URIs, tokens, or state needed for the next step without guessing. |
-| **Response quality** | Inspect successful responses for: (1) chaining IDs needed for follow-up calls, (2) operational metadata (counts, applied filters, truncation notices), (3) filtering transparency (if anything was excluded, does the response say what and how to include it?), (4) reasonable response size (not dumping unbounded data into context). See the `add-tool` skill's **Tool Response Design** section for the full set of patterns. |
-| **Resilience** | For tools backed by external APIs or slow subsystems, test or explicitly note rate-limit, timeout, and transient-failure behavior. Verify retries/backoff happen where intended, or at minimum that the error message clearly tells the user whether to retry, wait, or change input. |
-| **Descriptions** | Read every field's `.describe()` — would a user/LLM understand what to provide? Flag vague or missing descriptions. |
+For each call, capture: input sent, response (trim huge payloads to files), whether `isError: true` appeared, anything surprising (slow response, parity drift, unhelpful text, crash).
-#### Resources
+**Interpreting responses**
-| Category | What to test |
-|:---------|:-------------|
-| **Happy path** | Valid URI with known params. Verify returned content and MIME type. |
-| **List** | Call `list` if defined. Verify returned resources have names and valid URIs. |
-| **Not found** | URI with nonexistent params. Verify a clear error, not a crash. |
-| **Pagination** | If the resource uses `extractCursor`/`paginateArray`, test with varying limits and cursors. |
+- Tool domain errors return `{result: {content: [...], isError: true}}` — they live in `result`, not `error`. Check `isError`, not the JSON-RPC error field.
+- JSON-RPC `error` only appears for protocol issues (bad session, malformed envelope, unknown method).
+- `mcp_call` already strips SSE framing. Pipe to `jq` for readability.
-#### Prompts
+### 6. Tear down
-| Category | What to test |
-|:---------|:-------------|
-| **Happy path** | Valid args. Verify generated messages are well-formed. |
-| **Defaults** | Omit optional args. Verify the output still makes sense. |
-| **Content quality** | Read the generated messages — are they clear, well-structured prompts? |
+```bash
+. /tmp/mcp-field-test.sh
+mcp_stop
+```
-### 3. Track progress
+Kills the background server, clears state. Do this *before* writing the report so nothing leaks into the next session.
-Use a todo list to track each definition and its test status. Mark each as you go — don't batch.
+### 7. Report
-### 4. Produce the report
+Three sections. Tight. The user should be able to skim the summary, read details only for what matters, and act on numbered options.
-After testing everything, present a structured report:
+#### Summary (1 paragraph)
-#### Summary table
+One paragraph. How many definitions exercised, how many passed clean, how many have issues, and the single most important finding. No tables, no lists.
-| Definition | Type | Status | Issues |
-|:-----------|:-----|:-------|:-------|
-| `acme_search_items` | tool | pass | — |
-| `acme_get_item` | tool | issues | Error message unhelpful for missing ID |
-| `item://` | resource | fail | Crashes on nonexistent ID |
+#### Findings
-#### Detailed findings
+Only include definitions with issues. Group by severity. Each finding is 2–4 lines unless it genuinely needs more.
-For each definition with issues, include:
+| Severity | Meaning |
+|:---------|:--------|
+| **bug** | Broken: crash, wrong output, `isError: true` on valid input, data loss, schema violation |
+| **ux** | Works but degrades the user/LLM experience: vague description, unhelpful error text, missing `format()`, parity drift, annotation mismatches behavior |
+| **nit** | Polish: phrasing, inconsistent tone, minor doc gaps |
-- **What happened** — the input, the output or error, and what was expected
-- **Severity** — `bug` (broken behavior), `ux` (works but confusing/unhelpful), `nit` (minor polish)
-- **Recommendation** — specific fix suggestion
+Format:
-#### Pain points
+```
+**<tool_name> — <bug|ux|nit>**
+Input: `<short input>` → <what happened>
+Expected: <what should happen>
+Fix: <one sentence>
+```
-Cross-cutting observations that aren't tied to a single definition:
+#### Options
-- Inconsistent error message patterns across tools
-- Missing format functions (raw JSON returned to user)
-- `structuredContent` contains data that `content[]` silently drops
-- Requested projected fields are returned programmatically but not rendered for the model
-- Description quality issues (vague, missing, or misleading)
-- Schema design issues (required fields that should be optional, missing defaults, overly broad types, non-JSON-Schema-serializable types like `z.custom()` or `z.date()`)
-- Annotation hints that do not match real behavior (`readOnlyHint`, `idempotentHint`, `openWorldHint`)
-- Response quality issues (empty results with no context, silent filtering, missing chaining IDs, oversized payloads, no operational metadata)
-- Multi-step workflows that cannot be completed because intermediate outputs omit required IDs, cursors, or URIs
-- Error messages that don't guide recovery (generic "not found" instead of naming alternatives)
-- Resilience issues (rate limits, timeouts, transient upstream failures handled poorly or explained poorly)
-- Performance observations (unexpectedly slow responses)
+Numbered, actionable, cherry-pickable. Each item maps to a concrete change.
+```
+1. Fix empty-result message in `pubmed_search_articles` — echo criteria (finding #2)
+2. Add `format()` to `pubmed_lookup_mesh` — currently returns raw JSON (finding #5)
+3. Tighten `ids` description in `pubmed_fetch_articles` — silent on PMID vs DOI (finding #8)
+```
+End with:
+> Pick by number (e.g. "do 1, 3, 5" or "expand on 2").
 ---
 ## Checklist
-- [ ] All registered tools tested (happy path + edge cases + empty results)
-- [ ] All registered resources tested (happy path + not found)
-- [ ] All registered prompts tested (happy path + defaults)
-- [ ] Error messages reviewed for clarity and recovery guidance
-- [ ] Empty-result responses reviewed for context (criteria echo, suggestions)
-- [ ] `structuredContent` and `content[]` reviewed for parity
-- [ ] Field-selection / projection behavior reviewed where applicable
-- [ ] Response quality reviewed (chaining IDs, metadata, filtering transparency, payload size)
-- [ ] Tool annotations reviewed against actual behavior
-- [ ] Representative multi-step workflows exercised where applicable
-- [ ] External API resilience reviewed where applicable (rate limits, timeouts, transient failures)
-- [ ] Descriptions reviewed for completeness and accuracy
-- [ ] Format functions verified (or absence noted)
-- [ ] Summary report presented to user
+- [ ] Server built and started; real port parsed from log
+- [ ] Session initialized; `notifications/initialized` sent
+- [ ] Catalog surfaced and presented
+- [ ] Universal battery run on every definition
+- [ ] Situational categories applied only when triggered
+- [ ] External-state / auth-gated tools handled explicitly (run, skip, or confirm)
+- [ ] Server stopped; state file removed
+- [ ] Report: summary paragraph → grouped findings → numbered options

package/skills/polish-docs-meta/references/package-meta.md CHANGED Viewed

@@ -7,7 +7,7 @@ Fields that may still be empty or generic from scaffolding. Check each one and f
 | Field | Default / Scaffolded | What It Should Be |
 |:------|:---------------------|:------------------|
 | `name` | `{{PACKAGE_NAME}}` (substituted by init) | Verify it's correct. Use scoped name if publishing (`@org/my-server`). |
-| `version` | `0.1.0` | Keep for initial development. Bump via the `release` skill. |
+| `version` | `0.1.0` | Keep for initial development. Bump via the `release-and-publish` skill. |
 | `mcpName` | _(often missing)_ | Reverse-domain identifier: `"io.github.{owner}/{repo}"`. Used in `server.json` `name` field and Dockerfile OCI labels. |
 | `description` | `""` (empty) | One sentence: what the server does and what it wraps. Appears on npm and in `npm search`. |
 | `repository` | _(often missing)_ | `{ "type": "git", "url": "git+https://github.com/org/repo.git" }` |

package/skills/release-and-publish/SKILL.md ADDED Viewed

@@ -0,0 +1,150 @@
+---
+name: release-and-publish
+description: >
+  Ship a release end-to-end across every registry the project targets (npm, MCP Registry, GHCR). Runs the final verification gate, pushes commits and tags, then publishes to each applicable destination. Assumes git wrapup (version bumps, changelog, commit, annotated tag) is already complete — this skill is the post-wrapup publish workflow. Halts and alerts the user on the first failure.
+metadata:
+  author: cyanheads
+  version: "2.0"
+  audience: external
+  type: workflow
+---
+## Preconditions
+This skill runs **after** git wrapup. By the time it's invoked:
+- `package.json` version is bumped
+- `changelog/<major.minor>.x/<version>.md` is authored
+- `CHANGELOG.md` is regenerated
+- README and every version-bearing file is in sync
+- Release commit (`chore: release v<version>`) exists
+- Annotated tag (`v<version>`) exists locally
+- Working tree is clean
+If any are missing, halt and tell the user to finish wrapup first. Do not attempt to redo wrapup work from inside this skill.
+## Failure Protocol
+**Stop on the first non-zero exit.** No retries, no remediation from inside the skill. Report to the user:
+1. Which step failed
+2. The exact error output
+3. Which destinations already received the release (npm published? tag pushed? etc.) so they know the partial state
+The user fixes locally and re-invokes, or runs the remaining steps manually. Publishes hard-fail with "version already exists" if replayed — that's the signal the step already succeeded.
+## Steps
+### 1. Sanity-check wrapup outputs
+Read `package.json` → capture `version`. Then verify:
+```bash
+git status --porcelain          # must be empty — clean working tree
+git describe --exact-match --tags HEAD 2>&1   # must equal v<version>
+git rev-parse --abbrev-ref HEAD  # note the branch name for step 3
+```
+If working tree is dirty or HEAD isn't on `v<version>`, halt.
+### 2. Run the verification gate
+All three must succeed. Use `test:all` if the script exists in `package.json`, otherwise fall back to `test`:
+```bash
+bun run devcheck
+bun run rebuild
+bun run test:all        # or `bun run test` if no test:all
+```
+Any non-zero exit → halt with the failing command's output.
+### 3. Push to origin
+```bash
+git push
+git push --tags
+```
+If the remote rejects either push, halt.
+### 4. Publish to npm
+```bash
+bun publish --access public
+```
+`bun publish` uses whatever npm auth the user has configured in `~/.npmrc`. If 2FA is enabled on the npm account, the command will prompt for an OTP or open a browser — that's expected; the user completes it interactively.
+**Friction reducers (optional, configure once):**
+| Option | How |
+|:--|:--|
+| **npm granular access token** with "Bypass 2FA for publish" | Generate at npmjs.com → replace `_authToken` in `~/.npmrc` → no OTP prompt at all |
+| **1Password CLI TOTP injection** (requires `brew install --cask 1password-cli` + signed-in `op`) | `bun publish --access public --otp="$(op item get 'npm' --otp)"` |
+Halt on publish error other than "version already exists" (which means this step already ran).
+### 5. Publish to MCP Registry
+Only if `server.json` exists at the repo root (otherwise skip).
+```bash
+bun run publish-mcp
+```
+If `publish-mcp` isn't defined in `package.json`, add it (macOS):
+```json
+"publish-mcp": "mcp-publisher login github -token \"$(security find-generic-password -a \"$USER\" -s mcp-publisher-github-pat -w)\" && mcp-publisher publish"
+```
+Prereq: a GitHub PAT with `read:org` + `read:user` scopes stored in Keychain under the service name `mcp-publisher-github-pat`:
+```bash
+security add-generic-password -a "$USER" -s mcp-publisher-github-pat -w
+# paste PAT at the silent prompt
+```
+Halt on any publisher error other than "cannot publish duplicate version".
+### 6. Publish Docker image
+Only if `Dockerfile` exists at the repo root (otherwise skip).
+Derive:
+- `OWNER/REPO` from `git remote get-url origin` (strip `.git`, handle both `https://github.com/<owner>/<repo>` and `git@github.com:<owner>/<repo>` forms)
+- `VERSION` from `package.json` (step 1)
+```bash
+docker buildx build --platform linux/amd64,linux/arm64 \
+  -t ghcr.io/<OWNER>/<REPO>:<VERSION> \
+  -t ghcr.io/<OWNER>/<REPO>:latest \
+  --push .
+```
+If the project uses a non-GHCR registry or a custom image name, respect the project's convention. Halt on build or push failure.
+### 7. Report the deployed artifacts
+Print clickable URLs for every destination that succeeded:
+- npm: `https://www.npmjs.com/package/<package.json#name>/v/<version>`
+- MCP Registry: `https://registry.modelcontextprotocol.io/v0/servers?search=<package.json#mcpName>`
+- GHCR: `ghcr.io/<OWNER>/<REPO>:<VERSION>`
+Skip any destination that was skipped in its step.
+## Checklist
+- [ ] Working tree clean; HEAD tagged `v<version>`
+- [ ] `bun run devcheck` passes
+- [ ] `bun run rebuild` succeeds
+- [ ] `bun run test:all` (or `test`) passes
+- [ ] `git push` succeeds
+- [ ] `git push --tags` succeeds
+- [ ] `bun publish --access public` succeeds
+- [ ] `bun run publish-mcp` succeeds (if `server.json` present)
+- [ ] Docker buildx multi-arch push succeeds (if `Dockerfile` present)
+- [ ] Deployed artifact URLs reported to the user