npm - gsd-pi - Versions diffs - 2.76.0 → 2.77.0 - Mend

gsd-pi 2.76.0 → 2.77.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (536) hide show

package/dist/resources/extensions/gsd/worktree-resolver.js CHANGED Viewed

@@ -16,8 +16,30 @@ import { existsSync, unlinkSync } from "node:fs";
 import { randomUUID } from "node:crypto";
 import { join } from "node:path";
 import { debugLog } from "./debug-logger.js";
-import { MergeConflictError } from "./git-service.js";
 import { emitJournalEvent } from "./journal.js";
+// ─── Path Helpers ──────────────────────────────────────────────────────────
+/**
+ * Worktree marker segment — present in any path produced by worktreePath().
+ * Used to strip the worktree suffix and recover the project root (#3729).
+ */
+const WORKTREE_MARKER = "/.gsd/worktrees/";
+/**
+ * Resolve the project root from session path state.
+ *
+ * Prefers `originalBasePath` (always the project root when set), but falls
+ * back to `basePath` when `originalBasePath` is falsy (e.g. fresh AutoSession
+ * with default empty string). If `basePath` itself is inside a worktree
+ * directory (contains `/.gsd/worktrees/`), strip that suffix to recover the
+ * actual project root — preventing double-nested worktree paths (#3729).
+ */
+export function resolveProjectRoot(originalBasePath, basePath) {
+    let resolved = originalBasePath || basePath;
+    const markerIdx = resolved.indexOf(WORKTREE_MARKER);
+    if (markerIdx !== -1) {
+        resolved = resolved.slice(0, markerIdx);
+    }
+    return resolved;
+}
 // ─── WorktreeResolver ──────────────────────────────────────────────────────
 export class WorktreeResolver {
     s;
@@ -33,11 +55,11 @@ export class WorktreeResolver {
     }
     /** Original project root — always the non-worktree path. */
     get projectRoot() {
-        return this.s.originalBasePath || this.s.basePath;
+        return resolveProjectRoot(this.s.originalBasePath, this.s.basePath);
     }
     /** Path for auto.lock file — same as the old lockBase(). */
     get lockPath() {
-        return this.s.originalBasePath || this.s.basePath;
+        return resolveProjectRoot(this.s.originalBasePath, this.s.basePath);
     }
     // ── Private Helpers ────────────────────────────────────────────────────
     rebuildGitService() {
@@ -99,7 +121,10 @@ export class WorktreeResolver {
             });
             return;
         }
-        const basePath = this.s.originalBasePath || this.s.basePath;
+        // Resolve the project root for worktree operations via shared helper.
+        // Handles the case where originalBasePath is falsy and basePath is itself
+        // a worktree path — prevents double-nested worktree paths (#3729).
+        const basePath = resolveProjectRoot(this.s.originalBasePath, this.s.basePath);
         debugLog("WorktreeResolver", {
             action: "enterMilestone",
             milestoneId,
@@ -429,11 +454,13 @@ export class WorktreeResolver {
                     /* best-effort */
                 }
             }
-            // Re-throw MergeConflictError so the auto loop can detect real code
-            // conflicts and stop instead of retrying forever (#2330).
-            if (err instanceof MergeConflictError) {
-                throw err;
-            }
+            // Restore state before re-throwing so callers always get a consistent
+            // session (#4380).
+            this.restoreToProjectRoot();
+            // Re-throw: MergeConflictError stops the auto loop (#2330); non-conflict
+            // errors (permission denied, filesystem failures) must also propagate so
+            // broken states are diagnosable (#4380).
+            throw err;
         }
         // Always restore basePath and rebuild — whether merge succeeded or failed
         this.restoreToProjectRoot();
@@ -500,6 +527,8 @@ export class WorktreeResolver {
                 error: msg,
             });
             ctx.notify(`Milestone merge failed (branch mode): ${msg}`, "warning");
+            // Re-throw all errors so callers can apply their own recovery logic (#4380).
+            throw err;
         }
     }
     // ── Merge and Enter Next ───────────────────────────────────────────────
@@ -516,7 +545,18 @@ export class WorktreeResolver {
             currentMilestoneId,
             nextMilestoneId,
         });
-        this.mergeAndExit(currentMilestoneId, ctx);
+        try {
+            this.mergeAndExit(currentMilestoneId, ctx);
+        }
+        catch (err) {
+            // mergeAndExit emits a warning and restores state when it fails during
+            // merge/cleanup. But if it throws before recovery runs (e.g., in
+            // validateMilestoneId or emitJournalEvent), basePath won't be restored
+            // to projectRoot — re-throw so we don't enter the next milestone with
+            // the current one unmerged.
+            if (this.s.basePath !== this.projectRoot)
+                throw err;
+        }
         this.enterMilestone(nextMilestoneId, ctx);
     }
 }

package/dist/resources/extensions/search-the-web/command-search-provider.js CHANGED Viewed

@@ -7,7 +7,7 @@
  *
  * All provider logic lives in provider.ts (S01) — this is pure UI wiring.
  */
-import { isAnthropicApi } from '@gsd/pi-ai';
+import { supportsNativeWebSearch } from './native-search.js';
 import { getTavilyApiKey, getBraveApiKey, getOllamaApiKey, getSearchProviderPreference, setSearchProviderPreference, resolveSearchProvider, } from './provider.js';
 const VALID_PREFERENCES = ['tavily', 'brave', 'ollama', 'auto'];
 function keyStatus(provider) {
@@ -72,9 +72,10 @@ export function registerSearchProviderCommand(pi) {
             }
             setSearchProviderPreference(chosen);
             const effective = resolveSearchProvider();
-            // Gate on api (#4478 / ADR-012): covers claude-code, anthropic-vertex, and
-            // other Anthropic-fronting transports — not just the plain `anthropic` provider.
-            const isAnthropic = isAnthropicApi(ctx.model);
+            // Gate on api shape + provider allowlist: the info note must match the
+            // actual runtime behavior in native-search.ts. Claude served via copilot
+            // / minimax / kimi is anthropic-shaped but does NOT run native search.
+            const isAnthropic = supportsNativeWebSearch(ctx.model);
             const nativeNote = isAnthropic ? '\nNote: Native Anthropic web search is also active (automatic, no API key needed).' : '';
             ctx.ui.notify(`Search provider set to ${chosen}. Effective provider: ${effective ?? 'none (no API keys)'}${nativeNote}`, 'info');
         },

package/dist/resources/extensions/search-the-web/native-search.js CHANGED Viewed

@@ -12,6 +12,38 @@ export const BRAVE_TOOL_NAMES = ["search-the-web", "search_and_read"];
 export const CUSTOM_SEARCH_TOOL_NAMES = ["search-the-web", "search_and_read", "google_search"];
 /** Thinking block types that require signature validation by the API */
 const THINKING_TYPES = new Set(["thinking", "redacted_thinking"]);
+/**
+ * Providers whose Anthropic-Messages endpoint is known to accept the native
+ * `web_search_20250305` server tool. Anthropic-shaped transports NOT in this
+ * set (github-copilot, minimax, kimi-coding, opencode, vercel-ai-gateway,
+ * etc.) route Claude or Claude-compatible models through the Messages API
+ * but do NOT expose the server-side search tool — injecting it yields a
+ * 400 "unsupported_value" from their endpoints (regression from #4492).
+ *
+ * Keep this allowlist tight — err on the side of custom/Brave search rather
+ * than a runtime 400. Add a provider here only after confirming its endpoint
+ * accepts the tool type.
+ */
+const NATIVE_WEB_SEARCH_PROVIDERS = new Set([
+    "anthropic",
+    "claude-code",
+    "anthropic-vertex",
+    "vercel-ai-gateway",
+]);
+/**
+ * True when the model is an Anthropic-shaped transport AND the provider is
+ * known to accept the native `web_search_20250305` tool. Gate both on api
+ * shape (#4478 / ADR-012) and on provider identity (#444 regression guard
+ * and #4492 scope correction) — provider-level discrimination is legitimate
+ * per ADR-012 for credential/behavior differences that api shape can't
+ * express.
+ */
+export function supportsNativeWebSearch(model) {
+    if (!isAnthropicApi(model))
+        return false;
+    const provider = model?.provider;
+    return typeof provider === "string" && NATIVE_WEB_SEARCH_PROVIDERS.has(provider);
+}
 /**
  * Maximum number of native web searches allowed per session (agent unit).
  * The Anthropic API's `max_uses` is per-request — it resets on each API call.
@@ -76,10 +108,11 @@ export function registerNativeSearchHooks(pi) {
     pi.on("model_select", async (event, ctx) => {
         modelSelectFired = true;
         const wasAnthropic = isAnthropicProvider;
-        // Gate on `api` not `provider` (#4478 / ADR-012): covers claude-code OAuth,
-        // anthropic-vertex, and Vercel-gateway-hosted Anthropic — all serve the
-        // Messages API and accept the native web_search tool.
-        isAnthropicProvider = isAnthropicApi(event.model);
+        // Gate on api shape AND provider allowlist: direct Anthropic, claude-code
+        // OAuth, and anthropic-vertex accept `web_search_20250305`; copilot /
+        // minimax / kimi / opencode route Claude-compat models through the same
+        // wire protocol but reject the server-side tool (#4492 regression).
+        isAnthropicProvider = supportsNativeWebSearch(event.model);
         const hasBrave = !!process.env.BRAVE_API_KEY;
         // When Anthropic (and not preferring Brave): disable custom search tools —
         // native web_search is server-side and more reliable.
@@ -120,20 +153,19 @@ export function registerNativeSearchHooks(pi) {
         // modelsAreEqual suppresses model_select AND the SDK doesn't pass model.
         const eventModel = event.model;
         let isAnthropic;
-        if (eventModel?.api) {
-            // Preferred path: gate on wire protocol (#4478 / ADR-012).
-            isAnthropic = isAnthropicApi(eventModel);
-        }
-        else if (eventModel?.provider) {
-            // Fallback for event shapes that carry provider but not api — only plain
-            // `anthropic` maps unambiguously without the api field. Other Anthropic
-            // transports will arrive via the modelSelectFired or model-name branch.
-            isAnthropic = eventModel.provider === "anthropic";
+        if (eventModel?.api || eventModel?.provider) {
+            // Preferred path: gate on api shape + provider allowlist. Both fields
+            // are authoritative when present — do NOT fall back to the model-name
+            // heuristic, which would misclassify copilot-served Claude as Anthropic
+            // (#444 regression) or minimax-served Claude-compat as Anthropic (#4492).
+            isAnthropic = supportsNativeWebSearch(eventModel);
         }
         else if (modelSelectFired) {
             isAnthropic = isAnthropicProvider;
         }
         else {
+            // Last resort: session-restore paths where the SDK doesn't pass model.
+            // The model-name prefix is best-effort and assumes direct Anthropic.
             const modelName = typeof payload.model === "string" ? payload.model : "";
             isAnthropic = modelName.startsWith("claude-");
         }

package/dist/resources/skills/api-design/SKILL.md ADDED Viewed

@@ -0,0 +1,190 @@
+---
+name: api-design
+description: Design or review an HTTP/REST/GraphQL API for versioning, pagination, error shapes, idempotency, auth, and evolvability. Use when asked to "design an API", "shape the endpoints", "design the schema", "add a new endpoint", "review this API", or when building/modifying a public or internal HTTP surface. Complements `design-an-interface` (which is interface-agnostic) by covering HTTP-specific concerns like status codes, cache headers, and breaking-change management.
+---
+<objective>
+Shape an HTTP or GraphQL API so callers get predictable, evolvable, and honest semantics. The deliverable is a concrete endpoint/schema sketch with: URL or operation names, method/verb, request shape, response shape, error shape, auth model, pagination strategy, and versioning stance. Optimize for "clients that exist in 2 years" over "client that's easy to write today".
+</objective>
+<context>
+GSD-2 has `design-an-interface` for general module-interface design; this skill is the HTTP/GraphQL specialization. REST and GraphQL carry baggage — status codes, verbs, nullability, pagination — that a generic interface-design discussion glosses over.
+Invocation points:
+- Adding a new public API endpoint
+- Redesigning an internal API boundary between services
+- Code review of a PR that introduces HTTP handlers
+- A slice whose acceptance criteria include "the API works"
+- A GraphQL schema change
+</context>
+<core_principle>
+**CALLERS OUTLIVE YOUR ASSUMPTIONS.** An API you ship today has to keep working when your internals change, when the mobile app version two is still in use, and when a third party integrates against it. Design for extension, not just for the current caller.
+**HONEST STATUS CODES.** 200 OK with `{"error": "not found"}` is a lie. 404 says not found. Use the HTTP semantics the protocol offers — HTTP clients, caches, and intermediaries rely on them.
+**PAGINATION IS NON-OPTIONAL.** Any list endpoint that doesn't paginate will eventually get a request for "all records" that kills your database.
+</core_principle>
+<process>
+## Step 1: Gather the contract
+Answer, or ask (one round, 1–3 questions):
+1. **Who are the callers?** Internal service / mobile app / public third-party / same-repo frontend.
+2. **What's the versioning stance?** None / URL-path (`/v1/`) / header-based / GraphQL schema evolution.
+3. **Auth model?** Public / API key / OAuth / session cookie / mTLS / none-but-internal-only.
+4. **Idempotency expectation?** Is a retry safe? Required?
+5. **Consistency model?** Read-your-writes, eventual, serializable?
+## Step 2: Resource and operation naming
+### REST
+- Nouns not verbs in URLs: `POST /users`, not `POST /createUser`.
+- Plural resources: `/users/42`, not `/user/42`.
+- Nested only when the relationship is hierarchical and the child has no independent identity: `/users/42/sessions/3`. Otherwise flat: `/sessions/3?userId=42`.
+- Use subresources for actions that don't fit CRUD: `POST /users/42:deactivate` (colon syntax) or `POST /users/42/actions/deactivate`.
+### GraphQL
+- Queries are nouns; mutations are verbs: `user(id)`, `createUser(input)`, `deactivateUser(id)`.
+- Group related mutations under an input type: `createUser(input: CreateUserInput!)`.
+- Return the affected object plus any derived/computed fields from mutations — lets clients avoid a refetch.
+## Step 3: Methods and status codes
+### REST
+| Method | Intent | Idempotent? | Default success |
+|---|---|---|---|
+| GET | Read | Yes | 200, or 304 if conditional |
+| POST | Create or non-idempotent action | No | 201 with `Location` on create, 200 on action |
+| PUT | Replace (full-object) | Yes | 200 with body, or 204 |
+| PATCH | Partial update | No (usually) | 200 with body |
+| DELETE | Remove | Yes | 204 |
+Errors:
+- 400: caller screwed up the request shape
+- 401: no/invalid auth
+- 403: authed but not allowed
+- 404: resource doesn't exist
+- 409: conflict (version mismatch, unique constraint)
+- 410: gone (vs 404 when the resource previously existed and you want to signal that)
+- 422: validation failed
+- 429: rate-limited — include `Retry-After`
+- 500: genuinely unexpected server error
+- 503: service down or overloaded — include `Retry-After`
+Never 200-with-error-body. Never 500 for a 4xx cause.
+### GraphQL
+- Top-level errors (`errors[]`) for transport-level failures. Domain errors (validation, not-found, forbidden) go in the typed return — use a union or result type.
+- Partial results are expected; design the schema so `null` on a field is meaningful, not a signal of generic failure.
+## Step 4: Pagination
+- **Cursor-based by default.** Opaque cursor string, `limit`, return `nextCursor` when more exists. Scales, stable under writes.
+- **Offset-based only when:** dataset is small, user needs jump-to-page semantics (admin tables), and you're willing to accept stability drift.
+- **Never "return everything"** as default. Put a hard upper bound on `limit` (e.g., 200).
+- GraphQL: use Relay-style connections (`edges`, `pageInfo`) if the ecosystem expects it; otherwise a simpler `{items, nextCursor}` is fine.
+## Step 5: Error shape
+Standardize one shape and use it everywhere. Example REST:
+```json
+{
+  "error": {
+    "code": "user_not_found",
+    "message": "No user with id 42",
+    "details": { "userId": 42 },
+    "requestId": "req_abc123"
+  }
+}
+```
+- `code` is machine-readable; stable; documented.
+- `message` is human-readable; can change.
+- `details` carries structured context.
+- `requestId` lets callers report bugs.
+Errors don't leak stack traces, file paths, or internal queries.
+## Step 6: Idempotency, caching, concurrency
+- **Idempotency keys** for POST operations that mustn't double-execute on retry. Caller passes `Idempotency-Key: <uuid>`; server dedupes for a window.
+- **ETags** for GET + conditional updates (`If-Match` on PUT/PATCH).
+- **Cache-Control** on GETs that are safely cacheable.
+- **Optimistic concurrency:** when multiple writers collide, 409 with the current state. Don't silently clobber.
+## Step 7: Versioning and evolution
+- **Additive changes are free:** new optional fields, new endpoints, new optional query params.
+- **Breaking changes need a plan:** path-versioned (`/v2/`), sunset headers on `/v1/`, deprecation window communicated. Or, for GraphQL, `@deprecated` on fields with a migration note.
+- **Document the contract:** OpenAPI/GraphQL SDL. Keep it in the repo. Make it part of the PR that introduces the change.
+## Step 8: Review or write it up
+If this is a review, produce findings in the same shape as `security-review` / `review` — file:line, category, recommendation.
+If this is a new design, produce:
+```markdown
+## <API name>
+### Scope
+<what the API is for, who calls it>
+### Endpoints / Operations
+- `POST /users` — create user. Request: `{email, name}`. Response 201: `{id, email, name, createdAt}` + `Location: /users/<id>`. Errors: 409 email taken, 422 invalid.
+- ...
+### Auth
+<model + where to put the credential>
+### Pagination
+<cursor shape, max limit>
+### Error shape
+<one canonical shape>
+### Idempotency / concurrency
+<rules>
+### Versioning
+<stance + how breaking changes will be handled>
+### OpenAPI / SDL
+<link or inline>
+```
+Append architectural decisions to `.gsd/DECISIONS.md`.
+</process>
+<anti_patterns>
+- **200 OK with `{"error": "..."}`.** Lies to caches, proxies, retry libraries.
+- **Unbounded list endpoints.** `GET /users` without a `limit` cap will bite you.
+- **Offset pagination at scale.** Drifts under writes; slow at high offsets.
+- **Free-form error messages with no code.** Machine callers can't branch on prose.
+- **Breaking changes in-place.** Callers break; versioning exists for a reason.
+- **Ignoring idempotency on retriable POSTs.** Double-charges, duplicate records.
+- **Auth checks at the handler only, not the service layer.** Defense in depth.
+</anti_patterns>
+<success_criteria>
+- [ ] Every endpoint/operation has named request, response, and error shapes.
+- [ ] Status codes match HTTP semantics — no 200-with-error.
+- [ ] List endpoints paginate; max limit is documented.
+- [ ] A single error shape is used everywhere, with a machine-readable code.
+- [ ] Versioning stance is stated — even if the answer is "additive only for now."
+- [ ] OpenAPI/SDL reflects the design and lives in the repo.
+- [ ] Decisions appear in `.gsd/DECISIONS.md`.
+</success_criteria>

package/dist/resources/skills/create-mcp-server/SKILL.md ADDED Viewed

@@ -0,0 +1,121 @@
+---
+name: create-mcp-server
+description: Build, iterate, and evaluate Model Context Protocol (MCP) servers that expose external services as tools an LLM can call. Use when asked to "build an MCP server", "create an MCP tool", "wrap this API as MCP", "expose X to Claude", or when extending GSD with custom tool integrations. Covers research, schema/tool design, error handling, pagination, testing via MCP Inspector, and producing a 10-question eval set that proves the server actually enables real work.
+---
+<objective>
+Produce a high-quality MCP server that an LLM can actually use — not one that merely parses spec-compliant. Quality is measured by how well the server enables real-world task completion, which means the tool descriptions, error messages, and pagination behave under model reasoning, not just at the wire level.
+</objective>
+<context>
+GSD-2 consumes MCP heavily — see `src/resources/extensions/mcp-client/`, `src/resources/extensions/gsd/mcp-project-config.ts`, `src/resources/extensions/gsd/workflow-mcp.ts`, and `/gsd mcp` commands. Users frequently want to extend GSD with project-specific MCP servers (internal APIs, data sources, domain tools). This skill fills the authoring gap between "MCP exists" and "I have a working server."
+Invocation points:
+- User describes a service or API they want an LLM to reach
+- `/gsd mcp init` scaffolds config but there's a tool integration to build
+- Replacing a hand-rolled extension with a standard MCP server
+</context>
+<core_principle>
+**THE QUALITY METRIC IS TASK COMPLETION, NOT SCHEMA VALIDITY.** A server that lists 30 tools with cryptic names and empty descriptions passes the protocol but fails the point. The tool description is the only thing an LLM has to decide whether to call it — write it like documentation for a stranger under time pressure.
+**DESIGN FOR THE MODEL, NOT THE API.** A raw REST endpoint is rarely the right tool. Group, filter, and pre-shape responses so the model gets what it needs to reason, not a 40KB JSON blob it has to summarize. Fewer, deeper tools beat many, shallow ones.
+</core_principle>
+<process>
+## Step 1: Research and scope
+1. **Study modern MCP design.** Read the latest MCP protocol docs (not training data — fetch them). Read 2–3 reference implementations to see current patterns.
+2. **Pick a framework.** TypeScript is the default — the reference SDK is the most mature. Python is fine for data-heavy or ML adjacencies.
+3. **Analyze the target API.** Map the external service's endpoints, auth, rate limits, pagination, error shapes. Identify what a human workflow on top of it actually looks like — that's the cut line for tool design.
+4. **Produce a brief.** One page: what the server does, who calls it, the 5–10 tools you plan to expose, and the top 3 design trade-offs. Confirm with the user.
+## Step 2: Set up the project
+Skeleton:
+```text
+server/
+  src/
+    index.ts         # MCP entry point — stdio or sse transport
+    client.ts        # API client with auth, retries, typed errors
+    tools/           # one file per tool, or grouped by domain
+    pagination.ts    # shared cursor handling
+    errors.ts        # MCP-friendly error formatting
+  package.json       # @modelcontextprotocol/sdk as dep
+  tsconfig.json
+  README.md          # how to run, env vars, rate-limit notes
+  evals.xml          # 10 eval questions (Phase 4)
+```
+Core infrastructure goes first: API client with typed errors, pagination helpers, consistent retry/timeout behavior. Do not inline these per tool.
+## Step 3: Implement tools
+For each tool:
+1. **Name:** verb-noun, lowercase, snake_case. `search_issues`, `get_customer`, `create_deployment`. Not `do_thing` or `api_v2_post`.
+2. **Description (frontmatter):** 2–4 sentences. State what the tool does, when to use it, when NOT to use it, and any required fields or quirks. This is the model's entire interface to the tool — write it carefully.
+3. **Input schema (JSON Schema):** required fields marked, every field has a description, enums enumerated, examples included for free-form strings.
+4. **Output shape:** typed, minimal, decision-ready. If the raw API returns 40 fields and only 6 matter for follow-up calls, return 6.
+5. **Error handling:** never return raw HTTP errors. Translate to human-readable messages: "Rate limit exceeded (retry in 30s)", "Authorization expired", "No record found for ID X". Include the action the caller should take next.
+6. **Pagination:** expose cursors explicitly. Do not leak "page N of M" into the model — leak "more results available, pass `cursor: abc123` to continue."
+## Step 4: Build and test with MCP Inspector
+1. Run the server under MCP Inspector. Verify it registers, every tool lists with its description, inputs schema-validate, outputs shape correctly.
+2. Call every tool at least once manually through the Inspector UI. Check error paths.
+3. Fix any "looks fine in isolation, breaks under the Inspector's framing" issues.
+## Step 5: Produce the eval set
+Write 10 evaluation questions in `evals.xml` that exercise the server end-to-end. Each question should require 2+ tool calls and at least one decision the model has to make based on earlier output. Cover:
+- Happy path (2–3 questions)
+- Error recovery (2 questions — "the first call failed, what next?")
+- Pagination (1 question)
+- Decision under partial information (2–3 questions)
+- Cross-tool composition (1–2 questions)
+Format:
+```xml
+<evals>
+  <eval id="1">
+    <question>...user request...</question>
+    <expected>...concrete observable answer or tool-call sequence...</expected>
+  </eval>
+</evals>
+```
+Run the evals. If the model can't complete them, the server — not the model — needs work. Iterate on descriptions, error messages, and tool granularity.
+## Step 6: Wire into GSD
+Write the project's `.mcp.json` entry using `/gsd mcp init` as a starting point. Document env vars and startup in README.md. If the server is globally useful, suggest the user file it as a durable skill via `spike-wrap-up` or publish it.
+</process>
+<anti_patterns>
+- **One-to-one REST mapping.** If the API has 30 endpoints, you likely want 6 tools.
+- **Empty or auto-generated descriptions.** "Calls the /users endpoint" tells the model nothing it can reason with.
+- **Raw error passthrough.** `{"error": "500"}` is useless. Translate.
+- **Page-based pagination leaked as "page 1 of 5".** Use opaque cursors.
+- **Skipping the Inspector.** If you didn't run it under the Inspector, you didn't test it.
+- **No evals.** Without evals you have no signal on whether real task completion works.
+</anti_patterns>
+<success_criteria>
+- [ ] Every tool has a description that could guide a cold-start model correctly.
+- [ ] Errors are translated to actionable, human-readable messages.
+- [ ] Pagination uses opaque cursors; no leaked page numbers.
+- [ ] `evals.xml` has 10 questions; the model completes ≥8 without handholding.
+- [ ] MCP Inspector test passes cleanly.
+- [ ] README documents env, startup, and rate-limit behavior.
+- [ ] Server is reachable from GSD via `.mcp.json` entry.
+</success_criteria>