claude-mpm 4.15.6__py3-none-any.whl → 4.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of claude-mpm might be problematic. Click here for more details.

Files changed (194) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/agents/BASE_ENGINEER.md +286 -0
  3. claude_mpm/agents/BASE_PM.md +272 -23
  4. claude_mpm/agents/PM_INSTRUCTIONS.md +49 -0
  5. claude_mpm/agents/agent_loader.py +4 -4
  6. claude_mpm/agents/templates/engineer.json +5 -1
  7. claude_mpm/agents/templates/php-engineer.json +10 -4
  8. claude_mpm/agents/templates/python_engineer.json +8 -3
  9. claude_mpm/agents/templates/rust_engineer.json +12 -7
  10. claude_mpm/agents/templates/svelte-engineer.json +225 -0
  11. claude_mpm/cli/commands/__init__.py +2 -0
  12. claude_mpm/cli/commands/mpm_init/__init__.py +73 -0
  13. claude_mpm/cli/commands/mpm_init/core.py +525 -0
  14. claude_mpm/cli/commands/mpm_init/display.py +341 -0
  15. claude_mpm/cli/commands/mpm_init/git_activity.py +427 -0
  16. claude_mpm/cli/commands/mpm_init/modes.py +397 -0
  17. claude_mpm/cli/commands/mpm_init/prompts.py +442 -0
  18. claude_mpm/cli/commands/mpm_init_cli.py +396 -0
  19. claude_mpm/cli/commands/mpm_init_handler.py +67 -1
  20. claude_mpm/cli/commands/skills.py +488 -0
  21. claude_mpm/cli/executor.py +2 -0
  22. claude_mpm/cli/parsers/base_parser.py +7 -0
  23. claude_mpm/cli/parsers/mpm_init_parser.py +42 -0
  24. claude_mpm/cli/parsers/skills_parser.py +137 -0
  25. claude_mpm/cli/startup.py +57 -0
  26. claude_mpm/commands/mpm-auto-configure.md +52 -0
  27. claude_mpm/commands/mpm-help.md +3 -0
  28. claude_mpm/commands/mpm-init.md +112 -6
  29. claude_mpm/commands/mpm-version.md +113 -0
  30. claude_mpm/commands/mpm.md +1 -0
  31. claude_mpm/config/agent_config.py +2 -2
  32. claude_mpm/constants.py +12 -0
  33. claude_mpm/core/config.py +42 -0
  34. claude_mpm/core/factories.py +1 -1
  35. claude_mpm/core/interfaces.py +56 -1
  36. claude_mpm/core/optimized_agent_loader.py +3 -3
  37. claude_mpm/hooks/__init__.py +8 -0
  38. claude_mpm/hooks/claude_hooks/response_tracking.py +35 -1
  39. claude_mpm/hooks/session_resume_hook.py +121 -0
  40. claude_mpm/models/resume_log.py +340 -0
  41. claude_mpm/services/agents/auto_config_manager.py +1 -1
  42. claude_mpm/services/agents/deployment/agent_configuration_manager.py +1 -1
  43. claude_mpm/services/agents/deployment/agent_record_service.py +1 -1
  44. claude_mpm/services/agents/deployment/agent_validator.py +17 -1
  45. claude_mpm/services/agents/deployment/async_agent_deployment.py +1 -1
  46. claude_mpm/services/agents/deployment/local_template_deployment.py +1 -1
  47. claude_mpm/services/agents/local_template_manager.py +1 -1
  48. claude_mpm/services/agents/recommender.py +47 -0
  49. claude_mpm/services/cli/resume_service.py +617 -0
  50. claude_mpm/services/cli/session_manager.py +87 -0
  51. claude_mpm/services/cli/session_pause_manager.py +504 -0
  52. claude_mpm/services/cli/session_resume_helper.py +372 -0
  53. claude_mpm/services/core/interfaces.py +56 -1
  54. claude_mpm/services/core/models/agent_config.py +3 -0
  55. claude_mpm/services/core/models/process.py +4 -0
  56. claude_mpm/services/core/path_resolver.py +1 -1
  57. claude_mpm/services/diagnostics/models.py +21 -0
  58. claude_mpm/services/infrastructure/resume_log_generator.py +439 -0
  59. claude_mpm/services/local_ops/__init__.py +2 -0
  60. claude_mpm/services/mcp_config_manager.py +7 -131
  61. claude_mpm/services/mcp_gateway/auto_configure.py +31 -25
  62. claude_mpm/services/mcp_gateway/core/process_pool.py +19 -10
  63. claude_mpm/services/mcp_gateway/tools/external_mcp_services.py +26 -21
  64. claude_mpm/services/session_manager.py +205 -1
  65. claude_mpm/services/unified/deployment_strategies/local.py +1 -1
  66. claude_mpm/services/version_service.py +104 -1
  67. claude_mpm/skills/__init__.py +21 -0
  68. claude_mpm/skills/agent_skills_injector.py +324 -0
  69. claude_mpm/skills/bundled/LICENSE_ATTRIBUTIONS.md +79 -0
  70. claude_mpm/skills/bundled/api-documentation.md +393 -0
  71. claude_mpm/skills/bundled/async-testing.md +571 -0
  72. claude_mpm/skills/bundled/code-review.md +143 -0
  73. claude_mpm/skills/bundled/collaboration/brainstorming/SKILL.md +79 -0
  74. claude_mpm/skills/bundled/collaboration/dispatching-parallel-agents/SKILL.md +178 -0
  75. claude_mpm/skills/bundled/collaboration/dispatching-parallel-agents/references/agent-prompts.md +577 -0
  76. claude_mpm/skills/bundled/collaboration/dispatching-parallel-agents/references/coordination-patterns.md +467 -0
  77. claude_mpm/skills/bundled/collaboration/dispatching-parallel-agents/references/examples.md +537 -0
  78. claude_mpm/skills/bundled/collaboration/dispatching-parallel-agents/references/troubleshooting.md +730 -0
  79. claude_mpm/skills/bundled/collaboration/requesting-code-review/SKILL.md +112 -0
  80. claude_mpm/skills/bundled/collaboration/requesting-code-review/references/code-reviewer-template.md +146 -0
  81. claude_mpm/skills/bundled/collaboration/requesting-code-review/references/review-examples.md +412 -0
  82. claude_mpm/skills/bundled/collaboration/writing-plans/SKILL.md +81 -0
  83. claude_mpm/skills/bundled/collaboration/writing-plans/references/best-practices.md +362 -0
  84. claude_mpm/skills/bundled/collaboration/writing-plans/references/plan-structure-templates.md +312 -0
  85. claude_mpm/skills/bundled/database-migration.md +199 -0
  86. claude_mpm/skills/bundled/debugging/root-cause-tracing/SKILL.md +152 -0
  87. claude_mpm/skills/bundled/debugging/root-cause-tracing/references/advanced-techniques.md +668 -0
  88. claude_mpm/skills/bundled/debugging/root-cause-tracing/references/examples.md +587 -0
  89. claude_mpm/skills/bundled/debugging/root-cause-tracing/references/integration.md +438 -0
  90. claude_mpm/skills/bundled/debugging/root-cause-tracing/references/tracing-techniques.md +391 -0
  91. claude_mpm/skills/bundled/debugging/systematic-debugging/CREATION-LOG.md +119 -0
  92. claude_mpm/skills/bundled/debugging/systematic-debugging/SKILL.md +148 -0
  93. claude_mpm/skills/bundled/debugging/systematic-debugging/references/anti-patterns.md +483 -0
  94. claude_mpm/skills/bundled/debugging/systematic-debugging/references/examples.md +452 -0
  95. claude_mpm/skills/bundled/debugging/systematic-debugging/references/troubleshooting.md +449 -0
  96. claude_mpm/skills/bundled/debugging/systematic-debugging/references/workflow.md +411 -0
  97. claude_mpm/skills/bundled/debugging/systematic-debugging/test-academic.md +14 -0
  98. claude_mpm/skills/bundled/debugging/systematic-debugging/test-pressure-1.md +58 -0
  99. claude_mpm/skills/bundled/debugging/systematic-debugging/test-pressure-2.md +68 -0
  100. claude_mpm/skills/bundled/debugging/systematic-debugging/test-pressure-3.md +69 -0
  101. claude_mpm/skills/bundled/debugging/verification-before-completion/SKILL.md +131 -0
  102. claude_mpm/skills/bundled/debugging/verification-before-completion/references/gate-function.md +325 -0
  103. claude_mpm/skills/bundled/debugging/verification-before-completion/references/integration-and-workflows.md +490 -0
  104. claude_mpm/skills/bundled/debugging/verification-before-completion/references/red-flags-and-failures.md +425 -0
  105. claude_mpm/skills/bundled/debugging/verification-before-completion/references/verification-patterns.md +499 -0
  106. claude_mpm/skills/bundled/docker-containerization.md +194 -0
  107. claude_mpm/skills/bundled/express-local-dev.md +1429 -0
  108. claude_mpm/skills/bundled/fastapi-local-dev.md +1199 -0
  109. claude_mpm/skills/bundled/git-workflow.md +414 -0
  110. claude_mpm/skills/bundled/imagemagick.md +204 -0
  111. claude_mpm/skills/bundled/json-data-handling.md +223 -0
  112. claude_mpm/skills/bundled/main/artifacts-builder/SKILL.md +86 -0
  113. claude_mpm/skills/bundled/main/internal-comms/SKILL.md +43 -0
  114. claude_mpm/skills/bundled/main/internal-comms/examples/3p-updates.md +47 -0
  115. claude_mpm/skills/bundled/main/internal-comms/examples/company-newsletter.md +65 -0
  116. claude_mpm/skills/bundled/main/internal-comms/examples/faq-answers.md +30 -0
  117. claude_mpm/skills/bundled/main/internal-comms/examples/general-comms.md +16 -0
  118. claude_mpm/skills/bundled/main/mcp-builder/SKILL.md +160 -0
  119. claude_mpm/skills/bundled/main/mcp-builder/reference/design_principles.md +412 -0
  120. claude_mpm/skills/bundled/main/mcp-builder/reference/evaluation.md +602 -0
  121. claude_mpm/skills/bundled/main/mcp-builder/reference/mcp_best_practices.md +915 -0
  122. claude_mpm/skills/bundled/main/mcp-builder/reference/node_mcp_server.md +916 -0
  123. claude_mpm/skills/bundled/main/mcp-builder/reference/python_mcp_server.md +752 -0
  124. claude_mpm/skills/bundled/main/mcp-builder/reference/workflow.md +1237 -0
  125. claude_mpm/skills/bundled/main/mcp-builder/scripts/connections.py +157 -0
  126. claude_mpm/skills/bundled/main/mcp-builder/scripts/evaluation.py +425 -0
  127. claude_mpm/skills/bundled/main/skill-creator/SKILL.md +189 -0
  128. claude_mpm/skills/bundled/main/skill-creator/references/best-practices.md +500 -0
  129. claude_mpm/skills/bundled/main/skill-creator/references/creation-workflow.md +464 -0
  130. claude_mpm/skills/bundled/main/skill-creator/references/examples.md +619 -0
  131. claude_mpm/skills/bundled/main/skill-creator/references/progressive-disclosure.md +437 -0
  132. claude_mpm/skills/bundled/main/skill-creator/references/skill-structure.md +231 -0
  133. claude_mpm/skills/bundled/main/skill-creator/scripts/init_skill.py +303 -0
  134. claude_mpm/skills/bundled/main/skill-creator/scripts/package_skill.py +113 -0
  135. claude_mpm/skills/bundled/main/skill-creator/scripts/quick_validate.py +72 -0
  136. claude_mpm/skills/bundled/nextjs-local-dev.md +807 -0
  137. claude_mpm/skills/bundled/pdf.md +141 -0
  138. claude_mpm/skills/bundled/performance-profiling.md +567 -0
  139. claude_mpm/skills/bundled/php/espocrm-development/SKILL.md +170 -0
  140. claude_mpm/skills/bundled/php/espocrm-development/references/architecture.md +602 -0
  141. claude_mpm/skills/bundled/php/espocrm-development/references/common-tasks.md +821 -0
  142. claude_mpm/skills/bundled/php/espocrm-development/references/development-workflow.md +742 -0
  143. claude_mpm/skills/bundled/php/espocrm-development/references/frontend-customization.md +726 -0
  144. claude_mpm/skills/bundled/php/espocrm-development/references/hooks-and-services.md +764 -0
  145. claude_mpm/skills/bundled/php/espocrm-development/references/testing-debugging.md +831 -0
  146. claude_mpm/skills/bundled/refactoring-patterns.md +180 -0
  147. claude_mpm/skills/bundled/rust/desktop-applications/SKILL.md +226 -0
  148. claude_mpm/skills/bundled/rust/desktop-applications/references/architecture-patterns.md +901 -0
  149. claude_mpm/skills/bundled/rust/desktop-applications/references/native-gui-frameworks.md +901 -0
  150. claude_mpm/skills/bundled/rust/desktop-applications/references/platform-integration.md +775 -0
  151. claude_mpm/skills/bundled/rust/desktop-applications/references/state-management.md +937 -0
  152. claude_mpm/skills/bundled/rust/desktop-applications/references/tauri-framework.md +770 -0
  153. claude_mpm/skills/bundled/rust/desktop-applications/references/testing-deployment.md +961 -0
  154. claude_mpm/skills/bundled/security-scanning.md +327 -0
  155. claude_mpm/skills/bundled/systematic-debugging.md +473 -0
  156. claude_mpm/skills/bundled/test-driven-development.md +378 -0
  157. claude_mpm/skills/bundled/testing/condition-based-waiting/SKILL.md +119 -0
  158. claude_mpm/skills/bundled/testing/condition-based-waiting/references/patterns-and-implementation.md +253 -0
  159. claude_mpm/skills/bundled/testing/test-driven-development/SKILL.md +145 -0
  160. claude_mpm/skills/bundled/testing/test-driven-development/references/anti-patterns.md +543 -0
  161. claude_mpm/skills/bundled/testing/test-driven-development/references/examples.md +741 -0
  162. claude_mpm/skills/bundled/testing/test-driven-development/references/integration.md +470 -0
  163. claude_mpm/skills/bundled/testing/test-driven-development/references/philosophy.md +458 -0
  164. claude_mpm/skills/bundled/testing/test-driven-development/references/workflow.md +639 -0
  165. claude_mpm/skills/bundled/testing/testing-anti-patterns/SKILL.md +140 -0
  166. claude_mpm/skills/bundled/testing/testing-anti-patterns/references/completeness-anti-patterns.md +572 -0
  167. claude_mpm/skills/bundled/testing/testing-anti-patterns/references/core-anti-patterns.md +411 -0
  168. claude_mpm/skills/bundled/testing/testing-anti-patterns/references/detection-guide.md +569 -0
  169. claude_mpm/skills/bundled/testing/testing-anti-patterns/references/tdd-connection.md +695 -0
  170. claude_mpm/skills/bundled/testing/webapp-testing/SKILL.md +184 -0
  171. claude_mpm/skills/bundled/testing/webapp-testing/decision-tree.md +459 -0
  172. claude_mpm/skills/bundled/testing/webapp-testing/examples/console_logging.py +35 -0
  173. claude_mpm/skills/bundled/testing/webapp-testing/examples/element_discovery.py +44 -0
  174. claude_mpm/skills/bundled/testing/webapp-testing/examples/static_html_automation.py +34 -0
  175. claude_mpm/skills/bundled/testing/webapp-testing/playwright-patterns.md +479 -0
  176. claude_mpm/skills/bundled/testing/webapp-testing/reconnaissance-pattern.md +687 -0
  177. claude_mpm/skills/bundled/testing/webapp-testing/scripts/with_server.py +129 -0
  178. claude_mpm/skills/bundled/testing/webapp-testing/server-management.md +758 -0
  179. claude_mpm/skills/bundled/testing/webapp-testing/troubleshooting.md +868 -0
  180. claude_mpm/skills/bundled/vite-local-dev.md +1061 -0
  181. claude_mpm/skills/bundled/web-performance-optimization.md +2305 -0
  182. claude_mpm/skills/bundled/xlsx.md +157 -0
  183. claude_mpm/skills/registry.py +97 -9
  184. claude_mpm/skills/skills_registry.py +348 -0
  185. claude_mpm/skills/skills_service.py +739 -0
  186. claude_mpm/utils/agent_dependency_loader.py +2 -2
  187. {claude_mpm-4.15.6.dist-info → claude_mpm-4.21.0.dist-info}/METADATA +211 -33
  188. {claude_mpm-4.15.6.dist-info → claude_mpm-4.21.0.dist-info}/RECORD +192 -60
  189. claude_mpm/agents/INSTRUCTIONS_OLD_DEPRECATED.md +0 -602
  190. claude_mpm/cli/commands/mpm_init.py +0 -2008
  191. {claude_mpm-4.15.6.dist-info → claude_mpm-4.21.0.dist-info}/WHEEL +0 -0
  192. {claude_mpm-4.15.6.dist-info → claude_mpm-4.21.0.dist-info}/entry_points.txt +0 -0
  193. {claude_mpm-4.15.6.dist-info → claude_mpm-4.21.0.dist-info}/licenses/LICENSE +0 -0
  194. {claude_mpm-4.15.6.dist-info → claude_mpm-4.21.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1237 @@
1
+ # Complete MCP Server Development Workflow
2
+
3
+ ## Overview
4
+
5
+ This document provides the complete 4-phase workflow for developing high-quality MCP servers. Follow these steps in order, loading additional reference files as indicated.
6
+
7
+ **Time Allocation:**
8
+ - Phase 1 (Research & Planning): 40%
9
+ - Phase 2 (Implementation): 30%
10
+ - Phase 3 (Review & Refine): 15%
11
+ - Phase 4 (Evaluations): 15%
12
+
13
+ ---
14
+
15
+ ## Phase 1: Deep Research and Planning (40% of effort)
16
+
17
+ The most critical phase. Insufficient research leads to poor tool design that must be completely rewritten.
18
+
19
+ ### Step 1.1: Understand Agent-Centric Design Principles
20
+
21
+ **BEFORE writing any code**, understand how to design for AI agents.
22
+
23
+ **Action:**
24
+ - **Read [design_principles.md](./design_principles.md) completely**
25
+ - Study the five core principles:
26
+ 1. Build for workflows, not API endpoints
27
+ 2. Optimize for limited context
28
+ 3. Design actionable error messages
29
+ 4. Follow natural task subdivisions
30
+ 5. Use evaluation-driven development
31
+
32
+ **Key Takeaways to Internalize:**
33
+ - MCP servers serve AI agents, not humans
34
+ - Agents have limited context - every token matters
35
+ - Agents need workflow tools, not API wrappers
36
+ - Error messages must teach correct usage
37
+ - Tool names should reflect tasks, not API structure
38
+
39
+ **Time:** 20-30 minutes of focused reading
40
+
41
+ **Checkpoint:** Can you explain why wrapping API endpoints directly is insufficient?
42
+
43
+ ### Step 1.2: Study MCP Protocol Documentation
44
+
45
+ **Action:**
46
+ - Use WebFetch to load: `https://modelcontextprotocol.io/llms-full.txt`
47
+ - Read the complete MCP specification
48
+ - Understand:
49
+ - Tool registration and invocation
50
+ - Input schema requirements (JSON Schema format)
51
+ - Response format (content array with text/image/resource types)
52
+ - Error handling (isError flag in responses)
53
+ - Tool annotations (readOnlyHint, destructiveHint, etc.)
54
+ - Transport options (stdio, SSE, HTTP)
55
+
56
+ **Key Sections to Focus On:**
57
+ - Tools overview and definition structure
58
+ - Tool implementation patterns
59
+ - Error handling standards
60
+ - Security considerations
61
+ - Best practices
62
+
63
+ **Time:** 30-45 minutes
64
+
65
+ **Checkpoint:** Do you understand how tools are registered and how responses are formatted?
66
+
67
+ ### Step 1.3: Study Framework Documentation
68
+
69
+ Choose your implementation language and load the corresponding SDK documentation.
70
+
71
+ #### For Python (FastMCP):
72
+
73
+ **Action:**
74
+ - Use WebFetch to load: `https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md`
75
+ - Also load: [python_mcp_server.md](./python_mcp_server.md)
76
+
77
+ **Focus On:**
78
+ - FastMCP initialization: `mcp = FastMCP("service_mcp")`
79
+ - Tool decorator: `@mcp.tool(name, annotations)`
80
+ - Pydantic model integration for input validation
81
+ - Context injection for logging/progress
82
+ - Resource registration (if needed)
83
+ - Lifespan management
84
+ - Transport configuration
85
+
86
+ **Time:** 30-40 minutes
87
+
88
+ #### For Node/TypeScript (MCP SDK):
89
+
90
+ **Action:**
91
+ - Use WebFetch to load: `https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md`
92
+ - Also load: [node_mcp_server.md](./node_mcp_server.md)
93
+
94
+ **Focus On:**
95
+ - McpServer initialization
96
+ - `server.registerTool` pattern
97
+ - Zod schema integration
98
+ - StdioServerTransport setup
99
+ - Type safety requirements
100
+ - Build configuration
101
+
102
+ **Time:** 30-40 minutes
103
+
104
+ **Checkpoint:** Can you write a skeleton server with one simple tool?
105
+
106
+ ### Step 1.4: Exhaustively Study API Documentation
107
+
108
+ **This is the most important research step.** Incomplete API knowledge leads to missing critical tools.
109
+
110
+ **Action:**
111
+ - Read **ALL** available API documentation for the service you're integrating
112
+ - Use WebSearch and WebFetch to gather comprehensive information
113
+ - Parallelize this step if there are multiple documentation sources
114
+
115
+ **What to Document:**
116
+
117
+ #### Authentication & Authorization
118
+ - Authentication methods (API keys, OAuth, tokens)
119
+ - How to obtain credentials
120
+ - Where credentials are passed (headers, query params)
121
+ - Permission requirements for different operations
122
+ - Rate limiting rules and headers
123
+
124
+ #### Available Endpoints
125
+ - List ALL endpoints and their purposes
126
+ - HTTP methods (GET, POST, PUT, DELETE, PATCH)
127
+ - URL patterns and path parameters
128
+ - Query parameters and their constraints
129
+ - Request body schemas
130
+ - Response schemas and status codes
131
+
132
+ #### Data Models
133
+ - Key resources (users, projects, tasks, messages, etc.)
134
+ - Field names, types, and constraints
135
+ - Required vs optional fields
136
+ - Relationships between resources
137
+ - ID formats and patterns
138
+
139
+ #### Pagination & Filtering
140
+ - Pagination mechanisms (offset/limit, cursor-based, page numbers)
141
+ - Default page sizes and maximum limits
142
+ - Filter/search capabilities
143
+ - Sorting options
144
+
145
+ #### Error Responses
146
+ - Error formats and status codes
147
+ - Common error scenarios
148
+ - Retry strategies
149
+ - Rate limit error handling
150
+
151
+ **Time:** 1-2 hours (varies by API complexity)
152
+
153
+ **Checkpoint:** Can you list the 10 most important endpoints and explain what each does?
154
+
155
+ ### Step 1.5: Create a Comprehensive Implementation Plan
156
+
157
+ Now synthesize all research into a concrete plan.
158
+
159
+ #### Plan Component 1: Tool Selection
160
+
161
+ **Identify High-Value Tools:**
162
+ - Which endpoints enable the most common workflows?
163
+ - What would an agent most frequently need to accomplish?
164
+ - Which operations naturally combine into workflow tools?
165
+
166
+ **Prioritize by Impact:**
167
+ 1. Search/find tools (agents need to discover resources)
168
+ 2. Read/get tools (agents need to retrieve information)
169
+ 3. Create/update tools (agents need to modify state)
170
+ 4. Workflow tools (combinations of the above)
171
+
172
+ **Example Tool List:**
173
+ ```
174
+ High Priority (Implement First):
175
+ - search_users(query, team, status) - Find users by various criteria
176
+ - get_project_status(project_name) - Overview of project metrics
177
+ - list_recent_activity(project, limit) - What's been happening
178
+
179
+ Medium Priority:
180
+ - create_task(project, title, assignee, due_date)
181
+ - update_task_status(task_id, status, note)
182
+ - assign_task(task_id, user)
183
+
184
+ Low Priority (Nice to Have):
185
+ - export_project_data(project, format)
186
+ - generate_report(project, date_range)
187
+ ```
188
+
189
+ #### Plan Component 2: Shared Utilities and Helpers
190
+
191
+ **API Request Infrastructure:**
192
+ ```python
193
+ # Python example
194
+ async def _make_api_request(endpoint, method="GET", **kwargs):
195
+ """Centralized API calling with auth, timeouts, error handling"""
196
+
197
+ def _handle_api_error(error):
198
+ """Convert API errors to actionable error messages"""
199
+
200
+ async def _paginate_results(endpoint, params, max_items):
201
+ """Handle pagination across multiple API calls"""
202
+ ```
203
+
204
+ **Response Formatting:**
205
+ ```python
206
+ def _format_as_markdown(data):
207
+ """Convert JSON data to human-readable markdown"""
208
+
209
+ def _format_as_json(data):
210
+ """Convert to structured JSON with consistent schema"""
211
+
212
+ def _truncate_if_needed(response):
213
+ """Enforce CHARACTER_LIMIT with helpful truncation message"""
214
+ ```
215
+
216
+ **Common Operations:**
217
+ ```python
218
+ async def _resolve_user_id(identifier):
219
+ """Accept name or ID, return ID (for flexible inputs)"""
220
+
221
+ async def _validate_project_exists(project_name):
222
+ """Check project exists, return helpful error if not"""
223
+ ```
224
+
225
+ #### Plan Component 3: Input/Output Design
226
+
227
+ **For Each Tool, Define:**
228
+
229
+ **Input Parameters:**
230
+ - Required vs optional parameters
231
+ - Parameter types and constraints (min/max, patterns, enums)
232
+ - Validation rules (Pydantic models or Zod schemas)
233
+ - Examples of valid inputs
234
+ - Default values
235
+
236
+ **Example Input Design:**
237
+ ```python
238
+ class SearchUsersInput(BaseModel):
239
+ query: str = Field(..., min_length=2, max_length=200,
240
+ description="Search string (e.g., 'john', 'marketing', 'active')")
241
+ team: Optional[str] = Field(None,
242
+ description="Filter by team name")
243
+ status: Optional[str] = Field(None,
244
+ description="Filter by status: 'active', 'inactive', 'pending'")
245
+ limit: int = Field(20, ge=1, le=100,
246
+ description="Max results (1-100)")
247
+ offset: int = Field(0, ge=0,
248
+ description="Skip N results for pagination")
249
+ response_format: ResponseFormat = Field(ResponseFormat.MARKDOWN,
250
+ description="'markdown' or 'json'")
251
+ ```
252
+
253
+ **Output Formats:**
254
+ - Markdown format (default, human-readable)
255
+ - JSON format (optional, machine-readable)
256
+ - Character limit strategy (typically 25,000 chars)
257
+ - Truncation handling
258
+ - Pagination metadata
259
+
260
+ **Example Output Design:**
261
+ ```markdown
262
+ Markdown format:
263
+ # Search Results: "marketing"
264
+
265
+ Found 47 users (showing 20)
266
+
267
+ ## John Doe (U123)
268
+ - Email: john@example.com
269
+ - Team: Marketing
270
+ - Status: Active
271
+
272
+ JSON format:
273
+ {
274
+ "total": 47,
275
+ "count": 20,
276
+ "offset": 0,
277
+ "users": [
278
+ {"id": "U123", "name": "John Doe", "email": "john@example.com", ...}
279
+ ],
280
+ "has_more": true,
281
+ "next_offset": 20
282
+ }
283
+ ```
284
+
285
+ #### Plan Component 4: Error Handling Strategy
286
+
287
+ **For Each Potential Error:**
288
+
289
+ **Authentication Errors (401):**
290
+ ```
291
+ Error: Invalid API credentials.
292
+ Check that EXAMPLE_API_KEY environment variable is set correctly.
293
+ Visit https://example.com/settings/api to generate a new key.
294
+ ```
295
+
296
+ **Authorization Errors (403):**
297
+ ```
298
+ Error: Permission denied accessing project 'website-redesign'.
299
+ You may not have access to this project. Use list_projects() to see
300
+ available projects, or contact your admin to request access.
301
+ ```
302
+
303
+ **Not Found Errors (404):**
304
+ ```
305
+ Error: Project 'webiste-redesign' not found.
306
+ Did you mean 'website-redesign'? Use list_projects() to see exact names.
307
+ ```
308
+
309
+ **Rate Limit Errors (429):**
310
+ ```
311
+ Error: Rate limit exceeded (max 100 requests/minute).
312
+ Wait 60 seconds before retrying, or reduce request frequency.
313
+ ```
314
+
315
+ **Validation Errors:**
316
+ ```
317
+ Error: Date format invalid. Expected YYYY-MM-DD (e.g., '2024-01-15'),
318
+ received '01/15/2024'. Please use ISO date format.
319
+ ```
320
+
321
+ **Truncation Warnings:**
322
+ ```
323
+ Response truncated from 1,247 items to 50 items (25,000 character limit).
324
+ To see more results:
325
+ - Add filters: use team='marketing' or status='active'
326
+ - Use pagination: set offset=50 to see next page
327
+ - Use JSON format: response_format='json' is more compact
328
+ ```
329
+
330
+ **Plan Error Handling Code:**
331
+ - Create consistent error formatter function
332
+ - Map HTTP status codes to actionable messages
333
+ - Include suggested next steps in every error
334
+ - Reference related tools when helpful
335
+
336
+ #### Plan Component 5: Document Loading Strategy
337
+
338
+ **Create a Loading Sequence:**
339
+
340
+ ```
341
+ Phase 1.1: Load design_principles.md
342
+
343
+ Phase 1.2: Load MCP protocol docs
344
+
345
+ Phase 1.3: Load Python/TypeScript SDK docs + language guide
346
+
347
+ Phase 1.4: Fetch API documentation exhaustively
348
+
349
+ Phase 1.5: Create this plan
350
+
351
+ Phase 2.1: Begin implementation
352
+
353
+ Phase 2.4: Load mcp_best_practices.md for validation
354
+
355
+ Phase 3.3: Load language-specific checklist
356
+
357
+ Phase 4: Load evaluation.md
358
+ ```
359
+
360
+ **Time for Entire Planning Phase:** 3-4 hours
361
+
362
+ **Deliverable:** Written implementation plan documenting:
363
+ - 5-15 high-priority tools with descriptions
364
+ - Shared utility functions needed
365
+ - Input/output schemas for each tool
366
+ - Error handling strategy
367
+ - Example tool calls and responses
368
+
369
+ ---
370
+
371
+ ## Phase 2: Implementation (30% of effort)
372
+
373
+ Now execute your plan systematically.
374
+
375
+ ### Step 2.1: Set Up Project Structure
376
+
377
+ #### For Python (FastMCP):
378
+
379
+ **Single File Structure (Simple Servers):**
380
+ ```
381
+ service_mcp.py # All code in one file
382
+ requirements.txt # Dependencies
383
+ README.md # Usage instructions
384
+ evaluation.xml # Your evaluations
385
+ ```
386
+
387
+ **Multi-File Structure (Complex Servers):**
388
+ ```
389
+ service_mcp/
390
+ ├── __init__.py
391
+ ├── server.py # Main FastMCP initialization
392
+ ├── tools/
393
+ │ ├── search_tools.py # Search/find operations
394
+ │ ├── crud_tools.py # Create/read/update/delete
395
+ │ └── workflow_tools.py # Combined workflow operations
396
+ ├── models.py # Pydantic input models
397
+ ├── utils.py # Shared utilities
398
+ ├── constants.py # API_URL, CHARACTER_LIMIT, etc.
399
+ └── requirements.txt
400
+ ```
401
+
402
+ **Initialize:**
403
+ ```python
404
+ # service_mcp.py or server.py
405
+ from mcp.server.fastmcp import FastMCP
406
+
407
+ mcp = FastMCP("service_mcp")
408
+
409
+ # Constants at module level
410
+ API_BASE_URL = "https://api.example.com/v1"
411
+ CHARACTER_LIMIT = 25000
412
+
413
+ if __name__ == "__main__":
414
+ mcp.run()
415
+ ```
416
+
417
+ **Dependencies (requirements.txt):**
418
+ ```
419
+ mcp>=1.0.0
420
+ pydantic>=2.0.0
421
+ httpx>=0.24.0
422
+ ```
423
+
424
+ #### For Node/TypeScript (MCP SDK):
425
+
426
+ **Project Structure:**
427
+ ```
428
+ service-mcp-server/
429
+ ├── package.json
430
+ ├── tsconfig.json
431
+ ├── README.md
432
+ ├── src/
433
+ │ ├── index.ts # Main entry point
434
+ │ ├── types.ts # TypeScript interfaces
435
+ │ ├── tools/ # Tool implementations
436
+ │ ├── services/ # API clients
437
+ │ ├── schemas/ # Zod schemas
438
+ │ └── constants.ts # Configuration
439
+ └── dist/ # Compiled JavaScript
440
+ ```
441
+
442
+ **Initialize (package.json):**
443
+ ```json
444
+ {
445
+ "name": "service-mcp-server",
446
+ "version": "1.0.0",
447
+ "type": "module",
448
+ "main": "dist/index.js",
449
+ "scripts": {
450
+ "build": "tsc",
451
+ "start": "node dist/index.js"
452
+ },
453
+ "dependencies": {
454
+ "@modelcontextprotocol/sdk": "^1.6.1",
455
+ "axios": "^1.7.9",
456
+ "zod": "^3.23.8"
457
+ }
458
+ }
459
+ ```
460
+
461
+ **Initialize (src/index.ts):**
462
+ ```typescript
463
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
464
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
465
+
466
+ const server = new McpServer({
467
+ name: "service-mcp-server",
468
+ version: "1.0.0"
469
+ });
470
+
471
+ // Constants
472
+ export const API_BASE_URL = "https://api.example.com/v1";
473
+ export const CHARACTER_LIMIT = 25000;
474
+
475
+ async function main() {
476
+ const transport = new StdioServerTransport();
477
+ await server.connect(transport);
478
+ console.error("Service MCP server running via stdio");
479
+ }
480
+
481
+ main().catch(console.error);
482
+ ```
483
+
484
+ ### Step 2.2: Implement Core Infrastructure First
485
+
486
+ **DO NOT start with tools.** Build shared utilities first.
487
+
488
+ #### Shared API Request Function
489
+
490
+ **Python:**
491
+ ```python
492
+ async def _make_api_request(
493
+ endpoint: str,
494
+ method: str = "GET",
495
+ data: Optional[dict] = None,
496
+ params: Optional[dict] = None
497
+ ) -> dict:
498
+ """Centralized API calling with auth, timeouts, retries."""
499
+ async with httpx.AsyncClient() as client:
500
+ response = await client.request(
501
+ method,
502
+ f"{API_BASE_URL}/{endpoint}",
503
+ json=data,
504
+ params=params,
505
+ headers={
506
+ "Authorization": f"Bearer {os.getenv('EXAMPLE_API_KEY')}",
507
+ "Content-Type": "application/json"
508
+ },
509
+ timeout=30.0
510
+ )
511
+ response.raise_for_status()
512
+ return response.json()
513
+ ```
514
+
515
+ **TypeScript:**
516
+ ```typescript
517
+ async function makeApiRequest<T>(
518
+ endpoint: string,
519
+ method: "GET" | "POST" | "PUT" | "DELETE" = "GET",
520
+ data?: any,
521
+ params?: any
522
+ ): Promise<T> {
523
+ const response = await axios({
524
+ method,
525
+ url: `${API_BASE_URL}/${endpoint}`,
526
+ data,
527
+ params,
528
+ headers: {
529
+ "Authorization": `Bearer ${process.env.EXAMPLE_API_KEY}`,
530
+ "Content-Type": "application/json"
531
+ },
532
+ timeout: 30000
533
+ });
534
+ return response.data;
535
+ }
536
+ ```
537
+
538
+ #### Error Handler
539
+
540
+ **Python:**
541
+ ```python
542
+ def _handle_api_error(e: Exception) -> str:
543
+ """Convert exceptions to actionable error messages."""
544
+ if isinstance(e, httpx.HTTPStatusError):
545
+ status = e.response.status_code
546
+ if status == 401:
547
+ return "Error: Invalid API credentials. Check EXAMPLE_API_KEY environment variable."
548
+ elif status == 403:
549
+ return "Error: Permission denied. You don't have access to this resource."
550
+ elif status == 404:
551
+ return "Error: Resource not found. Check the ID is correct."
552
+ elif status == 429:
553
+ return "Error: Rate limit exceeded. Wait before making more requests."
554
+ return f"Error: API request failed with status {status}"
555
+ elif isinstance(e, httpx.TimeoutException):
556
+ return "Error: Request timed out. Try again or check network connection."
557
+ return f"Error: Unexpected error: {type(e).__name__}"
558
+ ```
559
+
560
+ #### Response Formatters
561
+
562
+ **Python:**
563
+ ```python
564
+ def _format_as_markdown(data: list, title: str) -> str:
565
+ """Format list of items as readable markdown."""
566
+ lines = [f"# {title}", ""]
567
+ for item in data:
568
+ lines.append(f"## {item['name']} ({item['id']})")
569
+ lines.append(f"- **Status**: {item['status']}")
570
+ lines.append("")
571
+ return "\n".join(lines)
572
+
573
+ def _format_as_json(data: dict) -> str:
574
+ """Format as pretty-printed JSON."""
575
+ return json.dumps(data, indent=2)
576
+
577
+ def _check_character_limit(text: str) -> str:
578
+ """Enforce character limit with truncation message."""
579
+ if len(text) > CHARACTER_LIMIT:
580
+ truncated = text[:CHARACTER_LIMIT]
581
+ truncated += "\n\n[Response truncated at 25,000 character limit]"
582
+ return truncated
583
+ return text
584
+ ```
585
+
586
+ **Time:** 1-2 hours to build solid infrastructure
587
+
588
+ ### Step 2.3: Implement Tools Systematically
589
+
590
+ For each tool in your plan, follow this pattern:
591
+
592
+ #### Step A: Define Input Schema
593
+
594
+ **Python (Pydantic):**
595
+ ```python
596
+ from pydantic import BaseModel, Field, ConfigDict
597
+ from enum import Enum
598
+
599
+ class ResponseFormat(str, Enum):
600
+ MARKDOWN = "markdown"
601
+ JSON = "json"
602
+
603
+ class SearchUsersInput(BaseModel):
604
+ model_config = ConfigDict(
605
+ str_strip_whitespace=True,
606
+ validate_assignment=True,
607
+ extra='forbid'
608
+ )
609
+
610
+ query: str = Field(
611
+ ...,
612
+ description="Search string to match (e.g., 'john', 'team:marketing')",
613
+ min_length=2,
614
+ max_length=200
615
+ )
616
+ limit: int = Field(
617
+ default=20,
618
+ description="Maximum results to return (1-100)",
619
+ ge=1,
620
+ le=100
621
+ )
622
+ offset: int = Field(
623
+ default=0,
624
+ description="Results to skip for pagination",
625
+ ge=0
626
+ )
627
+ response_format: ResponseFormat = Field(
628
+ default=ResponseFormat.MARKDOWN,
629
+ description="Output format: 'markdown' or 'json'"
630
+ )
631
+ ```
632
+
633
+ **TypeScript (Zod):**
634
+ ```typescript
635
+ import { z } from "zod";
636
+
637
+ enum ResponseFormat {
638
+ MARKDOWN = "markdown",
639
+ JSON = "json"
640
+ }
641
+
642
+ const SearchUsersInputSchema = z.object({
643
+ query: z.string()
644
+ .min(2, "Query must be at least 2 characters")
645
+ .max(200, "Query too long")
646
+ .describe("Search string to match"),
647
+ limit: z.number()
648
+ .int()
649
+ .min(1)
650
+ .max(100)
651
+ .default(20)
652
+ .describe("Maximum results (1-100)"),
653
+ offset: z.number()
654
+ .int()
655
+ .min(0)
656
+ .default(0)
657
+ .describe("Results to skip for pagination"),
658
+ response_format: z.nativeEnum(ResponseFormat)
659
+ .default(ResponseFormat.MARKDOWN)
660
+ .describe("Output format")
661
+ }).strict();
662
+
663
+ type SearchUsersInput = z.infer<typeof SearchUsersInputSchema>;
664
+ ```
665
+
666
+ #### Step B: Write Comprehensive Tool Description
667
+
668
+ **Key Elements:**
669
+ 1. One-line summary
670
+ 2. Detailed explanation of functionality
671
+ 3. Parameter documentation with examples
672
+ 4. Return value schema
673
+ 5. Usage examples (when to use, when NOT to use)
674
+ 6. Error handling documentation
675
+
676
+ **Example:**
677
+ ```python
678
+ @mcp.tool(
679
+ name="example_search_users",
680
+ annotations={
681
+ "title": "Search Example Users",
682
+ "readOnlyHint": True,
683
+ "destructiveHint": False,
684
+ "idempotentHint": True,
685
+ "openWorldHint": True
686
+ }
687
+ )
688
+ async def example_search_users(params: SearchUsersInput) -> str:
689
+ """Search for users in the Example system by name, email, or team.
690
+
691
+ This tool searches across all user profiles, supporting partial matches
692
+ and flexible filtering. It does NOT create or modify users - only searches.
693
+
694
+ Args:
695
+ params (SearchUsersInput): Search parameters including:
696
+ - query (str): Search string (e.g., "john", "team:marketing")
697
+ - limit (int): Max results 1-100 (default: 20)
698
+ - offset (int): Pagination offset (default: 0)
699
+ - response_format: 'markdown' or 'json' (default: 'markdown')
700
+
701
+ Returns:
702
+ str: Search results formatted as markdown or JSON
703
+
704
+ JSON format schema:
705
+ {
706
+ "total": int, # Total matches found
707
+ "count": int, # Results in this response
708
+ "offset": int, # Current offset
709
+ "users": [
710
+ {
711
+ "id": str, # User ID (e.g., "U123")
712
+ "name": str, # Full name
713
+ "email": str, # Email address
714
+ "team": str, # Team name (optional)
715
+ "status": str # 'active', 'inactive', 'pending'
716
+ }
717
+ ],
718
+ "has_more": bool, # More results available
719
+ "next_offset": int # Offset for next page
720
+ }
721
+
722
+ Examples:
723
+ - Find marketing team: params with query="team:marketing"
724
+ - Find by name: params with query="john"
725
+ - Get detailed data: params with response_format="json"
726
+
727
+ Don't use when:
728
+ - You need to CREATE a user -> use example_create_user instead
729
+ - You have a user ID and need full details -> use example_get_user
730
+
731
+ Error Handling:
732
+ - "No users found": Returns empty results with suggestion to broaden search
733
+ - "Rate limit exceeded": Advises waiting before retry
734
+ - "Invalid query": Provides example of valid query format
735
+ """
736
+ ```
737
+
738
+ #### Step C: Implement Tool Logic
739
+
740
+ **Pattern:**
741
+ ```python
742
+ async def example_search_users(params: SearchUsersInput) -> str:
743
+ try:
744
+ # 1. Make API request using shared utilities
745
+ data = await _make_api_request(
746
+ "users/search",
747
+ params={"q": params.query, "limit": params.limit, "offset": params.offset}
748
+ )
749
+
750
+ users = data.get("users", [])
751
+ total = data.get("total", 0)
752
+
753
+ # 2. Handle empty results
754
+ if not users:
755
+ return f"No users found matching '{params.query}'. Try a broader search term."
756
+
757
+ # 3. Format response based on requested format
758
+ if params.response_format == ResponseFormat.MARKDOWN:
759
+ result = _format_users_markdown(users, total, params.query)
760
+ else:
761
+ result = _format_users_json(users, total, params.offset)
762
+
763
+ # 4. Enforce character limit
764
+ result = _check_character_limit(result)
765
+
766
+ return result
767
+
768
+ except Exception as e:
769
+ # 5. Convert errors to actionable messages
770
+ return _handle_api_error(e)
771
+ ```
772
+
773
+ #### Step D: Add Tool Annotations
774
+
775
+ Always include these annotations:
776
+
777
+ ```python
778
+ annotations={
779
+ "title": "Human-Readable Tool Name",
780
+ "readOnlyHint": True/False, # Does it modify state?
781
+ "destructiveHint": True/False, # Does it delete/destroy data?
782
+ "idempotentHint": True/False, # Same call twice = same result?
783
+ "openWorldHint": True/False # Does it interact externally?
784
+ }
785
+ ```
786
+
787
+ **Examples:**
788
+ - Search tool: readOnly=True, destructive=False, idempotent=True, openWorld=True
789
+ - Create tool: readOnly=False, destructive=False, idempotent=False, openWorld=True
790
+ - Delete tool: readOnly=False, destructive=True, idempotent=True, openWorld=True
791
+
792
+ **Repeat for Each Tool in Your Plan**
793
+
794
+ **Time:** 30-60 minutes per tool (5-10 tools = 3-6 hours)
795
+
796
+ ### Step 2.4: Follow Language-Specific Best Practices
797
+
798
+ Before finalizing implementation:
799
+
800
+ #### For Python:
801
+ - Load [python_mcp_server.md](./python_mcp_server.md)
802
+ - Verify using Pydantic v2 with `model_config`
803
+ - Check all async/await patterns
804
+ - Confirm type hints throughout
805
+ - Review quality checklist
806
+
807
+ #### For TypeScript:
808
+ - Load [node_mcp_server.md](./node_mcp_server.md)
809
+ - Verify Zod schemas use `.strict()`
810
+ - Check TypeScript strict mode enabled
811
+ - Confirm no `any` types
812
+ - Review quality checklist
813
+ - Run `npm run build` to verify
814
+
815
+ **Time:** 1 hour
816
+
817
+ ---
818
+
819
+ ## Phase 3: Review and Refine (15% of effort)
820
+
821
+ ### Step 3.1: Code Quality Review
822
+
823
+ **DRY Principle (Don't Repeat Yourself):**
824
+ - [ ] No duplicated code between tools
825
+ - [ ] Common operations extracted to utility functions
826
+ - [ ] API request logic centralized
827
+ - [ ] Error handling consistent
828
+
829
+ **Composability:**
830
+ - [ ] Shared utilities can be combined flexibly
831
+ - [ ] Tools use shared formatters
832
+ - [ ] Validation logic is reusable
833
+
834
+ **Consistency:**
835
+ - [ ] Similar operations return similar formats
836
+ - [ ] Tool names follow same patterns
837
+ - [ ] Error messages have consistent structure
838
+ - [ ] Pagination handled identically across tools
839
+
840
+ **Error Handling:**
841
+ - [ ] All external API calls wrapped in try/catch
842
+ - [ ] Every error returns actionable message
843
+ - [ ] Timeout scenarios handled
844
+ - [ ] Authentication errors caught
845
+
846
+ **Type Safety:**
847
+ - [ ] Python: Type hints on all functions
848
+ - [ ] TypeScript: No `any` types
849
+ - [ ] Input validation via Pydantic/Zod
850
+ - [ ] Output types documented
851
+
852
+ **Documentation:**
853
+ - [ ] Every tool has comprehensive docstring
854
+ - [ ] Return schemas fully documented
855
+ - [ ] Usage examples provided
856
+ - [ ] Error scenarios explained
857
+
858
+ **Time:** 1-2 hours
859
+
860
+ ### Step 3.2: Test and Build
861
+
862
+ **IMPORTANT:** MCP servers are long-running processes. Never run them directly:
863
+ - ❌ `python server.py` - WILL HANG FOREVER
864
+ - ❌ `node dist/index.js` - WILL HANG FOREVER
865
+
866
+ **Safe Testing Options:**
867
+
868
+ #### Option 1: Use Evaluation Harness (Recommended)
869
+ ```bash
870
+ python scripts/evaluation.py \
871
+ -t stdio \
872
+ -c python \
873
+ -a your_server.py \
874
+ evaluation.xml
875
+ ```
876
+ The harness manages the server process for you.
877
+
878
+ #### Option 2: Run in tmux
879
+ ```bash
880
+ # Terminal 1
881
+ tmux new -s mcp-server
882
+ python your_server.py
883
+ # Detach with Ctrl+B then D
884
+
885
+ # Terminal 2
886
+ # Test with evaluation harness or manual testing
887
+ ```
888
+
889
+ #### For Python:
890
+
891
+ **Verify Syntax:**
892
+ ```bash
893
+ python -m py_compile your_server.py
894
+ ```
895
+
896
+ **Check Imports:**
897
+ Read through the file to verify:
898
+ - All imports are valid
899
+ - Pydantic models defined before use
900
+ - No circular dependencies
901
+
902
+ **Test Pattern:**
903
+ ```bash
904
+ # In tmux or use evaluation harness
905
+ timeout 5s python your_server.py
906
+ # Should timeout (proving it's listening)
907
+ ```
908
+
909
+ #### For TypeScript:
910
+
911
+ **Build:**
912
+ ```bash
913
+ npm run build
914
+ ```
915
+ **MUST complete without errors.**
916
+
917
+ **Verify Output:**
918
+ ```bash
919
+ ls dist/index.js # Must exist
920
+ ```
921
+
922
+ **Test Pattern:**
923
+ ```bash
924
+ # In tmux or use evaluation harness
925
+ timeout 5s node dist/index.js
926
+ # Should timeout (proving it's listening)
927
+ ```
928
+
929
+ **Time:** 30 minutes
930
+
931
+ ### Step 3.3: Use Quality Checklist
932
+
933
+ #### Python Checklist:
934
+ Load the "Quality Checklist" section from [python_mcp_server.md](./python_mcp_server.md)
935
+
936
+ Key items:
937
+ - [ ] Server name format: `{service}_mcp`
938
+ - [ ] All tools use `@mcp.tool` decorator
939
+ - [ ] Pydantic models for all inputs
940
+ - [ ] Annotations on all tools
941
+ - [ ] Async/await throughout
942
+ - [ ] CHARACTER_LIMIT enforced
943
+ - [ ] Pagination implemented
944
+ - [ ] Shared utilities used
945
+
946
+ #### TypeScript Checklist:
947
+ Load the "Quality Checklist" section from [node_mcp_server.md](./node_mcp_server.md)
948
+
949
+ Key items:
950
+ - [ ] Server name format: `{service}-mcp-server`
951
+ - [ ] `npm run build` succeeds
952
+ - [ ] dist/index.js exists
953
+ - [ ] Zod schemas with `.strict()`
954
+ - [ ] TypeScript strict mode
955
+ - [ ] No `any` types
956
+ - [ ] CHARACTER_LIMIT enforced
957
+ - [ ] Pagination implemented
958
+
959
+ **Time:** 30 minutes
960
+
961
+ ---
962
+
963
+ ## Phase 4: Create Evaluations (15% of effort)
964
+
965
+ ### Step 4.1: Load Evaluation Guide
966
+
967
+ **Action:**
968
+ - Load [evaluation.md](./evaluation.md) completely
969
+ - Understand evaluation purpose and requirements
970
+
971
+ **Key Points:**
972
+ - Evaluations test if agents can answer realistic questions
973
+ - Questions must be read-only, independent, complex
974
+ - Answers must be single verifiable values
975
+ - Create 10 questions that require multiple tool calls
976
+
977
+ ### Step 4.2: Understand Evaluation Requirements
978
+
979
+ **Each Question Must Be:**
980
+ - Independent (not dependent on other questions)
981
+ - Read-only (no state modifications required)
982
+ - Complex (requiring multiple tool calls, potentially dozens)
983
+ - Realistic (based on actual use cases)
984
+ - Verifiable (single clear answer via string comparison)
985
+ - Stable (answer won't change over time)
986
+
987
+ **Each Answer Must Be:**
988
+ - Single value (not a list or complex object)
989
+ - Verifiable via direct string comparison
990
+ - Human-readable when possible (names over IDs)
991
+ - Stable over time (based on historical data)
992
+
993
+ ### Step 4.3: Create Evaluation Process
994
+
995
+ **Step 1: Tool Inspection**
996
+ ```python
997
+ # List your implemented tools
998
+ tools = [
999
+ "example_search_users",
1000
+ "example_get_project_status",
1001
+ "example_list_recent_activity",
1002
+ # ... etc
1003
+ ]
1004
+
1005
+ # Understand capabilities
1006
+ for tool in tools:
1007
+ print(f"{tool}: {tool_description}")
1008
+ ```
1009
+
1010
+ **Step 2: Content Exploration**
1011
+ - Use READ-ONLY tools to explore available data
1012
+ - Identify specific users, projects, tasks for questions
1013
+ - Find historical data that won't change
1014
+ - Use `limit=10` to avoid overwhelming context
1015
+
1016
+ **Step 3: Generate Questions**
1017
+ Create 10 questions that:
1018
+ - Require understanding your API's data
1019
+ - Test multiple tools working together
1020
+ - Challenge agents with realistic complexity
1021
+ - Have stable, verifiable answers
1022
+
1023
+ **Example Questions:**
1024
+ ```xml
1025
+ <qa_pair>
1026
+ <question>Find the project that was completed in Q3 2023 and had the highest number of tasks marked as 'critical' priority. What was the project manager's email address?</question>
1027
+ <answer>sarah.johnson@example.com</answer>
1028
+ </qa_pair>
1029
+
1030
+ <qa_pair>
1031
+ <question>Among all users in the Engineering team who joined before January 2024, which user has closed the most bug reports? Provide their full name.</question>
1032
+ <answer>Michael Chen</answer>
1033
+ </qa_pair>
1034
+ ```
1035
+
1036
+ **Step 4: Verify Answers**
1037
+ - Solve each question yourself using the MCP server tools
1038
+ - Verify the answer is stable (won't change)
1039
+ - Confirm answer can be found with available tools
1040
+ - Adjust questions if too easy or impossible
1041
+
1042
+ ### Step 4.4: Create evaluation.xml File
1043
+
1044
+ ```xml
1045
+ <evaluation>
1046
+ <qa_pair>
1047
+ <question>Your first question here</question>
1048
+ <answer>verifiable answer</answer>
1049
+ </qa_pair>
1050
+ <qa_pair>
1051
+ <question>Your second question here</question>
1052
+ <answer>verifiable answer</answer>
1053
+ </qa_pair>
1054
+ <!-- 8 more qa_pairs -->
1055
+ </evaluation>
1056
+ ```
1057
+
1058
+ ### Step 4.5: Run Evaluation
1059
+
1060
+ ```bash
1061
+ # Install dependencies
1062
+ pip install anthropic mcp
1063
+
1064
+ # Set API key
1065
+ export ANTHROPIC_API_KEY=your_key
1066
+
1067
+ # Run evaluation
1068
+ python scripts/evaluation.py \
1069
+ -t stdio \
1070
+ -c python \
1071
+ -a your_server.py \
1072
+ -e EXAMPLE_API_KEY=your_api_key \
1073
+ -o report.md \
1074
+ evaluation.xml
1075
+ ```
1076
+
1077
+ **Review Results:**
1078
+ - Which questions passed/failed?
1079
+ - What was the agent's feedback on your tools?
1080
+ - Where did agents struggle?
1081
+ - What improvements are suggested?
1082
+
1083
+ ### Step 4.6: Iterate Based on Results
1084
+
1085
+ **If Accuracy < 80%:**
1086
+ - Review failed questions
1087
+ - Read agent feedback carefully
1088
+ - Identify patterns in failures
1089
+ - Improve tools based on feedback
1090
+ - Re-run evaluations
1091
+
1092
+ **Common Improvements:**
1093
+ - Add better search/filter capabilities
1094
+ - Improve error messages with examples
1095
+ - Reduce response verbosity
1096
+ - Add missing workflow tools
1097
+ - Improve tool descriptions
1098
+
1099
+ **Time:** 2-3 hours
1100
+
1101
+ ---
1102
+
1103
+ ## Workflow Decision Tree
1104
+
1105
+ ```
1106
+ START: Building MCP Server
1107
+
1108
+ Have you read design_principles.md?
1109
+ No → Read design_principles.md first
1110
+ Yes → Continue
1111
+
1112
+ Have you loaded MCP protocol docs?
1113
+ No → Load https://modelcontextprotocol.io/llms-full.txt
1114
+ Yes → Continue
1115
+
1116
+ Have you loaded SDK docs for your language?
1117
+ No → Load Python or TypeScript SDK + guide
1118
+ Yes → Continue
1119
+
1120
+ Have you studied ALL API documentation?
1121
+ No → Exhaustively research API (1-2 hours)
1122
+ Yes → Continue
1123
+
1124
+ Have you created implementation plan?
1125
+ No → Document tools, utilities, I/O, errors
1126
+ Yes → Begin Phase 2
1127
+
1128
+ Have you built shared utilities?
1129
+ No → Build API client, error handler, formatters
1130
+ Yes → Continue
1131
+
1132
+ Have you implemented tools with validation?
1133
+ No → Implement each tool systematically
1134
+ Yes → Continue
1135
+
1136
+ Does `python server.py` or `npm run build` work?
1137
+ No → Fix syntax/build errors
1138
+ Yes → Continue
1139
+
1140
+ Have you reviewed code quality?
1141
+ No → Check DRY, composability, consistency
1142
+ Yes → Continue
1143
+
1144
+ Have you created 10 evaluation questions?
1145
+ No → Load evaluation.md and create evaluations
1146
+ Yes → Continue
1147
+
1148
+ Does evaluation show 80%+ accuracy?
1149
+ No → Iterate on tools based on feedback
1150
+ Yes → SUCCESS - Server is ready!
1151
+ ```
1152
+
1153
+ ---
1154
+
1155
+ ## Common Pitfalls and Solutions
1156
+
1157
+ ### Pitfall 1: Starting Implementation Too Early
1158
+ **Symptom:** Building tools without understanding agent needs
1159
+ **Solution:** Complete Phase 1 research thoroughly (40% of time)
1160
+
1161
+ ### Pitfall 2: API Wrapper Mentality
1162
+ **Symptom:** One tool per API endpoint
1163
+ **Solution:** Review design_principles.md - build workflow tools
1164
+
1165
+ ### Pitfall 3: Verbose Responses
1166
+ **Symptom:** Agents run out of context
1167
+ **Solution:** Default to concise, offer detailed option, enforce CHARACTER_LIMIT
1168
+
1169
+ ### Pitfall 4: Generic Error Messages
1170
+ **Symptom:** Agents get stuck on errors
1171
+ **Solution:** Every error must include specific next steps
1172
+
1173
+ ### Pitfall 5: Skipping Evaluations
1174
+ **Symptom:** Tools seem good but agents fail in practice
1175
+ **Solution:** Create evaluations in Phase 4, iterate based on results
1176
+
1177
+ ### Pitfall 6: Running Server Directly
1178
+ **Symptom:** `python server.py` hangs forever
1179
+ **Solution:** Use evaluation harness or tmux, never run directly
1180
+
1181
+ ### Pitfall 7: Incomplete API Research
1182
+ **Symptom:** Missing important tools
1183
+ **Solution:** Exhaustively study API docs in Phase 1.4
1184
+
1185
+ ### Pitfall 8: Duplicated Code
1186
+ **Symptom:** Similar logic across multiple tools
1187
+ **Solution:** Extract shared utilities in Phase 2.2
1188
+
1189
+ ---
1190
+
1191
+ ## Time Estimates by Phase
1192
+
1193
+ **Small Server (5-8 tools):**
1194
+ - Phase 1: 3-4 hours
1195
+ - Phase 2: 3-4 hours
1196
+ - Phase 3: 1-2 hours
1197
+ - Phase 4: 2-3 hours
1198
+ - **Total: 9-13 hours**
1199
+
1200
+ **Medium Server (10-15 tools):**
1201
+ - Phase 1: 4-5 hours
1202
+ - Phase 2: 6-8 hours
1203
+ - Phase 3: 2-3 hours
1204
+ - Phase 4: 3-4 hours
1205
+ - **Total: 15-20 hours**
1206
+
1207
+ **Large Server (20+ tools):**
1208
+ - Phase 1: 5-6 hours
1209
+ - Phase 2: 10-12 hours
1210
+ - Phase 3: 3-4 hours
1211
+ - Phase 4: 4-5 hours
1212
+ - **Total: 22-27 hours**
1213
+
1214
+ ---
1215
+
1216
+ ## Success Criteria
1217
+
1218
+ Your MCP server is ready when:
1219
+
1220
+ - [ ] All reference documentation has been loaded and studied
1221
+ - [ ] Implementation plan documents tools, I/O, and error handling
1222
+ - [ ] Shared utilities are implemented and reused across tools
1223
+ - [ ] All tools have comprehensive descriptions and examples
1224
+ - [ ] Input validation uses Pydantic (Python) or Zod (TypeScript)
1225
+ - [ ] Error messages are actionable with specific guidance
1226
+ - [ ] CHARACTER_LIMIT is enforced with truncation messages
1227
+ - [ ] Pagination is implemented where applicable
1228
+ - [ ] Code follows language-specific best practices
1229
+ - [ ] Build/syntax check succeeds
1230
+ - [ ] Quality checklist is complete
1231
+ - [ ] 10 evaluation questions created
1232
+ - [ ] Evaluation shows 80%+ agent success rate
1233
+ - [ ] Agent feedback is positive and specific
1234
+
1235
+ ---
1236
+
1237
+ **Next:** Return to [SKILL.md](../SKILL.md) for navigation to other reference files.