mantisdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mantisdk might be problematic. Click here for more details.

Files changed (190) hide show
  1. mantisdk/__init__.py +22 -0
  2. mantisdk/adapter/__init__.py +15 -0
  3. mantisdk/adapter/base.py +94 -0
  4. mantisdk/adapter/messages.py +270 -0
  5. mantisdk/adapter/triplet.py +1028 -0
  6. mantisdk/algorithm/__init__.py +39 -0
  7. mantisdk/algorithm/apo/__init__.py +5 -0
  8. mantisdk/algorithm/apo/apo.py +889 -0
  9. mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
  10. mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
  11. mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
  12. mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
  13. mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
  14. mantisdk/algorithm/base.py +162 -0
  15. mantisdk/algorithm/decorator.py +264 -0
  16. mantisdk/algorithm/fast.py +250 -0
  17. mantisdk/algorithm/gepa/__init__.py +59 -0
  18. mantisdk/algorithm/gepa/adapter.py +459 -0
  19. mantisdk/algorithm/gepa/gepa.py +364 -0
  20. mantisdk/algorithm/gepa/lib/__init__.py +18 -0
  21. mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
  22. mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
  23. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
  24. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
  25. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
  26. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
  27. mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
  28. mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
  29. mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
  30. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
  31. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
  32. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
  33. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
  34. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
  35. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
  36. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
  37. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
  38. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
  39. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
  40. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
  41. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
  42. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
  43. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
  44. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
  45. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
  46. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
  47. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
  48. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
  49. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
  50. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
  51. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
  52. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
  53. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
  54. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
  55. mantisdk/algorithm/gepa/lib/api.py +375 -0
  56. mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
  57. mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
  58. mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
  59. mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
  60. mantisdk/algorithm/gepa/lib/core/result.py +233 -0
  61. mantisdk/algorithm/gepa/lib/core/state.py +636 -0
  62. mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
  63. mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
  64. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
  65. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
  66. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
  67. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
  68. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
  69. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
  70. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
  71. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
  72. mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
  73. mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
  74. mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
  75. mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
  76. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
  77. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
  78. mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
  79. mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
  80. mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
  81. mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
  82. mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
  83. mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
  84. mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
  85. mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
  86. mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
  87. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
  88. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
  89. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
  90. mantisdk/algorithm/gepa/lib/py.typed +0 -0
  91. mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
  92. mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
  93. mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
  94. mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
  95. mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
  96. mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
  97. mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
  98. mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
  99. mantisdk/algorithm/gepa/tracing.py +105 -0
  100. mantisdk/algorithm/utils.py +177 -0
  101. mantisdk/algorithm/verl/__init__.py +5 -0
  102. mantisdk/algorithm/verl/interface.py +202 -0
  103. mantisdk/cli/__init__.py +56 -0
  104. mantisdk/cli/prometheus.py +115 -0
  105. mantisdk/cli/store.py +131 -0
  106. mantisdk/cli/vllm.py +29 -0
  107. mantisdk/client.py +408 -0
  108. mantisdk/config.py +348 -0
  109. mantisdk/emitter/__init__.py +43 -0
  110. mantisdk/emitter/annotation.py +370 -0
  111. mantisdk/emitter/exception.py +54 -0
  112. mantisdk/emitter/message.py +61 -0
  113. mantisdk/emitter/object.py +117 -0
  114. mantisdk/emitter/reward.py +320 -0
  115. mantisdk/env_var.py +156 -0
  116. mantisdk/execution/__init__.py +15 -0
  117. mantisdk/execution/base.py +64 -0
  118. mantisdk/execution/client_server.py +443 -0
  119. mantisdk/execution/events.py +69 -0
  120. mantisdk/execution/inter_process.py +16 -0
  121. mantisdk/execution/shared_memory.py +282 -0
  122. mantisdk/instrumentation/__init__.py +119 -0
  123. mantisdk/instrumentation/agentops.py +314 -0
  124. mantisdk/instrumentation/agentops_langchain.py +45 -0
  125. mantisdk/instrumentation/litellm.py +83 -0
  126. mantisdk/instrumentation/vllm.py +81 -0
  127. mantisdk/instrumentation/weave.py +500 -0
  128. mantisdk/litagent/__init__.py +11 -0
  129. mantisdk/litagent/decorator.py +536 -0
  130. mantisdk/litagent/litagent.py +252 -0
  131. mantisdk/llm_proxy.py +1890 -0
  132. mantisdk/logging.py +370 -0
  133. mantisdk/reward.py +7 -0
  134. mantisdk/runner/__init__.py +11 -0
  135. mantisdk/runner/agent.py +845 -0
  136. mantisdk/runner/base.py +182 -0
  137. mantisdk/runner/legacy.py +309 -0
  138. mantisdk/semconv.py +170 -0
  139. mantisdk/server.py +401 -0
  140. mantisdk/store/__init__.py +23 -0
  141. mantisdk/store/base.py +897 -0
  142. mantisdk/store/client_server.py +2092 -0
  143. mantisdk/store/collection/__init__.py +30 -0
  144. mantisdk/store/collection/base.py +587 -0
  145. mantisdk/store/collection/memory.py +970 -0
  146. mantisdk/store/collection/mongo.py +1412 -0
  147. mantisdk/store/collection_based.py +1823 -0
  148. mantisdk/store/insight.py +648 -0
  149. mantisdk/store/listener.py +58 -0
  150. mantisdk/store/memory.py +396 -0
  151. mantisdk/store/mongo.py +165 -0
  152. mantisdk/store/sqlite.py +3 -0
  153. mantisdk/store/threading.py +357 -0
  154. mantisdk/store/utils.py +142 -0
  155. mantisdk/tracer/__init__.py +16 -0
  156. mantisdk/tracer/agentops.py +242 -0
  157. mantisdk/tracer/base.py +287 -0
  158. mantisdk/tracer/dummy.py +106 -0
  159. mantisdk/tracer/otel.py +555 -0
  160. mantisdk/tracer/weave.py +677 -0
  161. mantisdk/trainer/__init__.py +6 -0
  162. mantisdk/trainer/init_utils.py +263 -0
  163. mantisdk/trainer/legacy.py +367 -0
  164. mantisdk/trainer/registry.py +12 -0
  165. mantisdk/trainer/trainer.py +618 -0
  166. mantisdk/types/__init__.py +6 -0
  167. mantisdk/types/core.py +553 -0
  168. mantisdk/types/resources.py +204 -0
  169. mantisdk/types/tracer.py +515 -0
  170. mantisdk/types/tracing.py +218 -0
  171. mantisdk/utils/__init__.py +1 -0
  172. mantisdk/utils/id.py +18 -0
  173. mantisdk/utils/metrics.py +1025 -0
  174. mantisdk/utils/otel.py +578 -0
  175. mantisdk/utils/otlp.py +536 -0
  176. mantisdk/utils/server_launcher.py +1045 -0
  177. mantisdk/utils/system_snapshot.py +81 -0
  178. mantisdk/verl/__init__.py +8 -0
  179. mantisdk/verl/__main__.py +6 -0
  180. mantisdk/verl/async_server.py +46 -0
  181. mantisdk/verl/config.yaml +27 -0
  182. mantisdk/verl/daemon.py +1154 -0
  183. mantisdk/verl/dataset.py +44 -0
  184. mantisdk/verl/entrypoint.py +248 -0
  185. mantisdk/verl/trainer.py +549 -0
  186. mantisdk-0.1.0.dist-info/METADATA +119 -0
  187. mantisdk-0.1.0.dist-info/RECORD +190 -0
  188. mantisdk-0.1.0.dist-info/WHEEL +4 -0
  189. mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
  190. mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0
@@ -0,0 +1,552 @@
1
+ # MCP Adapter for GEPA
2
+
3
+ The MCP Adapter enables optimization of [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) tool usage through GEPA's reflective mutation approach.
4
+
5
+ ## Overview
6
+
7
+ This adapter optimizes:
8
+ - **Tool descriptions**: Improve how tools are described to the model
9
+ - **System prompts**: Optimize guidance for when and how to use tools
10
+ - **Tool usage patterns**: Learn better tool invocation strategies
11
+ - **Tool selection**: Choose the right tool from multiple available options
12
+
13
+ ## Multi-Tool Support
14
+
15
+ The MCP adapter supports both single-tool and multi-tool scenarios:
16
+
17
+ ### Single Tool
18
+ ```python
19
+ adapter = MCPAdapter(
20
+ tool_names="read_file", # Single tool as string
21
+ task_model="gpt-4o-mini", # Change as per you model choice
22
+ metric_fn=my_metric,
23
+ )
24
+ ```
25
+
26
+ ### Multiple Tools (New Feature)
27
+ ```python
28
+ adapter = MCPAdapter(
29
+ tool_names=["read_file", "write_file", "list_files"], # Multiple tools as list
30
+ task_model="gpt-4o-mini", # Change as per you model choice
31
+ metric_fn=my_metric,
32
+ )
33
+ ```
34
+
35
+ ## Installation
36
+
37
+ Install the MCP Python SDK:
38
+
39
+ ```bash
40
+ pip install mcp
41
+ ```
42
+
43
+ ## Quick Start
44
+
45
+ ### Option 1: Local Models (Ollama)
46
+
47
+ ```python
48
+ import gepa
49
+ from gepa.adapters.mcp_adapter import MCPAdapter
50
+ from mcp import StdioServerParameters
51
+
52
+ # Configure MCP server
53
+ server_params = StdioServerParameters(
54
+ command="npx",
55
+ args=["-y", "@modelcontextprotocol/server-filesystem", "/tmp"],
56
+ )
57
+
58
+ # Create dataset
59
+ dataset = [
60
+ {
61
+ "user_query": "What's in the file notes.txt?",
62
+ "tool_arguments": {"path": "/tmp/notes.txt"},
63
+ "reference_answer": "Meeting at 3pm",
64
+ "additional_context": {},
65
+ },
66
+ # ... more examples
67
+ ]
68
+
69
+ # Create adapter with LOCAL Ollama models
70
+ adapter = MCPAdapter(
71
+ server_params=server_params,
72
+ tool_names=["read_file", "write_file", "list_files"], # Multiple tools for selection
73
+ task_model="ollama/llama3.2:1b", # Local model via Ollama, replace with your model
74
+ metric_fn=lambda item, output: 1.0 if item["reference_answer"] in output else 0.0,
75
+ )
76
+
77
+ # Optimize with local models - no API costs!
78
+ result = gepa.optimize(
79
+ seed_candidate={"tool_description": "Read the contents of a file"},
80
+ trainset=dataset[:20],
81
+ valset=dataset[20:],
82
+ adapter=adapter,
83
+ reflection_lm="ollama/llama3.1:8b", # Larger local model for reflection replace with our choice
84
+ max_metric_calls=150,
85
+ )
86
+
87
+ print("Optimized tool description:", result.best_candidate["tool_description"])
88
+ # Total cost: $0.00 - runs 100% locally!
89
+ ```
90
+
91
+ **Setup for Ollama:**
92
+ ```bash
93
+ Install Ollama: https://ollama.com
94
+
95
+ # Pull models
96
+ ollama pull llama3.1:8b
97
+ ollama pull llama3.2:1b
98
+ ```
99
+
100
+ ### Option 2: OpenAI API
101
+
102
+ ```python
103
+ # Same as above, but use OpenAI models
104
+ adapter = MCPAdapter(
105
+ server_params=server_params,
106
+ tool_names=["read_file", "write_file", "list_files"], # Multiple tools for selection
107
+ task_model="openai/gpt-4o-mini", # OpenAI API, replace with your model choice
108
+ metric_fn=lambda item, output: 1.0 if item["reference_answer"] in output else 0.0,
109
+ )
110
+
111
+ result = gepa.optimize(
112
+ seed_candidate={"tool_description": "Read the contents of a file"},
113
+ trainset=dataset[:20],
114
+ valset=dataset[20:],
115
+ adapter=adapter,
116
+ reflection_lm="openai/gpt-5", # OpenAI for reflection, replace with yout model choice
117
+ max_metric_calls=150,
118
+ )
119
+ ```
120
+
121
+ **Setup for OpenAI:**
122
+ ```bash
123
+ export OPENAI_API_KEY=your-key-here
124
+ ```
125
+
126
+ ### Option 3: Remote MCP Servers (Truested/Self-Hosted Servers)
127
+
128
+ Connect to thousands of public MCP servers via SSE or StreamableHTTP:
129
+
130
+ ```python
131
+ # Remote SSE server
132
+ adapter = MCPAdapter(
133
+ tool_names=["search_web", "analyze_data", "summarize_text"], # Multiple tools for selection
134
+ task_model="openai/gpt-4o-mini",
135
+ metric_fn=lambda item, output: 1.0 if item["reference_answer"] in output else 0.0,
136
+ remote_url="https://mcp-server.com/sse",
137
+ remote_transport="sse",
138
+ )
139
+
140
+ # Remote HTTP server with authentication
141
+ adapter = MCPAdapter(
142
+ tool_names=["analyze_data", "visualize_data", "export_data"], # Multiple tools for selection
143
+ task_model="openai/gpt-4o-mini",
144
+ metric_fn=my_metric,
145
+ remote_url="https://mcp-server.com/mcp",
146
+ remote_transport="streamable_http",
147
+ remote_headers={"Authorization": "Bearer YOUR_TOKEN"},
148
+ remote_timeout=30,
149
+ )
150
+
151
+ result = gepa.optimize(
152
+ seed_candidate={"tool_description": "Search web for information"},
153
+ trainset=dataset[:20],
154
+ valset=dataset[20:],
155
+ adapter=adapter,
156
+ reflection_lm="openai/gpt-4o",
157
+ max_metric_calls=150,
158
+ )
159
+ ```
160
+
161
+ **Benefits:**
162
+ - Access thousands of public MCP servers that you trust
163
+ - No local server setup required
164
+ - Use hosted/managed MCP tools
165
+
166
+ ## Architecture
167
+
168
+ ### Two-Pass Workflow
169
+
170
+ The adapter uses a two-pass workflow for better tool integration:
171
+
172
+ 1. **First Pass**: Model receives user query and decides whether to call the tool
173
+ - Input: User query + system prompt with tool info
174
+ - Output: Tool call decision + arguments OR direct answer
175
+
176
+ 2. **Second Pass**: Model receives tool response and generates final answer
177
+ - Input: Original query + tool response
178
+ - Output: Final answer incorporating tool results
179
+
180
+ This workflow ensures the model can effectively utilize tool outputs.
181
+
182
+ ### Implementation Approach
183
+
184
+ The adapter uses `asyncio.run()` to bridge GEPA's synchronous API with MCP's async SDK:
185
+
186
+ ```python
187
+ def evaluate(self, batch, candidate, capture_traces):
188
+ # Run async evaluation in new event loop
189
+ return asyncio.run(self._evaluate_async(batch, candidate, capture_traces))
190
+ ```
191
+
192
+ Each evaluation creates a fresh MCP session, avoiding state management complexity.
193
+
194
+ **Performance Note**: Subprocess startup adds ~100-500ms per evaluation. For a typical optimization run with 150 metric calls, expect ~15-75 seconds of MCP overhead.
195
+
196
+ ## Component Optimization
197
+
198
+ ### Tool Description
199
+
200
+ Optimizes the description field of MCP tools, improving how the model understands when and how to use each tool.
201
+
202
+ ```python
203
+ # Single tool optimization
204
+ seed_candidate = {
205
+ "tool_description": "Search through documentation files"
206
+ }
207
+
208
+ # Multi-tool optimization
209
+ seed_candidate = {
210
+ "tool_description_read_file": "Read file contents from the filesystem",
211
+ "tool_description_write_file": "Write content to a file on the filesystem",
212
+ "tool_description_list_files": "List files and directories in a given path"
213
+ }
214
+
215
+ # GEPA will optimize these to something like:
216
+ # "tool_description_read_file": "Read file contents. Use when user asks to view, show, or display file contents. Returns the full text content of the specified file."
217
+ # "tool_description_write_file": "Write content to files. Use when user asks to create, save, or update file contents. Requires file path and content parameters."
218
+ # "tool_description_list_files": "List directory contents. Use when user asks to see what files are available, browse directories, or find files. Returns a list of files and folders."
219
+ ```
220
+
221
+ ### System Prompt
222
+
223
+ Optimizes the overall system prompt to provide better guidance on tool usage strategy.
224
+
225
+ ```python
226
+ seed_candidate = {
227
+ "tool_description": "Read file contents",
228
+ "system_prompt": "You are a helpful assistant with file access."
229
+ }
230
+
231
+ # GEPA optimizes both components jointly
232
+ ```
233
+
234
+ ## Dataset Format
235
+
236
+ The `MCPDataInst` TypedDict defines the expected dataset format:
237
+
238
+ ```python
239
+ {
240
+ "user_query": str, # User's question/request
241
+ "tool_arguments": dict, # Expected tool arguments
242
+ "reference_answer": str | None, # Reference answer for scoring
243
+ "additional_context": dict, # Additional context
244
+ }
245
+ ```
246
+
247
+ Example:
248
+
249
+ ```python
250
+ {
251
+ "user_query": "Show me the config file",
252
+ "tool_arguments": {"path": "/app/config.json"},
253
+ "reference_answer": '{"debug": true}',
254
+ "additional_context": {"file_location": "/app"},
255
+ }
256
+ ```
257
+
258
+ ## Metric Functions
259
+
260
+ The metric function scores model outputs. Higher scores are better.
261
+
262
+ ### Simple Exact Match
263
+
264
+ ```python
265
+ def exact_match(item, output):
266
+ return 1.0 if item["reference_answer"] in output else 0.0
267
+ ```
268
+
269
+ ### Fuzzy Matching
270
+
271
+ ```python
272
+ from difflib import SequenceMatcher
273
+
274
+ def fuzzy_match(item, output):
275
+ ratio = SequenceMatcher(None, item["reference_answer"], output).ratio()
276
+ return ratio # 0.0 to 1.0
277
+ ```
278
+
279
+ ### LLM-as-Judge
280
+
281
+ ```python
282
+ import litellm
283
+
284
+ def llm_judge(item, output):
285
+ messages = [{
286
+ "role": "user",
287
+ "content": f"Rate this answer (0-1):\nQuestion: {item['user_query']}\n"
288
+ f"Reference: {item['reference_answer']}\nAnswer: {output}"
289
+ }]
290
+ response = litellm.completion(model="openai/gpt-4o", messages=messages)
291
+ return float(response.choices[0].message.content)
292
+ ```
293
+
294
+ ## MCP Server Examples
295
+
296
+ ### Local Servers
297
+
298
+ #### Filesystem Server (stdio)
299
+
300
+ ```python
301
+ from mcp import StdioServerParameters
302
+
303
+ server_params = StdioServerParameters(
304
+ command="npx",
305
+ args=["-y", "@modelcontextprotocol/server-filesystem", "/tmp"],
306
+ )
307
+
308
+ adapter = MCPAdapter(
309
+ server_params=server_params,
310
+ tool_name="read_file",
311
+ task_model="openai/gpt-4o-mini",
312
+ metric_fn=exact_match,
313
+ )
314
+ ```
315
+
316
+ ### Custom Python Server
317
+
318
+ ```python
319
+ # Create custom MCP server: my_server.py
320
+ from mcp.server.fastmcp import FastMCP
321
+
322
+ mcp = FastMCP("MyServer")
323
+
324
+ @mcp.tool()
325
+ def search_docs(query: str) -> str:
326
+ """Search documentation."""
327
+ # Your search logic
328
+ return f"Results for: {query}"
329
+
330
+ if __name__ == "__main__":
331
+ mcp.run()
332
+ ```
333
+
334
+ ```python
335
+ # Use in GEPA
336
+ server_params = StdioServerParameters(
337
+ command="python",
338
+ args=["my_server.py"],
339
+ )
340
+
341
+ adapter = MCPAdapter(
342
+ server_params=server_params,
343
+ tool_name="search_docs",
344
+ task_model="openai/gpt-4o-mini",
345
+ metric_fn=custom_metric,
346
+ )
347
+ ```
348
+
349
+ ### Remote Servers
350
+
351
+ #### Public SSE Server
352
+
353
+ ```python
354
+ adapter = MCPAdapter(
355
+ tool_name="search_web",
356
+ task_model="openai/gpt-4o-mini",
357
+ metric_fn=my_metric,
358
+ remote_url="https://public-mcp.example.com/sse",
359
+ remote_transport="sse",
360
+ )
361
+ ```
362
+
363
+ #### Authenticated HTTP Server
364
+
365
+ ```python
366
+ adapter = MCPAdapter(
367
+ tool_name="company_data",
368
+ task_model="openai/gpt-4o-mini",
369
+ metric_fn=my_metric,
370
+ remote_url="https://internal-mcp.company.com/mcp",
371
+ remote_transport="streamable_http",
372
+ remote_headers={
373
+ "Authorization": "Bearer YOUR_API_TOKEN",
374
+ "X-Custom-Header": "value",
375
+ },
376
+ remote_timeout=60,
377
+ )
378
+ ```
379
+
380
+ **Available transports:**
381
+ - `"sse"` - Server-Sent Events (good for streaming)
382
+ - `"streamable_http"` - HTTP with session management (better for production)
383
+
384
+ **See also:** The [remote_server.py example](../../examples/mcp_tool_optimization/remote_server.py) for a complete command-line tool.
385
+
386
+ ## Advanced Configuration
387
+
388
+ ### Custom Model Functions
389
+
390
+ Instead of litellm model strings, you can provide a custom callable:
391
+
392
+ ```python
393
+ def my_model(messages):
394
+ # Your custom model logic
395
+ return "response"
396
+
397
+ adapter = MCPAdapter(
398
+ server_params=server_params,
399
+ tool_name="my_tool",
400
+ task_model=my_model, # Custom callable
401
+ metric_fn=my_metric,
402
+ )
403
+ ```
404
+
405
+ ### Disable Two-Pass Workflow
406
+
407
+ For simpler scenarios, disable the two-pass workflow:
408
+
409
+ ```python
410
+ adapter = MCPAdapter(
411
+ server_params=server_params,
412
+ tool_name="my_tool",
413
+ task_model="openai/gpt-4o-mini",
414
+ metric_fn=my_metric,
415
+ enable_two_pass=False, # Single-pass only
416
+ )
417
+ ```
418
+
419
+ ### Remote Server Configuration
420
+
421
+ ```python
422
+ adapter = MCPAdapter(
423
+ tool_name="my_tool",
424
+ task_model="openai/gpt-4o-mini",
425
+ metric_fn=my_metric,
426
+
427
+ # Remote server settings
428
+ remote_url="https://mcp.example.com/sse",
429
+ remote_transport="sse", # or "streamable_http"
430
+ remote_headers={
431
+ "Authorization": "Bearer TOKEN",
432
+ "User-Agent": "GEPA/1.0",
433
+ },
434
+ remote_timeout=30, # seconds
435
+
436
+ # Other settings
437
+ enable_two_pass=True,
438
+ failure_score=0.0,
439
+ )
440
+ ```
441
+
442
+ **Important:** You must provide EITHER `server_params` (local) OR `remote_url` (remote), not both.
443
+
444
+ ### Error Handling
445
+
446
+ Configure failure scores for robustness:
447
+
448
+ ```python
449
+ adapter = MCPAdapter(
450
+ server_params=server_params,
451
+ tool_name="my_tool",
452
+ task_model="openai/gpt-4o-mini",
453
+ metric_fn=my_metric,
454
+ failure_score=0.0, # Score for failed executions
455
+ )
456
+ ```
457
+
458
+ ## Reflective Dataset
459
+
460
+ The adapter generates reflective datasets for each component showing:
461
+
462
+ - Successful and failed tool calls
463
+ - Cases where tools should/shouldn't be called
464
+ - How well tool responses were utilized
465
+
466
+ Example reflective entry for `tool_description` (successful case):
467
+
468
+ ```python
469
+ {
470
+ "Inputs": {
471
+ "user_query": "What's in config.json?",
472
+ "tool_description": "Read file contents",
473
+ },
474
+ "Generated Outputs": {
475
+ "tool_called": True,
476
+ "selected_tool": "read_file",
477
+ "tool_arguments": {"path": "config.json"},
478
+ "final_answer": "The config file contains database settings: host=localhost, port=5432, user=admin",
479
+ },
480
+ "Feedback": "Good! The tool 'read_file' was used appropriately and produced a correct answer. Tool called: True, Score: 0.85"
481
+ }
482
+ ```
483
+
484
+ Example reflective entry for a failed case (tool not called):
485
+
486
+ ```python
487
+ {
488
+ "Inputs": {
489
+ "user_query": "What's in config.json?",
490
+ "tool_description": "Read file contents",
491
+ },
492
+ "Generated Outputs": {
493
+ "tool_called": False,
494
+ "tool_arguments": None,
495
+ "final_answer": "I don't have access to file contents.",
496
+ },
497
+ "Feedback": "The response was incorrect (score: 0.20). The tool was not called. Consider whether calling the tool would help answer this query."
498
+ }
499
+ ```
500
+
501
+ Example reflective entry for a failed case (tool called but wrong answer):
502
+
503
+ ```python
504
+ {
505
+ "Inputs": {
506
+ "user_query": "What's in config.json?",
507
+ "tool_description": "Read file contents",
508
+ },
509
+ "Generated Outputs": {
510
+ "tool_called": True,
511
+ "selected_tool": "read_file",
512
+ "tool_arguments": {"path": "config.json"},
513
+ "final_answer": "The file contains some configuration data.",
514
+ },
515
+ "Feedback": "The response was incorrect (score: 0.30). The tool 'read_file' was called with arguments {'path': 'config.json'}, but the final answer was still incorrect. Consider whether a different tool from ['read_file', 'write_file', 'list_files'] would be more appropriate, or if the tool description needs to be clearer."
516
+ }
517
+ ```
518
+
519
+ Example reflective entry for multi-tool selection (wrong tool chosen):
520
+
521
+ ```python
522
+ {
523
+ "Inputs": {
524
+ "user_query": "What files are in the docs folder?",
525
+ "tool_description": "List files and directories in a given path",
526
+ },
527
+ "Generated Outputs": {
528
+ "tool_called": True,
529
+ "selected_tool": "read_file", # Wrong tool selected
530
+ "tool_arguments": {"path": "docs"},
531
+ "final_answer": "Error: docs is not a file",
532
+ },
533
+ "Feedback": "The response was incorrect (score: 0.20). The tool 'read_file' was called with arguments {'path': 'docs'}, but the final answer was still incorrect. Consider whether a different tool from ['read_file', 'write_file', 'list_files'] would be more appropriate, or if the tool description needs to be clearer."
534
+ }
535
+ ```
536
+
537
+ ## Performance Notes
538
+
539
+ ### Subprocess Overhead
540
+
541
+ Each `evaluate()` call spawns a new MCP server process:
542
+ - Startup time: ~100-500ms
543
+ - Total overhead for 150 evals: ~15-75 seconds
544
+
545
+ This is early development MVP and overhead is expected as MCP is async and GEPA is still syc but plan is to add following features later
546
+ - Session pooling (reuse processes)
547
+ - Background event loop (persistent session)
548
+ - Async GEPA core (native async support)
549
+
550
+ ## License
551
+
552
+ Copyright (c) 2025 Lakshya A Agrawal and the GEPA contributors
@@ -0,0 +1,37 @@
1
+ # Copyright (c) 2025 Lakshya A Agrawal and the GEPA contributors
2
+ # https://github.com/gepa-ai/gepa
3
+
4
+ """
5
+ MCP Adapter for GEPA.
6
+
7
+ This adapter enables optimization of MCP tool descriptions and system prompts
8
+ using GEPA's iterative refinement approach.
9
+
10
+ Exports:
11
+ MCPAdapter: Main adapter class
12
+ MCPDataInst: Dataset item type
13
+ MCPTrajectory: Execution trace type
14
+ MCPOutput: Output type
15
+ """
16
+
17
+ from typing import TYPE_CHECKING
18
+
19
+ if TYPE_CHECKING:
20
+ from .mcp_adapter import MCPAdapter, MCPDataInst, MCPOutput, MCPTrajectory
21
+
22
+ __all__ = [
23
+ "MCPAdapter",
24
+ "MCPDataInst",
25
+ "MCPOutput",
26
+ "MCPTrajectory",
27
+ ]
28
+
29
+
30
+ def __getattr__(name: str):
31
+ """Lazy import to handle missing MCP SDK gracefully."""
32
+ if name in {"MCPAdapter", "MCPDataInst", "MCPOutput", "MCPTrajectory"}:
33
+ from .mcp_adapter import MCPAdapter, MCPDataInst, MCPOutput, MCPTrajectory
34
+
35
+ return locals()[name]
36
+
37
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")