mantisdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mantisdk might be problematic. Click here for more details.

Files changed (190) hide show
  1. mantisdk/__init__.py +22 -0
  2. mantisdk/adapter/__init__.py +15 -0
  3. mantisdk/adapter/base.py +94 -0
  4. mantisdk/adapter/messages.py +270 -0
  5. mantisdk/adapter/triplet.py +1028 -0
  6. mantisdk/algorithm/__init__.py +39 -0
  7. mantisdk/algorithm/apo/__init__.py +5 -0
  8. mantisdk/algorithm/apo/apo.py +889 -0
  9. mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
  10. mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
  11. mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
  12. mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
  13. mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
  14. mantisdk/algorithm/base.py +162 -0
  15. mantisdk/algorithm/decorator.py +264 -0
  16. mantisdk/algorithm/fast.py +250 -0
  17. mantisdk/algorithm/gepa/__init__.py +59 -0
  18. mantisdk/algorithm/gepa/adapter.py +459 -0
  19. mantisdk/algorithm/gepa/gepa.py +364 -0
  20. mantisdk/algorithm/gepa/lib/__init__.py +18 -0
  21. mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
  22. mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
  23. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
  24. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
  25. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
  26. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
  27. mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
  28. mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
  29. mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
  30. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
  31. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
  32. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
  33. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
  34. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
  35. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
  36. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
  37. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
  38. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
  39. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
  40. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
  41. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
  42. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
  43. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
  44. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
  45. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
  46. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
  47. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
  48. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
  49. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
  50. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
  51. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
  52. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
  53. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
  54. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
  55. mantisdk/algorithm/gepa/lib/api.py +375 -0
  56. mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
  57. mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
  58. mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
  59. mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
  60. mantisdk/algorithm/gepa/lib/core/result.py +233 -0
  61. mantisdk/algorithm/gepa/lib/core/state.py +636 -0
  62. mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
  63. mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
  64. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
  65. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
  66. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
  67. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
  68. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
  69. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
  70. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
  71. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
  72. mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
  73. mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
  74. mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
  75. mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
  76. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
  77. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
  78. mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
  79. mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
  80. mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
  81. mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
  82. mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
  83. mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
  84. mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
  85. mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
  86. mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
  87. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
  88. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
  89. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
  90. mantisdk/algorithm/gepa/lib/py.typed +0 -0
  91. mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
  92. mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
  93. mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
  94. mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
  95. mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
  96. mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
  97. mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
  98. mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
  99. mantisdk/algorithm/gepa/tracing.py +105 -0
  100. mantisdk/algorithm/utils.py +177 -0
  101. mantisdk/algorithm/verl/__init__.py +5 -0
  102. mantisdk/algorithm/verl/interface.py +202 -0
  103. mantisdk/cli/__init__.py +56 -0
  104. mantisdk/cli/prometheus.py +115 -0
  105. mantisdk/cli/store.py +131 -0
  106. mantisdk/cli/vllm.py +29 -0
  107. mantisdk/client.py +408 -0
  108. mantisdk/config.py +348 -0
  109. mantisdk/emitter/__init__.py +43 -0
  110. mantisdk/emitter/annotation.py +370 -0
  111. mantisdk/emitter/exception.py +54 -0
  112. mantisdk/emitter/message.py +61 -0
  113. mantisdk/emitter/object.py +117 -0
  114. mantisdk/emitter/reward.py +320 -0
  115. mantisdk/env_var.py +156 -0
  116. mantisdk/execution/__init__.py +15 -0
  117. mantisdk/execution/base.py +64 -0
  118. mantisdk/execution/client_server.py +443 -0
  119. mantisdk/execution/events.py +69 -0
  120. mantisdk/execution/inter_process.py +16 -0
  121. mantisdk/execution/shared_memory.py +282 -0
  122. mantisdk/instrumentation/__init__.py +119 -0
  123. mantisdk/instrumentation/agentops.py +314 -0
  124. mantisdk/instrumentation/agentops_langchain.py +45 -0
  125. mantisdk/instrumentation/litellm.py +83 -0
  126. mantisdk/instrumentation/vllm.py +81 -0
  127. mantisdk/instrumentation/weave.py +500 -0
  128. mantisdk/litagent/__init__.py +11 -0
  129. mantisdk/litagent/decorator.py +536 -0
  130. mantisdk/litagent/litagent.py +252 -0
  131. mantisdk/llm_proxy.py +1890 -0
  132. mantisdk/logging.py +370 -0
  133. mantisdk/reward.py +7 -0
  134. mantisdk/runner/__init__.py +11 -0
  135. mantisdk/runner/agent.py +845 -0
  136. mantisdk/runner/base.py +182 -0
  137. mantisdk/runner/legacy.py +309 -0
  138. mantisdk/semconv.py +170 -0
  139. mantisdk/server.py +401 -0
  140. mantisdk/store/__init__.py +23 -0
  141. mantisdk/store/base.py +897 -0
  142. mantisdk/store/client_server.py +2092 -0
  143. mantisdk/store/collection/__init__.py +30 -0
  144. mantisdk/store/collection/base.py +587 -0
  145. mantisdk/store/collection/memory.py +970 -0
  146. mantisdk/store/collection/mongo.py +1412 -0
  147. mantisdk/store/collection_based.py +1823 -0
  148. mantisdk/store/insight.py +648 -0
  149. mantisdk/store/listener.py +58 -0
  150. mantisdk/store/memory.py +396 -0
  151. mantisdk/store/mongo.py +165 -0
  152. mantisdk/store/sqlite.py +3 -0
  153. mantisdk/store/threading.py +357 -0
  154. mantisdk/store/utils.py +142 -0
  155. mantisdk/tracer/__init__.py +16 -0
  156. mantisdk/tracer/agentops.py +242 -0
  157. mantisdk/tracer/base.py +287 -0
  158. mantisdk/tracer/dummy.py +106 -0
  159. mantisdk/tracer/otel.py +555 -0
  160. mantisdk/tracer/weave.py +677 -0
  161. mantisdk/trainer/__init__.py +6 -0
  162. mantisdk/trainer/init_utils.py +263 -0
  163. mantisdk/trainer/legacy.py +367 -0
  164. mantisdk/trainer/registry.py +12 -0
  165. mantisdk/trainer/trainer.py +618 -0
  166. mantisdk/types/__init__.py +6 -0
  167. mantisdk/types/core.py +553 -0
  168. mantisdk/types/resources.py +204 -0
  169. mantisdk/types/tracer.py +515 -0
  170. mantisdk/types/tracing.py +218 -0
  171. mantisdk/utils/__init__.py +1 -0
  172. mantisdk/utils/id.py +18 -0
  173. mantisdk/utils/metrics.py +1025 -0
  174. mantisdk/utils/otel.py +578 -0
  175. mantisdk/utils/otlp.py +536 -0
  176. mantisdk/utils/server_launcher.py +1045 -0
  177. mantisdk/utils/system_snapshot.py +81 -0
  178. mantisdk/verl/__init__.py +8 -0
  179. mantisdk/verl/__main__.py +6 -0
  180. mantisdk/verl/async_server.py +46 -0
  181. mantisdk/verl/config.yaml +27 -0
  182. mantisdk/verl/daemon.py +1154 -0
  183. mantisdk/verl/dataset.py +44 -0
  184. mantisdk/verl/entrypoint.py +248 -0
  185. mantisdk/verl/trainer.py +549 -0
  186. mantisdk-0.1.0.dist-info/METADATA +119 -0
  187. mantisdk-0.1.0.dist-info/RECORD +190 -0
  188. mantisdk-0.1.0.dist-info/WHEEL +4 -0
  189. mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
  190. mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0
@@ -0,0 +1,4 @@
1
+ # Copyright (c) 2025 Lakshya A Agrawal and the GEPA contributors
2
+ # https://github.com/gepa-ai/gepa
3
+
4
+ """MCP adapter examples."""
@@ -0,0 +1,455 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ MCP Tool Optimization with GEPA
4
+
5
+ This example demonstrates how to use GEPA to optimize MCP tool descriptions
6
+ and system prompts. It shows both local (stdio) and remote (SSE) server support.
7
+
8
+ What you'll learn:
9
+ - Setting up MCPAdapter with local or remote servers
10
+ - Defining evaluation datasets
11
+ - Running optimization to improve tool descriptions
12
+ - Multi-tool support
13
+
14
+ MODEL CONFIGURATION:
15
+ Defaults to Ollama models (no API key needed). Requires Ollama installed: https://ollama.com
16
+ Pull models: ollama pull llama3.1:8b && ollama pull qwen3:8b
17
+
18
+ To use OpenAI models: --task-model gpt-4o-mini (requires OPENAI_API_KEY)
19
+
20
+ Requirements:
21
+ pip install gepa mcp litellm
22
+
23
+ Usage Examples:
24
+ # Run with default Ollama models (no flags needed)
25
+ python mcp_optimization_example.py
26
+
27
+ # Use OpenAI models (requires OPENAI_API_KEY)
28
+ python mcp_optimization_example.py --task-model gpt-4o-mini
29
+
30
+ # Remote MCP server
31
+ python mcp_optimization_example.py --mode remote --url YOUR_URL
32
+ """
33
+
34
+ import logging
35
+ import sys
36
+ import tempfile
37
+ from pathlib import Path
38
+
39
+ from mcp import StdioServerParameters
40
+
41
+ import mantisdk.algorithm.gepa.lib as gepa
42
+ from mantisdk.algorithm.gepa.lib.adapters.mcp_adapter import MCPAdapter
43
+
44
+ # Suppress verbose output from dependencies
45
+ try:
46
+ import litellm
47
+ litellm.set_verbose = False
48
+ litellm.drop_params = True
49
+ # Set LiteLLM logger to WARNING to suppress INFO messages
50
+ logging.getLogger("LiteLLM").setLevel(logging.WARNING)
51
+ except ImportError:
52
+ pass
53
+
54
+ # Suppress HTTP request logging
55
+ logging.getLogger("httpx").setLevel(logging.WARNING)
56
+ logging.getLogger("httpcore").setLevel(logging.WARNING)
57
+ logging.getLogger("urllib3").setLevel(logging.WARNING)
58
+
59
+ # Configure logging for GEPA output
60
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
61
+ logger = logging.getLogger(__name__)
62
+
63
+
64
+ # ============================================================================
65
+ # Simple MCP Server (for local testing)
66
+ # ============================================================================
67
+
68
+ SIMPLE_MCP_SERVER = '''"""Simple MCP server with file operations."""
69
+ import asyncio
70
+ from pathlib import Path
71
+ from mcp.server.fastmcp import FastMCP
72
+
73
+ # Create MCP server
74
+ mcp = FastMCP("File Server")
75
+
76
+ # Base directory for file operations
77
+ BASE_DIR = Path("/tmp/mcp_test")
78
+ BASE_DIR.mkdir(exist_ok=True)
79
+
80
+
81
+ @mcp.tool()
82
+ def read_file(path: str) -> str:
83
+ """Read contents of a file.
84
+
85
+ Args:
86
+ path: Relative path to the file
87
+ """
88
+ try:
89
+ file_path = BASE_DIR / path
90
+ if not file_path.exists():
91
+ return f"Error: File {path} not found"
92
+ return file_path.read_text()
93
+ except Exception as e:
94
+ return f"Error reading file: {e}"
95
+
96
+
97
+ @mcp.tool()
98
+ def write_file(path: str, content: str) -> str:
99
+ """Write content to a file.
100
+
101
+ Args:
102
+ path: Relative path to the file
103
+ content: Content to write
104
+ """
105
+ try:
106
+ file_path = BASE_DIR / path
107
+ file_path.parent.mkdir(parents=True, exist_ok=True)
108
+ file_path.write_text(content)
109
+ return f"Successfully wrote to {path}"
110
+ except Exception as e:
111
+ return f"Error writing file: {e}"
112
+
113
+
114
+ @mcp.tool()
115
+ def list_files() -> str:
116
+ """List all files in the base directory."""
117
+ try:
118
+ files = [str(p.relative_to(BASE_DIR)) for p in BASE_DIR.rglob("*") if p.is_file()]
119
+ if not files:
120
+ return "No files found"
121
+ return "\\\\n".join(files)
122
+ except Exception as e:
123
+ return f"Error listing files: {e}"
124
+
125
+
126
+ if __name__ == "__main__":
127
+ # Run the server
128
+ mcp.run()
129
+ '''
130
+
131
+
132
+ def create_test_server():
133
+ """Create a test MCP server file."""
134
+ temp_dir = Path(tempfile.mkdtemp(prefix="gepa_mcp_"))
135
+ server_file = temp_dir / "server.py"
136
+ server_file.write_text(SIMPLE_MCP_SERVER)
137
+ return server_file
138
+
139
+
140
+ def create_test_files():
141
+ """Create test files for the example."""
142
+ base_dir = Path("/tmp/mcp_test")
143
+ base_dir.mkdir(exist_ok=True)
144
+
145
+ (base_dir / "notes.txt").write_text("Meeting at 3pm in Room B\nDiscuss Q4 goals")
146
+ (base_dir / "data.txt").write_text("Revenue: $50000\nExpenses: $30000\nProfit: $20000")
147
+
148
+
149
+ # ============================================================================
150
+ # Dataset & Metric Definition
151
+ # ============================================================================
152
+
153
+ def create_dataset():
154
+ """Create evaluation dataset for file operations."""
155
+ return [
156
+ {
157
+ "user_query": "What's in the notes.txt file?",
158
+ "tool_arguments": {"path": "notes.txt"},
159
+ "reference_answer": "3pm",
160
+ "additional_context": {},
161
+ },
162
+ {
163
+ "user_query": "Read the content of data.txt",
164
+ "tool_arguments": {"path": "data.txt"},
165
+ "reference_answer": "50000",
166
+ "additional_context": {},
167
+ },
168
+ {
169
+ "user_query": "Show me what's in notes.txt",
170
+ "tool_arguments": {"path": "notes.txt"},
171
+ "reference_answer": "Room B",
172
+ "additional_context": {},
173
+ },
174
+ ]
175
+
176
+
177
+ def metric_fn(data_inst, output: str) -> float:
178
+ """
179
+ Simple metric: 1.0 if reference answer appears in output, 0.0 otherwise.
180
+
181
+ In practice, you'd use more sophisticated metrics based on your use case.
182
+ """
183
+ reference = data_inst.get("reference_answer", "")
184
+ return 1.0 if reference and reference.lower() in output.lower() else 0.0
185
+
186
+
187
+ # ============================================================================
188
+ # Local Server Example
189
+ # ============================================================================
190
+
191
+ def run_local_example(task_model: str = "ollama/llama3.1:8b", reflection_model: str = "ollama/qwen3:8b"):
192
+ """Run optimization with local stdio MCP server."""
193
+ logger.info("=" * 60)
194
+ logger.info("GEPA MCP Tool Optimization")
195
+ logger.info("=" * 60)
196
+
197
+ server_file = create_test_server()
198
+ create_test_files()
199
+
200
+ logger.info(f"MCP Server: Local stdio server ({server_file.name})")
201
+ logger.info("Tools: read_file")
202
+ logger.info(f"Task Model: {task_model}")
203
+ logger.info(f"Reflection Model: {reflection_model}")
204
+
205
+ adapter = MCPAdapter(
206
+ tool_names="read_file",
207
+ task_model=task_model,
208
+ metric_fn=metric_fn,
209
+ server_params=StdioServerParameters(
210
+ command="python",
211
+ args=[str(server_file)],
212
+ ),
213
+ base_system_prompt="You are a helpful file assistant.",
214
+ enable_two_pass=True,
215
+ )
216
+
217
+ dataset = create_dataset()
218
+ seed_candidate = {
219
+ "tool_description": "Read file contents from disk."
220
+ }
221
+
222
+ logger.info("")
223
+ logger.info("Seed Prompt (Initial Tool Description):")
224
+ logger.info(f" {seed_candidate['tool_description']}")
225
+ logger.info("")
226
+ logger.info(f"Dataset: {len(dataset)} examples")
227
+ logger.info("")
228
+ logger.info("Starting GEPA optimization...")
229
+ logger.info("-" * 60)
230
+
231
+ result = gepa.optimize(
232
+ seed_candidate=seed_candidate,
233
+ trainset=dataset,
234
+ valset=dataset,
235
+ adapter=adapter,
236
+ reflection_lm=reflection_model,
237
+ max_metric_calls=10,
238
+ )
239
+
240
+ logger.info("-" * 60)
241
+ logger.info("Optimization Complete")
242
+ logger.info("=" * 60)
243
+ best_score = result.val_aggregate_scores[result.best_idx] if result.val_aggregate_scores else 0.0
244
+ best_candidate = result.candidates[result.best_idx]
245
+ logger.info(f"Best Score: {best_score:.2f}")
246
+ logger.info("")
247
+ logger.info("Optimized Tool Description:")
248
+ logger.info(f" {best_candidate.get('tool_description', 'N/A')}")
249
+ logger.info("=" * 60)
250
+
251
+ return result
252
+
253
+
254
+ # ============================================================================
255
+ # Remote Server Example
256
+ # ============================================================================
257
+
258
+ def run_remote_example(url: str, task_model: str = "ollama/llama3.1:8b", reflection_model: str = "ollama/qwen3:8b"):
259
+ """Run optimization with remote SSE MCP server."""
260
+ logger.info("=" * 60)
261
+ logger.info("GEPA MCP Tool Optimization")
262
+ logger.info("=" * 60)
263
+
264
+ logger.info(f"MCP Server: Remote SSE server ({url})")
265
+ logger.info("Tools: search")
266
+ logger.info(f"Task Model: {task_model}")
267
+ logger.info(f"Reflection Model: {reflection_model}")
268
+
269
+ adapter = MCPAdapter(
270
+ tool_names="search",
271
+ task_model=task_model,
272
+ metric_fn=metric_fn,
273
+ remote_url=url,
274
+ remote_transport="sse",
275
+ remote_headers={},
276
+ )
277
+
278
+ dataset = [
279
+ {
280
+ "user_query": "Search for information about Python",
281
+ "tool_arguments": {"query": "Python"},
282
+ "reference_answer": "programming",
283
+ "additional_context": {},
284
+ },
285
+ ]
286
+
287
+ seed_candidate = {
288
+ "tool_description": "Search for information."
289
+ }
290
+
291
+ logger.info("")
292
+ logger.info("Seed Prompt (Initial Tool Description):")
293
+ logger.info(f" {seed_candidate['tool_description']}")
294
+ logger.info("")
295
+ logger.info(f"Dataset: {len(dataset)} examples")
296
+ logger.info("")
297
+ logger.info("Starting GEPA optimization...")
298
+ logger.info("-" * 60)
299
+
300
+ result = gepa.optimize(
301
+ seed_candidate=seed_candidate,
302
+ trainset=dataset,
303
+ valset=dataset,
304
+ adapter=adapter,
305
+ reflection_lm=reflection_model,
306
+ max_metric_calls=10,
307
+ )
308
+
309
+ logger.info("-" * 60)
310
+ logger.info("Optimization Complete")
311
+ logger.info("=" * 60)
312
+ best_score = result.val_aggregate_scores[result.best_idx] if result.val_aggregate_scores else 0.0
313
+ best_candidate = result.candidates[result.best_idx]
314
+ logger.info(f"Best Score: {best_score:.2f}")
315
+ logger.info("")
316
+ logger.info("Optimized Tool Description:")
317
+ logger.info(f" {best_candidate.get('tool_description', 'N/A')}")
318
+ logger.info("=" * 60)
319
+
320
+ return result
321
+
322
+
323
+ # ============================================================================
324
+ # Multi-Tool Example
325
+ # ============================================================================
326
+
327
+ def run_multitool_example(task_model: str = "ollama/llama3.1:8b", reflection_model: str = "ollama/qwen3:8b"):
328
+ """Run optimization with multiple tools."""
329
+ logger.info("=" * 60)
330
+ logger.info("GEPA MCP Tool Optimization (Multi-Tool)")
331
+ logger.info("=" * 60)
332
+
333
+ server_file = create_test_server()
334
+ create_test_files()
335
+
336
+ logger.info(f"MCP Server: Local stdio server ({server_file.name})")
337
+ logger.info("Tools: read_file, write_file, list_files")
338
+ logger.info(f"Task Model: {task_model}")
339
+ logger.info(f"Reflection Model: {reflection_model}")
340
+
341
+ adapter = MCPAdapter(
342
+ tool_names=["read_file", "write_file", "list_files"],
343
+ task_model=task_model,
344
+ metric_fn=metric_fn,
345
+ server_params=StdioServerParameters(
346
+ command="python",
347
+ args=[str(server_file)],
348
+ ),
349
+ )
350
+
351
+ dataset = [
352
+ {
353
+ "user_query": "What files are available?",
354
+ "tool_arguments": {},
355
+ "reference_answer": "notes.txt",
356
+ "additional_context": {},
357
+ },
358
+ {
359
+ "user_query": "Read notes.txt",
360
+ "tool_arguments": {"path": "notes.txt"},
361
+ "reference_answer": "3pm",
362
+ "additional_context": {},
363
+ },
364
+ ]
365
+
366
+ seed_candidate = {
367
+ "tool_description_read_file": "Read a file.",
368
+ "tool_description_write_file": "Write a file.",
369
+ "tool_description_list_files": "List files.",
370
+ }
371
+
372
+ logger.info("")
373
+ logger.info("Seed Prompts (Initial Tool Descriptions):")
374
+ for tool_name in adapter.tool_names:
375
+ key = f"tool_description_{tool_name}"
376
+ logger.info(f" {tool_name}: {seed_candidate.get(key, 'N/A')}")
377
+ logger.info("")
378
+ logger.info(f"Dataset: {len(dataset)} examples")
379
+ logger.info("")
380
+ logger.info("Starting GEPA optimization...")
381
+ logger.info("-" * 60)
382
+
383
+ result = gepa.optimize(
384
+ seed_candidate=seed_candidate,
385
+ trainset=dataset,
386
+ valset=dataset,
387
+ adapter=adapter,
388
+ reflection_lm=reflection_model,
389
+ max_metric_calls=10,
390
+ )
391
+
392
+ logger.info("-" * 60)
393
+ logger.info("Optimization Complete")
394
+ logger.info("=" * 60)
395
+ best_candidate = result.candidates[result.best_idx]
396
+ logger.info("Optimized Tool Descriptions:")
397
+ for tool_name in adapter.tool_names:
398
+ key = f"tool_description_{tool_name}"
399
+ logger.info(f" {tool_name}: {best_candidate.get(key, 'N/A')}")
400
+ logger.info("=" * 60)
401
+
402
+ return result
403
+
404
+
405
+ # ============================================================================
406
+ # Main
407
+ # ============================================================================
408
+
409
+ if __name__ == "__main__":
410
+ import argparse
411
+
412
+ parser = argparse.ArgumentParser(description="MCP Tool Optimization Example")
413
+ parser.add_argument(
414
+ "--mode",
415
+ choices=["local", "remote", "multitool"],
416
+ default="local",
417
+ help="Example mode to run",
418
+ )
419
+ parser.add_argument(
420
+ "--url",
421
+ type=str,
422
+ help="Remote MCP server URL (for remote mode)",
423
+ )
424
+ parser.add_argument(
425
+ "--task-model",
426
+ type=str,
427
+ default="ollama/llama3.1:8b",
428
+ help='Model for task execution (default: "ollama/llama3.1:8b")',
429
+ )
430
+ parser.add_argument(
431
+ "--reflection-model",
432
+ type=str,
433
+ default="ollama/qwen3:8b",
434
+ help='Model for reflection (default: "ollama/qwen3:8b")',
435
+ )
436
+
437
+ args = parser.parse_args()
438
+
439
+ try:
440
+ if args.mode == "local":
441
+ run_local_example(args.task_model, args.reflection_model)
442
+ elif args.mode == "remote":
443
+ if not args.url:
444
+ logger.error("Remote mode requires --url argument")
445
+ sys.exit(1)
446
+ run_remote_example(args.url, args.task_model, args.reflection_model)
447
+ elif args.mode == "multitool":
448
+ run_multitool_example(args.task_model, args.reflection_model)
449
+
450
+ except KeyboardInterrupt:
451
+ logger.info("\nInterrupted by user")
452
+ sys.exit(0)
453
+ except Exception as e:
454
+ logger.exception(f"Error: {e}")
455
+ sys.exit(1)