mantisdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mantisdk might be problematic. Click here for more details.
- mantisdk/__init__.py +22 -0
- mantisdk/adapter/__init__.py +15 -0
- mantisdk/adapter/base.py +94 -0
- mantisdk/adapter/messages.py +270 -0
- mantisdk/adapter/triplet.py +1028 -0
- mantisdk/algorithm/__init__.py +39 -0
- mantisdk/algorithm/apo/__init__.py +5 -0
- mantisdk/algorithm/apo/apo.py +889 -0
- mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
- mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
- mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
- mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
- mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
- mantisdk/algorithm/base.py +162 -0
- mantisdk/algorithm/decorator.py +264 -0
- mantisdk/algorithm/fast.py +250 -0
- mantisdk/algorithm/gepa/__init__.py +59 -0
- mantisdk/algorithm/gepa/adapter.py +459 -0
- mantisdk/algorithm/gepa/gepa.py +364 -0
- mantisdk/algorithm/gepa/lib/__init__.py +18 -0
- mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
- mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
- mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
- mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
- mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
- mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
- mantisdk/algorithm/gepa/lib/api.py +375 -0
- mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
- mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
- mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
- mantisdk/algorithm/gepa/lib/core/result.py +233 -0
- mantisdk/algorithm/gepa/lib/core/state.py +636 -0
- mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
- mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
- mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
- mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
- mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
- mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
- mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
- mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
- mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
- mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
- mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
- mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
- mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
- mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
- mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
- mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
- mantisdk/algorithm/gepa/lib/py.typed +0 -0
- mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
- mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
- mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
- mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
- mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
- mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
- mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
- mantisdk/algorithm/gepa/tracing.py +105 -0
- mantisdk/algorithm/utils.py +177 -0
- mantisdk/algorithm/verl/__init__.py +5 -0
- mantisdk/algorithm/verl/interface.py +202 -0
- mantisdk/cli/__init__.py +56 -0
- mantisdk/cli/prometheus.py +115 -0
- mantisdk/cli/store.py +131 -0
- mantisdk/cli/vllm.py +29 -0
- mantisdk/client.py +408 -0
- mantisdk/config.py +348 -0
- mantisdk/emitter/__init__.py +43 -0
- mantisdk/emitter/annotation.py +370 -0
- mantisdk/emitter/exception.py +54 -0
- mantisdk/emitter/message.py +61 -0
- mantisdk/emitter/object.py +117 -0
- mantisdk/emitter/reward.py +320 -0
- mantisdk/env_var.py +156 -0
- mantisdk/execution/__init__.py +15 -0
- mantisdk/execution/base.py +64 -0
- mantisdk/execution/client_server.py +443 -0
- mantisdk/execution/events.py +69 -0
- mantisdk/execution/inter_process.py +16 -0
- mantisdk/execution/shared_memory.py +282 -0
- mantisdk/instrumentation/__init__.py +119 -0
- mantisdk/instrumentation/agentops.py +314 -0
- mantisdk/instrumentation/agentops_langchain.py +45 -0
- mantisdk/instrumentation/litellm.py +83 -0
- mantisdk/instrumentation/vllm.py +81 -0
- mantisdk/instrumentation/weave.py +500 -0
- mantisdk/litagent/__init__.py +11 -0
- mantisdk/litagent/decorator.py +536 -0
- mantisdk/litagent/litagent.py +252 -0
- mantisdk/llm_proxy.py +1890 -0
- mantisdk/logging.py +370 -0
- mantisdk/reward.py +7 -0
- mantisdk/runner/__init__.py +11 -0
- mantisdk/runner/agent.py +845 -0
- mantisdk/runner/base.py +182 -0
- mantisdk/runner/legacy.py +309 -0
- mantisdk/semconv.py +170 -0
- mantisdk/server.py +401 -0
- mantisdk/store/__init__.py +23 -0
- mantisdk/store/base.py +897 -0
- mantisdk/store/client_server.py +2092 -0
- mantisdk/store/collection/__init__.py +30 -0
- mantisdk/store/collection/base.py +587 -0
- mantisdk/store/collection/memory.py +970 -0
- mantisdk/store/collection/mongo.py +1412 -0
- mantisdk/store/collection_based.py +1823 -0
- mantisdk/store/insight.py +648 -0
- mantisdk/store/listener.py +58 -0
- mantisdk/store/memory.py +396 -0
- mantisdk/store/mongo.py +165 -0
- mantisdk/store/sqlite.py +3 -0
- mantisdk/store/threading.py +357 -0
- mantisdk/store/utils.py +142 -0
- mantisdk/tracer/__init__.py +16 -0
- mantisdk/tracer/agentops.py +242 -0
- mantisdk/tracer/base.py +287 -0
- mantisdk/tracer/dummy.py +106 -0
- mantisdk/tracer/otel.py +555 -0
- mantisdk/tracer/weave.py +677 -0
- mantisdk/trainer/__init__.py +6 -0
- mantisdk/trainer/init_utils.py +263 -0
- mantisdk/trainer/legacy.py +367 -0
- mantisdk/trainer/registry.py +12 -0
- mantisdk/trainer/trainer.py +618 -0
- mantisdk/types/__init__.py +6 -0
- mantisdk/types/core.py +553 -0
- mantisdk/types/resources.py +204 -0
- mantisdk/types/tracer.py +515 -0
- mantisdk/types/tracing.py +218 -0
- mantisdk/utils/__init__.py +1 -0
- mantisdk/utils/id.py +18 -0
- mantisdk/utils/metrics.py +1025 -0
- mantisdk/utils/otel.py +578 -0
- mantisdk/utils/otlp.py +536 -0
- mantisdk/utils/server_launcher.py +1045 -0
- mantisdk/utils/system_snapshot.py +81 -0
- mantisdk/verl/__init__.py +8 -0
- mantisdk/verl/__main__.py +6 -0
- mantisdk/verl/async_server.py +46 -0
- mantisdk/verl/config.yaml +27 -0
- mantisdk/verl/daemon.py +1154 -0
- mantisdk/verl/dataset.py +44 -0
- mantisdk/verl/entrypoint.py +248 -0
- mantisdk/verl/trainer.py +549 -0
- mantisdk-0.1.0.dist-info/METADATA +119 -0
- mantisdk-0.1.0.dist-info/RECORD +190 -0
- mantisdk-0.1.0.dist-info/WHEEL +4 -0
- mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
- mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0
|
@@ -0,0 +1,552 @@
|
|
|
1
|
+
# MCP Adapter for GEPA
|
|
2
|
+
|
|
3
|
+
The MCP Adapter enables optimization of [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) tool usage through GEPA's reflective mutation approach.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
This adapter optimizes:
|
|
8
|
+
- **Tool descriptions**: Improve how tools are described to the model
|
|
9
|
+
- **System prompts**: Optimize guidance for when and how to use tools
|
|
10
|
+
- **Tool usage patterns**: Learn better tool invocation strategies
|
|
11
|
+
- **Tool selection**: Choose the right tool from multiple available options
|
|
12
|
+
|
|
13
|
+
## Multi-Tool Support
|
|
14
|
+
|
|
15
|
+
The MCP adapter supports both single-tool and multi-tool scenarios:
|
|
16
|
+
|
|
17
|
+
### Single Tool
|
|
18
|
+
```python
|
|
19
|
+
adapter = MCPAdapter(
|
|
20
|
+
tool_names="read_file", # Single tool as string
|
|
21
|
+
task_model="gpt-4o-mini", # Change as per you model choice
|
|
22
|
+
metric_fn=my_metric,
|
|
23
|
+
)
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Multiple Tools (New Feature)
|
|
27
|
+
```python
|
|
28
|
+
adapter = MCPAdapter(
|
|
29
|
+
tool_names=["read_file", "write_file", "list_files"], # Multiple tools as list
|
|
30
|
+
task_model="gpt-4o-mini", # Change as per you model choice
|
|
31
|
+
metric_fn=my_metric,
|
|
32
|
+
)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
Install the MCP Python SDK:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install mcp
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Quick Start
|
|
44
|
+
|
|
45
|
+
### Option 1: Local Models (Ollama)
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
import gepa
|
|
49
|
+
from gepa.adapters.mcp_adapter import MCPAdapter
|
|
50
|
+
from mcp import StdioServerParameters
|
|
51
|
+
|
|
52
|
+
# Configure MCP server
|
|
53
|
+
server_params = StdioServerParameters(
|
|
54
|
+
command="npx",
|
|
55
|
+
args=["-y", "@modelcontextprotocol/server-filesystem", "/tmp"],
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Create dataset
|
|
59
|
+
dataset = [
|
|
60
|
+
{
|
|
61
|
+
"user_query": "What's in the file notes.txt?",
|
|
62
|
+
"tool_arguments": {"path": "/tmp/notes.txt"},
|
|
63
|
+
"reference_answer": "Meeting at 3pm",
|
|
64
|
+
"additional_context": {},
|
|
65
|
+
},
|
|
66
|
+
# ... more examples
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
# Create adapter with LOCAL Ollama models
|
|
70
|
+
adapter = MCPAdapter(
|
|
71
|
+
server_params=server_params,
|
|
72
|
+
tool_names=["read_file", "write_file", "list_files"], # Multiple tools for selection
|
|
73
|
+
task_model="ollama/llama3.2:1b", # Local model via Ollama, replace with your model
|
|
74
|
+
metric_fn=lambda item, output: 1.0 if item["reference_answer"] in output else 0.0,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Optimize with local models - no API costs!
|
|
78
|
+
result = gepa.optimize(
|
|
79
|
+
seed_candidate={"tool_description": "Read the contents of a file"},
|
|
80
|
+
trainset=dataset[:20],
|
|
81
|
+
valset=dataset[20:],
|
|
82
|
+
adapter=adapter,
|
|
83
|
+
reflection_lm="ollama/llama3.1:8b", # Larger local model for reflection replace with our choice
|
|
84
|
+
max_metric_calls=150,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
print("Optimized tool description:", result.best_candidate["tool_description"])
|
|
88
|
+
# Total cost: $0.00 - runs 100% locally!
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
**Setup for Ollama:**
|
|
92
|
+
```bash
|
|
93
|
+
Install Ollama: https://ollama.com
|
|
94
|
+
|
|
95
|
+
# Pull models
|
|
96
|
+
ollama pull llama3.1:8b
|
|
97
|
+
ollama pull llama3.2:1b
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Option 2: OpenAI API
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
# Same as above, but use OpenAI models
|
|
104
|
+
adapter = MCPAdapter(
|
|
105
|
+
server_params=server_params,
|
|
106
|
+
tool_names=["read_file", "write_file", "list_files"], # Multiple tools for selection
|
|
107
|
+
task_model="openai/gpt-4o-mini", # OpenAI API, replace with your model choice
|
|
108
|
+
metric_fn=lambda item, output: 1.0 if item["reference_answer"] in output else 0.0,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
result = gepa.optimize(
|
|
112
|
+
seed_candidate={"tool_description": "Read the contents of a file"},
|
|
113
|
+
trainset=dataset[:20],
|
|
114
|
+
valset=dataset[20:],
|
|
115
|
+
adapter=adapter,
|
|
116
|
+
reflection_lm="openai/gpt-5", # OpenAI for reflection, replace with yout model choice
|
|
117
|
+
max_metric_calls=150,
|
|
118
|
+
)
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
**Setup for OpenAI:**
|
|
122
|
+
```bash
|
|
123
|
+
export OPENAI_API_KEY=your-key-here
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Option 3: Remote MCP Servers (Truested/Self-Hosted Servers)
|
|
127
|
+
|
|
128
|
+
Connect to thousands of public MCP servers via SSE or StreamableHTTP:
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
# Remote SSE server
|
|
132
|
+
adapter = MCPAdapter(
|
|
133
|
+
tool_names=["search_web", "analyze_data", "summarize_text"], # Multiple tools for selection
|
|
134
|
+
task_model="openai/gpt-4o-mini",
|
|
135
|
+
metric_fn=lambda item, output: 1.0 if item["reference_answer"] in output else 0.0,
|
|
136
|
+
remote_url="https://mcp-server.com/sse",
|
|
137
|
+
remote_transport="sse",
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Remote HTTP server with authentication
|
|
141
|
+
adapter = MCPAdapter(
|
|
142
|
+
tool_names=["analyze_data", "visualize_data", "export_data"], # Multiple tools for selection
|
|
143
|
+
task_model="openai/gpt-4o-mini",
|
|
144
|
+
metric_fn=my_metric,
|
|
145
|
+
remote_url="https://mcp-server.com/mcp",
|
|
146
|
+
remote_transport="streamable_http",
|
|
147
|
+
remote_headers={"Authorization": "Bearer YOUR_TOKEN"},
|
|
148
|
+
remote_timeout=30,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
result = gepa.optimize(
|
|
152
|
+
seed_candidate={"tool_description": "Search web for information"},
|
|
153
|
+
trainset=dataset[:20],
|
|
154
|
+
valset=dataset[20:],
|
|
155
|
+
adapter=adapter,
|
|
156
|
+
reflection_lm="openai/gpt-4o",
|
|
157
|
+
max_metric_calls=150,
|
|
158
|
+
)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
**Benefits:**
|
|
162
|
+
- Access thousands of public MCP servers that you trust
|
|
163
|
+
- No local server setup required
|
|
164
|
+
- Use hosted/managed MCP tools
|
|
165
|
+
|
|
166
|
+
## Architecture
|
|
167
|
+
|
|
168
|
+
### Two-Pass Workflow
|
|
169
|
+
|
|
170
|
+
The adapter uses a two-pass workflow for better tool integration:
|
|
171
|
+
|
|
172
|
+
1. **First Pass**: Model receives user query and decides whether to call the tool
|
|
173
|
+
- Input: User query + system prompt with tool info
|
|
174
|
+
- Output: Tool call decision + arguments OR direct answer
|
|
175
|
+
|
|
176
|
+
2. **Second Pass**: Model receives tool response and generates final answer
|
|
177
|
+
- Input: Original query + tool response
|
|
178
|
+
- Output: Final answer incorporating tool results
|
|
179
|
+
|
|
180
|
+
This workflow ensures the model can effectively utilize tool outputs.
|
|
181
|
+
|
|
182
|
+
### Implementation Approach
|
|
183
|
+
|
|
184
|
+
The adapter uses `asyncio.run()` to bridge GEPA's synchronous API with MCP's async SDK:
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
def evaluate(self, batch, candidate, capture_traces):
|
|
188
|
+
# Run async evaluation in new event loop
|
|
189
|
+
return asyncio.run(self._evaluate_async(batch, candidate, capture_traces))
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
Each evaluation creates a fresh MCP session, avoiding state management complexity.
|
|
193
|
+
|
|
194
|
+
**Performance Note**: Subprocess startup adds ~100-500ms per evaluation. For a typical optimization run with 150 metric calls, expect ~15-75 seconds of MCP overhead.
|
|
195
|
+
|
|
196
|
+
## Component Optimization
|
|
197
|
+
|
|
198
|
+
### Tool Description
|
|
199
|
+
|
|
200
|
+
Optimizes the description field of MCP tools, improving how the model understands when and how to use each tool.
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
# Single tool optimization
|
|
204
|
+
seed_candidate = {
|
|
205
|
+
"tool_description": "Search through documentation files"
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
# Multi-tool optimization
|
|
209
|
+
seed_candidate = {
|
|
210
|
+
"tool_description_read_file": "Read file contents from the filesystem",
|
|
211
|
+
"tool_description_write_file": "Write content to a file on the filesystem",
|
|
212
|
+
"tool_description_list_files": "List files and directories in a given path"
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
# GEPA will optimize these to something like:
|
|
216
|
+
# "tool_description_read_file": "Read file contents. Use when user asks to view, show, or display file contents. Returns the full text content of the specified file."
|
|
217
|
+
# "tool_description_write_file": "Write content to files. Use when user asks to create, save, or update file contents. Requires file path and content parameters."
|
|
218
|
+
# "tool_description_list_files": "List directory contents. Use when user asks to see what files are available, browse directories, or find files. Returns a list of files and folders."
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### System Prompt
|
|
222
|
+
|
|
223
|
+
Optimizes the overall system prompt to provide better guidance on tool usage strategy.
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
seed_candidate = {
|
|
227
|
+
"tool_description": "Read file contents",
|
|
228
|
+
"system_prompt": "You are a helpful assistant with file access."
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
# GEPA optimizes both components jointly
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## Dataset Format
|
|
235
|
+
|
|
236
|
+
The `MCPDataInst` TypedDict defines the expected dataset format:
|
|
237
|
+
|
|
238
|
+
```python
|
|
239
|
+
{
|
|
240
|
+
"user_query": str, # User's question/request
|
|
241
|
+
"tool_arguments": dict, # Expected tool arguments
|
|
242
|
+
"reference_answer": str | None, # Reference answer for scoring
|
|
243
|
+
"additional_context": dict, # Additional context
|
|
244
|
+
}
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
Example:
|
|
248
|
+
|
|
249
|
+
```python
|
|
250
|
+
{
|
|
251
|
+
"user_query": "Show me the config file",
|
|
252
|
+
"tool_arguments": {"path": "/app/config.json"},
|
|
253
|
+
"reference_answer": '{"debug": true}',
|
|
254
|
+
"additional_context": {"file_location": "/app"},
|
|
255
|
+
}
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
## Metric Functions
|
|
259
|
+
|
|
260
|
+
The metric function scores model outputs. Higher scores are better.
|
|
261
|
+
|
|
262
|
+
### Simple Exact Match
|
|
263
|
+
|
|
264
|
+
```python
|
|
265
|
+
def exact_match(item, output):
|
|
266
|
+
return 1.0 if item["reference_answer"] in output else 0.0
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
### Fuzzy Matching
|
|
270
|
+
|
|
271
|
+
```python
|
|
272
|
+
from difflib import SequenceMatcher
|
|
273
|
+
|
|
274
|
+
def fuzzy_match(item, output):
|
|
275
|
+
ratio = SequenceMatcher(None, item["reference_answer"], output).ratio()
|
|
276
|
+
return ratio # 0.0 to 1.0
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
### LLM-as-Judge
|
|
280
|
+
|
|
281
|
+
```python
|
|
282
|
+
import litellm
|
|
283
|
+
|
|
284
|
+
def llm_judge(item, output):
|
|
285
|
+
messages = [{
|
|
286
|
+
"role": "user",
|
|
287
|
+
"content": f"Rate this answer (0-1):\nQuestion: {item['user_query']}\n"
|
|
288
|
+
f"Reference: {item['reference_answer']}\nAnswer: {output}"
|
|
289
|
+
}]
|
|
290
|
+
response = litellm.completion(model="openai/gpt-4o", messages=messages)
|
|
291
|
+
return float(response.choices[0].message.content)
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
## MCP Server Examples
|
|
295
|
+
|
|
296
|
+
### Local Servers
|
|
297
|
+
|
|
298
|
+
#### Filesystem Server (stdio)
|
|
299
|
+
|
|
300
|
+
```python
|
|
301
|
+
from mcp import StdioServerParameters
|
|
302
|
+
|
|
303
|
+
server_params = StdioServerParameters(
|
|
304
|
+
command="npx",
|
|
305
|
+
args=["-y", "@modelcontextprotocol/server-filesystem", "/tmp"],
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
adapter = MCPAdapter(
|
|
309
|
+
server_params=server_params,
|
|
310
|
+
tool_name="read_file",
|
|
311
|
+
task_model="openai/gpt-4o-mini",
|
|
312
|
+
metric_fn=exact_match,
|
|
313
|
+
)
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
### Custom Python Server
|
|
317
|
+
|
|
318
|
+
```python
|
|
319
|
+
# Create custom MCP server: my_server.py
|
|
320
|
+
from mcp.server.fastmcp import FastMCP
|
|
321
|
+
|
|
322
|
+
mcp = FastMCP("MyServer")
|
|
323
|
+
|
|
324
|
+
@mcp.tool()
|
|
325
|
+
def search_docs(query: str) -> str:
|
|
326
|
+
"""Search documentation."""
|
|
327
|
+
# Your search logic
|
|
328
|
+
return f"Results for: {query}"
|
|
329
|
+
|
|
330
|
+
if __name__ == "__main__":
|
|
331
|
+
mcp.run()
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
```python
|
|
335
|
+
# Use in GEPA
|
|
336
|
+
server_params = StdioServerParameters(
|
|
337
|
+
command="python",
|
|
338
|
+
args=["my_server.py"],
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
adapter = MCPAdapter(
|
|
342
|
+
server_params=server_params,
|
|
343
|
+
tool_name="search_docs",
|
|
344
|
+
task_model="openai/gpt-4o-mini",
|
|
345
|
+
metric_fn=custom_metric,
|
|
346
|
+
)
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
### Remote Servers
|
|
350
|
+
|
|
351
|
+
#### Public SSE Server
|
|
352
|
+
|
|
353
|
+
```python
|
|
354
|
+
adapter = MCPAdapter(
|
|
355
|
+
tool_name="search_web",
|
|
356
|
+
task_model="openai/gpt-4o-mini",
|
|
357
|
+
metric_fn=my_metric,
|
|
358
|
+
remote_url="https://public-mcp.example.com/sse",
|
|
359
|
+
remote_transport="sse",
|
|
360
|
+
)
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
#### Authenticated HTTP Server
|
|
364
|
+
|
|
365
|
+
```python
|
|
366
|
+
adapter = MCPAdapter(
|
|
367
|
+
tool_name="company_data",
|
|
368
|
+
task_model="openai/gpt-4o-mini",
|
|
369
|
+
metric_fn=my_metric,
|
|
370
|
+
remote_url="https://internal-mcp.company.com/mcp",
|
|
371
|
+
remote_transport="streamable_http",
|
|
372
|
+
remote_headers={
|
|
373
|
+
"Authorization": "Bearer YOUR_API_TOKEN",
|
|
374
|
+
"X-Custom-Header": "value",
|
|
375
|
+
},
|
|
376
|
+
remote_timeout=60,
|
|
377
|
+
)
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
**Available transports:**
|
|
381
|
+
- `"sse"` - Server-Sent Events (good for streaming)
|
|
382
|
+
- `"streamable_http"` - HTTP with session management (better for production)
|
|
383
|
+
|
|
384
|
+
**See also:** The [remote_server.py example](../../examples/mcp_tool_optimization/remote_server.py) for a complete command-line tool.
|
|
385
|
+
|
|
386
|
+
## Advanced Configuration
|
|
387
|
+
|
|
388
|
+
### Custom Model Functions
|
|
389
|
+
|
|
390
|
+
Instead of litellm model strings, you can provide a custom callable:
|
|
391
|
+
|
|
392
|
+
```python
|
|
393
|
+
def my_model(messages):
|
|
394
|
+
# Your custom model logic
|
|
395
|
+
return "response"
|
|
396
|
+
|
|
397
|
+
adapter = MCPAdapter(
|
|
398
|
+
server_params=server_params,
|
|
399
|
+
tool_name="my_tool",
|
|
400
|
+
task_model=my_model, # Custom callable
|
|
401
|
+
metric_fn=my_metric,
|
|
402
|
+
)
|
|
403
|
+
```
|
|
404
|
+
|
|
405
|
+
### Disable Two-Pass Workflow
|
|
406
|
+
|
|
407
|
+
For simpler scenarios, disable the two-pass workflow:
|
|
408
|
+
|
|
409
|
+
```python
|
|
410
|
+
adapter = MCPAdapter(
|
|
411
|
+
server_params=server_params,
|
|
412
|
+
tool_name="my_tool",
|
|
413
|
+
task_model="openai/gpt-4o-mini",
|
|
414
|
+
metric_fn=my_metric,
|
|
415
|
+
enable_two_pass=False, # Single-pass only
|
|
416
|
+
)
|
|
417
|
+
```
|
|
418
|
+
|
|
419
|
+
### Remote Server Configuration
|
|
420
|
+
|
|
421
|
+
```python
|
|
422
|
+
adapter = MCPAdapter(
|
|
423
|
+
tool_name="my_tool",
|
|
424
|
+
task_model="openai/gpt-4o-mini",
|
|
425
|
+
metric_fn=my_metric,
|
|
426
|
+
|
|
427
|
+
# Remote server settings
|
|
428
|
+
remote_url="https://mcp.example.com/sse",
|
|
429
|
+
remote_transport="sse", # or "streamable_http"
|
|
430
|
+
remote_headers={
|
|
431
|
+
"Authorization": "Bearer TOKEN",
|
|
432
|
+
"User-Agent": "GEPA/1.0",
|
|
433
|
+
},
|
|
434
|
+
remote_timeout=30, # seconds
|
|
435
|
+
|
|
436
|
+
# Other settings
|
|
437
|
+
enable_two_pass=True,
|
|
438
|
+
failure_score=0.0,
|
|
439
|
+
)
|
|
440
|
+
```
|
|
441
|
+
|
|
442
|
+
**Important:** You must provide EITHER `server_params` (local) OR `remote_url` (remote), not both.
|
|
443
|
+
|
|
444
|
+
### Error Handling
|
|
445
|
+
|
|
446
|
+
Configure failure scores for robustness:
|
|
447
|
+
|
|
448
|
+
```python
|
|
449
|
+
adapter = MCPAdapter(
|
|
450
|
+
server_params=server_params,
|
|
451
|
+
tool_name="my_tool",
|
|
452
|
+
task_model="openai/gpt-4o-mini",
|
|
453
|
+
metric_fn=my_metric,
|
|
454
|
+
failure_score=0.0, # Score for failed executions
|
|
455
|
+
)
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
## Reflective Dataset
|
|
459
|
+
|
|
460
|
+
The adapter generates reflective datasets for each component showing:
|
|
461
|
+
|
|
462
|
+
- Successful and failed tool calls
|
|
463
|
+
- Cases where tools should/shouldn't be called
|
|
464
|
+
- How well tool responses were utilized
|
|
465
|
+
|
|
466
|
+
Example reflective entry for `tool_description` (successful case):
|
|
467
|
+
|
|
468
|
+
```python
|
|
469
|
+
{
|
|
470
|
+
"Inputs": {
|
|
471
|
+
"user_query": "What's in config.json?",
|
|
472
|
+
"tool_description": "Read file contents",
|
|
473
|
+
},
|
|
474
|
+
"Generated Outputs": {
|
|
475
|
+
"tool_called": True,
|
|
476
|
+
"selected_tool": "read_file",
|
|
477
|
+
"tool_arguments": {"path": "config.json"},
|
|
478
|
+
"final_answer": "The config file contains database settings: host=localhost, port=5432, user=admin",
|
|
479
|
+
},
|
|
480
|
+
"Feedback": "Good! The tool 'read_file' was used appropriately and produced a correct answer. Tool called: True, Score: 0.85"
|
|
481
|
+
}
|
|
482
|
+
```
|
|
483
|
+
|
|
484
|
+
Example reflective entry for a failed case (tool not called):
|
|
485
|
+
|
|
486
|
+
```python
|
|
487
|
+
{
|
|
488
|
+
"Inputs": {
|
|
489
|
+
"user_query": "What's in config.json?",
|
|
490
|
+
"tool_description": "Read file contents",
|
|
491
|
+
},
|
|
492
|
+
"Generated Outputs": {
|
|
493
|
+
"tool_called": False,
|
|
494
|
+
"tool_arguments": None,
|
|
495
|
+
"final_answer": "I don't have access to file contents.",
|
|
496
|
+
},
|
|
497
|
+
"Feedback": "The response was incorrect (score: 0.20). The tool was not called. Consider whether calling the tool would help answer this query."
|
|
498
|
+
}
|
|
499
|
+
```
|
|
500
|
+
|
|
501
|
+
Example reflective entry for a failed case (tool called but wrong answer):
|
|
502
|
+
|
|
503
|
+
```python
|
|
504
|
+
{
|
|
505
|
+
"Inputs": {
|
|
506
|
+
"user_query": "What's in config.json?",
|
|
507
|
+
"tool_description": "Read file contents",
|
|
508
|
+
},
|
|
509
|
+
"Generated Outputs": {
|
|
510
|
+
"tool_called": True,
|
|
511
|
+
"selected_tool": "read_file",
|
|
512
|
+
"tool_arguments": {"path": "config.json"},
|
|
513
|
+
"final_answer": "The file contains some configuration data.",
|
|
514
|
+
},
|
|
515
|
+
"Feedback": "The response was incorrect (score: 0.30). The tool 'read_file' was called with arguments {'path': 'config.json'}, but the final answer was still incorrect. Consider whether a different tool from ['read_file', 'write_file', 'list_files'] would be more appropriate, or if the tool description needs to be clearer."
|
|
516
|
+
}
|
|
517
|
+
```
|
|
518
|
+
|
|
519
|
+
Example reflective entry for multi-tool selection (wrong tool chosen):
|
|
520
|
+
|
|
521
|
+
```python
|
|
522
|
+
{
|
|
523
|
+
"Inputs": {
|
|
524
|
+
"user_query": "What files are in the docs folder?",
|
|
525
|
+
"tool_description": "List files and directories in a given path",
|
|
526
|
+
},
|
|
527
|
+
"Generated Outputs": {
|
|
528
|
+
"tool_called": True,
|
|
529
|
+
"selected_tool": "read_file", # Wrong tool selected
|
|
530
|
+
"tool_arguments": {"path": "docs"},
|
|
531
|
+
"final_answer": "Error: docs is not a file",
|
|
532
|
+
},
|
|
533
|
+
"Feedback": "The response was incorrect (score: 0.20). The tool 'read_file' was called with arguments {'path': 'docs'}, but the final answer was still incorrect. Consider whether a different tool from ['read_file', 'write_file', 'list_files'] would be more appropriate, or if the tool description needs to be clearer."
|
|
534
|
+
}
|
|
535
|
+
```
|
|
536
|
+
|
|
537
|
+
## Performance Notes
|
|
538
|
+
|
|
539
|
+
### Subprocess Overhead
|
|
540
|
+
|
|
541
|
+
Each `evaluate()` call spawns a new MCP server process:
|
|
542
|
+
- Startup time: ~100-500ms
|
|
543
|
+
- Total overhead for 150 evals: ~15-75 seconds
|
|
544
|
+
|
|
545
|
+
This is early development MVP and overhead is expected as MCP is async and GEPA is still syc but plan is to add following features later
|
|
546
|
+
- Session pooling (reuse processes)
|
|
547
|
+
- Background event loop (persistent session)
|
|
548
|
+
- Async GEPA core (native async support)
|
|
549
|
+
|
|
550
|
+
## License
|
|
551
|
+
|
|
552
|
+
Copyright (c) 2025 Lakshya A Agrawal and the GEPA contributors
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Copyright (c) 2025 Lakshya A Agrawal and the GEPA contributors
|
|
2
|
+
# https://github.com/gepa-ai/gepa
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
MCP Adapter for GEPA.
|
|
6
|
+
|
|
7
|
+
This adapter enables optimization of MCP tool descriptions and system prompts
|
|
8
|
+
using GEPA's iterative refinement approach.
|
|
9
|
+
|
|
10
|
+
Exports:
|
|
11
|
+
MCPAdapter: Main adapter class
|
|
12
|
+
MCPDataInst: Dataset item type
|
|
13
|
+
MCPTrajectory: Execution trace type
|
|
14
|
+
MCPOutput: Output type
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from typing import TYPE_CHECKING
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from .mcp_adapter import MCPAdapter, MCPDataInst, MCPOutput, MCPTrajectory
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"MCPAdapter",
|
|
24
|
+
"MCPDataInst",
|
|
25
|
+
"MCPOutput",
|
|
26
|
+
"MCPTrajectory",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def __getattr__(name: str):
|
|
31
|
+
"""Lazy import to handle missing MCP SDK gracefully."""
|
|
32
|
+
if name in {"MCPAdapter", "MCPDataInst", "MCPOutput", "MCPTrajectory"}:
|
|
33
|
+
from .mcp_adapter import MCPAdapter, MCPDataInst, MCPOutput, MCPTrajectory
|
|
34
|
+
|
|
35
|
+
return locals()[name]
|
|
36
|
+
|
|
37
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|