mantisdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mantisdk might be problematic. Click here for more details.
- mantisdk/__init__.py +22 -0
- mantisdk/adapter/__init__.py +15 -0
- mantisdk/adapter/base.py +94 -0
- mantisdk/adapter/messages.py +270 -0
- mantisdk/adapter/triplet.py +1028 -0
- mantisdk/algorithm/__init__.py +39 -0
- mantisdk/algorithm/apo/__init__.py +5 -0
- mantisdk/algorithm/apo/apo.py +889 -0
- mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
- mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
- mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
- mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
- mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
- mantisdk/algorithm/base.py +162 -0
- mantisdk/algorithm/decorator.py +264 -0
- mantisdk/algorithm/fast.py +250 -0
- mantisdk/algorithm/gepa/__init__.py +59 -0
- mantisdk/algorithm/gepa/adapter.py +459 -0
- mantisdk/algorithm/gepa/gepa.py +364 -0
- mantisdk/algorithm/gepa/lib/__init__.py +18 -0
- mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
- mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
- mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
- mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
- mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
- mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
- mantisdk/algorithm/gepa/lib/api.py +375 -0
- mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
- mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
- mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
- mantisdk/algorithm/gepa/lib/core/result.py +233 -0
- mantisdk/algorithm/gepa/lib/core/state.py +636 -0
- mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
- mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
- mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
- mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
- mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
- mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
- mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
- mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
- mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
- mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
- mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
- mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
- mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
- mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
- mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
- mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
- mantisdk/algorithm/gepa/lib/py.typed +0 -0
- mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
- mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
- mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
- mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
- mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
- mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
- mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
- mantisdk/algorithm/gepa/tracing.py +105 -0
- mantisdk/algorithm/utils.py +177 -0
- mantisdk/algorithm/verl/__init__.py +5 -0
- mantisdk/algorithm/verl/interface.py +202 -0
- mantisdk/cli/__init__.py +56 -0
- mantisdk/cli/prometheus.py +115 -0
- mantisdk/cli/store.py +131 -0
- mantisdk/cli/vllm.py +29 -0
- mantisdk/client.py +408 -0
- mantisdk/config.py +348 -0
- mantisdk/emitter/__init__.py +43 -0
- mantisdk/emitter/annotation.py +370 -0
- mantisdk/emitter/exception.py +54 -0
- mantisdk/emitter/message.py +61 -0
- mantisdk/emitter/object.py +117 -0
- mantisdk/emitter/reward.py +320 -0
- mantisdk/env_var.py +156 -0
- mantisdk/execution/__init__.py +15 -0
- mantisdk/execution/base.py +64 -0
- mantisdk/execution/client_server.py +443 -0
- mantisdk/execution/events.py +69 -0
- mantisdk/execution/inter_process.py +16 -0
- mantisdk/execution/shared_memory.py +282 -0
- mantisdk/instrumentation/__init__.py +119 -0
- mantisdk/instrumentation/agentops.py +314 -0
- mantisdk/instrumentation/agentops_langchain.py +45 -0
- mantisdk/instrumentation/litellm.py +83 -0
- mantisdk/instrumentation/vllm.py +81 -0
- mantisdk/instrumentation/weave.py +500 -0
- mantisdk/litagent/__init__.py +11 -0
- mantisdk/litagent/decorator.py +536 -0
- mantisdk/litagent/litagent.py +252 -0
- mantisdk/llm_proxy.py +1890 -0
- mantisdk/logging.py +370 -0
- mantisdk/reward.py +7 -0
- mantisdk/runner/__init__.py +11 -0
- mantisdk/runner/agent.py +845 -0
- mantisdk/runner/base.py +182 -0
- mantisdk/runner/legacy.py +309 -0
- mantisdk/semconv.py +170 -0
- mantisdk/server.py +401 -0
- mantisdk/store/__init__.py +23 -0
- mantisdk/store/base.py +897 -0
- mantisdk/store/client_server.py +2092 -0
- mantisdk/store/collection/__init__.py +30 -0
- mantisdk/store/collection/base.py +587 -0
- mantisdk/store/collection/memory.py +970 -0
- mantisdk/store/collection/mongo.py +1412 -0
- mantisdk/store/collection_based.py +1823 -0
- mantisdk/store/insight.py +648 -0
- mantisdk/store/listener.py +58 -0
- mantisdk/store/memory.py +396 -0
- mantisdk/store/mongo.py +165 -0
- mantisdk/store/sqlite.py +3 -0
- mantisdk/store/threading.py +357 -0
- mantisdk/store/utils.py +142 -0
- mantisdk/tracer/__init__.py +16 -0
- mantisdk/tracer/agentops.py +242 -0
- mantisdk/tracer/base.py +287 -0
- mantisdk/tracer/dummy.py +106 -0
- mantisdk/tracer/otel.py +555 -0
- mantisdk/tracer/weave.py +677 -0
- mantisdk/trainer/__init__.py +6 -0
- mantisdk/trainer/init_utils.py +263 -0
- mantisdk/trainer/legacy.py +367 -0
- mantisdk/trainer/registry.py +12 -0
- mantisdk/trainer/trainer.py +618 -0
- mantisdk/types/__init__.py +6 -0
- mantisdk/types/core.py +553 -0
- mantisdk/types/resources.py +204 -0
- mantisdk/types/tracer.py +515 -0
- mantisdk/types/tracing.py +218 -0
- mantisdk/utils/__init__.py +1 -0
- mantisdk/utils/id.py +18 -0
- mantisdk/utils/metrics.py +1025 -0
- mantisdk/utils/otel.py +578 -0
- mantisdk/utils/otlp.py +536 -0
- mantisdk/utils/server_launcher.py +1045 -0
- mantisdk/utils/system_snapshot.py +81 -0
- mantisdk/verl/__init__.py +8 -0
- mantisdk/verl/__main__.py +6 -0
- mantisdk/verl/async_server.py +46 -0
- mantisdk/verl/config.yaml +27 -0
- mantisdk/verl/daemon.py +1154 -0
- mantisdk/verl/dataset.py +44 -0
- mantisdk/verl/entrypoint.py +248 -0
- mantisdk/verl/trainer.py +549 -0
- mantisdk-0.1.0.dist-info/METADATA +119 -0
- mantisdk-0.1.0.dist-info/RECORD +190 -0
- mantisdk-0.1.0.dist-info/WHEEL +4 -0
- mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
- mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
MCP Tool Optimization with GEPA
|
|
4
|
+
|
|
5
|
+
This example demonstrates how to use GEPA to optimize MCP tool descriptions
|
|
6
|
+
and system prompts. It shows both local (stdio) and remote (SSE) server support.
|
|
7
|
+
|
|
8
|
+
What you'll learn:
|
|
9
|
+
- Setting up MCPAdapter with local or remote servers
|
|
10
|
+
- Defining evaluation datasets
|
|
11
|
+
- Running optimization to improve tool descriptions
|
|
12
|
+
- Multi-tool support
|
|
13
|
+
|
|
14
|
+
MODEL CONFIGURATION:
|
|
15
|
+
Defaults to Ollama models (no API key needed). Requires Ollama installed: https://ollama.com
|
|
16
|
+
Pull models: ollama pull llama3.1:8b && ollama pull qwen3:8b
|
|
17
|
+
|
|
18
|
+
To use OpenAI models: --task-model gpt-4o-mini (requires OPENAI_API_KEY)
|
|
19
|
+
|
|
20
|
+
Requirements:
|
|
21
|
+
pip install gepa mcp litellm
|
|
22
|
+
|
|
23
|
+
Usage Examples:
|
|
24
|
+
# Run with default Ollama models (no flags needed)
|
|
25
|
+
python mcp_optimization_example.py
|
|
26
|
+
|
|
27
|
+
# Use OpenAI models (requires OPENAI_API_KEY)
|
|
28
|
+
python mcp_optimization_example.py --task-model gpt-4o-mini
|
|
29
|
+
|
|
30
|
+
# Remote MCP server
|
|
31
|
+
python mcp_optimization_example.py --mode remote --url YOUR_URL
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
import logging
|
|
35
|
+
import sys
|
|
36
|
+
import tempfile
|
|
37
|
+
from pathlib import Path
|
|
38
|
+
|
|
39
|
+
from mcp import StdioServerParameters
|
|
40
|
+
|
|
41
|
+
import mantisdk.algorithm.gepa.lib as gepa
|
|
42
|
+
from mantisdk.algorithm.gepa.lib.adapters.mcp_adapter import MCPAdapter
|
|
43
|
+
|
|
44
|
+
# Suppress verbose output from dependencies
|
|
45
|
+
try:
|
|
46
|
+
import litellm
|
|
47
|
+
litellm.set_verbose = False
|
|
48
|
+
litellm.drop_params = True
|
|
49
|
+
# Set LiteLLM logger to WARNING to suppress INFO messages
|
|
50
|
+
logging.getLogger("LiteLLM").setLevel(logging.WARNING)
|
|
51
|
+
except ImportError:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
# Suppress HTTP request logging
|
|
55
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
56
|
+
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
|
57
|
+
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
|
58
|
+
|
|
59
|
+
# Configure logging for GEPA output
|
|
60
|
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
|
61
|
+
logger = logging.getLogger(__name__)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# ============================================================================
|
|
65
|
+
# Simple MCP Server (for local testing)
|
|
66
|
+
# ============================================================================
|
|
67
|
+
|
|
68
|
+
SIMPLE_MCP_SERVER = '''"""Simple MCP server with file operations."""
|
|
69
|
+
import asyncio
|
|
70
|
+
from pathlib import Path
|
|
71
|
+
from mcp.server.fastmcp import FastMCP
|
|
72
|
+
|
|
73
|
+
# Create MCP server
|
|
74
|
+
mcp = FastMCP("File Server")
|
|
75
|
+
|
|
76
|
+
# Base directory for file operations
|
|
77
|
+
BASE_DIR = Path("/tmp/mcp_test")
|
|
78
|
+
BASE_DIR.mkdir(exist_ok=True)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@mcp.tool()
|
|
82
|
+
def read_file(path: str) -> str:
|
|
83
|
+
"""Read contents of a file.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
path: Relative path to the file
|
|
87
|
+
"""
|
|
88
|
+
try:
|
|
89
|
+
file_path = BASE_DIR / path
|
|
90
|
+
if not file_path.exists():
|
|
91
|
+
return f"Error: File {path} not found"
|
|
92
|
+
return file_path.read_text()
|
|
93
|
+
except Exception as e:
|
|
94
|
+
return f"Error reading file: {e}"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@mcp.tool()
|
|
98
|
+
def write_file(path: str, content: str) -> str:
|
|
99
|
+
"""Write content to a file.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
path: Relative path to the file
|
|
103
|
+
content: Content to write
|
|
104
|
+
"""
|
|
105
|
+
try:
|
|
106
|
+
file_path = BASE_DIR / path
|
|
107
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
108
|
+
file_path.write_text(content)
|
|
109
|
+
return f"Successfully wrote to {path}"
|
|
110
|
+
except Exception as e:
|
|
111
|
+
return f"Error writing file: {e}"
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@mcp.tool()
|
|
115
|
+
def list_files() -> str:
|
|
116
|
+
"""List all files in the base directory."""
|
|
117
|
+
try:
|
|
118
|
+
files = [str(p.relative_to(BASE_DIR)) for p in BASE_DIR.rglob("*") if p.is_file()]
|
|
119
|
+
if not files:
|
|
120
|
+
return "No files found"
|
|
121
|
+
return "\\\\n".join(files)
|
|
122
|
+
except Exception as e:
|
|
123
|
+
return f"Error listing files: {e}"
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
if __name__ == "__main__":
|
|
127
|
+
# Run the server
|
|
128
|
+
mcp.run()
|
|
129
|
+
'''
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def create_test_server():
|
|
133
|
+
"""Create a test MCP server file."""
|
|
134
|
+
temp_dir = Path(tempfile.mkdtemp(prefix="gepa_mcp_"))
|
|
135
|
+
server_file = temp_dir / "server.py"
|
|
136
|
+
server_file.write_text(SIMPLE_MCP_SERVER)
|
|
137
|
+
return server_file
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def create_test_files():
|
|
141
|
+
"""Create test files for the example."""
|
|
142
|
+
base_dir = Path("/tmp/mcp_test")
|
|
143
|
+
base_dir.mkdir(exist_ok=True)
|
|
144
|
+
|
|
145
|
+
(base_dir / "notes.txt").write_text("Meeting at 3pm in Room B\nDiscuss Q4 goals")
|
|
146
|
+
(base_dir / "data.txt").write_text("Revenue: $50000\nExpenses: $30000\nProfit: $20000")
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ============================================================================
|
|
150
|
+
# Dataset & Metric Definition
|
|
151
|
+
# ============================================================================
|
|
152
|
+
|
|
153
|
+
def create_dataset():
|
|
154
|
+
"""Create evaluation dataset for file operations."""
|
|
155
|
+
return [
|
|
156
|
+
{
|
|
157
|
+
"user_query": "What's in the notes.txt file?",
|
|
158
|
+
"tool_arguments": {"path": "notes.txt"},
|
|
159
|
+
"reference_answer": "3pm",
|
|
160
|
+
"additional_context": {},
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
"user_query": "Read the content of data.txt",
|
|
164
|
+
"tool_arguments": {"path": "data.txt"},
|
|
165
|
+
"reference_answer": "50000",
|
|
166
|
+
"additional_context": {},
|
|
167
|
+
},
|
|
168
|
+
{
|
|
169
|
+
"user_query": "Show me what's in notes.txt",
|
|
170
|
+
"tool_arguments": {"path": "notes.txt"},
|
|
171
|
+
"reference_answer": "Room B",
|
|
172
|
+
"additional_context": {},
|
|
173
|
+
},
|
|
174
|
+
]
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def metric_fn(data_inst, output: str) -> float:
|
|
178
|
+
"""
|
|
179
|
+
Simple metric: 1.0 if reference answer appears in output, 0.0 otherwise.
|
|
180
|
+
|
|
181
|
+
In practice, you'd use more sophisticated metrics based on your use case.
|
|
182
|
+
"""
|
|
183
|
+
reference = data_inst.get("reference_answer", "")
|
|
184
|
+
return 1.0 if reference and reference.lower() in output.lower() else 0.0
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
# ============================================================================
|
|
188
|
+
# Local Server Example
|
|
189
|
+
# ============================================================================
|
|
190
|
+
|
|
191
|
+
def run_local_example(task_model: str = "ollama/llama3.1:8b", reflection_model: str = "ollama/qwen3:8b"):
|
|
192
|
+
"""Run optimization with local stdio MCP server."""
|
|
193
|
+
logger.info("=" * 60)
|
|
194
|
+
logger.info("GEPA MCP Tool Optimization")
|
|
195
|
+
logger.info("=" * 60)
|
|
196
|
+
|
|
197
|
+
server_file = create_test_server()
|
|
198
|
+
create_test_files()
|
|
199
|
+
|
|
200
|
+
logger.info(f"MCP Server: Local stdio server ({server_file.name})")
|
|
201
|
+
logger.info("Tools: read_file")
|
|
202
|
+
logger.info(f"Task Model: {task_model}")
|
|
203
|
+
logger.info(f"Reflection Model: {reflection_model}")
|
|
204
|
+
|
|
205
|
+
adapter = MCPAdapter(
|
|
206
|
+
tool_names="read_file",
|
|
207
|
+
task_model=task_model,
|
|
208
|
+
metric_fn=metric_fn,
|
|
209
|
+
server_params=StdioServerParameters(
|
|
210
|
+
command="python",
|
|
211
|
+
args=[str(server_file)],
|
|
212
|
+
),
|
|
213
|
+
base_system_prompt="You are a helpful file assistant.",
|
|
214
|
+
enable_two_pass=True,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
dataset = create_dataset()
|
|
218
|
+
seed_candidate = {
|
|
219
|
+
"tool_description": "Read file contents from disk."
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
logger.info("")
|
|
223
|
+
logger.info("Seed Prompt (Initial Tool Description):")
|
|
224
|
+
logger.info(f" {seed_candidate['tool_description']}")
|
|
225
|
+
logger.info("")
|
|
226
|
+
logger.info(f"Dataset: {len(dataset)} examples")
|
|
227
|
+
logger.info("")
|
|
228
|
+
logger.info("Starting GEPA optimization...")
|
|
229
|
+
logger.info("-" * 60)
|
|
230
|
+
|
|
231
|
+
result = gepa.optimize(
|
|
232
|
+
seed_candidate=seed_candidate,
|
|
233
|
+
trainset=dataset,
|
|
234
|
+
valset=dataset,
|
|
235
|
+
adapter=adapter,
|
|
236
|
+
reflection_lm=reflection_model,
|
|
237
|
+
max_metric_calls=10,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
logger.info("-" * 60)
|
|
241
|
+
logger.info("Optimization Complete")
|
|
242
|
+
logger.info("=" * 60)
|
|
243
|
+
best_score = result.val_aggregate_scores[result.best_idx] if result.val_aggregate_scores else 0.0
|
|
244
|
+
best_candidate = result.candidates[result.best_idx]
|
|
245
|
+
logger.info(f"Best Score: {best_score:.2f}")
|
|
246
|
+
logger.info("")
|
|
247
|
+
logger.info("Optimized Tool Description:")
|
|
248
|
+
logger.info(f" {best_candidate.get('tool_description', 'N/A')}")
|
|
249
|
+
logger.info("=" * 60)
|
|
250
|
+
|
|
251
|
+
return result
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
# ============================================================================
|
|
255
|
+
# Remote Server Example
|
|
256
|
+
# ============================================================================
|
|
257
|
+
|
|
258
|
+
def run_remote_example(url: str, task_model: str = "ollama/llama3.1:8b", reflection_model: str = "ollama/qwen3:8b"):
|
|
259
|
+
"""Run optimization with remote SSE MCP server."""
|
|
260
|
+
logger.info("=" * 60)
|
|
261
|
+
logger.info("GEPA MCP Tool Optimization")
|
|
262
|
+
logger.info("=" * 60)
|
|
263
|
+
|
|
264
|
+
logger.info(f"MCP Server: Remote SSE server ({url})")
|
|
265
|
+
logger.info("Tools: search")
|
|
266
|
+
logger.info(f"Task Model: {task_model}")
|
|
267
|
+
logger.info(f"Reflection Model: {reflection_model}")
|
|
268
|
+
|
|
269
|
+
adapter = MCPAdapter(
|
|
270
|
+
tool_names="search",
|
|
271
|
+
task_model=task_model,
|
|
272
|
+
metric_fn=metric_fn,
|
|
273
|
+
remote_url=url,
|
|
274
|
+
remote_transport="sse",
|
|
275
|
+
remote_headers={},
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
dataset = [
|
|
279
|
+
{
|
|
280
|
+
"user_query": "Search for information about Python",
|
|
281
|
+
"tool_arguments": {"query": "Python"},
|
|
282
|
+
"reference_answer": "programming",
|
|
283
|
+
"additional_context": {},
|
|
284
|
+
},
|
|
285
|
+
]
|
|
286
|
+
|
|
287
|
+
seed_candidate = {
|
|
288
|
+
"tool_description": "Search for information."
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
logger.info("")
|
|
292
|
+
logger.info("Seed Prompt (Initial Tool Description):")
|
|
293
|
+
logger.info(f" {seed_candidate['tool_description']}")
|
|
294
|
+
logger.info("")
|
|
295
|
+
logger.info(f"Dataset: {len(dataset)} examples")
|
|
296
|
+
logger.info("")
|
|
297
|
+
logger.info("Starting GEPA optimization...")
|
|
298
|
+
logger.info("-" * 60)
|
|
299
|
+
|
|
300
|
+
result = gepa.optimize(
|
|
301
|
+
seed_candidate=seed_candidate,
|
|
302
|
+
trainset=dataset,
|
|
303
|
+
valset=dataset,
|
|
304
|
+
adapter=adapter,
|
|
305
|
+
reflection_lm=reflection_model,
|
|
306
|
+
max_metric_calls=10,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
logger.info("-" * 60)
|
|
310
|
+
logger.info("Optimization Complete")
|
|
311
|
+
logger.info("=" * 60)
|
|
312
|
+
best_score = result.val_aggregate_scores[result.best_idx] if result.val_aggregate_scores else 0.0
|
|
313
|
+
best_candidate = result.candidates[result.best_idx]
|
|
314
|
+
logger.info(f"Best Score: {best_score:.2f}")
|
|
315
|
+
logger.info("")
|
|
316
|
+
logger.info("Optimized Tool Description:")
|
|
317
|
+
logger.info(f" {best_candidate.get('tool_description', 'N/A')}")
|
|
318
|
+
logger.info("=" * 60)
|
|
319
|
+
|
|
320
|
+
return result
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
# ============================================================================
|
|
324
|
+
# Multi-Tool Example
|
|
325
|
+
# ============================================================================
|
|
326
|
+
|
|
327
|
+
def run_multitool_example(task_model: str = "ollama/llama3.1:8b", reflection_model: str = "ollama/qwen3:8b"):
|
|
328
|
+
"""Run optimization with multiple tools."""
|
|
329
|
+
logger.info("=" * 60)
|
|
330
|
+
logger.info("GEPA MCP Tool Optimization (Multi-Tool)")
|
|
331
|
+
logger.info("=" * 60)
|
|
332
|
+
|
|
333
|
+
server_file = create_test_server()
|
|
334
|
+
create_test_files()
|
|
335
|
+
|
|
336
|
+
logger.info(f"MCP Server: Local stdio server ({server_file.name})")
|
|
337
|
+
logger.info("Tools: read_file, write_file, list_files")
|
|
338
|
+
logger.info(f"Task Model: {task_model}")
|
|
339
|
+
logger.info(f"Reflection Model: {reflection_model}")
|
|
340
|
+
|
|
341
|
+
adapter = MCPAdapter(
|
|
342
|
+
tool_names=["read_file", "write_file", "list_files"],
|
|
343
|
+
task_model=task_model,
|
|
344
|
+
metric_fn=metric_fn,
|
|
345
|
+
server_params=StdioServerParameters(
|
|
346
|
+
command="python",
|
|
347
|
+
args=[str(server_file)],
|
|
348
|
+
),
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
dataset = [
|
|
352
|
+
{
|
|
353
|
+
"user_query": "What files are available?",
|
|
354
|
+
"tool_arguments": {},
|
|
355
|
+
"reference_answer": "notes.txt",
|
|
356
|
+
"additional_context": {},
|
|
357
|
+
},
|
|
358
|
+
{
|
|
359
|
+
"user_query": "Read notes.txt",
|
|
360
|
+
"tool_arguments": {"path": "notes.txt"},
|
|
361
|
+
"reference_answer": "3pm",
|
|
362
|
+
"additional_context": {},
|
|
363
|
+
},
|
|
364
|
+
]
|
|
365
|
+
|
|
366
|
+
seed_candidate = {
|
|
367
|
+
"tool_description_read_file": "Read a file.",
|
|
368
|
+
"tool_description_write_file": "Write a file.",
|
|
369
|
+
"tool_description_list_files": "List files.",
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
logger.info("")
|
|
373
|
+
logger.info("Seed Prompts (Initial Tool Descriptions):")
|
|
374
|
+
for tool_name in adapter.tool_names:
|
|
375
|
+
key = f"tool_description_{tool_name}"
|
|
376
|
+
logger.info(f" {tool_name}: {seed_candidate.get(key, 'N/A')}")
|
|
377
|
+
logger.info("")
|
|
378
|
+
logger.info(f"Dataset: {len(dataset)} examples")
|
|
379
|
+
logger.info("")
|
|
380
|
+
logger.info("Starting GEPA optimization...")
|
|
381
|
+
logger.info("-" * 60)
|
|
382
|
+
|
|
383
|
+
result = gepa.optimize(
|
|
384
|
+
seed_candidate=seed_candidate,
|
|
385
|
+
trainset=dataset,
|
|
386
|
+
valset=dataset,
|
|
387
|
+
adapter=adapter,
|
|
388
|
+
reflection_lm=reflection_model,
|
|
389
|
+
max_metric_calls=10,
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
logger.info("-" * 60)
|
|
393
|
+
logger.info("Optimization Complete")
|
|
394
|
+
logger.info("=" * 60)
|
|
395
|
+
best_candidate = result.candidates[result.best_idx]
|
|
396
|
+
logger.info("Optimized Tool Descriptions:")
|
|
397
|
+
for tool_name in adapter.tool_names:
|
|
398
|
+
key = f"tool_description_{tool_name}"
|
|
399
|
+
logger.info(f" {tool_name}: {best_candidate.get(key, 'N/A')}")
|
|
400
|
+
logger.info("=" * 60)
|
|
401
|
+
|
|
402
|
+
return result
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
# ============================================================================
|
|
406
|
+
# Main
|
|
407
|
+
# ============================================================================
|
|
408
|
+
|
|
409
|
+
if __name__ == "__main__":
|
|
410
|
+
import argparse
|
|
411
|
+
|
|
412
|
+
parser = argparse.ArgumentParser(description="MCP Tool Optimization Example")
|
|
413
|
+
parser.add_argument(
|
|
414
|
+
"--mode",
|
|
415
|
+
choices=["local", "remote", "multitool"],
|
|
416
|
+
default="local",
|
|
417
|
+
help="Example mode to run",
|
|
418
|
+
)
|
|
419
|
+
parser.add_argument(
|
|
420
|
+
"--url",
|
|
421
|
+
type=str,
|
|
422
|
+
help="Remote MCP server URL (for remote mode)",
|
|
423
|
+
)
|
|
424
|
+
parser.add_argument(
|
|
425
|
+
"--task-model",
|
|
426
|
+
type=str,
|
|
427
|
+
default="ollama/llama3.1:8b",
|
|
428
|
+
help='Model for task execution (default: "ollama/llama3.1:8b")',
|
|
429
|
+
)
|
|
430
|
+
parser.add_argument(
|
|
431
|
+
"--reflection-model",
|
|
432
|
+
type=str,
|
|
433
|
+
default="ollama/qwen3:8b",
|
|
434
|
+
help='Model for reflection (default: "ollama/qwen3:8b")',
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
args = parser.parse_args()
|
|
438
|
+
|
|
439
|
+
try:
|
|
440
|
+
if args.mode == "local":
|
|
441
|
+
run_local_example(args.task_model, args.reflection_model)
|
|
442
|
+
elif args.mode == "remote":
|
|
443
|
+
if not args.url:
|
|
444
|
+
logger.error("Remote mode requires --url argument")
|
|
445
|
+
sys.exit(1)
|
|
446
|
+
run_remote_example(args.url, args.task_model, args.reflection_model)
|
|
447
|
+
elif args.mode == "multitool":
|
|
448
|
+
run_multitool_example(args.task_model, args.reflection_model)
|
|
449
|
+
|
|
450
|
+
except KeyboardInterrupt:
|
|
451
|
+
logger.info("\nInterrupted by user")
|
|
452
|
+
sys.exit(0)
|
|
453
|
+
except Exception as e:
|
|
454
|
+
logger.exception(f"Error: {e}")
|
|
455
|
+
sys.exit(1)
|