mantisdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mantisdk might be problematic. Click here for more details.
- mantisdk/__init__.py +22 -0
- mantisdk/adapter/__init__.py +15 -0
- mantisdk/adapter/base.py +94 -0
- mantisdk/adapter/messages.py +270 -0
- mantisdk/adapter/triplet.py +1028 -0
- mantisdk/algorithm/__init__.py +39 -0
- mantisdk/algorithm/apo/__init__.py +5 -0
- mantisdk/algorithm/apo/apo.py +889 -0
- mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
- mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
- mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
- mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
- mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
- mantisdk/algorithm/base.py +162 -0
- mantisdk/algorithm/decorator.py +264 -0
- mantisdk/algorithm/fast.py +250 -0
- mantisdk/algorithm/gepa/__init__.py +59 -0
- mantisdk/algorithm/gepa/adapter.py +459 -0
- mantisdk/algorithm/gepa/gepa.py +364 -0
- mantisdk/algorithm/gepa/lib/__init__.py +18 -0
- mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
- mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
- mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
- mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
- mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
- mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
- mantisdk/algorithm/gepa/lib/api.py +375 -0
- mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
- mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
- mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
- mantisdk/algorithm/gepa/lib/core/result.py +233 -0
- mantisdk/algorithm/gepa/lib/core/state.py +636 -0
- mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
- mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
- mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
- mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
- mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
- mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
- mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
- mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
- mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
- mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
- mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
- mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
- mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
- mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
- mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
- mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
- mantisdk/algorithm/gepa/lib/py.typed +0 -0
- mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
- mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
- mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
- mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
- mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
- mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
- mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
- mantisdk/algorithm/gepa/tracing.py +105 -0
- mantisdk/algorithm/utils.py +177 -0
- mantisdk/algorithm/verl/__init__.py +5 -0
- mantisdk/algorithm/verl/interface.py +202 -0
- mantisdk/cli/__init__.py +56 -0
- mantisdk/cli/prometheus.py +115 -0
- mantisdk/cli/store.py +131 -0
- mantisdk/cli/vllm.py +29 -0
- mantisdk/client.py +408 -0
- mantisdk/config.py +348 -0
- mantisdk/emitter/__init__.py +43 -0
- mantisdk/emitter/annotation.py +370 -0
- mantisdk/emitter/exception.py +54 -0
- mantisdk/emitter/message.py +61 -0
- mantisdk/emitter/object.py +117 -0
- mantisdk/emitter/reward.py +320 -0
- mantisdk/env_var.py +156 -0
- mantisdk/execution/__init__.py +15 -0
- mantisdk/execution/base.py +64 -0
- mantisdk/execution/client_server.py +443 -0
- mantisdk/execution/events.py +69 -0
- mantisdk/execution/inter_process.py +16 -0
- mantisdk/execution/shared_memory.py +282 -0
- mantisdk/instrumentation/__init__.py +119 -0
- mantisdk/instrumentation/agentops.py +314 -0
- mantisdk/instrumentation/agentops_langchain.py +45 -0
- mantisdk/instrumentation/litellm.py +83 -0
- mantisdk/instrumentation/vllm.py +81 -0
- mantisdk/instrumentation/weave.py +500 -0
- mantisdk/litagent/__init__.py +11 -0
- mantisdk/litagent/decorator.py +536 -0
- mantisdk/litagent/litagent.py +252 -0
- mantisdk/llm_proxy.py +1890 -0
- mantisdk/logging.py +370 -0
- mantisdk/reward.py +7 -0
- mantisdk/runner/__init__.py +11 -0
- mantisdk/runner/agent.py +845 -0
- mantisdk/runner/base.py +182 -0
- mantisdk/runner/legacy.py +309 -0
- mantisdk/semconv.py +170 -0
- mantisdk/server.py +401 -0
- mantisdk/store/__init__.py +23 -0
- mantisdk/store/base.py +897 -0
- mantisdk/store/client_server.py +2092 -0
- mantisdk/store/collection/__init__.py +30 -0
- mantisdk/store/collection/base.py +587 -0
- mantisdk/store/collection/memory.py +970 -0
- mantisdk/store/collection/mongo.py +1412 -0
- mantisdk/store/collection_based.py +1823 -0
- mantisdk/store/insight.py +648 -0
- mantisdk/store/listener.py +58 -0
- mantisdk/store/memory.py +396 -0
- mantisdk/store/mongo.py +165 -0
- mantisdk/store/sqlite.py +3 -0
- mantisdk/store/threading.py +357 -0
- mantisdk/store/utils.py +142 -0
- mantisdk/tracer/__init__.py +16 -0
- mantisdk/tracer/agentops.py +242 -0
- mantisdk/tracer/base.py +287 -0
- mantisdk/tracer/dummy.py +106 -0
- mantisdk/tracer/otel.py +555 -0
- mantisdk/tracer/weave.py +677 -0
- mantisdk/trainer/__init__.py +6 -0
- mantisdk/trainer/init_utils.py +263 -0
- mantisdk/trainer/legacy.py +367 -0
- mantisdk/trainer/registry.py +12 -0
- mantisdk/trainer/trainer.py +618 -0
- mantisdk/types/__init__.py +6 -0
- mantisdk/types/core.py +553 -0
- mantisdk/types/resources.py +204 -0
- mantisdk/types/tracer.py +515 -0
- mantisdk/types/tracing.py +218 -0
- mantisdk/utils/__init__.py +1 -0
- mantisdk/utils/id.py +18 -0
- mantisdk/utils/metrics.py +1025 -0
- mantisdk/utils/otel.py +578 -0
- mantisdk/utils/otlp.py +536 -0
- mantisdk/utils/server_launcher.py +1045 -0
- mantisdk/utils/system_snapshot.py +81 -0
- mantisdk/verl/__init__.py +8 -0
- mantisdk/verl/__main__.py +6 -0
- mantisdk/verl/async_server.py +46 -0
- mantisdk/verl/config.yaml +27 -0
- mantisdk/verl/daemon.py +1154 -0
- mantisdk/verl/dataset.py +44 -0
- mantisdk/verl/entrypoint.py +248 -0
- mantisdk/verl/trainer.py +549 -0
- mantisdk-0.1.0.dist-info/METADATA +119 -0
- mantisdk-0.1.0.dist-info/RECORD +190 -0
- mantisdk-0.1.0.dist-info/WHEEL +4 -0
- mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
- mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
# Copyright (c) Microsoft. All rights reserved.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Any, Dict, Generic, List, Optional, TypeVar
|
|
8
|
+
|
|
9
|
+
import openai
|
|
10
|
+
from mantisdk.algorithm.gepa.lib.api import optimize
|
|
11
|
+
|
|
12
|
+
from mantisdk.adapter import TraceAdapter
|
|
13
|
+
from mantisdk.adapter.messages import TraceToMessages
|
|
14
|
+
from mantisdk.algorithm.base import Algorithm
|
|
15
|
+
from mantisdk.algorithm.gepa.adapter import (
|
|
16
|
+
MantisdkDataInst,
|
|
17
|
+
MantisdkGEPAAdapter,
|
|
18
|
+
)
|
|
19
|
+
from mantisdk.algorithm.gepa.tracing import GEPATracingContext
|
|
20
|
+
from mantisdk.algorithm.utils import with_llm_proxy, with_store
|
|
21
|
+
from mantisdk.types import Dataset, PromptTemplate, TracingConfig
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
T_task = TypeVar("T_task")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
TEMPLATE_AWARE_REFLECTION_PROMPT = """You are an expert at improving LLM prompts based on observed failures.
|
|
29
|
+
|
|
30
|
+
## Current Prompt Template
|
|
31
|
+
```
|
|
32
|
+
<curr_instructions>
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Observed Failures
|
|
36
|
+
The following examples show where the current prompt gave INCORRECT outputs. Study the pattern of failures carefully:
|
|
37
|
+
```
|
|
38
|
+
<inputs_outputs_feedback>
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Your Task
|
|
42
|
+
Write an IMPROVED prompt template that fixes the observed failures.
|
|
43
|
+
|
|
44
|
+
**CRITICAL REQUIREMENTS**:
|
|
45
|
+
1. **Analyze the failure pattern**: Look at WHAT the prompt got wrong. Is it too strict? Too lenient? Missing context? Misunderstanding the task?
|
|
46
|
+
2. **Make a CONCEPTUAL FIX**: Don't just tweak wording - fundamentally change the approach if the current strategy is flawed
|
|
47
|
+
3. **Preserve placeholders**: Keep any {variable_name} placeholders exactly as they appear (e.g., {session}, {input}, {output}, or {{session}}, {{input}}, {{output}})
|
|
48
|
+
4. **Be specific**: Add concrete criteria, examples, or decision rules based on what the failures reveal
|
|
49
|
+
5. Read the inputs carefully and identify the input format and infer detailed task description about the task I wish to solve with the assistant
|
|
50
|
+
6. Read all the assistant responses and the corresponding feedback. Identify all niche and domain specific factual information about the task and include it in the instruction, as a lot of it may not be available to the assistant in the future. The assistant may have utilized a generalizable strategy to solve the task, if so, include that in the instruction as well.
|
|
51
|
+
|
|
52
|
+
**Think step by step**:
|
|
53
|
+
- What is the PATTERN in the failures?
|
|
54
|
+
- WHY is the current prompt failing on these cases?
|
|
55
|
+
- What SPECIFIC change would fix this pattern?
|
|
56
|
+
|
|
57
|
+
Output your improved template within ``` blocks."""
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class GEPA(Algorithm, Generic[T_task]):
|
|
61
|
+
"""GEPA (Genetic-Pareto) algorithm for Mantisdk.
|
|
62
|
+
|
|
63
|
+
This algorithm optimizes prompt templates (and potentially other text resources)
|
|
64
|
+
using an evolutionary approach with LLM-based reflection.
|
|
65
|
+
|
|
66
|
+
GEPA maintains a population of candidate prompts and evolves them by:
|
|
67
|
+
1. Evaluating candidates on training data
|
|
68
|
+
2. Using an LLM to reflect on failures and propose improvements
|
|
69
|
+
3. Selecting the best candidates based on validation performance
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
# Algorithm-owned tracing configuration.
|
|
73
|
+
# Defines environment and tags for all traces generated by GEPA.
|
|
74
|
+
# Note: session_id will be overridden per-run in the run() method
|
|
75
|
+
TRACING_CONFIG = TracingConfig(
|
|
76
|
+
environment="mantisdk-gepa",
|
|
77
|
+
algorithm_name="gepa",
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def _get_tracing_config_with_session(self, session_id: str) -> TracingConfig:
|
|
81
|
+
"""Create a TracingConfig with a specific session_id for this run."""
|
|
82
|
+
return TracingConfig(
|
|
83
|
+
environment="mantisdk-gepa",
|
|
84
|
+
algorithm_name="gepa",
|
|
85
|
+
session_id=session_id,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def __init__(
|
|
89
|
+
self,
|
|
90
|
+
*,
|
|
91
|
+
max_metric_calls: int = 100,
|
|
92
|
+
reflection_minibatch_size: int = 5,
|
|
93
|
+
population_size: int = 4,
|
|
94
|
+
adapter: Optional[TraceAdapter] = None,
|
|
95
|
+
rollout_batch_timeout: float = 600.0,
|
|
96
|
+
reflection_prompt_template: Optional[str] = None,
|
|
97
|
+
reflection_metadata: Optional[Dict[str, Any]] = None,
|
|
98
|
+
target_model_config: Optional[Dict[str, Any]] = None,
|
|
99
|
+
**gepa_kwargs: Any,
|
|
100
|
+
) -> None:
|
|
101
|
+
"""Initialize the GEPA algorithm.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
max_metric_calls: Maximum number of evaluations (budget).
|
|
105
|
+
reflection_minibatch_size: Batch size for reflection.
|
|
106
|
+
population_size: Size of the population in evolutionary search.
|
|
107
|
+
adapter: TraceAdapter to convert spans to messages. Defaults to TraceToMessages.
|
|
108
|
+
rollout_batch_timeout: Timeout for waiting for rollouts.
|
|
109
|
+
reflection_prompt_template: Custom prompt template for reflection.
|
|
110
|
+
reflection_metadata: Metadata for reflection LLM traces (Langfuse).
|
|
111
|
+
target_model_config: Direct model configuration (apiKey, baseUrl) to bypass proxy if needed.
|
|
112
|
+
**gepa_kwargs: Additional arguments passed to gepa.optimize.
|
|
113
|
+
"""
|
|
114
|
+
super().__init__()
|
|
115
|
+
self.max_metric_calls = max_metric_calls
|
|
116
|
+
self.reflection_minibatch_size = reflection_minibatch_size
|
|
117
|
+
self.population_size = population_size
|
|
118
|
+
self.adapter = adapter or TraceToMessages()
|
|
119
|
+
self.rollout_batch_timeout = rollout_batch_timeout
|
|
120
|
+
self.reflection_metadata = reflection_metadata
|
|
121
|
+
self.target_model_config = target_model_config
|
|
122
|
+
|
|
123
|
+
# Set default reflection prompt template if not provided
|
|
124
|
+
if reflection_prompt_template is not None:
|
|
125
|
+
gepa_kwargs["reflection_prompt_template"] = reflection_prompt_template
|
|
126
|
+
elif "reflection_prompt_template" not in gepa_kwargs:
|
|
127
|
+
gepa_kwargs["reflection_prompt_template"] = TEMPLATE_AWARE_REFLECTION_PROMPT
|
|
128
|
+
|
|
129
|
+
self.gepa_kwargs = gepa_kwargs
|
|
130
|
+
self._best_candidate: Optional[Dict[str, str]] = None
|
|
131
|
+
self._best_score: float = 0.0
|
|
132
|
+
self._full_result: Optional[Any] = None # Store full GEPAResult for history access
|
|
133
|
+
|
|
134
|
+
def get_best_prompt(self) -> Optional[PromptTemplate]:
|
|
135
|
+
"""Get the best prompt found during optimization."""
|
|
136
|
+
if self._best_candidate:
|
|
137
|
+
# Return the first PromptTemplate value
|
|
138
|
+
for key, value in self._best_candidate.items():
|
|
139
|
+
return PromptTemplate(template=value, engine="f-string")
|
|
140
|
+
return None
|
|
141
|
+
|
|
142
|
+
@with_store
|
|
143
|
+
@with_llm_proxy(required=True)
|
|
144
|
+
async def run(
|
|
145
|
+
self,
|
|
146
|
+
llm_proxy, # injected by decorator
|
|
147
|
+
store, # injected by decorator
|
|
148
|
+
train_dataset: Optional[Dataset[T_task]] = None,
|
|
149
|
+
val_dataset: Optional[Dataset[T_task]] = None,
|
|
150
|
+
) -> None:
|
|
151
|
+
"""Run the GEPA optimization loop.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
train_dataset: Dataset used for optimization (training).
|
|
155
|
+
val_dataset: Dataset used for validation.
|
|
156
|
+
"""
|
|
157
|
+
if train_dataset is None:
|
|
158
|
+
raise ValueError("train_dataset is required for GEPA optimization.")
|
|
159
|
+
|
|
160
|
+
store = self.get_store()
|
|
161
|
+
assert store is not None
|
|
162
|
+
assert llm_proxy is not None
|
|
163
|
+
|
|
164
|
+
loop = asyncio.get_running_loop()
|
|
165
|
+
|
|
166
|
+
# Get initial resources to find the target resource to optimize
|
|
167
|
+
initial_resources = self.get_initial_resources()
|
|
168
|
+
if not initial_resources:
|
|
169
|
+
raise ValueError("Initial resources must be set before running GEPA.")
|
|
170
|
+
|
|
171
|
+
# Find the target resource to optimize
|
|
172
|
+
target_resource_name = None
|
|
173
|
+
initial_candidate: Dict[str, str] = {}
|
|
174
|
+
|
|
175
|
+
for name, res in initial_resources.items():
|
|
176
|
+
if isinstance(res, PromptTemplate):
|
|
177
|
+
target_resource_name = name
|
|
178
|
+
initial_candidate[name] = res.template
|
|
179
|
+
|
|
180
|
+
if not target_resource_name:
|
|
181
|
+
raise ValueError("No PromptTemplate found in initial resources to optimize.")
|
|
182
|
+
|
|
183
|
+
logger.info(f"GEPA will optimize resource: {target_resource_name}")
|
|
184
|
+
logger.info(f"Initial prompt: {initial_candidate[target_resource_name][:100]}...")
|
|
185
|
+
|
|
186
|
+
# Setup Reflection LLM via Proxy
|
|
187
|
+
llm_resource = llm_proxy.as_resource()
|
|
188
|
+
|
|
189
|
+
# Create tracing context for detailed execution tracking
|
|
190
|
+
# This generates a unique session_id for grouping all traces in this run
|
|
191
|
+
tracing_context = GEPATracingContext()
|
|
192
|
+
|
|
193
|
+
# Create a TracingConfig with this run's session_id
|
|
194
|
+
run_tracing_config = self._get_tracing_config_with_session(tracing_context.session_id)
|
|
195
|
+
|
|
196
|
+
logger.info(f"GEPA session started: {tracing_context.session_id}")
|
|
197
|
+
|
|
198
|
+
# Create the bridge adapter with tracing config and context
|
|
199
|
+
gepa_adapter = MantisdkGEPAAdapter(
|
|
200
|
+
store=store,
|
|
201
|
+
loop=loop,
|
|
202
|
+
resource_name=target_resource_name,
|
|
203
|
+
adapter=self.adapter,
|
|
204
|
+
llm_proxy_resource=llm_resource,
|
|
205
|
+
rollout_batch_timeout=self.rollout_batch_timeout,
|
|
206
|
+
tracing_config=run_tracing_config,
|
|
207
|
+
tracing_context=tracing_context,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Import here (inside run) to avoid circular imports:
|
|
211
|
+
# `mantisdk.algorithm.gepa.__init__` re-exports `GEPA` from this module.
|
|
212
|
+
from mantisdk.algorithm import gepa as gepa_module
|
|
213
|
+
|
|
214
|
+
@gepa_module.reflection
|
|
215
|
+
def reflection_lm(prompt: str) -> str:
|
|
216
|
+
"""GEPA reflection LM callable - takes a string prompt, returns string response.
|
|
217
|
+
|
|
218
|
+
Routes through the LLMProxy for consistent tracing and OpenTelemetry export.
|
|
219
|
+
The proxy handles API keys and model routing - we just need to call it.
|
|
220
|
+
|
|
221
|
+
IMPORTANT: The LLMProxy must be configured with callbacks: ["opentelemetry"]
|
|
222
|
+
(not "return_token_ids") when calling OpenAI providers.
|
|
223
|
+
|
|
224
|
+
The reflection phase is tagged distinctly from validation-eval to enable
|
|
225
|
+
proper filtering in the Mantis UI. Tags include:
|
|
226
|
+
- "gepa" (algorithm)
|
|
227
|
+
- "reflection" (phase)
|
|
228
|
+
- "llm-reflection" (explicit marker)
|
|
229
|
+
- "gen-N" (generation number)
|
|
230
|
+
"""
|
|
231
|
+
import litellm
|
|
232
|
+
|
|
233
|
+
# Track phase transition: entering reflection, then starting new generation
|
|
234
|
+
tracing_context.set_phase("reflection")
|
|
235
|
+
|
|
236
|
+
logger.info(f"[reflection_lm] === REFLECTION CALLED (gen-{tracing_context.generation}) ===")
|
|
237
|
+
logger.info(f"[reflection_lm] Session: {tracing_context.session_id}")
|
|
238
|
+
logger.info(f"[reflection_lm] Model: {llm_resource.model}")
|
|
239
|
+
logger.info(f"[reflection_lm] Proxy Endpoint: {llm_resource.endpoint}")
|
|
240
|
+
logger.info(f"[reflection_lm] Prompt length: {len(prompt)} chars")
|
|
241
|
+
logger.debug(f"[reflection_lm] Prompt preview: {prompt[:500]}...")
|
|
242
|
+
|
|
243
|
+
try:
|
|
244
|
+
# Route through the LLMProxy for tracing
|
|
245
|
+
# The proxy already has the API key and model config from gepa_runner.py
|
|
246
|
+
logger.info(f"[reflection_lm] Calling via proxy: model={llm_resource.model} endpoint={llm_resource.endpoint}")
|
|
247
|
+
|
|
248
|
+
# Include detailed tracing metadata for reflection traces
|
|
249
|
+
# Uses run_tracing_config which includes the session_id
|
|
250
|
+
# Note: call-type tagging ("reflection-call") is handled by @gepa_module.reflection decorator
|
|
251
|
+
import json
|
|
252
|
+
gepa_tags = [
|
|
253
|
+
f"gen-{tracing_context.generation}",
|
|
254
|
+
]
|
|
255
|
+
tracing_metadata = run_tracing_config.to_detailed_metadata(
|
|
256
|
+
phase="reflection",
|
|
257
|
+
extra_tags=gepa_tags,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Pass metadata via headers for proxy-side tracing context
|
|
261
|
+
# Note: call-type is handled automatically by the @gepa_module.reflection decorator
|
|
262
|
+
extra_headers = {
|
|
263
|
+
"x-mantis-session-id": tracing_metadata.get("session_id", ""),
|
|
264
|
+
"x-mantis-environment": tracing_metadata.get("environment", ""),
|
|
265
|
+
"x-mantis-tags": json.dumps(tracing_metadata.get("tags", [])),
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
response = litellm.completion(
|
|
269
|
+
model=llm_resource.model, # Model name as registered in proxy
|
|
270
|
+
messages=[{"role": "user", "content": prompt}],
|
|
271
|
+
api_base=llm_resource.endpoint, # Proxy URL
|
|
272
|
+
api_key="dummy", # Proxy handles the real API key
|
|
273
|
+
temperature=0.7,
|
|
274
|
+
metadata=tracing_metadata, # Pass environment/tags/session_id for OTEL
|
|
275
|
+
extra_headers=extra_headers, # Pass to proxy for server-side span attributes
|
|
276
|
+
)
|
|
277
|
+
content = response.choices[0].message.content or ""
|
|
278
|
+
logger.info(f"[reflection_lm] SUCCESS - Response length: {len(content)} chars")
|
|
279
|
+
logger.info(f"[reflection_lm] Response preview: {content[:300]}...")
|
|
280
|
+
|
|
281
|
+
# After reflection completes, increment generation for next eval round
|
|
282
|
+
tracing_context.next_generation()
|
|
283
|
+
tracing_context.set_phase("train-eval")
|
|
284
|
+
|
|
285
|
+
return content
|
|
286
|
+
except Exception as e:
|
|
287
|
+
logger.error(f"[reflection_lm] FAILED: {e}", exc_info=True)
|
|
288
|
+
# Return fallback instead of crashing - allows optimization to continue
|
|
289
|
+
logger.warning(f"[reflection_lm] Returning fallback (seed prompt)")
|
|
290
|
+
|
|
291
|
+
# Still increment generation even on failure
|
|
292
|
+
tracing_context.next_generation()
|
|
293
|
+
tracing_context.set_phase("train-eval")
|
|
294
|
+
|
|
295
|
+
return f"```\n{initial_candidate.get(target_resource_name, '')}\n```"
|
|
296
|
+
|
|
297
|
+
# Prepare Training Data
|
|
298
|
+
gepa_train_data: List[MantisdkDataInst] = []
|
|
299
|
+
for i, item in enumerate(train_dataset):
|
|
300
|
+
item_id = str(i)
|
|
301
|
+
if isinstance(item, dict):
|
|
302
|
+
gepa_train_data.append({"input": item, "id": item.get("id", item_id)})
|
|
303
|
+
else:
|
|
304
|
+
gepa_train_data.append({"input": {"task": item}, "id": item_id})
|
|
305
|
+
|
|
306
|
+
# Prepare Validation Data (use train if not provided)
|
|
307
|
+
gepa_val_data: Optional[List[MantisdkDataInst]] = None
|
|
308
|
+
if val_dataset is not None:
|
|
309
|
+
gepa_val_data = []
|
|
310
|
+
for i, item in enumerate(val_dataset):
|
|
311
|
+
item_id = f"val-{i}"
|
|
312
|
+
if isinstance(item, dict):
|
|
313
|
+
gepa_val_data.append({"input": item, "id": item.get("id", item_id)})
|
|
314
|
+
else:
|
|
315
|
+
gepa_val_data.append({"input": {"task": item}, "id": item_id})
|
|
316
|
+
|
|
317
|
+
logger.info(f"Starting GEPA optimization with {len(gepa_train_data)} training samples")
|
|
318
|
+
if gepa_val_data:
|
|
319
|
+
logger.info(f"Using {len(gepa_val_data)} validation samples")
|
|
320
|
+
|
|
321
|
+
# Run Optimization in Executor (GEPA is synchronous)
|
|
322
|
+
def run_optimization():
|
|
323
|
+
return optimize(
|
|
324
|
+
trainset=gepa_train_data,
|
|
325
|
+
valset=gepa_val_data,
|
|
326
|
+
adapter=gepa_adapter,
|
|
327
|
+
seed_candidate=initial_candidate,
|
|
328
|
+
reflection_lm=reflection_lm,
|
|
329
|
+
max_metric_calls=self.max_metric_calls,
|
|
330
|
+
reflection_minibatch_size=self.reflection_minibatch_size,
|
|
331
|
+
**self.gepa_kwargs,
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
try:
|
|
335
|
+
result = await loop.run_in_executor(None, run_optimization)
|
|
336
|
+
|
|
337
|
+
logger.info(f"GEPA optimization finished. Total metric calls: {result.total_metric_calls}")
|
|
338
|
+
logger.info(f"Best candidate index: {result.best_idx}, score: {result.val_aggregate_scores[result.best_idx]}")
|
|
339
|
+
logger.info(f"Best candidate: {result.best_candidate}")
|
|
340
|
+
|
|
341
|
+
self._best_candidate = result.best_candidate
|
|
342
|
+
self._best_score = result.val_aggregate_scores[result.best_idx]
|
|
343
|
+
self._full_result = result # Store full result for candidate history
|
|
344
|
+
|
|
345
|
+
# Update the store with the best candidate
|
|
346
|
+
final_resources = {}
|
|
347
|
+
for key, value in result.best_candidate.items():
|
|
348
|
+
original = initial_resources.get(key)
|
|
349
|
+
engine = original.engine if isinstance(original, PromptTemplate) else "f-string"
|
|
350
|
+
final_resources[key] = PromptTemplate(template=value, engine=engine)
|
|
351
|
+
|
|
352
|
+
await store.update_resources("gepa-final", final_resources)
|
|
353
|
+
logger.info("Updated store with best candidate resources (version: 'gepa-final').")
|
|
354
|
+
|
|
355
|
+
# Send full GEPA result to Insight for experiment tracking
|
|
356
|
+
if hasattr(store, 'complete_job'):
|
|
357
|
+
summary = result.to_dict()
|
|
358
|
+
# Add session_id for trace filtering in the UI
|
|
359
|
+
summary["session_id"] = tracing_context.session_id
|
|
360
|
+
store.complete_job(summary)
|
|
361
|
+
|
|
362
|
+
except Exception as e:
|
|
363
|
+
logger.error(f"GEPA optimization failed: {e}")
|
|
364
|
+
raise
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Copyright (c) 2025 Lakshya A Agrawal and the GEPA contributors
|
|
2
|
+
# https://github.com/gepa-ai/gepa
|
|
3
|
+
|
|
4
|
+
from .adapters import default_adapter
|
|
5
|
+
from .api import optimize
|
|
6
|
+
from .core.adapter import EvaluationBatch, GEPAAdapter
|
|
7
|
+
from .core.result import GEPAResult
|
|
8
|
+
from .examples import aime
|
|
9
|
+
from .utils.stop_condition import (
|
|
10
|
+
CompositeStopper,
|
|
11
|
+
FileStopper,
|
|
12
|
+
MaxMetricCallsStopper,
|
|
13
|
+
NoImprovementStopper,
|
|
14
|
+
ScoreThresholdStopper,
|
|
15
|
+
SignalStopper,
|
|
16
|
+
StopperProtocol,
|
|
17
|
+
TimeoutStopCondition,
|
|
18
|
+
)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# GEPA Adapters
|
|
2
|
+
|
|
3
|
+
> GEPA 🤝 Any Framework
|
|
4
|
+
|
|
5
|
+
This directory provides the interface to allow GEPA to plug into systems and frameworks of your choice! GEPA can interface with any system consisting of text components, by implementing `GEPAAdapter` in [../core/adapter.py](../core/adapter.py).
|
|
6
|
+
|
|
7
|
+
Currently, GEPA has the following adapters:
|
|
8
|
+
- [DSPy Adapter](./dspy_adapter/): This adapter integrates GEPA into [DSPy](https://dspy.ai/), to allow it to optimize any DSPy module's signature instructions.
|
|
9
|
+
- [Default Adapter](./default_adapter/): This adapter integrates GEPA into a single-turn LLM environment, where the task is specified as a user message, and an answer string must be present in the assistant response. GEPA optimizes the system prompt.
|
|
10
|
+
- [AnyMaths Adapter](./anymaths_adapter/): This adapter integrates GEPA with litellm and ollama to solve single-turn mathematical problems.
|
|
11
|
+
|
|
12
|
+
If there are any frameworks you would like GEPA integrated into, please create an issue or PR!
|
|
File without changes
|