mantisdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mantisdk might be problematic. Click here for more details.

Files changed (190) hide show
  1. mantisdk/__init__.py +22 -0
  2. mantisdk/adapter/__init__.py +15 -0
  3. mantisdk/adapter/base.py +94 -0
  4. mantisdk/adapter/messages.py +270 -0
  5. mantisdk/adapter/triplet.py +1028 -0
  6. mantisdk/algorithm/__init__.py +39 -0
  7. mantisdk/algorithm/apo/__init__.py +5 -0
  8. mantisdk/algorithm/apo/apo.py +889 -0
  9. mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
  10. mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
  11. mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
  12. mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
  13. mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
  14. mantisdk/algorithm/base.py +162 -0
  15. mantisdk/algorithm/decorator.py +264 -0
  16. mantisdk/algorithm/fast.py +250 -0
  17. mantisdk/algorithm/gepa/__init__.py +59 -0
  18. mantisdk/algorithm/gepa/adapter.py +459 -0
  19. mantisdk/algorithm/gepa/gepa.py +364 -0
  20. mantisdk/algorithm/gepa/lib/__init__.py +18 -0
  21. mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
  22. mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
  23. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
  24. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
  25. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
  26. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
  27. mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
  28. mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
  29. mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
  30. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
  31. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
  32. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
  33. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
  34. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
  35. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
  36. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
  37. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
  38. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
  39. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
  40. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
  41. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
  42. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
  43. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
  44. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
  45. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
  46. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
  47. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
  48. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
  49. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
  50. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
  51. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
  52. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
  53. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
  54. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
  55. mantisdk/algorithm/gepa/lib/api.py +375 -0
  56. mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
  57. mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
  58. mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
  59. mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
  60. mantisdk/algorithm/gepa/lib/core/result.py +233 -0
  61. mantisdk/algorithm/gepa/lib/core/state.py +636 -0
  62. mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
  63. mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
  64. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
  65. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
  66. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
  67. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
  68. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
  69. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
  70. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
  71. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
  72. mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
  73. mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
  74. mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
  75. mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
  76. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
  77. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
  78. mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
  79. mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
  80. mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
  81. mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
  82. mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
  83. mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
  84. mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
  85. mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
  86. mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
  87. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
  88. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
  89. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
  90. mantisdk/algorithm/gepa/lib/py.typed +0 -0
  91. mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
  92. mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
  93. mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
  94. mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
  95. mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
  96. mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
  97. mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
  98. mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
  99. mantisdk/algorithm/gepa/tracing.py +105 -0
  100. mantisdk/algorithm/utils.py +177 -0
  101. mantisdk/algorithm/verl/__init__.py +5 -0
  102. mantisdk/algorithm/verl/interface.py +202 -0
  103. mantisdk/cli/__init__.py +56 -0
  104. mantisdk/cli/prometheus.py +115 -0
  105. mantisdk/cli/store.py +131 -0
  106. mantisdk/cli/vllm.py +29 -0
  107. mantisdk/client.py +408 -0
  108. mantisdk/config.py +348 -0
  109. mantisdk/emitter/__init__.py +43 -0
  110. mantisdk/emitter/annotation.py +370 -0
  111. mantisdk/emitter/exception.py +54 -0
  112. mantisdk/emitter/message.py +61 -0
  113. mantisdk/emitter/object.py +117 -0
  114. mantisdk/emitter/reward.py +320 -0
  115. mantisdk/env_var.py +156 -0
  116. mantisdk/execution/__init__.py +15 -0
  117. mantisdk/execution/base.py +64 -0
  118. mantisdk/execution/client_server.py +443 -0
  119. mantisdk/execution/events.py +69 -0
  120. mantisdk/execution/inter_process.py +16 -0
  121. mantisdk/execution/shared_memory.py +282 -0
  122. mantisdk/instrumentation/__init__.py +119 -0
  123. mantisdk/instrumentation/agentops.py +314 -0
  124. mantisdk/instrumentation/agentops_langchain.py +45 -0
  125. mantisdk/instrumentation/litellm.py +83 -0
  126. mantisdk/instrumentation/vllm.py +81 -0
  127. mantisdk/instrumentation/weave.py +500 -0
  128. mantisdk/litagent/__init__.py +11 -0
  129. mantisdk/litagent/decorator.py +536 -0
  130. mantisdk/litagent/litagent.py +252 -0
  131. mantisdk/llm_proxy.py +1890 -0
  132. mantisdk/logging.py +370 -0
  133. mantisdk/reward.py +7 -0
  134. mantisdk/runner/__init__.py +11 -0
  135. mantisdk/runner/agent.py +845 -0
  136. mantisdk/runner/base.py +182 -0
  137. mantisdk/runner/legacy.py +309 -0
  138. mantisdk/semconv.py +170 -0
  139. mantisdk/server.py +401 -0
  140. mantisdk/store/__init__.py +23 -0
  141. mantisdk/store/base.py +897 -0
  142. mantisdk/store/client_server.py +2092 -0
  143. mantisdk/store/collection/__init__.py +30 -0
  144. mantisdk/store/collection/base.py +587 -0
  145. mantisdk/store/collection/memory.py +970 -0
  146. mantisdk/store/collection/mongo.py +1412 -0
  147. mantisdk/store/collection_based.py +1823 -0
  148. mantisdk/store/insight.py +648 -0
  149. mantisdk/store/listener.py +58 -0
  150. mantisdk/store/memory.py +396 -0
  151. mantisdk/store/mongo.py +165 -0
  152. mantisdk/store/sqlite.py +3 -0
  153. mantisdk/store/threading.py +357 -0
  154. mantisdk/store/utils.py +142 -0
  155. mantisdk/tracer/__init__.py +16 -0
  156. mantisdk/tracer/agentops.py +242 -0
  157. mantisdk/tracer/base.py +287 -0
  158. mantisdk/tracer/dummy.py +106 -0
  159. mantisdk/tracer/otel.py +555 -0
  160. mantisdk/tracer/weave.py +677 -0
  161. mantisdk/trainer/__init__.py +6 -0
  162. mantisdk/trainer/init_utils.py +263 -0
  163. mantisdk/trainer/legacy.py +367 -0
  164. mantisdk/trainer/registry.py +12 -0
  165. mantisdk/trainer/trainer.py +618 -0
  166. mantisdk/types/__init__.py +6 -0
  167. mantisdk/types/core.py +553 -0
  168. mantisdk/types/resources.py +204 -0
  169. mantisdk/types/tracer.py +515 -0
  170. mantisdk/types/tracing.py +218 -0
  171. mantisdk/utils/__init__.py +1 -0
  172. mantisdk/utils/id.py +18 -0
  173. mantisdk/utils/metrics.py +1025 -0
  174. mantisdk/utils/otel.py +578 -0
  175. mantisdk/utils/otlp.py +536 -0
  176. mantisdk/utils/server_launcher.py +1045 -0
  177. mantisdk/utils/system_snapshot.py +81 -0
  178. mantisdk/verl/__init__.py +8 -0
  179. mantisdk/verl/__main__.py +6 -0
  180. mantisdk/verl/async_server.py +46 -0
  181. mantisdk/verl/config.yaml +27 -0
  182. mantisdk/verl/daemon.py +1154 -0
  183. mantisdk/verl/dataset.py +44 -0
  184. mantisdk/verl/entrypoint.py +248 -0
  185. mantisdk/verl/trainer.py +549 -0
  186. mantisdk-0.1.0.dist-info/METADATA +119 -0
  187. mantisdk-0.1.0.dist-info/RECORD +190 -0
  188. mantisdk-0.1.0.dist-info/WHEEL +4 -0
  189. mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
  190. mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0
@@ -0,0 +1,364 @@
1
+ # Copyright (c) Microsoft. All rights reserved.
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ from typing import Any, Dict, Generic, List, Optional, TypeVar
8
+
9
+ import openai
10
+ from mantisdk.algorithm.gepa.lib.api import optimize
11
+
12
+ from mantisdk.adapter import TraceAdapter
13
+ from mantisdk.adapter.messages import TraceToMessages
14
+ from mantisdk.algorithm.base import Algorithm
15
+ from mantisdk.algorithm.gepa.adapter import (
16
+ MantisdkDataInst,
17
+ MantisdkGEPAAdapter,
18
+ )
19
+ from mantisdk.algorithm.gepa.tracing import GEPATracingContext
20
+ from mantisdk.algorithm.utils import with_llm_proxy, with_store
21
+ from mantisdk.types import Dataset, PromptTemplate, TracingConfig
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ T_task = TypeVar("T_task")
26
+
27
+
28
+ TEMPLATE_AWARE_REFLECTION_PROMPT = """You are an expert at improving LLM prompts based on observed failures.
29
+
30
+ ## Current Prompt Template
31
+ ```
32
+ <curr_instructions>
33
+ ```
34
+
35
+ ## Observed Failures
36
+ The following examples show where the current prompt gave INCORRECT outputs. Study the pattern of failures carefully:
37
+ ```
38
+ <inputs_outputs_feedback>
39
+ ```
40
+
41
+ ## Your Task
42
+ Write an IMPROVED prompt template that fixes the observed failures.
43
+
44
+ **CRITICAL REQUIREMENTS**:
45
+ 1. **Analyze the failure pattern**: Look at WHAT the prompt got wrong. Is it too strict? Too lenient? Missing context? Misunderstanding the task?
46
+ 2. **Make a CONCEPTUAL FIX**: Don't just tweak wording - fundamentally change the approach if the current strategy is flawed
47
+ 3. **Preserve placeholders**: Keep any {variable_name} placeholders exactly as they appear (e.g., {session}, {input}, {output}, or {{session}}, {{input}}, {{output}})
48
+ 4. **Be specific**: Add concrete criteria, examples, or decision rules based on what the failures reveal
49
+ 5. Read the inputs carefully and identify the input format and infer detailed task description about the task I wish to solve with the assistant
50
+ 6. Read all the assistant responses and the corresponding feedback. Identify all niche and domain specific factual information about the task and include it in the instruction, as a lot of it may not be available to the assistant in the future. The assistant may have utilized a generalizable strategy to solve the task, if so, include that in the instruction as well.
51
+
52
+ **Think step by step**:
53
+ - What is the PATTERN in the failures?
54
+ - WHY is the current prompt failing on these cases?
55
+ - What SPECIFIC change would fix this pattern?
56
+
57
+ Output your improved template within ``` blocks."""
58
+
59
+
60
+ class GEPA(Algorithm, Generic[T_task]):
61
+ """GEPA (Genetic-Pareto) algorithm for Mantisdk.
62
+
63
+ This algorithm optimizes prompt templates (and potentially other text resources)
64
+ using an evolutionary approach with LLM-based reflection.
65
+
66
+ GEPA maintains a population of candidate prompts and evolves them by:
67
+ 1. Evaluating candidates on training data
68
+ 2. Using an LLM to reflect on failures and propose improvements
69
+ 3. Selecting the best candidates based on validation performance
70
+ """
71
+
72
+ # Algorithm-owned tracing configuration.
73
+ # Defines environment and tags for all traces generated by GEPA.
74
+ # Note: session_id will be overridden per-run in the run() method
75
+ TRACING_CONFIG = TracingConfig(
76
+ environment="mantisdk-gepa",
77
+ algorithm_name="gepa",
78
+ )
79
+
80
+ def _get_tracing_config_with_session(self, session_id: str) -> TracingConfig:
81
+ """Create a TracingConfig with a specific session_id for this run."""
82
+ return TracingConfig(
83
+ environment="mantisdk-gepa",
84
+ algorithm_name="gepa",
85
+ session_id=session_id,
86
+ )
87
+
88
+ def __init__(
89
+ self,
90
+ *,
91
+ max_metric_calls: int = 100,
92
+ reflection_minibatch_size: int = 5,
93
+ population_size: int = 4,
94
+ adapter: Optional[TraceAdapter] = None,
95
+ rollout_batch_timeout: float = 600.0,
96
+ reflection_prompt_template: Optional[str] = None,
97
+ reflection_metadata: Optional[Dict[str, Any]] = None,
98
+ target_model_config: Optional[Dict[str, Any]] = None,
99
+ **gepa_kwargs: Any,
100
+ ) -> None:
101
+ """Initialize the GEPA algorithm.
102
+
103
+ Args:
104
+ max_metric_calls: Maximum number of evaluations (budget).
105
+ reflection_minibatch_size: Batch size for reflection.
106
+ population_size: Size of the population in evolutionary search.
107
+ adapter: TraceAdapter to convert spans to messages. Defaults to TraceToMessages.
108
+ rollout_batch_timeout: Timeout for waiting for rollouts.
109
+ reflection_prompt_template: Custom prompt template for reflection.
110
+ reflection_metadata: Metadata for reflection LLM traces (Langfuse).
111
+ target_model_config: Direct model configuration (apiKey, baseUrl) to bypass proxy if needed.
112
+ **gepa_kwargs: Additional arguments passed to gepa.optimize.
113
+ """
114
+ super().__init__()
115
+ self.max_metric_calls = max_metric_calls
116
+ self.reflection_minibatch_size = reflection_minibatch_size
117
+ self.population_size = population_size
118
+ self.adapter = adapter or TraceToMessages()
119
+ self.rollout_batch_timeout = rollout_batch_timeout
120
+ self.reflection_metadata = reflection_metadata
121
+ self.target_model_config = target_model_config
122
+
123
+ # Set default reflection prompt template if not provided
124
+ if reflection_prompt_template is not None:
125
+ gepa_kwargs["reflection_prompt_template"] = reflection_prompt_template
126
+ elif "reflection_prompt_template" not in gepa_kwargs:
127
+ gepa_kwargs["reflection_prompt_template"] = TEMPLATE_AWARE_REFLECTION_PROMPT
128
+
129
+ self.gepa_kwargs = gepa_kwargs
130
+ self._best_candidate: Optional[Dict[str, str]] = None
131
+ self._best_score: float = 0.0
132
+ self._full_result: Optional[Any] = None # Store full GEPAResult for history access
133
+
134
+ def get_best_prompt(self) -> Optional[PromptTemplate]:
135
+ """Get the best prompt found during optimization."""
136
+ if self._best_candidate:
137
+ # Return the first PromptTemplate value
138
+ for key, value in self._best_candidate.items():
139
+ return PromptTemplate(template=value, engine="f-string")
140
+ return None
141
+
142
+ @with_store
143
+ @with_llm_proxy(required=True)
144
+ async def run(
145
+ self,
146
+ llm_proxy, # injected by decorator
147
+ store, # injected by decorator
148
+ train_dataset: Optional[Dataset[T_task]] = None,
149
+ val_dataset: Optional[Dataset[T_task]] = None,
150
+ ) -> None:
151
+ """Run the GEPA optimization loop.
152
+
153
+ Args:
154
+ train_dataset: Dataset used for optimization (training).
155
+ val_dataset: Dataset used for validation.
156
+ """
157
+ if train_dataset is None:
158
+ raise ValueError("train_dataset is required for GEPA optimization.")
159
+
160
+ store = self.get_store()
161
+ assert store is not None
162
+ assert llm_proxy is not None
163
+
164
+ loop = asyncio.get_running_loop()
165
+
166
+ # Get initial resources to find the target resource to optimize
167
+ initial_resources = self.get_initial_resources()
168
+ if not initial_resources:
169
+ raise ValueError("Initial resources must be set before running GEPA.")
170
+
171
+ # Find the target resource to optimize
172
+ target_resource_name = None
173
+ initial_candidate: Dict[str, str] = {}
174
+
175
+ for name, res in initial_resources.items():
176
+ if isinstance(res, PromptTemplate):
177
+ target_resource_name = name
178
+ initial_candidate[name] = res.template
179
+
180
+ if not target_resource_name:
181
+ raise ValueError("No PromptTemplate found in initial resources to optimize.")
182
+
183
+ logger.info(f"GEPA will optimize resource: {target_resource_name}")
184
+ logger.info(f"Initial prompt: {initial_candidate[target_resource_name][:100]}...")
185
+
186
+ # Setup Reflection LLM via Proxy
187
+ llm_resource = llm_proxy.as_resource()
188
+
189
+ # Create tracing context for detailed execution tracking
190
+ # This generates a unique session_id for grouping all traces in this run
191
+ tracing_context = GEPATracingContext()
192
+
193
+ # Create a TracingConfig with this run's session_id
194
+ run_tracing_config = self._get_tracing_config_with_session(tracing_context.session_id)
195
+
196
+ logger.info(f"GEPA session started: {tracing_context.session_id}")
197
+
198
+ # Create the bridge adapter with tracing config and context
199
+ gepa_adapter = MantisdkGEPAAdapter(
200
+ store=store,
201
+ loop=loop,
202
+ resource_name=target_resource_name,
203
+ adapter=self.adapter,
204
+ llm_proxy_resource=llm_resource,
205
+ rollout_batch_timeout=self.rollout_batch_timeout,
206
+ tracing_config=run_tracing_config,
207
+ tracing_context=tracing_context,
208
+ )
209
+
210
+ # Import here (inside run) to avoid circular imports:
211
+ # `mantisdk.algorithm.gepa.__init__` re-exports `GEPA` from this module.
212
+ from mantisdk.algorithm import gepa as gepa_module
213
+
214
+ @gepa_module.reflection
215
+ def reflection_lm(prompt: str) -> str:
216
+ """GEPA reflection LM callable - takes a string prompt, returns string response.
217
+
218
+ Routes through the LLMProxy for consistent tracing and OpenTelemetry export.
219
+ The proxy handles API keys and model routing - we just need to call it.
220
+
221
+ IMPORTANT: The LLMProxy must be configured with callbacks: ["opentelemetry"]
222
+ (not "return_token_ids") when calling OpenAI providers.
223
+
224
+ The reflection phase is tagged distinctly from validation-eval to enable
225
+ proper filtering in the Mantis UI. Tags include:
226
+ - "gepa" (algorithm)
227
+ - "reflection" (phase)
228
+ - "llm-reflection" (explicit marker)
229
+ - "gen-N" (generation number)
230
+ """
231
+ import litellm
232
+
233
+ # Track phase transition: entering reflection, then starting new generation
234
+ tracing_context.set_phase("reflection")
235
+
236
+ logger.info(f"[reflection_lm] === REFLECTION CALLED (gen-{tracing_context.generation}) ===")
237
+ logger.info(f"[reflection_lm] Session: {tracing_context.session_id}")
238
+ logger.info(f"[reflection_lm] Model: {llm_resource.model}")
239
+ logger.info(f"[reflection_lm] Proxy Endpoint: {llm_resource.endpoint}")
240
+ logger.info(f"[reflection_lm] Prompt length: {len(prompt)} chars")
241
+ logger.debug(f"[reflection_lm] Prompt preview: {prompt[:500]}...")
242
+
243
+ try:
244
+ # Route through the LLMProxy for tracing
245
+ # The proxy already has the API key and model config from gepa_runner.py
246
+ logger.info(f"[reflection_lm] Calling via proxy: model={llm_resource.model} endpoint={llm_resource.endpoint}")
247
+
248
+ # Include detailed tracing metadata for reflection traces
249
+ # Uses run_tracing_config which includes the session_id
250
+ # Note: call-type tagging ("reflection-call") is handled by @gepa_module.reflection decorator
251
+ import json
252
+ gepa_tags = [
253
+ f"gen-{tracing_context.generation}",
254
+ ]
255
+ tracing_metadata = run_tracing_config.to_detailed_metadata(
256
+ phase="reflection",
257
+ extra_tags=gepa_tags,
258
+ )
259
+
260
+ # Pass metadata via headers for proxy-side tracing context
261
+ # Note: call-type is handled automatically by the @gepa_module.reflection decorator
262
+ extra_headers = {
263
+ "x-mantis-session-id": tracing_metadata.get("session_id", ""),
264
+ "x-mantis-environment": tracing_metadata.get("environment", ""),
265
+ "x-mantis-tags": json.dumps(tracing_metadata.get("tags", [])),
266
+ }
267
+
268
+ response = litellm.completion(
269
+ model=llm_resource.model, # Model name as registered in proxy
270
+ messages=[{"role": "user", "content": prompt}],
271
+ api_base=llm_resource.endpoint, # Proxy URL
272
+ api_key="dummy", # Proxy handles the real API key
273
+ temperature=0.7,
274
+ metadata=tracing_metadata, # Pass environment/tags/session_id for OTEL
275
+ extra_headers=extra_headers, # Pass to proxy for server-side span attributes
276
+ )
277
+ content = response.choices[0].message.content or ""
278
+ logger.info(f"[reflection_lm] SUCCESS - Response length: {len(content)} chars")
279
+ logger.info(f"[reflection_lm] Response preview: {content[:300]}...")
280
+
281
+ # After reflection completes, increment generation for next eval round
282
+ tracing_context.next_generation()
283
+ tracing_context.set_phase("train-eval")
284
+
285
+ return content
286
+ except Exception as e:
287
+ logger.error(f"[reflection_lm] FAILED: {e}", exc_info=True)
288
+ # Return fallback instead of crashing - allows optimization to continue
289
+ logger.warning(f"[reflection_lm] Returning fallback (seed prompt)")
290
+
291
+ # Still increment generation even on failure
292
+ tracing_context.next_generation()
293
+ tracing_context.set_phase("train-eval")
294
+
295
+ return f"```\n{initial_candidate.get(target_resource_name, '')}\n```"
296
+
297
+ # Prepare Training Data
298
+ gepa_train_data: List[MantisdkDataInst] = []
299
+ for i, item in enumerate(train_dataset):
300
+ item_id = str(i)
301
+ if isinstance(item, dict):
302
+ gepa_train_data.append({"input": item, "id": item.get("id", item_id)})
303
+ else:
304
+ gepa_train_data.append({"input": {"task": item}, "id": item_id})
305
+
306
+ # Prepare Validation Data (use train if not provided)
307
+ gepa_val_data: Optional[List[MantisdkDataInst]] = None
308
+ if val_dataset is not None:
309
+ gepa_val_data = []
310
+ for i, item in enumerate(val_dataset):
311
+ item_id = f"val-{i}"
312
+ if isinstance(item, dict):
313
+ gepa_val_data.append({"input": item, "id": item.get("id", item_id)})
314
+ else:
315
+ gepa_val_data.append({"input": {"task": item}, "id": item_id})
316
+
317
+ logger.info(f"Starting GEPA optimization with {len(gepa_train_data)} training samples")
318
+ if gepa_val_data:
319
+ logger.info(f"Using {len(gepa_val_data)} validation samples")
320
+
321
+ # Run Optimization in Executor (GEPA is synchronous)
322
+ def run_optimization():
323
+ return optimize(
324
+ trainset=gepa_train_data,
325
+ valset=gepa_val_data,
326
+ adapter=gepa_adapter,
327
+ seed_candidate=initial_candidate,
328
+ reflection_lm=reflection_lm,
329
+ max_metric_calls=self.max_metric_calls,
330
+ reflection_minibatch_size=self.reflection_minibatch_size,
331
+ **self.gepa_kwargs,
332
+ )
333
+
334
+ try:
335
+ result = await loop.run_in_executor(None, run_optimization)
336
+
337
+ logger.info(f"GEPA optimization finished. Total metric calls: {result.total_metric_calls}")
338
+ logger.info(f"Best candidate index: {result.best_idx}, score: {result.val_aggregate_scores[result.best_idx]}")
339
+ logger.info(f"Best candidate: {result.best_candidate}")
340
+
341
+ self._best_candidate = result.best_candidate
342
+ self._best_score = result.val_aggregate_scores[result.best_idx]
343
+ self._full_result = result # Store full result for candidate history
344
+
345
+ # Update the store with the best candidate
346
+ final_resources = {}
347
+ for key, value in result.best_candidate.items():
348
+ original = initial_resources.get(key)
349
+ engine = original.engine if isinstance(original, PromptTemplate) else "f-string"
350
+ final_resources[key] = PromptTemplate(template=value, engine=engine)
351
+
352
+ await store.update_resources("gepa-final", final_resources)
353
+ logger.info("Updated store with best candidate resources (version: 'gepa-final').")
354
+
355
+ # Send full GEPA result to Insight for experiment tracking
356
+ if hasattr(store, 'complete_job'):
357
+ summary = result.to_dict()
358
+ # Add session_id for trace filtering in the UI
359
+ summary["session_id"] = tracing_context.session_id
360
+ store.complete_job(summary)
361
+
362
+ except Exception as e:
363
+ logger.error(f"GEPA optimization failed: {e}")
364
+ raise
@@ -0,0 +1,18 @@
1
+ # Copyright (c) 2025 Lakshya A Agrawal and the GEPA contributors
2
+ # https://github.com/gepa-ai/gepa
3
+
4
+ from .adapters import default_adapter
5
+ from .api import optimize
6
+ from .core.adapter import EvaluationBatch, GEPAAdapter
7
+ from .core.result import GEPAResult
8
+ from .examples import aime
9
+ from .utils.stop_condition import (
10
+ CompositeStopper,
11
+ FileStopper,
12
+ MaxMetricCallsStopper,
13
+ NoImprovementStopper,
14
+ ScoreThresholdStopper,
15
+ SignalStopper,
16
+ StopperProtocol,
17
+ TimeoutStopCondition,
18
+ )
@@ -0,0 +1,12 @@
1
+ # GEPA Adapters
2
+
3
+ > GEPA 🤝 Any Framework
4
+
5
+ This directory provides the interface to allow GEPA to plug into systems and frameworks of your choice! GEPA can interface with any system consisting of text components, by implementing `GEPAAdapter` in [../core/adapter.py](../core/adapter.py).
6
+
7
+ Currently, GEPA has the following adapters:
8
+ - [DSPy Adapter](./dspy_adapter/): This adapter integrates GEPA into [DSPy](https://dspy.ai/), to allow it to optimize any DSPy module's signature instructions.
9
+ - [Default Adapter](./default_adapter/): This adapter integrates GEPA into a single-turn LLM environment, where the task is specified as a user message, and an answer string must be present in the assistant response. GEPA optimizes the system prompt.
10
+ - [AnyMaths Adapter](./anymaths_adapter/): This adapter integrates GEPA with litellm and ollama to solve single-turn mathematical problems.
11
+
12
+ If there are any frameworks you would like GEPA integrated into, please create an issue or PR!
File without changes