mantisdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mantisdk might be problematic. Click here for more details.

Files changed (190) hide show
  1. mantisdk/__init__.py +22 -0
  2. mantisdk/adapter/__init__.py +15 -0
  3. mantisdk/adapter/base.py +94 -0
  4. mantisdk/adapter/messages.py +270 -0
  5. mantisdk/adapter/triplet.py +1028 -0
  6. mantisdk/algorithm/__init__.py +39 -0
  7. mantisdk/algorithm/apo/__init__.py +5 -0
  8. mantisdk/algorithm/apo/apo.py +889 -0
  9. mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
  10. mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
  11. mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
  12. mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
  13. mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
  14. mantisdk/algorithm/base.py +162 -0
  15. mantisdk/algorithm/decorator.py +264 -0
  16. mantisdk/algorithm/fast.py +250 -0
  17. mantisdk/algorithm/gepa/__init__.py +59 -0
  18. mantisdk/algorithm/gepa/adapter.py +459 -0
  19. mantisdk/algorithm/gepa/gepa.py +364 -0
  20. mantisdk/algorithm/gepa/lib/__init__.py +18 -0
  21. mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
  22. mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
  23. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
  24. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
  25. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
  26. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
  27. mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
  28. mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
  29. mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
  30. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
  31. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
  32. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
  33. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
  34. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
  35. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
  36. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
  37. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
  38. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
  39. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
  40. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
  41. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
  42. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
  43. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
  44. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
  45. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
  46. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
  47. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
  48. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
  49. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
  50. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
  51. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
  52. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
  53. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
  54. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
  55. mantisdk/algorithm/gepa/lib/api.py +375 -0
  56. mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
  57. mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
  58. mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
  59. mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
  60. mantisdk/algorithm/gepa/lib/core/result.py +233 -0
  61. mantisdk/algorithm/gepa/lib/core/state.py +636 -0
  62. mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
  63. mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
  64. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
  65. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
  66. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
  67. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
  68. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
  69. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
  70. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
  71. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
  72. mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
  73. mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
  74. mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
  75. mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
  76. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
  77. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
  78. mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
  79. mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
  80. mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
  81. mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
  82. mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
  83. mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
  84. mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
  85. mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
  86. mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
  87. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
  88. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
  89. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
  90. mantisdk/algorithm/gepa/lib/py.typed +0 -0
  91. mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
  92. mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
  93. mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
  94. mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
  95. mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
  96. mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
  97. mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
  98. mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
  99. mantisdk/algorithm/gepa/tracing.py +105 -0
  100. mantisdk/algorithm/utils.py +177 -0
  101. mantisdk/algorithm/verl/__init__.py +5 -0
  102. mantisdk/algorithm/verl/interface.py +202 -0
  103. mantisdk/cli/__init__.py +56 -0
  104. mantisdk/cli/prometheus.py +115 -0
  105. mantisdk/cli/store.py +131 -0
  106. mantisdk/cli/vllm.py +29 -0
  107. mantisdk/client.py +408 -0
  108. mantisdk/config.py +348 -0
  109. mantisdk/emitter/__init__.py +43 -0
  110. mantisdk/emitter/annotation.py +370 -0
  111. mantisdk/emitter/exception.py +54 -0
  112. mantisdk/emitter/message.py +61 -0
  113. mantisdk/emitter/object.py +117 -0
  114. mantisdk/emitter/reward.py +320 -0
  115. mantisdk/env_var.py +156 -0
  116. mantisdk/execution/__init__.py +15 -0
  117. mantisdk/execution/base.py +64 -0
  118. mantisdk/execution/client_server.py +443 -0
  119. mantisdk/execution/events.py +69 -0
  120. mantisdk/execution/inter_process.py +16 -0
  121. mantisdk/execution/shared_memory.py +282 -0
  122. mantisdk/instrumentation/__init__.py +119 -0
  123. mantisdk/instrumentation/agentops.py +314 -0
  124. mantisdk/instrumentation/agentops_langchain.py +45 -0
  125. mantisdk/instrumentation/litellm.py +83 -0
  126. mantisdk/instrumentation/vllm.py +81 -0
  127. mantisdk/instrumentation/weave.py +500 -0
  128. mantisdk/litagent/__init__.py +11 -0
  129. mantisdk/litagent/decorator.py +536 -0
  130. mantisdk/litagent/litagent.py +252 -0
  131. mantisdk/llm_proxy.py +1890 -0
  132. mantisdk/logging.py +370 -0
  133. mantisdk/reward.py +7 -0
  134. mantisdk/runner/__init__.py +11 -0
  135. mantisdk/runner/agent.py +845 -0
  136. mantisdk/runner/base.py +182 -0
  137. mantisdk/runner/legacy.py +309 -0
  138. mantisdk/semconv.py +170 -0
  139. mantisdk/server.py +401 -0
  140. mantisdk/store/__init__.py +23 -0
  141. mantisdk/store/base.py +897 -0
  142. mantisdk/store/client_server.py +2092 -0
  143. mantisdk/store/collection/__init__.py +30 -0
  144. mantisdk/store/collection/base.py +587 -0
  145. mantisdk/store/collection/memory.py +970 -0
  146. mantisdk/store/collection/mongo.py +1412 -0
  147. mantisdk/store/collection_based.py +1823 -0
  148. mantisdk/store/insight.py +648 -0
  149. mantisdk/store/listener.py +58 -0
  150. mantisdk/store/memory.py +396 -0
  151. mantisdk/store/mongo.py +165 -0
  152. mantisdk/store/sqlite.py +3 -0
  153. mantisdk/store/threading.py +357 -0
  154. mantisdk/store/utils.py +142 -0
  155. mantisdk/tracer/__init__.py +16 -0
  156. mantisdk/tracer/agentops.py +242 -0
  157. mantisdk/tracer/base.py +287 -0
  158. mantisdk/tracer/dummy.py +106 -0
  159. mantisdk/tracer/otel.py +555 -0
  160. mantisdk/tracer/weave.py +677 -0
  161. mantisdk/trainer/__init__.py +6 -0
  162. mantisdk/trainer/init_utils.py +263 -0
  163. mantisdk/trainer/legacy.py +367 -0
  164. mantisdk/trainer/registry.py +12 -0
  165. mantisdk/trainer/trainer.py +618 -0
  166. mantisdk/types/__init__.py +6 -0
  167. mantisdk/types/core.py +553 -0
  168. mantisdk/types/resources.py +204 -0
  169. mantisdk/types/tracer.py +515 -0
  170. mantisdk/types/tracing.py +218 -0
  171. mantisdk/utils/__init__.py +1 -0
  172. mantisdk/utils/id.py +18 -0
  173. mantisdk/utils/metrics.py +1025 -0
  174. mantisdk/utils/otel.py +578 -0
  175. mantisdk/utils/otlp.py +536 -0
  176. mantisdk/utils/server_launcher.py +1045 -0
  177. mantisdk/utils/system_snapshot.py +81 -0
  178. mantisdk/verl/__init__.py +8 -0
  179. mantisdk/verl/__main__.py +6 -0
  180. mantisdk/verl/async_server.py +46 -0
  181. mantisdk/verl/config.yaml +27 -0
  182. mantisdk/verl/daemon.py +1154 -0
  183. mantisdk/verl/dataset.py +44 -0
  184. mantisdk/verl/entrypoint.py +248 -0
  185. mantisdk/verl/trainer.py +549 -0
  186. mantisdk-0.1.0.dist-info/METADATA +119 -0
  187. mantisdk-0.1.0.dist-info/RECORD +190 -0
  188. mantisdk-0.1.0.dist-info/WHEEL +4 -0
  189. mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
  190. mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0
@@ -0,0 +1,137 @@
1
+ # Copyright (c) 2025 Lakshya A Agrawal and the GEPA contributors
2
+ # https://github.com/gepa-ai/gepa
3
+
4
+ from typing import Any, ClassVar
5
+
6
+ import yaml
7
+
8
+ from mantisdk.algorithm.gepa.lib.proposer.reflective_mutation.base import Signature
9
+
10
+
11
+ class DSPyProgramProposalSignature(Signature):
12
+ prompt_template = """I am trying to solve a task using the DSPy framework. Here's a comprehensive overview of DSPy concepts to guide your improvements:
13
+
14
+ Signatures:
15
+ - Signatures define tasks declaratively through input/output fields and explicit instructions.
16
+ - They serve as blueprints for what the LM needs to accomplish.
17
+
18
+ Signature Types:
19
+ - Simple signatures: Specified as strings like "input1, ..., inputN -> output1, ..., outputM" (e.g., "topic -> tweet").
20
+ - Typed signatures: Create a subclass of dspy.Signature with a detailed docstring that includes task instructions, common pitfalls, edge cases, and successful strategies. Define fields using dspy.InputField(desc="...", type=...) and dspy.OutputField(desc="...", type=...) with pydantic types such as str, List[str], Literal["option1", "option2"], or custom classes.
21
+
22
+ Modules:
23
+ - Modules specify __how__ to solve the task defined by a signature.
24
+ - They are composable units inspired by PyTorch layers, using language models to process inputs and produce outputs.
25
+ - Inputs are provided as keyword arguments matching the signature's input fields.
26
+ - Outputs are returned as dspy.Prediction objects containing the signature's output fields.
27
+ - Key built-in modules:
28
+ - dspy.Predict(signature): Performs a single LM call to directly generate the outputs from the inputs.
29
+ - dspy.ChainOfThought(signature): Performs a single LM call that first generates a reasoning chain, then the outputs (adds a 'reasoning' field to the prediction).
30
+ - Other options: dspy.ReAct(signature) for reasoning and acting, or custom chains.
31
+ - Custom modules: Subclass dspy.Module. In __init__, compose sub-modules (e.g., other Predict or ChainOfThought instances). In forward(self, **kwargs), define the data flow: call sub-modules, execute Python logic if needed, and return dspy.Prediction with the output fields.
32
+
33
+ Example Usage:
34
+ ```
35
+ # Simple signature
36
+ simple_signature = "question -> answer"
37
+
38
+ # Typed signature
39
+ class ComplexSignature(dspy.Signature):
40
+ \"\"\"
41
+ <Detailed instructions for completing the task: Include steps, common pitfalls, edge cases, successful strategies. Include domain knowledge...>
42
+ \"\"\"
43
+ question: str = dspy.InputField(desc="The question to answer")
44
+ answer: str = dspy.OutputField(desc="Concise and accurate answer")
45
+
46
+ # Built-in module
47
+ simple_program = dspy.Predict(simple_signature) # or dspy.ChainOfThought(ComplexSignature)
48
+
49
+ # Custom module
50
+ class ComplexModule(dspy.Module):
51
+ def __init__(self):
52
+ self.reasoner = dspy.ChainOfThought("question -> intermediate_answer")
53
+ self.finalizer = dspy.Predict("intermediate_answer -> answer")
54
+
55
+ def forward(self, question: str):
56
+ intermediate = self.reasoner(question=question)
57
+ final = self.finalizer(intermediate_answer=intermediate.intermediate_answer)
58
+ return dspy.Prediction(answer=final.answer, reasoning=intermediate.reasoning) # dspy.ChainOfThought returns 'reasoning' in addition to the signature outputs.
59
+
60
+ complex_program = ComplexModule()
61
+ ```
62
+
63
+ DSPy Improvement Strategies:
64
+ 1. Analyze traces for LM overload: If a single call struggles (e.g., skips steps or hallucinates), decompose into multi-step modules with ChainOfThought or custom logic for stepwise reasoning.
65
+ 2. Avoid over-decomposition: If the program is too fragmented, consolidate related steps into fewer modules for efficiency and coherence.
66
+ 3. Refine signatures: Enhance docstrings with actionable guidance from traces—address specific errors, incorporate domain knowledge, document edge cases, and suggest reasoning patterns. Ensure docstrings are self-contained, as the LM won't have access external traces during runtime.
67
+ 4. Balance LM and Python: Use Python for symbolic/logical operations (e.g., loops, conditionals); delegate complex reasoning or generation to LM calls.
68
+ 5. Incorporate control flow: Add loops, conditionals, sub-modules in custom modules if the task requires iteration (e.g., multi-turn reasoning, selection, voting, etc.).
69
+ 6. Leverage LM strengths: For code-heavy tasks, define signatures with 'code' outputs, extract and execute the generated code in the module's forward pass.
70
+
71
+ Here's my current code:
72
+ ```
73
+ <curr_program>
74
+ ```
75
+
76
+ Here is the execution trace of the current code on example inputs, their outputs, and detailed feedback on improvements:
77
+ ```
78
+ <dataset_with_feedback>
79
+ ```
80
+
81
+ Assignment:
82
+ - Think step-by-step: First, deeply analyze the current code, traces, and feedback to identify failure modes, strengths, and opportunities.
83
+ - Create a concise checklist (3-7 bullets) outlining your high-level improvement plan, focusing on conceptual changes (e.g., "Decompose step X into a multi-stage module").
84
+ - Then, propose a drop-in replacement code that instantiates an improved 'program' object.
85
+ - Ensure the code is modular, efficient, and directly addresses feedback.
86
+ - Output everything in a single code block using triple backticks—no additional explanations, comments, or language markers outside the block.
87
+ - The code must be a valid, self-contained Python script with all necessary imports, definitions, and assignment to 'program'.
88
+
89
+ Output Format:
90
+ - Start with the checklist in plain text (3-7 short bullets).
91
+ - Follow immediately with one code block in triple backticks containing the complete Python code, including assigning a `program` object."""
92
+ input_keys: ClassVar[list[str]] = ["curr_program", "dataset_with_feedback"]
93
+ output_keys: ClassVar[list[str]] = ["new_program"]
94
+
95
+ @classmethod
96
+ def prompt_renderer(cls, input_dict: dict[str, Any]) -> str:
97
+ curr_program = input_dict["curr_program"]
98
+ if not isinstance(curr_program, str):
99
+ raise TypeError("curr_program must be a string")
100
+
101
+ dataset = input_dict["dataset_with_feedback"]
102
+ if not isinstance(dataset, list):
103
+ raise TypeError("dataset_with_feedback must be a list")
104
+
105
+ def format_samples(samples):
106
+ # Serialize the samples list to YAML for concise, structured representation
107
+ yaml_str = yaml.dump(samples, sort_keys=False, default_flow_style=False, indent=2)
108
+ # Optionally, wrap or label it for clarity in the prompt
109
+ return yaml_str
110
+
111
+ prompt = cls.prompt_template
112
+ prompt = prompt.replace("<curr_program>", curr_program)
113
+ prompt = prompt.replace("<dataset_with_feedback>", format_samples(dataset))
114
+ return prompt
115
+
116
+ @staticmethod
117
+ def output_extractor(lm_out: str) -> dict[str, str]:
118
+ # Extract ``` blocks
119
+ new_instruction = None
120
+ if lm_out.count("```") >= 2:
121
+ start = lm_out.find("```")
122
+ end = lm_out.rfind("```")
123
+ if start >= end:
124
+ new_instruction = lm_out
125
+ if start == -1 or end == -1:
126
+ new_instruction = lm_out
127
+ else:
128
+ new_instruction = lm_out[start + 3 : end].strip()
129
+ else:
130
+ lm_out = lm_out.strip()
131
+ if lm_out.startswith("```"):
132
+ lm_out = lm_out[3:]
133
+ if lm_out.endswith("```"):
134
+ lm_out = lm_out[:-3]
135
+ new_instruction = lm_out
136
+
137
+ return {"new_program": new_instruction}
@@ -0,0 +1,266 @@
1
+ import random
2
+ from typing import Any, Callable
3
+
4
+ import dspy
5
+ from dspy.adapters.types import History
6
+ from dspy.evaluate import Evaluate
7
+ from dspy.primitives import Example, Prediction
8
+ from dspy.teleprompt.bootstrap_trace import TraceData
9
+
10
+ from mantisdk.algorithm.gepa.lib import EvaluationBatch, GEPAAdapter
11
+
12
+
13
+ class DspyAdapter(GEPAAdapter[Example, TraceData, Prediction]):
14
+ def __init__(
15
+ self,
16
+ task_lm: dspy.LM,
17
+ metric_fn: Callable,
18
+ reflection_lm: dspy.LM,
19
+ failure_score=0.0,
20
+ num_threads: int | None = None,
21
+ add_format_failure_as_feedback: bool = False,
22
+ rng: random.Random | None = None,
23
+ ):
24
+ self.task_lm = task_lm
25
+ self.metric_fn = metric_fn
26
+ assert reflection_lm is not None, (
27
+ "DspyAdapter for full-program evolution requires a reflection_lm to be provided"
28
+ )
29
+ self.reflection_lm = reflection_lm
30
+ self.failure_score = failure_score
31
+ self.num_threads = num_threads
32
+ self.add_format_failure_as_feedback = add_format_failure_as_feedback
33
+ self.rng = rng or random.Random(0)
34
+
35
+ def build_program(self, candidate: dict[str, str]) -> tuple[dspy.Module, None] | tuple[None, str]:
36
+ candidate_src = candidate["program"]
37
+ context = {}
38
+ o = self.load_dspy_program_from_code(candidate_src, context)
39
+ return o
40
+
41
+ def load_dspy_program_from_code(
42
+ self,
43
+ candidate_src: str,
44
+ context: dict,
45
+ ):
46
+ try:
47
+ compile(candidate_src, "<string>", "exec")
48
+ except SyntaxError as e:
49
+ # print(f"Syntax Error in original code {e}")
50
+ # return None
51
+ import traceback
52
+
53
+ tb = traceback.format_exc()
54
+ return None, f"Syntax Error in code: {e}\n{tb}"
55
+
56
+ try:
57
+ exec(candidate_src, context) # expose to current namespace
58
+ except Exception as e:
59
+ import traceback
60
+
61
+ tb = traceback.format_exc()
62
+ return None, f"Error in executing code: {e}\n{tb}"
63
+
64
+ dspy_program = context.get("program")
65
+
66
+ if dspy_program is None:
67
+ return (
68
+ None,
69
+ "Your code did not define a `program` object. Please define a `program` object which is an instance of `dspy.Module`, either directly by dspy.Predict or dspy.ChainOfThought, or by instantiating a class that inherits from `dspy.Module`.",
70
+ )
71
+ else:
72
+ if not isinstance(dspy_program, dspy.Module):
73
+ return (
74
+ None,
75
+ f"Your code defined a `program` object, but it is an instance of {type(dspy_program)}, not `dspy.Module`. Please define a `program` object which is an instance of `dspy.Module`, either directly by dspy.Predict or dspy.ChainOfThought, or by instantiating a class that inherits from `dspy.Module`.",
76
+ )
77
+
78
+ dspy_program.set_lm(self.task_lm)
79
+
80
+ return dspy_program, None
81
+
82
+ def evaluate(self, batch, candidate, capture_traces=False):
83
+ program, feedback = self.build_program(candidate)
84
+
85
+ if program is None:
86
+ return EvaluationBatch(outputs=None, scores=[self.failure_score for _ in batch], trajectories=feedback)
87
+
88
+ if capture_traces:
89
+ # bootstrap_trace_data-like flow with trace capture
90
+ from dspy.teleprompt.bootstrap_trace import bootstrap_trace_data
91
+
92
+ trajs = bootstrap_trace_data(
93
+ program=program,
94
+ dataset=batch,
95
+ metric=self.metric_fn,
96
+ num_threads=self.num_threads,
97
+ raise_on_error=False,
98
+ capture_failed_parses=True,
99
+ failure_score=self.failure_score,
100
+ format_failure_score=self.failure_score,
101
+ )
102
+ scores = []
103
+ outputs = []
104
+ for t in trajs:
105
+ outputs.append(t["prediction"])
106
+ if hasattr(t["prediction"], "__class__") and t.get("score") is None:
107
+ scores.append(self.failure_score)
108
+ else:
109
+ score = t["score"]
110
+ if hasattr(score, "score"):
111
+ score = score["score"]
112
+ scores.append(score)
113
+ return EvaluationBatch(outputs=outputs, scores=scores, trajectories=trajs)
114
+ else:
115
+ evaluator = Evaluate(
116
+ devset=batch,
117
+ metric=self.metric_fn,
118
+ num_threads=self.num_threads,
119
+ return_all_scores=True,
120
+ failure_score=self.failure_score,
121
+ provide_traceback=True,
122
+ max_errors=len(batch) * 100,
123
+ )
124
+ res = evaluator(program)
125
+ outputs = [r[1] for r in res.results]
126
+ scores = [r[2] for r in res.results]
127
+ scores = [s["score"] if hasattr(s, "score") else s for s in scores]
128
+ return EvaluationBatch(outputs=outputs, scores=scores, trajectories=None)
129
+
130
+ def make_reflective_dataset(self, candidate, eval_batch, components_to_update):
131
+ proposed_program, _ = self.build_program(candidate)
132
+
133
+ assert set(components_to_update) == {"program"}, f"set(components_to_update) = {set(components_to_update)}"
134
+ from dspy.teleprompt.bootstrap_trace import FailedPrediction
135
+
136
+ ret_d: dict[str, list[dict[str, Any]]] = {}
137
+
138
+ if isinstance(eval_batch.trajectories, str):
139
+ feedback = eval_batch.trajectories
140
+ return {"program": {"Feedback": feedback}}
141
+
142
+ ########
143
+ items: list[dict[str, Any]] = []
144
+ for data in eval_batch.trajectories or []:
145
+ example_data = {}
146
+ trace = data["trace"]
147
+ example = data["example"]
148
+ example_data["Program Inputs"] = {**example.inputs()}
149
+ prediction = data["prediction"]
150
+ example_data["Program Outputs"] = {**prediction}
151
+ module_score = data["score"]
152
+
153
+ if hasattr(module_score, "feedback"):
154
+ feedback_text = module_score["feedback"]
155
+ else:
156
+ feedback_text = None
157
+
158
+ if hasattr(module_score, "score"):
159
+ module_score = module_score["score"]
160
+
161
+ trace_instances = trace
162
+
163
+ if len(trace_instances) == 0:
164
+ continue
165
+
166
+ selected = None
167
+ for t in trace_instances:
168
+ if isinstance(t[2], FailedPrediction):
169
+ selected = t
170
+ break
171
+
172
+ if selected is not None:
173
+ trace_instances = [selected]
174
+
175
+ trace_d = []
176
+ example_data["Program Trace"] = trace_d
177
+ for selected in trace_instances:
178
+ inputs = selected[1]
179
+ outputs = selected[2]
180
+
181
+ pred_name = None
182
+ for name, predictor in proposed_program.named_predictors():
183
+ if predictor.signature.equals(selected[0].signature):
184
+ pred_name = name
185
+ break
186
+ assert pred_name is not None, f"Could not find predictor for {selected[0].signature}"
187
+
188
+ new_inputs = {}
189
+ new_outputs = {}
190
+
191
+ contains_history = False
192
+ history_key_name = None
193
+ for input_key, input_val in inputs.items():
194
+ if isinstance(input_val, History):
195
+ contains_history = True
196
+ assert history_key_name is None
197
+ history_key_name = input_key
198
+
199
+ if contains_history:
200
+ s = "```json\n"
201
+ for i, message in enumerate(inputs[history_key_name].messages):
202
+ s += f" {i}: {message}\n"
203
+ s += "```"
204
+ new_inputs["Context"] = s
205
+
206
+ for input_key, input_val in inputs.items():
207
+ if contains_history and input_key == history_key_name:
208
+ continue
209
+ new_inputs[input_key] = str(input_val)
210
+
211
+ if isinstance(outputs, FailedPrediction):
212
+ s = "Couldn't parse the output as per the expected output format. The model's raw response was:\n"
213
+ s += "```\n"
214
+ s += outputs.completion_text + "\n"
215
+ s += "```\n\n"
216
+ new_outputs = s
217
+ else:
218
+ for output_key, output_val in outputs.items():
219
+ new_outputs[output_key] = str(output_val)
220
+
221
+ d = {"Called Module": pred_name, "Inputs": new_inputs, "Generated Outputs": new_outputs}
222
+ # if isinstance(outputs, FailedPrediction):
223
+ # adapter = ChatAdapter()
224
+ # structure_instruction = ""
225
+ # for dd in adapter.format(module.signature, [], {}):
226
+ # structure_instruction += dd["role"] + ": " + dd["content"] + "\n"
227
+ # d["Feedback"] = "Your output failed to parse. Follow this structure:\n" + structure_instruction
228
+ # # d['score'] = self.failure_score
229
+ # else:
230
+ # assert fb["score"] == module_score, f"Currently, GEPA only supports feedback functions that return the same score as the module's score. However, the module-level score is {module_score} and the feedback score is {fb.score}."
231
+ # d['score'] = fb.score
232
+ trace_d.append(d)
233
+
234
+ if feedback_text is not None:
235
+ example_data["Feedback"] = feedback_text
236
+
237
+ items.append(example_data)
238
+
239
+ if len(items) == 0:
240
+ raise Exception("No valid predictions found for program.")
241
+
242
+ ret_d["program"] = items
243
+
244
+ ########
245
+ if len(ret_d) == 0:
246
+ raise Exception("No valid predictions found for any module.")
247
+
248
+ return ret_d
249
+
250
+ def propose_new_texts(
251
+ self,
252
+ candidate: dict[str, str],
253
+ reflective_dataset: dict[str, list[dict[str, Any]]],
254
+ components_to_update: list[str],
255
+ ) -> dict[str, str]:
256
+ from mantisdk.algorithm.gepa.lib.adapters.dspy_full_program_adapter.dspy_program_proposal_signature import DSPyProgramProposalSignature
257
+
258
+ new_texts: dict[str, str] = {}
259
+ for name in components_to_update:
260
+ base_instruction = candidate[name]
261
+ dataset_with_feedback = reflective_dataset[name]
262
+ new_texts[name] = DSPyProgramProposalSignature.run(
263
+ lm=self.reflection_lm,
264
+ input_dict={"curr_program": base_instruction, "dataset_with_feedback": dataset_with_feedback},
265
+ )["new_program"]
266
+ return new_texts