PyPI - mantisdk - Versions diffs - 0.1.0__py3-none-any.whl - Mend

mantisdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mantisdk might be problematic. Click here for more details.

Files changed (190) hide show

mantisdk/__init__.py +22 -0
mantisdk/adapter/__init__.py +15 -0
mantisdk/adapter/base.py +94 -0
mantisdk/adapter/messages.py +270 -0
mantisdk/adapter/triplet.py +1028 -0
mantisdk/algorithm/__init__.py +39 -0
mantisdk/algorithm/apo/__init__.py +5 -0
mantisdk/algorithm/apo/apo.py +889 -0
mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
mantisdk/algorithm/base.py +162 -0
mantisdk/algorithm/decorator.py +264 -0
mantisdk/algorithm/fast.py +250 -0
mantisdk/algorithm/gepa/__init__.py +59 -0
mantisdk/algorithm/gepa/adapter.py +459 -0
mantisdk/algorithm/gepa/gepa.py +364 -0
mantisdk/algorithm/gepa/lib/__init__.py +18 -0
mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
mantisdk/algorithm/gepa/lib/api.py +375 -0
mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
mantisdk/algorithm/gepa/lib/core/result.py +233 -0
mantisdk/algorithm/gepa/lib/core/state.py +636 -0
mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
mantisdk/algorithm/gepa/lib/py.typed +0 -0
mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
mantisdk/algorithm/gepa/tracing.py +105 -0
mantisdk/algorithm/utils.py +177 -0
mantisdk/algorithm/verl/__init__.py +5 -0
mantisdk/algorithm/verl/interface.py +202 -0
mantisdk/cli/__init__.py +56 -0
mantisdk/cli/prometheus.py +115 -0
mantisdk/cli/store.py +131 -0
mantisdk/cli/vllm.py +29 -0
mantisdk/client.py +408 -0
mantisdk/config.py +348 -0
mantisdk/emitter/__init__.py +43 -0
mantisdk/emitter/annotation.py +370 -0
mantisdk/emitter/exception.py +54 -0
mantisdk/emitter/message.py +61 -0
mantisdk/emitter/object.py +117 -0
mantisdk/emitter/reward.py +320 -0
mantisdk/env_var.py +156 -0
mantisdk/execution/__init__.py +15 -0
mantisdk/execution/base.py +64 -0
mantisdk/execution/client_server.py +443 -0
mantisdk/execution/events.py +69 -0
mantisdk/execution/inter_process.py +16 -0
mantisdk/execution/shared_memory.py +282 -0
mantisdk/instrumentation/__init__.py +119 -0
mantisdk/instrumentation/agentops.py +314 -0
mantisdk/instrumentation/agentops_langchain.py +45 -0
mantisdk/instrumentation/litellm.py +83 -0
mantisdk/instrumentation/vllm.py +81 -0
mantisdk/instrumentation/weave.py +500 -0
mantisdk/litagent/__init__.py +11 -0
mantisdk/litagent/decorator.py +536 -0
mantisdk/litagent/litagent.py +252 -0
mantisdk/llm_proxy.py +1890 -0
mantisdk/logging.py +370 -0
mantisdk/reward.py +7 -0
mantisdk/runner/__init__.py +11 -0
mantisdk/runner/agent.py +845 -0
mantisdk/runner/base.py +182 -0
mantisdk/runner/legacy.py +309 -0
mantisdk/semconv.py +170 -0
mantisdk/server.py +401 -0
mantisdk/store/__init__.py +23 -0
mantisdk/store/base.py +897 -0
mantisdk/store/client_server.py +2092 -0
mantisdk/store/collection/__init__.py +30 -0
mantisdk/store/collection/base.py +587 -0
mantisdk/store/collection/memory.py +970 -0
mantisdk/store/collection/mongo.py +1412 -0
mantisdk/store/collection_based.py +1823 -0
mantisdk/store/insight.py +648 -0
mantisdk/store/listener.py +58 -0
mantisdk/store/memory.py +396 -0
mantisdk/store/mongo.py +165 -0
mantisdk/store/sqlite.py +3 -0
mantisdk/store/threading.py +357 -0
mantisdk/store/utils.py +142 -0
mantisdk/tracer/__init__.py +16 -0
mantisdk/tracer/agentops.py +242 -0
mantisdk/tracer/base.py +287 -0
mantisdk/tracer/dummy.py +106 -0
mantisdk/tracer/otel.py +555 -0
mantisdk/tracer/weave.py +677 -0
mantisdk/trainer/__init__.py +6 -0
mantisdk/trainer/init_utils.py +263 -0
mantisdk/trainer/legacy.py +367 -0
mantisdk/trainer/registry.py +12 -0
mantisdk/trainer/trainer.py +618 -0
mantisdk/types/__init__.py +6 -0
mantisdk/types/core.py +553 -0
mantisdk/types/resources.py +204 -0
mantisdk/types/tracer.py +515 -0
mantisdk/types/tracing.py +218 -0
mantisdk/utils/__init__.py +1 -0
mantisdk/utils/id.py +18 -0
mantisdk/utils/metrics.py +1025 -0
mantisdk/utils/otel.py +578 -0
mantisdk/utils/otlp.py +536 -0
mantisdk/utils/server_launcher.py +1045 -0
mantisdk/utils/system_snapshot.py +81 -0
mantisdk/verl/__init__.py +8 -0
mantisdk/verl/__main__.py +6 -0
mantisdk/verl/async_server.py +46 -0
mantisdk/verl/config.yaml +27 -0
mantisdk/verl/daemon.py +1154 -0
mantisdk/verl/dataset.py +44 -0
mantisdk/verl/entrypoint.py +248 -0
mantisdk/verl/trainer.py +549 -0
mantisdk-0.1.0.dist-info/METADATA +119 -0
mantisdk-0.1.0.dist-info/RECORD +190 -0
mantisdk-0.1.0.dist-info/WHEEL +4 -0
mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0

mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py ADDED Viewed

@@ -0,0 +1,137 @@
+# Copyright (c) 2025 Lakshya A Agrawal and the GEPA contributors
+# https://github.com/gepa-ai/gepa
+from typing import Any, ClassVar
+import yaml
+from mantisdk.algorithm.gepa.lib.proposer.reflective_mutation.base import Signature
+class DSPyProgramProposalSignature(Signature):
+    prompt_template = """I am trying to solve a task using the DSPy framework. Here's a comprehensive overview of DSPy concepts to guide your improvements:
+Signatures:
+- Signatures define tasks declaratively through input/output fields and explicit instructions.
+- They serve as blueprints for what the LM needs to accomplish.
+Signature Types:
+- Simple signatures: Specified as strings like "input1, ..., inputN -> output1, ..., outputM" (e.g., "topic -> tweet").
+- Typed signatures: Create a subclass of dspy.Signature with a detailed docstring that includes task instructions, common pitfalls, edge cases, and successful strategies. Define fields using dspy.InputField(desc="...", type=...) and dspy.OutputField(desc="...", type=...) with pydantic types such as str, List[str], Literal["option1", "option2"], or custom classes.
+Modules:
+- Modules specify __how__ to solve the task defined by a signature.
+- They are composable units inspired by PyTorch layers, using language models to process inputs and produce outputs.
+- Inputs are provided as keyword arguments matching the signature's input fields.
+- Outputs are returned as dspy.Prediction objects containing the signature's output fields.
+- Key built-in modules:
+  - dspy.Predict(signature): Performs a single LM call to directly generate the outputs from the inputs.
+  - dspy.ChainOfThought(signature): Performs a single LM call that first generates a reasoning chain, then the outputs (adds a 'reasoning' field to the prediction).
+  - Other options: dspy.ReAct(signature) for reasoning and acting, or custom chains.
+- Custom modules: Subclass dspy.Module. In __init__, compose sub-modules (e.g., other Predict or ChainOfThought instances). In forward(self, **kwargs), define the data flow: call sub-modules, execute Python logic if needed, and return dspy.Prediction with the output fields.
+Example Usage:
+```
+# Simple signature
+simple_signature = "question -> answer"
+# Typed signature
+class ComplexSignature(dspy.Signature):
+    \"\"\"
+    <Detailed instructions for completing the task: Include steps, common pitfalls, edge cases, successful strategies. Include domain knowledge...>
+    \"\"\"
+    question: str = dspy.InputField(desc="The question to answer")
+    answer: str = dspy.OutputField(desc="Concise and accurate answer")
+# Built-in module
+simple_program = dspy.Predict(simple_signature)  # or dspy.ChainOfThought(ComplexSignature)
+# Custom module
+class ComplexModule(dspy.Module):
+    def __init__(self):
+        self.reasoner = dspy.ChainOfThought("question -> intermediate_answer")
+        self.finalizer = dspy.Predict("intermediate_answer -> answer")
+    def forward(self, question: str):
+        intermediate = self.reasoner(question=question)
+        final = self.finalizer(intermediate_answer=intermediate.intermediate_answer)
+        return dspy.Prediction(answer=final.answer, reasoning=intermediate.reasoning) # dspy.ChainOfThought returns 'reasoning' in addition to the signature outputs.
+complex_program = ComplexModule()
+```
+DSPy Improvement Strategies:
+1. Analyze traces for LM overload: If a single call struggles (e.g., skips steps or hallucinates), decompose into multi-step modules with ChainOfThought or custom logic for stepwise reasoning.
+2. Avoid over-decomposition: If the program is too fragmented, consolidate related steps into fewer modules for efficiency and coherence.
+3. Refine signatures: Enhance docstrings with actionable guidance from traces—address specific errors, incorporate domain knowledge, document edge cases, and suggest reasoning patterns. Ensure docstrings are self-contained, as the LM won't have access external traces during runtime.
+4. Balance LM and Python: Use Python for symbolic/logical operations (e.g., loops, conditionals); delegate complex reasoning or generation to LM calls.
+5. Incorporate control flow: Add loops, conditionals, sub-modules in custom modules if the task requires iteration (e.g., multi-turn reasoning, selection, voting, etc.).
+6. Leverage LM strengths: For code-heavy tasks, define signatures with 'code' outputs, extract and execute the generated code in the module's forward pass.
+Here's my current code:
+```
+<curr_program>
+```
+Here is the execution trace of the current code on example inputs, their outputs, and detailed feedback on improvements:
+```
+<dataset_with_feedback>
+```
+Assignment:
+- Think step-by-step: First, deeply analyze the current code, traces, and feedback to identify failure modes, strengths, and opportunities.
+- Create a concise checklist (3-7 bullets) outlining your high-level improvement plan, focusing on conceptual changes (e.g., "Decompose step X into a multi-stage module").
+- Then, propose a drop-in replacement code that instantiates an improved 'program' object.
+- Ensure the code is modular, efficient, and directly addresses feedback.
+- Output everything in a single code block using triple backticks—no additional explanations, comments, or language markers outside the block.
+- The code must be a valid, self-contained Python script with all necessary imports, definitions, and assignment to 'program'.
+Output Format:
+- Start with the checklist in plain text (3-7 short bullets).
+- Follow immediately with one code block in triple backticks containing the complete Python code, including assigning a `program` object."""
+    input_keys: ClassVar[list[str]] = ["curr_program", "dataset_with_feedback"]
+    output_keys: ClassVar[list[str]] = ["new_program"]
+    @classmethod
+    def prompt_renderer(cls, input_dict: dict[str, Any]) -> str:
+        curr_program = input_dict["curr_program"]
+        if not isinstance(curr_program, str):
+            raise TypeError("curr_program must be a string")
+        dataset = input_dict["dataset_with_feedback"]
+        if not isinstance(dataset, list):
+            raise TypeError("dataset_with_feedback must be a list")
+        def format_samples(samples):
+            # Serialize the samples list to YAML for concise, structured representation
+            yaml_str = yaml.dump(samples, sort_keys=False, default_flow_style=False, indent=2)
+            # Optionally, wrap or label it for clarity in the prompt
+            return yaml_str
+        prompt = cls.prompt_template
+        prompt = prompt.replace("<curr_program>", curr_program)
+        prompt = prompt.replace("<dataset_with_feedback>", format_samples(dataset))
+        return prompt
+    @staticmethod
+    def output_extractor(lm_out: str) -> dict[str, str]:
+        # Extract ``` blocks
+        new_instruction = None
+        if lm_out.count("```") >= 2:
+            start = lm_out.find("```")
+            end = lm_out.rfind("```")
+            if start >= end:
+                new_instruction = lm_out
+            if start == -1 or end == -1:
+                new_instruction = lm_out
+            else:
+                new_instruction = lm_out[start + 3 : end].strip()
+        else:
+            lm_out = lm_out.strip()
+            if lm_out.startswith("```"):
+                lm_out = lm_out[3:]
+            if lm_out.endswith("```"):
+                lm_out = lm_out[:-3]
+            new_instruction = lm_out
+        return {"new_program": new_instruction}

mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py ADDED Viewed

@@ -0,0 +1,266 @@
+import random
+from typing import Any, Callable
+import dspy
+from dspy.adapters.types import History
+from dspy.evaluate import Evaluate
+from dspy.primitives import Example, Prediction
+from dspy.teleprompt.bootstrap_trace import TraceData
+from mantisdk.algorithm.gepa.lib import EvaluationBatch, GEPAAdapter
+class DspyAdapter(GEPAAdapter[Example, TraceData, Prediction]):
+    def __init__(
+        self,
+        task_lm: dspy.LM,
+        metric_fn: Callable,
+        reflection_lm: dspy.LM,
+        failure_score=0.0,
+        num_threads: int | None = None,
+        add_format_failure_as_feedback: bool = False,
+        rng: random.Random | None = None,
+    ):
+        self.task_lm = task_lm
+        self.metric_fn = metric_fn
+        assert reflection_lm is not None, (
+            "DspyAdapter for full-program evolution requires a reflection_lm to be provided"
+        )
+        self.reflection_lm = reflection_lm
+        self.failure_score = failure_score
+        self.num_threads = num_threads
+        self.add_format_failure_as_feedback = add_format_failure_as_feedback
+        self.rng = rng or random.Random(0)
+    def build_program(self, candidate: dict[str, str]) -> tuple[dspy.Module, None] | tuple[None, str]:
+        candidate_src = candidate["program"]
+        context = {}
+        o = self.load_dspy_program_from_code(candidate_src, context)
+        return o
+    def load_dspy_program_from_code(
+        self,
+        candidate_src: str,
+        context: dict,
+    ):
+        try:
+            compile(candidate_src, "<string>", "exec")
+        except SyntaxError as e:
+            # print(f"Syntax Error in original code {e}")
+            # return None
+            import traceback
+            tb = traceback.format_exc()
+            return None, f"Syntax Error in code: {e}\n{tb}"
+        try:
+            exec(candidate_src, context)  # expose to current namespace
+        except Exception as e:
+            import traceback
+            tb = traceback.format_exc()
+            return None, f"Error in executing code: {e}\n{tb}"
+        dspy_program = context.get("program")
+        if dspy_program is None:
+            return (
+                None,
+                "Your code did not define a `program` object. Please define a `program` object which is an instance of `dspy.Module`, either directly by dspy.Predict or dspy.ChainOfThought, or by instantiating a class that inherits from `dspy.Module`.",
+            )
+        else:
+            if not isinstance(dspy_program, dspy.Module):
+                return (
+                    None,
+                    f"Your code defined a `program` object, but it is an instance of {type(dspy_program)}, not `dspy.Module`. Please define a `program` object which is an instance of `dspy.Module`, either directly by dspy.Predict or dspy.ChainOfThought, or by instantiating a class that inherits from `dspy.Module`.",
+                )
+        dspy_program.set_lm(self.task_lm)
+        return dspy_program, None
+    def evaluate(self, batch, candidate, capture_traces=False):
+        program, feedback = self.build_program(candidate)
+        if program is None:
+            return EvaluationBatch(outputs=None, scores=[self.failure_score for _ in batch], trajectories=feedback)
+        if capture_traces:
+            # bootstrap_trace_data-like flow with trace capture
+            from dspy.teleprompt.bootstrap_trace import bootstrap_trace_data
+            trajs = bootstrap_trace_data(
+                program=program,
+                dataset=batch,
+                metric=self.metric_fn,
+                num_threads=self.num_threads,
+                raise_on_error=False,
+                capture_failed_parses=True,
+                failure_score=self.failure_score,
+                format_failure_score=self.failure_score,
+            )
+            scores = []
+            outputs = []
+            for t in trajs:
+                outputs.append(t["prediction"])
+                if hasattr(t["prediction"], "__class__") and t.get("score") is None:
+                    scores.append(self.failure_score)
+                else:
+                    score = t["score"]
+                    if hasattr(score, "score"):
+                        score = score["score"]
+                    scores.append(score)
+            return EvaluationBatch(outputs=outputs, scores=scores, trajectories=trajs)
+        else:
+            evaluator = Evaluate(
+                devset=batch,
+                metric=self.metric_fn,
+                num_threads=self.num_threads,
+                return_all_scores=True,
+                failure_score=self.failure_score,
+                provide_traceback=True,
+                max_errors=len(batch) * 100,
+            )
+            res = evaluator(program)
+            outputs = [r[1] for r in res.results]
+            scores = [r[2] for r in res.results]
+            scores = [s["score"] if hasattr(s, "score") else s for s in scores]
+            return EvaluationBatch(outputs=outputs, scores=scores, trajectories=None)
+    def make_reflective_dataset(self, candidate, eval_batch, components_to_update):
+        proposed_program, _ = self.build_program(candidate)
+        assert set(components_to_update) == {"program"}, f"set(components_to_update) = {set(components_to_update)}"
+        from dspy.teleprompt.bootstrap_trace import FailedPrediction
+        ret_d: dict[str, list[dict[str, Any]]] = {}
+        if isinstance(eval_batch.trajectories, str):
+            feedback = eval_batch.trajectories
+            return {"program": {"Feedback": feedback}}
+        ########
+        items: list[dict[str, Any]] = []
+        for data in eval_batch.trajectories or []:
+            example_data = {}
+            trace = data["trace"]
+            example = data["example"]
+            example_data["Program Inputs"] = {**example.inputs()}
+            prediction = data["prediction"]
+            example_data["Program Outputs"] = {**prediction}
+            module_score = data["score"]
+            if hasattr(module_score, "feedback"):
+                feedback_text = module_score["feedback"]
+            else:
+                feedback_text = None
+            if hasattr(module_score, "score"):
+                module_score = module_score["score"]
+            trace_instances = trace
+            if len(trace_instances) == 0:
+                continue
+            selected = None
+            for t in trace_instances:
+                if isinstance(t[2], FailedPrediction):
+                    selected = t
+                    break
+            if selected is not None:
+                trace_instances = [selected]
+            trace_d = []
+            example_data["Program Trace"] = trace_d
+            for selected in trace_instances:
+                inputs = selected[1]
+                outputs = selected[2]
+                pred_name = None
+                for name, predictor in proposed_program.named_predictors():
+                    if predictor.signature.equals(selected[0].signature):
+                        pred_name = name
+                        break
+                assert pred_name is not None, f"Could not find predictor for {selected[0].signature}"
+                new_inputs = {}
+                new_outputs = {}
+                contains_history = False
+                history_key_name = None
+                for input_key, input_val in inputs.items():
+                    if isinstance(input_val, History):
+                        contains_history = True
+                        assert history_key_name is None
+                        history_key_name = input_key
+                if contains_history:
+                    s = "```json\n"
+                    for i, message in enumerate(inputs[history_key_name].messages):
+                        s += f"  {i}: {message}\n"
+                    s += "```"
+                    new_inputs["Context"] = s
+                for input_key, input_val in inputs.items():
+                    if contains_history and input_key == history_key_name:
+                        continue
+                    new_inputs[input_key] = str(input_val)
+                if isinstance(outputs, FailedPrediction):
+                    s = "Couldn't parse the output as per the expected output format. The model's raw response was:\n"
+                    s += "```\n"
+                    s += outputs.completion_text + "\n"
+                    s += "```\n\n"
+                    new_outputs = s
+                else:
+                    for output_key, output_val in outputs.items():
+                        new_outputs[output_key] = str(output_val)
+                d = {"Called Module": pred_name, "Inputs": new_inputs, "Generated Outputs": new_outputs}
+                # if isinstance(outputs, FailedPrediction):
+                #     adapter = ChatAdapter()
+                #     structure_instruction = ""
+                #     for dd in adapter.format(module.signature, [], {}):
+                #         structure_instruction += dd["role"] + ": " + dd["content"] + "\n"
+                #     d["Feedback"] = "Your output failed to parse. Follow this structure:\n" + structure_instruction
+                #     # d['score'] = self.failure_score
+                # else:
+                # assert fb["score"] == module_score, f"Currently, GEPA only supports feedback functions that return the same score as the module's score. However, the module-level score is {module_score} and the feedback score is {fb.score}."
+                # d['score'] = fb.score
+                trace_d.append(d)
+            if feedback_text is not None:
+                example_data["Feedback"] = feedback_text
+            items.append(example_data)
+        if len(items) == 0:
+            raise Exception("No valid predictions found for program.")
+        ret_d["program"] = items
+        ########
+        if len(ret_d) == 0:
+            raise Exception("No valid predictions found for any module.")
+        return ret_d
+    def propose_new_texts(
+        self,
+        candidate: dict[str, str],
+        reflective_dataset: dict[str, list[dict[str, Any]]],
+        components_to_update: list[str],
+    ) -> dict[str, str]:
+        from mantisdk.algorithm.gepa.lib.adapters.dspy_full_program_adapter.dspy_program_proposal_signature import DSPyProgramProposalSignature
+        new_texts: dict[str, str] = {}
+        for name in components_to_update:
+            base_instruction = candidate[name]
+            dataset_with_feedback = reflective_dataset[name]
+            new_texts[name] = DSPyProgramProposalSignature.run(
+                lm=self.reflection_lm,
+                input_dict={"curr_program": base_instruction, "dataset_with_feedback": dataset_with_feedback},
+            )["new_program"]
+        return new_texts