PyPI - opik-optimizer - Versions diffs - 1.0.6__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

opik-optimizer 1.0.6py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

opik_optimizer/__init__.py +4 -0
opik_optimizer/_throttle.py +2 -1
opik_optimizer/base_optimizer.py +402 -28
opik_optimizer/data/context7_eval.jsonl +3 -0
opik_optimizer/datasets/context7_eval.py +90 -0
opik_optimizer/datasets/tiny_test.py +33 -34
opik_optimizer/datasets/truthful_qa.py +2 -2
opik_optimizer/evolutionary_optimizer/crossover_ops.py +194 -0
opik_optimizer/evolutionary_optimizer/evaluation_ops.py +136 -0
opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +289 -966
opik_optimizer/evolutionary_optimizer/helpers.py +10 -0
opik_optimizer/evolutionary_optimizer/llm_support.py +136 -0
opik_optimizer/evolutionary_optimizer/mcp.py +249 -0
opik_optimizer/evolutionary_optimizer/mutation_ops.py +306 -0
opik_optimizer/evolutionary_optimizer/population_ops.py +228 -0
opik_optimizer/evolutionary_optimizer/prompts.py +352 -0
opik_optimizer/evolutionary_optimizer/reporting.py +28 -4
opik_optimizer/evolutionary_optimizer/style_ops.py +86 -0
opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +90 -81
opik_optimizer/few_shot_bayesian_optimizer/reporting.py +12 -5
opik_optimizer/gepa_optimizer/__init__.py +3 -0
opik_optimizer/gepa_optimizer/adapter.py +154 -0
opik_optimizer/gepa_optimizer/gepa_optimizer.py +653 -0
opik_optimizer/gepa_optimizer/reporting.py +181 -0
opik_optimizer/logging_config.py +42 -7
opik_optimizer/mcp_utils/__init__.py +22 -0
opik_optimizer/mcp_utils/mcp.py +541 -0
opik_optimizer/mcp_utils/mcp_second_pass.py +152 -0
opik_optimizer/mcp_utils/mcp_simulator.py +116 -0
opik_optimizer/mcp_utils/mcp_workflow.py +547 -0
opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +470 -134
opik_optimizer/meta_prompt_optimizer/reporting.py +16 -2
opik_optimizer/mipro_optimizer/_lm.py +30 -23
opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +52 -51
opik_optimizer/mipro_optimizer/mipro_optimizer.py +126 -46
opik_optimizer/mipro_optimizer/utils.py +2 -4
opik_optimizer/optimizable_agent.py +21 -16
opik_optimizer/optimization_config/chat_prompt.py +44 -23
opik_optimizer/optimization_config/configs.py +3 -3
opik_optimizer/optimization_config/mappers.py +9 -8
opik_optimizer/optimization_result.py +22 -14
opik_optimizer/reporting_utils.py +61 -10
opik_optimizer/task_evaluator.py +9 -8
opik_optimizer/utils/__init__.py +15 -0
opik_optimizer/utils/colbert.py +236 -0
opik_optimizer/{utils.py → utils/core.py} +160 -33
opik_optimizer/utils/dataset_utils.py +49 -0
opik_optimizer/utils/prompt_segments.py +186 -0
opik_optimizer-2.0.0.dist-info/METADATA +345 -0
opik_optimizer-2.0.0.dist-info/RECORD +74 -0
opik_optimizer-2.0.0.dist-info/licenses/LICENSE +203 -0
opik_optimizer-1.0.6.dist-info/METADATA +0 -181
opik_optimizer-1.0.6.dist-info/RECORD +0 -50
opik_optimizer-1.0.6.dist-info/licenses/LICENSE +0 -21
{opik_optimizer-1.0.6.dist-info → opik_optimizer-2.0.0.dist-info}/WHEEL +0 -0
{opik_optimizer-1.0.6.dist-info → opik_optimizer-2.0.0.dist-info}/top_level.txt +0 -0

opik_optimizer/evolutionary_optimizer/mutation_ops.py ADDED Viewed

@@ -0,0 +1,306 @@
+from typing import Any, TYPE_CHECKING
+from collections.abc import Callable
+import json
+import logging
+import random
+from . import prompts as evo_prompts
+from .mcp import EvolutionaryMCPContext, tool_description_mutation
+from ..optimization_config import chat_prompt
+from .. import utils
+from . import reporting
+logger = logging.getLogger(__name__)
+class MutationOps:
+    if TYPE_CHECKING:
+        _calculate_population_diversity: Any
+        DEFAULT_DIVERSITY_THRESHOLD: float
+        verbose: int
+        output_style_guidance: str
+        _get_task_description_for_llm: Any
+        _call_model: Any
+        _mcp_context: EvolutionaryMCPContext | None
+        _update_individual_with_prompt: Callable[[Any, chat_prompt.ChatPrompt], Any]
+    def _deap_mutation(
+        self, individual: Any, initial_prompt: chat_prompt.ChatPrompt
+    ) -> Any:
+        """Enhanced mutation operation with multiple strategies."""
+        prompt = chat_prompt.ChatPrompt(messages=individual)
+        mcp_context = getattr(self, "_mcp_context", None)
+        if mcp_context is not None:
+            mutated_prompt = tool_description_mutation(self, prompt, mcp_context)
+            if mutated_prompt is not None:
+                reporting.display_success(
+                    "      Mutation successful, tool description updated (MCP mutation).",
+                    verbose=self.verbose,
+                )
+                return self._update_individual_with_prompt(individual, mutated_prompt)
+        # Choose mutation strategy based on current diversity
+        diversity = self._calculate_population_diversity()
+        # Determine thresholds based on diversity
+        if diversity < self.DEFAULT_DIVERSITY_THRESHOLD:
+            # Low diversity - use more aggressive mutations (higher chance for semantic)
+            semantic_threshold = 0.5
+            structural_threshold = 0.8  # semantic_threshold + 0.3
+        else:
+            # Good diversity - use more conservative mutations (higher chance for word_level)
+            semantic_threshold = 0.4
+            structural_threshold = 0.7  # semantic_threshold + 0.3
+        mutation_choice = random.random()
+        if mutation_choice > structural_threshold:
+            mutated_prompt = self._word_level_mutation_prompt(prompt)
+            reporting.display_success(
+                "      Mutation successful, prompt has been edited by randomizing words (word-level mutation).",
+                verbose=self.verbose,
+            )
+            return self._update_individual_with_prompt(individual, mutated_prompt)
+        elif mutation_choice > semantic_threshold:
+            mutated_prompt = self._structural_mutation(prompt)
+            reporting.display_success(
+                "      Mutation successful, prompt has been edited by reordering, combining, or splitting sentences (structural mutation).",
+                verbose=self.verbose,
+            )
+            return self._update_individual_with_prompt(individual, mutated_prompt)
+        else:
+            mutated_prompt = self._semantic_mutation(prompt, initial_prompt)
+            reporting.display_success(
+                "      Mutation successful, prompt has been edited using an LLM (semantic mutation).",
+                verbose=self.verbose,
+            )
+            return self._update_individual_with_prompt(individual, mutated_prompt)
+    def _semantic_mutation(
+        self, prompt: chat_prompt.ChatPrompt, initial_prompt: chat_prompt.ChatPrompt
+    ) -> chat_prompt.ChatPrompt:
+        """Enhanced semantic mutation with multiple strategies."""
+        current_output_style_guidance = self.output_style_guidance
+        if random.random() < 0.1:
+            return self._radical_innovation_mutation(prompt, initial_prompt)
+        try:
+            strategy = random.choice(
+                [
+                    "rephrase",
+                    "simplify",
+                    "elaborate",
+                    "restructure",
+                    "focus",
+                    "increase_complexity_and_detail",
+                ]
+            )
+            strategy_prompts = evo_prompts.mutation_strategy_prompts(
+                current_output_style_guidance
+            )
+            user_prompt_for_semantic_mutation = (
+                evo_prompts.semantic_mutation_user_prompt(
+                    prompt.get_messages(),
+                    self._get_task_description_for_llm(initial_prompt),
+                    current_output_style_guidance,
+                    strategy_prompts[strategy],
+                )
+            )
+            response = self._call_model(
+                messages=[
+                    {
+                        "role": "system",
+                        "content": evo_prompts.semantic_mutation_system_prompt(
+                            current_output_style_guidance
+                        ),
+                    },
+                    {"role": "user", "content": user_prompt_for_semantic_mutation},
+                ],
+                is_reasoning=True,
+            )
+            try:
+                messages = utils.json_to_dict(response.strip())
+            except Exception as parse_exc:
+                raise RuntimeError(
+                    f"Error parsing semantic mutation response as JSON. "
+                    f"Response: {response!r}\nOriginal error: {parse_exc}"
+                ) from parse_exc
+            return chat_prompt.ChatPrompt(messages=messages)
+        except Exception as e:
+            reporting.display_error(
+                f"      Error in semantic mutation, this is usually a parsing error: {e}",
+                verbose=self.verbose,
+            )
+            return prompt
+    def _structural_mutation(
+        self, prompt: chat_prompt.ChatPrompt
+    ) -> chat_prompt.ChatPrompt:
+        """Perform structural mutation (reordering, combining, splitting)."""
+        mutated_messages: list[dict[str, str]] = []
+        for message in prompt.get_messages():
+            content = message["content"]
+            role = message["role"]
+            sentences = [s.strip() for s in content.split(".") if s.strip()]
+            if len(sentences) <= 1:
+                mutated_messages.append(
+                    {"role": role, "content": self._word_level_mutation(content)}
+                )
+                continue
+            mutation_type = random.random()
+            if mutation_type < 0.3:
+                random.shuffle(sentences)
+                mutated_messages.append(
+                    {"role": role, "content": ". ".join(sentences) + "."}
+                )
+                continue
+            elif mutation_type < 0.6:
+                if len(sentences) >= 2:
+                    idx = random.randint(0, len(sentences) - 2)
+                    combined = sentences[idx] + " and " + sentences[idx + 1]
+                    sentences[idx : idx + 2] = [combined]
+                    mutated_messages.append(
+                        {"role": role, "content": ". ".join(sentences) + "."}
+                    )
+                    continue
+            else:
+                idx = random.randint(0, len(sentences) - 1)
+                words = sentences[idx].split()
+                if len(words) > 3:
+                    split_point = random.randint(2, len(words) - 2)
+                    sentences[idx : idx + 1] = [
+                        " ".join(words[:split_point]),
+                        " ".join(words[split_point:]),
+                    ]
+                    mutated_messages.append(
+                        {"role": role, "content": ". ".join(sentences) + "."}
+                    )
+                    continue
+                else:
+                    mutated_messages.append({"role": role, "content": content})
+        return chat_prompt.ChatPrompt(messages=mutated_messages)
+    def _word_level_mutation_prompt(
+        self, prompt: chat_prompt.ChatPrompt
+    ) -> chat_prompt.ChatPrompt:
+        mutated_messages: list[dict[str, str]] = []
+        for message in prompt.get_messages():
+            mutated_messages.append(
+                {
+                    "role": message["role"],
+                    "content": self._word_level_mutation(message["content"]),
+                }
+            )
+        return chat_prompt.ChatPrompt(messages=mutated_messages)
+    def _word_level_mutation(self, msg_content: str) -> str:
+        """Perform word-level mutation."""
+        words = msg_content.split()
+        if len(words) <= 1:
+            return msg_content
+        mutation_type = random.random()
+        if mutation_type < 0.3:
+            idx = random.randint(0, len(words) - 1)
+            words[idx] = self._get_synonym(words[idx])
+        elif mutation_type < 0.6:
+            if len(words) > 2:
+                i, j = random.sample(range(len(words)), 2)
+                words[i], words[j] = words[j], words[i]
+        else:
+            idx = random.randint(0, len(words) - 1)
+            words[idx] = self._modify_phrase(words[idx])
+        return " ".join(words)
+    def _get_synonym(self, word: str) -> str:
+        """Get a synonym for a word using LLM."""
+        try:
+            response = self._call_model(
+                messages=[
+                    {"role": "system", "content": evo_prompts.synonyms_system_prompt()},
+                    {
+                        "role": "user",
+                        "content": (
+                            f"Give me a single synonym for the word '{word}'. Return only the synonym, nothing else."
+                        ),
+                    },
+                ],
+                is_reasoning=True,
+            )
+            return response.strip()
+        except Exception as e:
+            logger.warning(f"Error getting synonym for '{word}': {e}")
+            return word
+    def _modify_phrase(self, phrase: str) -> str:
+        """Modify a phrase while preserving meaning using LLM."""
+        try:
+            response = self._call_model(
+                messages=[
+                    {"role": "system", "content": evo_prompts.rephrase_system_prompt()},
+                    {
+                        "role": "user",
+                        "content": (
+                            f"Modify this phrase while keeping the same meaning: '{phrase}'. Return only the modified phrase, nothing else."
+                        ),
+                    },
+                ],
+                is_reasoning=True,
+            )
+            return response.strip()
+        except Exception as e:
+            logger.warning(f"Error modifying phrase '{phrase}': {e}")
+            return phrase
+    def _radical_innovation_mutation(
+        self, prompt: chat_prompt.ChatPrompt, initial_prompt: chat_prompt.ChatPrompt
+    ) -> chat_prompt.ChatPrompt:
+        """Attempts to generate a significantly improved and potentially very different prompt using an LLM."""
+        logger.debug(
+            f"Attempting radical innovation for prompt: {json.dumps(prompt.get_messages())[:70]}..."
+        )
+        task_desc_for_llm = self._get_task_description_for_llm(initial_prompt)
+        current_output_style_guidance = self.output_style_guidance
+        user_prompt_for_radical_innovation = evo_prompts.radical_innovation_user_prompt(
+            task_desc_for_llm, current_output_style_guidance, prompt.get_messages()
+        )
+        try:
+            new_prompt_str = self._call_model(
+                messages=[
+                    {
+                        "role": "system",
+                        "content": evo_prompts.radical_innovation_system_prompt(
+                            current_output_style_guidance
+                        ),
+                    },
+                    {"role": "user", "content": user_prompt_for_radical_innovation},
+                ],
+                is_reasoning=True,
+            )
+            logger.info(
+                f"Radical innovation LLM result (truncated): {new_prompt_str[:200]}"
+            )
+            try:
+                new_messages = utils.json_to_dict(new_prompt_str)
+            except Exception as parse_exc:
+                logger.warning(
+                    f"Failed to parse LLM output in radical innovation mutation for prompt '{json.dumps(prompt.get_messages())[:50]}...'. Output: {new_prompt_str[:200]}. Error: {parse_exc}. Returning original."
+                )
+                return prompt
+            return chat_prompt.ChatPrompt(messages=new_messages)
+        except Exception as e:
+            logger.warning(
+                f"Radical innovation mutation failed for prompt '{json.dumps(prompt.get_messages())[:50]}...': {e}. Returning original."
+            )
+            return prompt

opik_optimizer/evolutionary_optimizer/population_ops.py ADDED Viewed

@@ -0,0 +1,228 @@
+from typing import Any, TYPE_CHECKING
+import json
+import logging
+from deap import tools
+from deap import creator as _creator
+from . import prompts as evo_prompts
+from . import reporting
+from .mcp import EvolutionaryMCPContext, initialize_population_mcp
+from ..optimization_config import chat_prompt
+from .. import utils
+logger = logging.getLogger(__name__)
+creator = _creator
+class PopulationOps:
+    if TYPE_CHECKING:
+        _get_task_description_for_llm: Any
+        output_style_guidance: str
+        _call_model: Any
+        toolbox: Any
+        _mcp_context: EvolutionaryMCPContext | None
+    # Hints for mixin attributes provided by the primary optimizer class
+    _gens_since_pop_improvement: int
+    _best_primary_score_history: list[float]
+    DEFAULT_RESTART_THRESHOLD: float
+    DEFAULT_RESTART_GENERATIONS: int
+    enable_moo: bool
+    elitism_size: int
+    population_size: int
+    verbose: int
+    def _initialize_population(
+        self, prompt: chat_prompt.ChatPrompt
+    ) -> list[chat_prompt.ChatPrompt]:
+        """Initialize the population with diverse variations of the initial prompt,
+        including some 'fresh start' prompts based purely on task description.
+        All generated prompts should aim to elicit answers matching self.output_style_guidance.
+        """
+        mcp_context = getattr(self, "_mcp_context", None)
+        if mcp_context is not None:
+            return initialize_population_mcp(self, prompt, mcp_context)
+        with reporting.initializing_population(verbose=self.verbose) as init_pop_report:
+            init_pop_report.start(self.population_size)
+            population = [prompt]
+            if self.population_size <= 1:
+                return population
+            num_to_generate_total = self.population_size - 1
+            num_fresh_starts = max(1, int(num_to_generate_total * 0.2))
+            num_variations_on_initial = num_to_generate_total - num_fresh_starts
+            task_desc_for_llm = self._get_task_description_for_llm(prompt)
+            current_output_style_guidance = self.output_style_guidance
+            # Fresh starts
+            if num_fresh_starts > 0:
+                init_pop_report.start_fresh_prompts(num_fresh_starts)
+                fresh_start_user_prompt = evo_prompts.fresh_start_user_prompt(
+                    task_desc_for_llm, current_output_style_guidance, num_fresh_starts
+                )
+                try:
+                    response_content = self._call_model(
+                        messages=[
+                            {
+                                "role": "system",
+                                "content": evo_prompts.fresh_start_system_prompt(
+                                    current_output_style_guidance
+                                ),
+                            },
+                            {"role": "user", "content": fresh_start_user_prompt},
+                        ],
+                        is_reasoning=True,
+                    )
+                    logger.debug(
+                        f"Raw LLM response for fresh start prompts: {response_content}"
+                    )
+                    fresh_prompts = utils.json_to_dict(response_content)
+                    if isinstance(fresh_prompts, list):
+                        if all(isinstance(p, dict) for p in fresh_prompts) and all(
+                            p.get("role") is not None for p in fresh_prompts
+                        ):
+                            population.append(
+                                chat_prompt.ChatPrompt(messages=fresh_prompts)
+                            )
+                            init_pop_report.success_fresh_prompts(1)
+                        elif all(isinstance(p, list) for p in fresh_prompts):
+                            population.extend(
+                                [
+                                    chat_prompt.ChatPrompt(messages=p)
+                                    for p in fresh_prompts[:num_fresh_starts]
+                                ]
+                            )
+                            init_pop_report.success_fresh_prompts(
+                                len(fresh_prompts[:num_fresh_starts])
+                            )
+                        else:
+                            init_pop_report.failed_fresh_prompts(
+                                num_fresh_starts,
+                                f"LLM response for fresh starts was not a valid list of strings or was empty: {response_content}. Skipping fresh start prompts.",
+                            )
+                except json.JSONDecodeError as e_json:
+                    init_pop_report.failed_fresh_prompts(
+                        num_fresh_starts,
+                        f"JSONDecodeError generating fresh start prompts: {e_json}. LLM response: '{response_content}'. Skipping fresh start prompts.",
+                    )
+                except Exception as e:
+                    init_pop_report.failed_fresh_prompts(
+                        num_fresh_starts,
+                        f"Error generating fresh start prompts: {e}. Skipping fresh start prompts.",
+                    )
+            # Variations on the initial prompt
+            if num_variations_on_initial > 0:
+                init_pop_report.start_variations(num_variations_on_initial)
+                user_prompt_for_variation = evo_prompts.variation_user_prompt(
+                    prompt.get_messages(),
+                    task_desc_for_llm,
+                    current_output_style_guidance,
+                    num_variations_on_initial,
+                )
+                try:
+                    response_content_variations = self._call_model(
+                        messages=[
+                            {
+                                "role": "system",
+                                "content": evo_prompts.variation_system_prompt(
+                                    current_output_style_guidance
+                                ),
+                            },
+                            {"role": "user", "content": user_prompt_for_variation},
+                        ],
+                        is_reasoning=True,
+                    )
+                    logger.debug(
+                        f"Raw response for population variations: {response_content_variations}"
+                    )
+                    json_response_variations = json.loads(response_content_variations)
+                    generated_prompts_variations = [
+                        p["prompt"]
+                        for p in json_response_variations.get("prompts", [])
+                        if isinstance(p, dict) and "prompt" in p
+                    ]
+                    if generated_prompts_variations:
+                        init_pop_report.success_variations(
+                            len(
+                                generated_prompts_variations[:num_variations_on_initial]
+                            )
+                        )
+                        population.extend(
+                            [
+                                chat_prompt.ChatPrompt(messages=p)
+                                for p in generated_prompts_variations[
+                                    :num_variations_on_initial
+                                ]
+                            ]
+                        )
+                    else:
+                        init_pop_report.failed_variations(
+                            num_variations_on_initial,
+                            "Could not parse 'prompts' list for variations. Skipping variations.",
+                        )
+                except Exception as e:
+                    init_pop_report.failed_variations(
+                        num_variations_on_initial,
+                        f"Error calling LLM for initial population variations: {e}",
+                    )
+            # Ensure population is of the required size using unique prompts
+            final_population_set: set[str] = set()
+            final_population_list: list[chat_prompt.ChatPrompt] = []
+            for p in population:
+                if json.dumps(p.get_messages()) not in final_population_set:
+                    final_population_set.add(json.dumps(p.get_messages()))
+                    final_population_list.append(p)
+            init_pop_report.end(final_population_list)
+            return final_population_list[: self.population_size]
+    def _should_restart_population(self, curr_best: float) -> bool:
+        """Update internal counters and decide if we should trigger a population restart."""
+        if self._best_primary_score_history:
+            threshold = self._best_primary_score_history[-1] * (
+                1 + self.DEFAULT_RESTART_THRESHOLD
+            )
+            if curr_best < threshold:
+                self._gens_since_pop_improvement += 1  # type: ignore[attr-defined]
+            else:
+                self._gens_since_pop_improvement = 0  # type: ignore[attr-defined]
+        self._best_primary_score_history.append(curr_best)
+        return self._gens_since_pop_improvement >= self.DEFAULT_RESTART_GENERATIONS  # type: ignore[attr-defined]
+    def _restart_population(
+        self,
+        hof: tools.HallOfFame,
+        population: list[Any],
+        best_prompt_so_far: chat_prompt.ChatPrompt,
+    ) -> list[Any]:
+        """Return a fresh, evaluated population seeded by elites."""
+        if self.enable_moo:
+            elites = list(hof)
+        else:
+            elites = tools.selBest(population, self.elitism_size)
+        seed_prompt = (
+            chat_prompt.ChatPrompt(
+                messages=max(elites, key=lambda x: x.fitness.values[0])
+            )
+            if elites
+            else best_prompt_so_far
+        )
+        prompt_variants = self._initialize_population(seed_prompt)
+        new_pop = [creator.Individual(p.get_messages()) for p in prompt_variants]
+        for ind, fit in zip(new_pop, map(self.toolbox.evaluate, new_pop)):
+            ind.fitness.values = fit
+        self._gens_since_pop_improvement = 0  # type: ignore[attr-defined]
+        return new_pop

opik-optimizer 1.0.6__py3-none-any.whl → 2.0.0__py3-none-any.whl

opik-optimizer 1.0.6py3-none-any.whl → 2.0.0py3-none-any.whl