PyPI - opik-optimizer - Versions diffs - 1.1.0__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

opik-optimizer 1.1.0py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

opik_optimizer/__init__.py CHANGED Viewed

@@ -15,6 +15,7 @@ from .few_shot_bayesian_optimizer import FewShotBayesianOptimizer
 from .gepa_optimizer import GepaOptimizer
 from .logging_config import setup_logging
 from .meta_prompt_optimizer import MetaPromptOptimizer
+from .mipro_optimizer import MiproOptimizer
 from .optimization_config.configs import TaskConfig
 from .optimization_result import OptimizationResult
@@ -31,6 +32,7 @@ __all__ = [
     "FewShotBayesianOptimizer",
     "GepaOptimizer",
     "MetaPromptOptimizer",
+    "MiproOptimizer",
     "EvolutionaryOptimizer",
     "OptimizationResult",
     "OptimizableAgent",

opik_optimizer/base_optimizer.py CHANGED Viewed

@@ -1,10 +1,13 @@
-from typing import Any
+from typing import Any, cast
 from collections.abc import Callable
+import copy
+import inspect
 import logging
 import time
-from abc import abstractmethod
+from abc import ABC, abstractmethod
 import random
+import importlib.metadata
 import litellm
@@ -17,7 +20,7 @@ from . import _throttle, optimization_result
 from .cache_config import initialize_cache
 from .optimization_config import chat_prompt, mappers
 from .optimizable_agent import OptimizableAgent
-from .utils import create_litellm_agent_class
+from .utils import create_litellm_agent_class, optimization_context
 from . import task_evaluator
 _limiter = _throttle.get_rate_limiter_for_current_opik_installation()
@@ -29,6 +32,12 @@ litellm.drop_params = True
 logger = logging.getLogger(__name__)
+try:
+    _OPTIMIZER_VERSION = importlib.metadata.version("opik_optimizer")
+except importlib.metadata.PackageNotFoundError:  # pragma: no cover - dev installs
+    _OPTIMIZER_VERSION = "unknown"
 class OptimizationRound(BaseModel):
     model_config = {"arbitrary_types_allowed": True}
@@ -41,11 +50,12 @@ class OptimizationRound(BaseModel):
     improvement: float
-class BaseOptimizer:
+class BaseOptimizer(ABC):
     def __init__(
         self,
         model: str,
         verbose: int = 1,
+        seed: int = 42,
         **model_kwargs: Any,
     ) -> None:
         """
@@ -54,19 +64,335 @@ class BaseOptimizer:
         Args:
            model: LiteLLM model name
            verbose: Controls internal logging/progress bars (0=off, 1=on).
+           seed: Random seed for reproducibility (default: 42)
            model_kwargs: additional args for model (eg, temperature)
         """
         self.model = model
         self.reasoning_model = model
         self.model_kwargs = model_kwargs
         self.verbose = verbose
+        self.seed = seed
         self._history: list[OptimizationRound] = []
         self.experiment_config = None
         self.llm_call_counter = 0
+        self.tool_call_counter = 0
+        self._opik_client = None  # Lazy initialization
         # Initialize shared cache
         initialize_cache()
+    def reset_counters(self) -> None:
+        """Reset all call counters for a new optimization run."""
+        self.llm_call_counter = 0
+        self.tool_call_counter = 0
+    def increment_llm_counter(self) -> None:
+        """Increment the LLM call counter."""
+        self.llm_call_counter += 1
+    def increment_tool_counter(self) -> None:
+        """Increment the tool call counter."""
+        self.tool_call_counter += 1
+    def cleanup(self) -> None:
+        """
+        Clean up resources and perform memory management.
+        Should be called when the optimizer is no longer needed.
+        """
+        # Reset counters
+        self.reset_counters()
+        # Clear history to free memory
+        self._history.clear()
+        # Clear Opik client if it exists
+        if self._opik_client is not None:
+            # Note: Opik client doesn't have explicit cleanup, but we can clear the reference
+            self._opik_client = None
+        logger.debug(f"Cleaned up resources for {self.__class__.__name__}")
+    def __del__(self) -> None:
+        """Destructor to ensure cleanup is called."""
+        try:
+            self.cleanup()
+        except Exception:
+            # Ignore exceptions during cleanup in destructor
+            pass
+    @property
+    def opik_client(self) -> Any:
+        """Lazy initialization of Opik client."""
+        if self._opik_client is None:
+            import opik
+            self._opik_client = opik.Opik()
+        return self._opik_client
+    def validate_optimization_inputs(
+        self, prompt: "chat_prompt.ChatPrompt", dataset: "Dataset", metric: Callable
+    ) -> None:
+        """
+        Validate common optimization inputs.
+        Args:
+            prompt: The chat prompt to validate
+            dataset: The dataset to validate
+            metric: The metric function to validate
+        Raises:
+            ValueError: If any input is invalid
+        """
+        if not isinstance(prompt, chat_prompt.ChatPrompt):
+            raise ValueError("Prompt must be a ChatPrompt object")
+        if not isinstance(dataset, Dataset):
+            raise ValueError("Dataset must be a Dataset object")
+        if not callable(metric):
+            raise ValueError(
+                "Metric must be a function that takes `dataset_item` and `llm_output` as arguments."
+            )
+    def setup_agent_class(
+        self, prompt: "chat_prompt.ChatPrompt", agent_class: Any = None
+    ) -> Any:
+        """
+        Setup agent class for optimization.
+        Args:
+            prompt: The chat prompt
+            agent_class: Optional custom agent class
+        Returns:
+            The agent class to use
+        """
+        if agent_class is None:
+            return create_litellm_agent_class(prompt, optimizer_ref=self)
+        else:
+            return agent_class
+    def configure_prompt_model(self, prompt: "chat_prompt.ChatPrompt") -> None:
+        """
+        Configure prompt model and model_kwargs if not set.
+        Args:
+            prompt: The chat prompt to configure
+        """
+        # Only configure if prompt is a valid ChatPrompt object
+        if hasattr(prompt, "model") and hasattr(prompt, "model_kwargs"):
+            if prompt.model is None:
+                prompt.model = self.model
+            if prompt.model_kwargs is None:
+                prompt.model_kwargs = self.model_kwargs
+    # ------------------------------------------------------------------
+    # Experiment metadata helpers
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _drop_none(metadata: dict[str, Any]) -> dict[str, Any]:
+        return {k: v for k, v in metadata.items() if v is not None}
+    @staticmethod
+    def _deep_merge_dicts(
+        base: dict[str, Any], overrides: dict[str, Any]
+    ) -> dict[str, Any]:
+        result = copy.deepcopy(base)
+        for key, value in overrides.items():
+            if (
+                key in result
+                and isinstance(result[key], dict)
+                and isinstance(value, dict)
+            ):
+                result[key] = BaseOptimizer._deep_merge_dicts(result[key], value)
+            else:
+                result[key] = value
+        return result
+    @staticmethod
+    def _serialize_tools(prompt: "chat_prompt.ChatPrompt") -> list[dict[str, Any]]:
+        tools_obj = getattr(prompt, "tools", None)
+        if not isinstance(tools_obj, list):
+            return []
+        try:
+            return copy.deepcopy(cast(list[dict[str, Any]], tools_obj))
+        except Exception:  # pragma: no cover - defensive
+            serialized_tools: list[dict[str, Any]] = []
+            for tool in tools_obj:
+                if isinstance(tool, dict):
+                    serialized_tools.append({k: v for k, v in tool.items() if k})
+            return serialized_tools
+    @staticmethod
+    def _describe_annotation(annotation: Any) -> str | None:
+        if annotation is inspect._empty:
+            return None
+        if isinstance(annotation, type):
+            return annotation.__name__
+        return str(annotation)
+    def _summarize_tool_signatures(
+        self, prompt: "chat_prompt.ChatPrompt"
+    ) -> list[dict[str, Any]]:
+        signatures: list[dict[str, Any]] = []
+        for name, func in getattr(prompt, "function_map", {}).items():
+            callable_obj = getattr(func, "__wrapped__", func)
+            try:
+                sig = inspect.signature(callable_obj)
+            except (TypeError, ValueError):  # pragma: no cover - defensive
+                signatures.append({"name": name, "signature": "unavailable"})
+                continue
+            params: list[dict[str, Any]] = []
+            for parameter in sig.parameters.values():
+                params.append(
+                    self._drop_none(
+                        {
+                            "name": parameter.name,
+                            "kind": parameter.kind.name,
+                            "annotation": self._describe_annotation(
+                                parameter.annotation
+                            ),
+                            "default": (
+                                None
+                                if parameter.default is inspect._empty
+                                else parameter.default
+                            ),
+                        }
+                    )
+                )
+            signatures.append(
+                self._drop_none(
+                    {
+                        "name": name,
+                        "parameters": params,
+                        "docstring": inspect.getdoc(callable_obj),
+                    }
+                )
+            )
+        return signatures
+    def _build_agent_config(self, prompt: "chat_prompt.ChatPrompt") -> dict[str, Any]:
+        agent_config: dict[str, Any] = dict(prompt.to_dict())
+        agent_config["project_name"] = getattr(prompt, "project_name", None)
+        agent_config["model"] = getattr(prompt, "model", None) or self.model
+        agent_config["tools"] = self._serialize_tools(prompt)
+        return self._drop_none(agent_config)
+    def get_optimizer_metadata(self) -> dict[str, Any]:
+        """Override in subclasses to expose optimizer-specific parameters."""
+        return {}
+    def _build_optimizer_metadata(self) -> dict[str, Any]:
+        metadata = {
+            "name": self.__class__.__name__,
+            "version": _OPTIMIZER_VERSION,
+            "model": self.model,
+            "model_kwargs": self.model_kwargs or None,
+            "seed": getattr(self, "seed", None),
+            "num_threads": getattr(self, "num_threads", None),
+        }
+        # n_threads is used by some optimizers instead of num_threads
+        if metadata["num_threads"] is None and hasattr(self, "n_threads"):
+            metadata["num_threads"] = getattr(self, "n_threads")
+        if hasattr(self, "reasoning_model"):
+            metadata["reasoning_model"] = getattr(self, "reasoning_model")
+        extra_parameters = self.get_optimizer_metadata()
+        if extra_parameters:
+            metadata["parameters"] = extra_parameters
+        return self._drop_none(metadata)
+    def _prepare_experiment_config(
+        self,
+        *,
+        prompt: "chat_prompt.ChatPrompt",
+        dataset: Dataset,
+        metric: Callable,
+        experiment_config: dict[str, Any] | None = None,
+        configuration_updates: dict[str, Any] | None = None,
+        additional_metadata: dict[str, Any] | None = None,
+    ) -> dict[str, Any]:
+        dataset_id = getattr(dataset, "id", None)
+        project_name = (
+            getattr(self.agent_class, "project_name", None)
+            if hasattr(self, "agent_class")
+            else None
+        )
+        if not project_name:
+            project_name = getattr(prompt, "project_name", None)
+        if not project_name:
+            project_name = self.__class__.__name__
+        base_config: dict[str, Any] = {
+            "project_name": project_name,
+            "agent_class": (
+                getattr(self.agent_class, "__name__", None)
+                if hasattr(self, "agent_class")
+                else None
+            ),
+            "agent_config": self._build_agent_config(prompt),
+            "metric": getattr(metric, "__name__", str(metric)),
+            "dataset": getattr(dataset, "name", None),
+            "dataset_id": dataset_id,
+            "optimizer_metadata": self._build_optimizer_metadata(),
+            "tool_signatures": self._summarize_tool_signatures(prompt),
+            "configuration": {
+                "prompt": prompt.get_messages(),
+                "prompt_name": getattr(prompt, "name", None),
+                "tools": self._serialize_tools(prompt),
+                "prompt_project_name": getattr(prompt, "project_name", None),
+            },
+        }
+        if configuration_updates:
+            base_config["configuration"] = self._deep_merge_dicts(
+                base_config["configuration"], configuration_updates
+            )
+        if additional_metadata:
+            base_config = self._deep_merge_dicts(base_config, additional_metadata)
+        if experiment_config:
+            base_config = self._deep_merge_dicts(base_config, experiment_config)
+        return self._drop_none(base_config)
+    def create_optimization_context(
+        self, dataset: "Dataset", metric: Callable, metadata: dict | None = None
+    ) -> Any:
+        """
+        Create optimization context for tracking.
+        Args:
+            dataset: The dataset being optimized
+            metric: The metric function
+            metadata: Additional metadata
+        Returns:
+            Optimization context manager
+        """
+        context_metadata = {
+            "optimizer": self.__class__.__name__,
+            "model": self.model,
+            "seed": self.seed,
+        }
+        if metadata:
+            context_metadata.update(metadata)
+        return optimization_context(
+            client=self.opik_client,
+            dataset_name=dataset.name,
+            objective_name=metric.__name__,
+            metadata=context_metadata,
+        )
     @abstractmethod
     def optimize_prompt(
         self,
@@ -74,6 +400,9 @@ class BaseOptimizer:
         dataset: Dataset,
         metric: Callable,
         experiment_config: dict | None = None,
+        n_samples: int | None = None,
+        auto_continue: bool = False,
+        agent_class: type[OptimizableAgent] | None = None,
         **kwargs: Any,
     ) -> optimization_result.OptimizationResult:
         """
@@ -100,9 +429,43 @@ class BaseOptimizer:
         tool_name: str,
         second_pass: Any,
         experiment_config: dict | None = None,
+        n_samples: int | None = None,
+        auto_continue: bool = False,
+        agent_class: type[OptimizableAgent] | None = None,
+        fallback_invoker: Callable[[dict[str, Any]], str] | None = None,
+        fallback_arguments: Callable[[Any], dict[str, Any]] | None = None,
+        allow_tool_use_on_second_pass: bool = False,
         **kwargs: Any,
     ) -> optimization_result.OptimizationResult:
-        """Optimize prompts that rely on MCP tooling."""
+        """
+        Optimize prompts that rely on MCP (Model Context Protocol) tooling.
+        This method provides a standardized interface for optimizing prompts that use
+        external tools through the MCP protocol. It handles tool invocation, second-pass
+        coordination, and fallback mechanisms.
+        Args:
+            prompt: The chat prompt to optimize, must include tools
+            dataset: Opik dataset containing evaluation data
+            metric: Evaluation function that takes (dataset_item, llm_output) and returns a score
+            tool_name: Name of the MCP tool to use for optimization
+            second_pass: MCPSecondPassCoordinator for handling second-pass tool calls
+            experiment_config: Optional configuration for the experiment
+            n_samples: Number of samples to use for optimization (default: None)
+            auto_continue: Whether to auto-continue optimization (default: False)
+            agent_class: Custom agent class to use (default: None)
+            fallback_invoker: Fallback function for tool invocation (default: None)
+            fallback_arguments: Function to extract tool arguments (default: None)
+            allow_tool_use_on_second_pass: Whether to allow tool use on second pass (default: False)
+            **kwargs: Additional arguments for optimization
+        Returns:
+            OptimizationResult: The optimization result containing the optimized prompt and metrics
+        Raises:
+            NotImplementedError: If the optimizer doesn't implement MCP optimization
+            ValueError: If the prompt doesn't include required tools
+        """
         raise NotImplementedError(
             f"{self.__class__.__name__} does not implement optimize_mcp yet."
         )
@@ -166,7 +529,7 @@ class BaseOptimizer:
         self.agent_class: type[OptimizableAgent]
         if agent_class is None:
-            self.agent_class = create_litellm_agent_class(prompt)
+            self.agent_class = create_litellm_agent_class(prompt, optimizer_ref=self)
         else:
             self.agent_class = agent_class
@@ -181,18 +544,12 @@ class BaseOptimizer:
             }
             return result
-        experiment_config = experiment_config or {}
-        experiment_config["project_name"] = self.__class__.__name__
-        experiment_config = {
-            **experiment_config,
-            **{
-                "agent_class": self.agent_class.__name__,
-                "agent_config": prompt.to_dict(),
-                "metric": metric.__name__,
-                "dataset": dataset.name,
-                "configuration": {"prompt": (prompt.get_messages() if prompt else [])},
-            },
-        }
+        experiment_config = self._prepare_experiment_config(
+            prompt=prompt,
+            dataset=dataset,
+            metric=metric,
+            experiment_config=experiment_config,
+        )
         if n_samples is not None:
             if dataset_item_ids is not None:
@@ -207,7 +564,7 @@ class BaseOptimizer:
             metric=metric,
             evaluated_task=llm_task,
             num_threads=n_threads,
-            project_name=self.agent_class.project_name,
+            project_name=experiment_config.get("project_name"),
             experiment_config=experiment_config,
             optimization_id=None,
             verbose=verbose,

opik_optimizer/evolutionary_optimizer/evaluation_ops.py CHANGED Viewed

@@ -1,10 +1,15 @@
-from typing import Any, TYPE_CHECKING
+from typing import Any, TYPE_CHECKING, cast
 from collections.abc import Callable
 from .. import task_evaluator
 from ..optimization_config import mappers, chat_prompt
+from ..mcp_utils.mcp_workflow import MCPExecutionConfig
 import opik
+import copy
+if TYPE_CHECKING:  # pragma: no cover - typing only
+    from ..base_optimizer import BaseOptimizer
 class EvaluationOps:
@@ -30,33 +35,91 @@ class EvaluationOps:
         new_prompt = prompt.copy()
         new_prompt.set_messages(messages)
+        tools = getattr(messages, "tools", None)
+        if tools is not None:
+            new_prompt.tools = copy.deepcopy(tools)
+        optimizer = cast("BaseOptimizer", self)
-        experiment_config = experiment_config or {}
-        experiment_config["project_name"] = self.agent_class.project_name
-        experiment_config = {
-            **experiment_config,
-            "optimizer": self.__class__.__name__,
-            "agent_class": self.agent_class.__name__,
-            "agent_config": new_prompt.to_dict(),
-            "metric": metric.__name__,
-            "dataset": dataset.name,
-            "configuration": {
-                "prompt": new_prompt.get_messages(),
+        configuration_updates = optimizer._drop_none(
+            {
                 "n_samples_for_eval": (
                     len(dataset_item_ids) if dataset_item_ids is not None else n_samples
                 ),
                 "total_dataset_items": total_items,
-            },
-        }
+            }
+        )
+        evaluation_details = optimizer._drop_none(
+            {
+                "dataset_item_ids": dataset_item_ids,
+                "optimization_id": optimization_id,
+            }
+        )
+        additional_metadata = (
+            {"evaluation": evaluation_details} if evaluation_details else None
+        )
+        experiment_config = optimizer._prepare_experiment_config(
+            prompt=new_prompt,
+            dataset=dataset,
+            metric=metric,
+            experiment_config=experiment_config,
+            configuration_updates=configuration_updates,
+            additional_metadata=additional_metadata,
+        )
         try:
             agent = self.agent_class(new_prompt)
         except Exception:
             return 0.0
+        mcp_execution_config: MCPExecutionConfig | None = kwargs.get("mcp_config")
         def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
             messages = new_prompt.get_messages(dataset_item)
-            model_output = agent.invoke(messages)
-            return {mappers.EVALUATED_LLM_TASK_OUTPUT: model_output}
+            if mcp_execution_config is None:
+                model_output = agent.invoke(messages)
+                return {mappers.EVALUATED_LLM_TASK_OUTPUT: model_output}
+            coordinator = mcp_execution_config.coordinator
+            coordinator.reset()
+            raw_model_output = agent.llm_invoke(
+                messages=messages,
+                seed=getattr(self, "seed", None),
+                allow_tool_use=True,
+            )
+            second_pass_messages = coordinator.build_second_pass_messages(
+                base_messages=messages,
+                dataset_item=dataset_item,
+            )
+            if (
+                second_pass_messages is None
+                and mcp_execution_config.fallback_invoker is not None
+            ):
+                fallback_args = mcp_execution_config.fallback_arguments(dataset_item)
+                if fallback_args:
+                    summary_override = mcp_execution_config.fallback_invoker(
+                        fallback_args
+                    )
+                    second_pass_messages = coordinator.build_second_pass_messages(
+                        base_messages=messages,
+                        dataset_item=dataset_item,
+                        summary_override=summary_override,
+                    )
+            if second_pass_messages is not None:
+                final_response = agent.llm_invoke(
+                    messages=second_pass_messages,
+                    seed=getattr(self, "seed", None),
+                    allow_tool_use=mcp_execution_config.allow_tool_use_on_second_pass,
+                )
+            else:
+                final_response = raw_model_output
+            return {mappers.EVALUATED_LLM_TASK_OUTPUT: final_response.strip()}
         score = task_evaluator.evaluate(
             dataset=dataset,
@@ -64,7 +127,7 @@ class EvaluationOps:
             metric=metric,
             evaluated_task=llm_task,
             num_threads=self.num_threads,
-            project_name=experiment_config["project_name"],
+            project_name=experiment_config.get("project_name"),
             n_samples=n_samples if dataset_item_ids is None else None,
             experiment_config=experiment_config,
             optimization_id=optimization_id,

opik-optimizer 1.1.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

opik-optimizer 1.1.0py3-none-any.whl → 2.0.0py3-none-any.whl