PyPI - opik-optimizer - Versions diffs - 0.7.0__py3-none-any.whl - Mend

opik-optimizer 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

opik_optimizer/__init__.py +65 -0
opik_optimizer/_throttle.py +43 -0
opik_optimizer/base_optimizer.py +240 -0
opik_optimizer/cache_config.py +24 -0
opik_optimizer/demo/__init__.py +7 -0
opik_optimizer/demo/cache.py +112 -0
opik_optimizer/demo/datasets.py +656 -0
opik_optimizer/few_shot_bayesian_optimizer/__init__.py +5 -0
opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +408 -0
opik_optimizer/few_shot_bayesian_optimizer/prompt_parameter.py +91 -0
opik_optimizer/few_shot_bayesian_optimizer/prompt_templates.py +80 -0
opik_optimizer/integrations/__init__.py +0 -0
opik_optimizer/logging_config.py +69 -0
opik_optimizer/meta_prompt_optimizer.py +1100 -0
opik_optimizer/mipro_optimizer/__init__.py +1 -0
opik_optimizer/mipro_optimizer/_lm.py +394 -0
opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +1058 -0
opik_optimizer/mipro_optimizer/mipro_optimizer.py +395 -0
opik_optimizer/mipro_optimizer/utils.py +107 -0
opik_optimizer/optimization_config/__init__.py +0 -0
opik_optimizer/optimization_config/configs.py +35 -0
opik_optimizer/optimization_config/mappers.py +49 -0
opik_optimizer/optimization_result.py +211 -0
opik_optimizer/task_evaluator.py +102 -0
opik_optimizer/utils.py +132 -0
opik_optimizer-0.7.0.dist-info/METADATA +35 -0
opik_optimizer-0.7.0.dist-info/RECORD +30 -0
opik_optimizer-0.7.0.dist-info/WHEEL +5 -0
opik_optimizer-0.7.0.dist-info/licenses/LICENSE +21 -0
opik_optimizer-0.7.0.dist-info/top_level.txt +1 -0

opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py ADDED Viewed

@@ -0,0 +1,408 @@
+import random
+from typing import Any, Dict, List, Tuple, Union, Optional, Callable, Literal
+import openai
+import opik
+import optuna
+import logging
+import json
+from opik import Dataset
+from opik_optimizer.optimization_config import mappers
+from opik_optimizer.optimization_config.configs import TaskConfig, MetricConfig
+from opik_optimizer import base_optimizer
+from . import prompt_parameter
+from . import prompt_templates
+from .._throttle import RateLimiter, rate_limited
+from .. import optimization_result, task_evaluator
+import litellm
+from opik.evaluation.models.litellm import opik_monitor as opik_litellm_monitor
+limiter = RateLimiter(max_calls_per_second=15)
+logger = logging.getLogger(__name__)
+@rate_limited(limiter)
+def _call_model(model, messages, seed, model_kwargs):
+    model_kwargs = opik_litellm_monitor.try_add_opik_monitoring_to_params(model_kwargs)
+    response = litellm.completion(
+        model=model,
+        messages=messages,
+        seed=seed,
+        **model_kwargs,
+    )
+    return response
+class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
+    def __init__(
+        self,
+        model: str,
+        project_name: Optional[str] = None,
+        min_examples: int = 2,
+        max_examples: int = 8,
+        seed: int = 42,
+        n_threads: int = 8,
+        n_initial_prompts: int = 5,
+        n_iterations: int = 10,
+        **model_kwargs,
+    ) -> None:
+        super().__init__(model, project_name, **model_kwargs)
+        self.min_examples = min_examples
+        self.max_examples = max_examples
+        self.seed = seed
+        self.n_threads = n_threads
+        self.n_initial_prompts = n_initial_prompts
+        self.n_iterations = n_iterations
+        self._opik_client = opik.Opik()
+        logger.debug(f"Initialized FewShotBayesianOptimizer with model: {model}")
+    def _split_dataset(
+        self, dataset: List[Dict[str, Any]], train_ratio: float
+    ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
+        """Split the dataset into training and validation sets.
+        Args:
+            dataset: List of dataset items
+            train_ratio: Ratio of items to use for training
+        Returns:
+            Tuple of (train_set, validation_set)
+        """
+        if not dataset:
+            return [], []
+        random.seed(self.seed)
+        dataset = dataset.copy()
+        random.shuffle(dataset)
+        split_idx = int(len(dataset) * train_ratio)
+        return dataset[:split_idx], dataset[split_idx:]
+    def _optimize_prompt(
+        self,
+        dataset: Union[str, Dataset],
+        metric_config: MetricConfig,
+        task_config: TaskConfig,
+        n_trials: int = 10,
+        optimization_id: Optional[str] = None,
+        experiment_config: Optional[Dict] = None,
+        n_samples: int = None,
+    ) -> optimization_result.OptimizationResult:
+        random.seed(self.seed)
+        if not task_config.use_chat_prompt:
+            raise ValueError(
+                "Few-shot Bayesian optimization is only supported for chat prompts."
+            )
+        opik_dataset: opik.Dataset = dataset
+        # Load the dataset
+        if isinstance(dataset, str):
+            opik_dataset = self._opik_client.get_dataset(dataset)
+            dataset_items = opik_dataset.get_items()
+        else:
+            opik_dataset = dataset
+            dataset_items = opik_dataset.get_items()
+        experiment_config = experiment_config or {}
+        base_experiment_config = {  # Base config for reuse
+            **experiment_config,
+            **{
+                "optimizer": self.__class__.__name__,
+                "metric": metric_config.metric.name,
+                "dataset": opik_dataset.name,
+                "configuration": {},
+            },
+        }
+        # Evaluate Initial (Zero-Shot) Prompt
+        logger.info("Evaluating initial (zero-shot) prompt...")
+        initial_instruction = task_config.instruction_prompt
+        zero_shot_param = prompt_parameter.ChatPromptParameter(
+            name="zero_shot_prompt",
+            instruction=initial_instruction,
+            task_input_parameters=task_config.input_dataset_fields,
+            task_output_parameter=task_config.output_dataset_field,
+            demo_examples=[],  # No examples
+        )
+        zero_shot_llm_task = self._build_task_from_prompt_template(
+            zero_shot_param.as_template()
+        )
+        initial_eval_config = base_experiment_config.copy()
+        initial_eval_config["configuration"]["prompt"] = initial_instruction
+        initial_eval_config["configuration"]["n_examples"] = 0
+        # Determine dataset item IDs for evaluation (initial and trials)
+        all_dataset_item_ids = [item["id"] for item in dataset_items]
+        eval_dataset_item_ids = all_dataset_item_ids
+        if n_samples is not None and n_samples < len(all_dataset_item_ids):
+            eval_dataset_item_ids = random.sample(all_dataset_item_ids, n_samples)
+            logger.info(f"Using {n_samples} samples for evaluations.")
+        else:
+            logger.info(
+                f"Using all {len(all_dataset_item_ids)} samples for evaluations."
+            )
+        initial_score = task_evaluator.evaluate(
+            dataset=opik_dataset,
+            dataset_item_ids=eval_dataset_item_ids,
+            metric_config=metric_config,
+            evaluated_task=zero_shot_llm_task,
+            num_threads=self.n_threads,
+            project_name=self.project_name,
+            experiment_config=initial_eval_config,
+            optimization_id=optimization_id,
+        )
+        logger.info(f"Initial (zero-shot) score: {initial_score:.4f}")
+        # Start Optuna Study
+        logger.info("Starting Optuna study for Few-Shot Bayesian Optimization...")
+        def optimization_objective(trial: optuna.Trial) -> float:
+            n_examples = trial.suggest_int(
+                "n_examples", self.min_examples, self.max_examples
+            )
+            available_indices = list(range(len(dataset_items)))
+            example_indices = random.sample(available_indices, n_examples)
+            trial.set_user_attr("example_indices", example_indices)
+            instruction = task_config.instruction_prompt
+            demo_examples = [dataset_items[idx] for idx in example_indices]
+            processed_demo_examples = []
+            for example in demo_examples:
+                processed_example = {}
+                for key, value in example.items():
+                    processed_example[key] = str(value)
+                processed_demo_examples.append(processed_example)
+            param = prompt_parameter.ChatPromptParameter(
+                name=f"trial_{trial.number}_prompt",
+                instruction=instruction,
+                task_input_parameters=task_config.input_dataset_fields,
+                task_output_parameter=task_config.output_dataset_field,
+                demo_examples=processed_demo_examples,
+            )
+            llm_task = self._build_task_from_prompt_template(param.as_template())
+            # Log trial config
+            trial_config = base_experiment_config.copy()
+            trial_config["configuration"]["prompt"] = instruction  # Base instruction
+            trial_config["configuration"][
+                "examples"
+            ] = processed_demo_examples  # Log stringified examples
+            trial_config["configuration"]["n_examples"] = n_examples
+            trial_config["configuration"]["example_indices"] = example_indices
+            logger.debug(
+                f"Trial {trial.number}: n_examples={n_examples}, indices={example_indices}"
+            )
+            logger.debug(f"Evaluating trial {trial.number}...")
+            score = task_evaluator.evaluate(
+                dataset=opik_dataset,
+                dataset_item_ids=eval_dataset_item_ids,
+                metric_config=metric_config,
+                evaluated_task=llm_task,
+                num_threads=self.n_threads,
+                project_name=self.project_name,
+                experiment_config=trial_config,
+                optimization_id=optimization_id,
+            )
+            logger.debug(f"Trial {trial.number} score: {score:.4f}")
+            trial.set_user_attr("score", score)
+            trial.set_user_attr("param", param)
+            return score
+        # Configure Optuna Logging
+        try:
+            optuna.logging.disable_default_handler()
+            optuna_logger = logging.getLogger("optuna")
+            package_level = logging.getLogger("opik_optimizer").getEffectiveLevel()
+            optuna_logger.setLevel(package_level)
+            optuna_logger.propagate = False
+            logger.debug(
+                f"Optuna logger configured to level {logging.getLevelName(package_level)} and set to not propagate."
+            )
+        except Exception as e:
+            logger.warning(f"Could not configure Optuna logging within optimizer: {e}")
+        study = optuna.create_study(direction="maximize")
+        study.optimize(optimization_objective, n_trials=n_trials)
+        logger.info("Optuna study finished.")
+        best_trial = study.best_trial
+        best_score = best_trial.value
+        best_n_examples = best_trial.params["n_examples"]
+        best_example_indices = best_trial.user_attrs.get("example_indices", [])
+        best_param: prompt_parameter.ChatPromptParameter = best_trial.user_attrs[
+            "param"
+        ]
+        chat_messages_list = best_param.as_template().format()
+        main_prompt_string = best_param.instruction
+        return optimization_result.OptimizationResult(
+            prompt=main_prompt_string,
+            score=best_score,
+            metric_name=metric_config.metric.name,
+            details={
+                "prompt_type": "chat" if task_config.use_chat_prompt else "non-chat",
+                "chat_messages": chat_messages_list,
+                "prompt_parameter": best_param,
+                "n_examples": best_n_examples,
+                "example_indices": best_example_indices,
+                "trial_number": best_trial.number,
+                "initial_score": initial_score,
+                "total_trials": n_trials,
+                "rounds": [],
+                "stopped_early": False,
+                "metric_config": metric_config.dict(),
+                "task_config": task_config.dict(),
+                "model": self.model,
+                "temperature": self.model_kwargs.get("temperature"),
+            },
+        )
+    def optimize_prompt(
+        self,
+        dataset: Union[str, Dataset],
+        metric_config: MetricConfig,
+        task_config: TaskConfig,
+        n_trials: int = 10,
+        experiment_config: Optional[Dict] = None,
+        n_samples: int = None,
+    ) -> optimization_result.OptimizationResult:
+        optimization = None
+        try:
+            optimization = self._opik_client.create_optimization(
+                dataset_name=dataset.name,
+                objective_name=metric_config.metric.name,
+            )
+        except Exception:
+            logger.warning(
+                "Opik server does not support optimizations. Please upgrade opik."
+            )
+            optimization = None
+        try:
+            result = self._optimize_prompt(
+                optimization_id=optimization.id if optimization is not None else None,
+                dataset=dataset,
+                metric_config=metric_config,
+                task_config=task_config,
+                n_trials=n_trials,
+                experiment_config=experiment_config,
+                n_samples=n_samples,
+            )
+            if optimization:
+                self.update_optimization(optimization, status="completed")
+            return result
+        except Exception as e:
+            if optimization:
+                self.update_optimization(optimization, status="cancelled")
+            logger.error(f"FewShotBayesian optimization failed: {e}", exc_info=True)
+            raise e
+    def evaluate_prompt(
+        self,
+        prompt: List[Dict[Literal["role", "content"], str]],
+        dataset: opik.Dataset,
+        metric_config: MetricConfig,
+        task_config: Optional[TaskConfig] = None,
+        dataset_item_ids: Optional[List[str]] = None,
+        experiment_config: Optional[Dict] = None,
+        n_samples: int = None,
+    ) -> float:
+        if isinstance(prompt, str):
+            if task_config is None:
+                raise ValueError(
+                    "To use a string prompt, please pass in task_config to evaluate_prompt()"
+                )
+            questions = {
+                field: ("{{%s}}" % field) for field in task_config.input_dataset_fields
+            }
+            prompt = [
+                {"role": "system", "content": prompt},
+                {"role": "user", "content": json.dumps(questions)},
+            ]
+        # Ensure prompt is correctly formatted
+        if not all(
+            isinstance(item, dict) and "role" in item and "content" in item
+            for item in prompt
+        ):
+            raise ValueError(
+                "A ChatPrompt must be a list of dictionaries with 'role' and 'content' keys."
+            )
+        template = prompt_templates.ChatPromptTemplate(
+            prompt, validate_placeholders=False
+        )
+        llm_task = self._build_task_from_prompt_template(template)
+        experiment_config = experiment_config or {}
+        experiment_config = {
+            **experiment_config,
+            **{
+                "optimizer": self.__class__.__name__,
+                "metric": metric_config.metric.name,
+                "dataset": dataset.name,
+                "configuration": {
+                    "examples": prompt,
+                },
+            },
+        }
+        if n_samples is not None:
+            if dataset_item_ids is not None:
+                raise Exception("Can't use n_samples and dataset_item_ids")
+            all_ids = [dataset_item["id"] for dataset_item in dataset.get_items()]
+            dataset_item_ids = random.sample(all_ids, n_samples)
+        logger.debug(f"Starting FewShotBayesian evaluation...")
+        score = task_evaluator.evaluate(
+            dataset=dataset,
+            dataset_item_ids=dataset_item_ids,
+            metric_config=metric_config,
+            evaluated_task=llm_task,
+            num_threads=self.n_threads,
+            project_name=self.project_name,
+            experiment_config=experiment_config,
+        )
+        logger.debug(f"Evaluation score: {score:.4f}")
+        return score
+    def _build_task_from_prompt_template(
+        self, template: prompt_templates.ChatPromptTemplate
+    ):
+        def llm_task(dataset_item: Dict[str, Any]) -> Dict[str, Any]:
+            prompt_ = template.format(**dataset_item)
+            response = _call_model(
+                model=self.model,
+                messages=prompt_,
+                seed=self.seed,
+                model_kwargs=self.model_kwargs,
+            )
+            return {
+                mappers.EVALUATED_LLM_TASK_OUTPUT: response.choices[0].message.content
+            }
+        return llm_task

opik_optimizer/few_shot_bayesian_optimizer/prompt_parameter.py ADDED Viewed

@@ -0,0 +1,91 @@
+import pydantic
+from typing import List, Dict, Literal
+import json
+from . import prompt_templates
+ChatItem = Dict[Literal["role", "content"], str]
+class ChatPromptParameter(pydantic.BaseModel):
+    name: str
+    instruction: str
+    task_input_parameters: List[str]
+    task_output_parameter: str
+    demo_examples: List[Dict[str, str]] = pydantic.Field(default_factory=list)
+    _few_shot_system_prompt_intro: str = "You are an intelligent assistant that learns from few-shot examples provided earlier in the conversation. Whenever you respond, carefully follow the structure, tone, and format of previous assistant replies, using them as a guide"
+    def as_template(self) -> prompt_templates.ChatPromptTemplate:
+        if not self.demo_examples:
+            return prompt_templates.ChatPromptTemplate(
+                chat_template=[
+                    {
+                        "role": "system",
+                        "content": self.instruction
+                    },
+                    {
+                        "role": "user",
+                        "content": json.dumps({param: f"{{{{{param}}}}}" for param in self.task_input_parameters})
+                    }
+                ]
+            )
+        return prompt_templates.ChatPromptTemplate(
+            chat_template=[
+                {
+                    "role": "system",
+                    "content": self.instruction + f"\n\n{self._few_shot_system_prompt_intro}"
+                },
+                *self._render_demos(),
+                {
+                    "role": "user",
+                    "content": json.dumps({param: f"{{{{{param}}}}}" for param in self.task_input_parameters})
+                }
+            ]
+        )
+    def _render_demos(self) -> List[ChatItem]:
+        """
+        Renders demo examples in the following format:
+        [
+            {
+                "role": "user",
+                "content": "\n{\n\"input_field1\": \"value1\",\n\"input_field2\": \"value2\"\n}\n"
+            },
+            {
+                "role": "assistant",
+                "content": "expected_response_1"
+            },
+            {
+                "role": "user",
+                "content": "\n{\n\"input_field1\": \"value3\",\n\"input_field2\": \"value4\"\n}\n"
+            },
+            {
+                "role": "assistant",
+                "content": "expected_response_2"
+            }
+        ]
+        """
+        chat_items: List[ChatItem] = []
+        for example in self.demo_examples:
+            inputs = {param: example[param] for param in self.task_input_parameters}
+            formatted_input = json.dumps(inputs, indent=2)
+            user_message: ChatItem = {
+                "role": "user",
+                "content": f"\n{formatted_input}\n"
+            }
+            assistant_message: ChatItem = {
+                "role": "assistant",
+                "content": example[self.task_output_parameter]
+            }
+            chat_items.append(user_message)
+            chat_items.append(assistant_message)
+        return chat_items

opik_optimizer/few_shot_bayesian_optimizer/prompt_templates.py ADDED Viewed

@@ -0,0 +1,80 @@
+import abc
+from typing import Dict, List, Literal, Any
+from typing_extensions import override
+import opik
+from opik.api_objects.prompt import prompt_template as opik_prompt_template
+ChatItem = Dict[Literal["role", "content"], str]
+class BaseTemplate(abc.ABC):
+    @abc.abstractmethod
+    def format(self, **kwargs: Any) -> Any:
+        raise NotImplementedError
+class PromptTemplate(BaseTemplate):
+    """Wrapper for opik PromptTemplate which is a subclass of BaseTemplate."""
+    def __init__(
+        self,
+        template: str,
+        validate_placeholders: bool = False,
+        type: opik.PromptType = opik.PromptType.MUSTACHE
+    ) -> None:
+        self._opik_prompt_template = opik_prompt_template.PromptTemplate(
+            template=template,
+            validate_placeholders=validate_placeholders,
+            type=type
+        )
+    @override
+    def format(self, **kwargs: Any) -> str:
+        return self._opik_prompt_template.format(**kwargs)
+class ChatItemTemplate(BaseTemplate):
+    def __init__(
+        self,
+        role: str,
+        prompt_template: PromptTemplate
+    ) -> None:
+        self._role = role
+        self._prompt_template = prompt_template
+    @override
+    def format(self, **kwargs: Any) -> ChatItem:
+        return {
+            "role": self._role,
+            "content": self._prompt_template.format(**kwargs)
+        }
+class ChatPromptTemplate(BaseTemplate):
+    def __init__(
+        self,
+        chat_template: List[Dict[str, str]],
+        type: opik.PromptType = opik.PromptType.MUSTACHE,
+        validate_placeholders: bool = False,
+    ) -> None:
+        self._raw_chat_template = chat_template
+        self._type = type
+        self._validate_placeholders = validate_placeholders
+        self._init_chat_template_items()
+    def _init_chat_template_items(self) -> None:
+        self._chat_template_items: List[ChatItemTemplate] = [
+            ChatItemTemplate(
+                role=item["role"],
+                prompt_template=PromptTemplate(
+                    item["content"],
+                    type=self._type,
+                    validate_placeholders=self._validate_placeholders,
+                )
+            )
+            for item in self._raw_chat_template
+        ]
+    @override
+    def format(self, **kwargs: Any) -> List[ChatItem]:
+        return [
+            item.format(**kwargs)
+            for item in self._chat_template_items
+        ]

opik_optimizer/integrations/__init__.py ADDED Viewed

File without changes

opik_optimizer/logging_config.py ADDED Viewed

@@ -0,0 +1,69 @@
+import logging
+from rich.logging import RichHandler
+DEFAULT_LOG_FORMAT = '%(message)s'
+DEFAULT_DATE_FORMAT = '%Y-%m-%d %H:%M:%S'
+# Store configured state to prevent reconfiguration
+_logging_configured = False
+def setup_logging(
+    level=logging.WARNING,
+    format_string=DEFAULT_LOG_FORMAT,
+    date_format=DEFAULT_DATE_FORMAT,
+    force=False,
+):
+    """
+    Configures logging for the opik_optimizer package using rich.
+    Args:
+        level: The desired logging level (e.g., logging.DEBUG, logging.INFO, logging.WARNING).
+        format_string: The format string for log messages.
+        date_format: The format string for the date/time in log messages.
+        force: If True, reconfigure logging even if already configured.
+    """
+    global _logging_configured
+    if _logging_configured and not force:
+        # Use logger after getting it
+        return
+    # Configure opik_optimizer package logger
+    package_logger = logging.getLogger('opik_optimizer')
+    # Avoid adding handlers repeatedly if force=True replaces them
+    if not package_logger.handlers or force:
+        # Remove existing handlers if forcing re-configuration
+        if force and package_logger.handlers:
+            for handler in package_logger.handlers[:]:
+                package_logger.removeHandler(handler)
+        console_handler = RichHandler(
+            rich_tracebacks=True,
+            markup=True, # Enable rich markup in log messages
+            log_time_format=f"[{date_format}]" # Apply date format
+        )
+        # RichHandler manages formatting, so we don't need a separate formatter
+        # formatter = logging.Formatter(format_string, datefmt=date_format)
+        # console_handler.setFormatter(formatter)
+        package_logger.addHandler(console_handler)
+    package_logger.setLevel(level)
+    package_logger.propagate = False # Don't duplicate messages in root logger
+    # Set levels for noisy libraries like LiteLLM and httpx
+    logging.getLogger("LiteLLM").setLevel(logging.WARNING)
+    logging.getLogger("urllib3").setLevel(logging.WARNING)
+    logging.getLogger("requests").setLevel(logging.WARNING)
+    logging.getLogger("httpx").setLevel(logging.WARNING)
+    logging.getLogger("dspy").setLevel(logging.WARNING)
+    logging.getLogger("datasets").setLevel(logging.WARNING)
+    logging.getLogger("optuna").setLevel(logging.WARNING)
+    logging.getLogger("filelock").setLevel(logging.WARNING)
+    _logging_configured = True
+    # Use level name provided by rich handler by default
+    package_logger.info(f"Opik Optimizer logging configured to level: [bold cyan]{logging.getLevelName(level)}[/bold cyan]")
+# Ensure logger obtained after setup can be used immediately if needed
+logger = logging.getLogger(__name__)