PyPI - opik-optimizer - Versions diffs - 1.0.5__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

opik-optimizer 1.0.5py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

opik_optimizer/__init__.py +2 -0
opik_optimizer/_throttle.py +2 -1
opik_optimizer/base_optimizer.py +28 -11
opik_optimizer/colbert.py +236 -0
opik_optimizer/data/context7_eval.jsonl +3 -0
opik_optimizer/datasets/context7_eval.py +90 -0
opik_optimizer/datasets/tiny_test.py +33 -34
opik_optimizer/datasets/truthful_qa.py +2 -2
opik_optimizer/evolutionary_optimizer/crossover_ops.py +194 -0
opik_optimizer/evolutionary_optimizer/evaluation_ops.py +73 -0
opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +124 -941
opik_optimizer/evolutionary_optimizer/helpers.py +10 -0
opik_optimizer/evolutionary_optimizer/llm_support.py +134 -0
opik_optimizer/evolutionary_optimizer/mutation_ops.py +292 -0
opik_optimizer/evolutionary_optimizer/population_ops.py +223 -0
opik_optimizer/evolutionary_optimizer/prompts.py +305 -0
opik_optimizer/evolutionary_optimizer/reporting.py +16 -4
opik_optimizer/evolutionary_optimizer/style_ops.py +86 -0
opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +26 -23
opik_optimizer/few_shot_bayesian_optimizer/reporting.py +12 -5
opik_optimizer/gepa_optimizer/__init__.py +3 -0
opik_optimizer/gepa_optimizer/adapter.py +152 -0
opik_optimizer/gepa_optimizer/gepa_optimizer.py +556 -0
opik_optimizer/gepa_optimizer/reporting.py +181 -0
opik_optimizer/logging_config.py +42 -7
opik_optimizer/mcp_utils/__init__.py +22 -0
opik_optimizer/mcp_utils/mcp.py +541 -0
opik_optimizer/mcp_utils/mcp_second_pass.py +152 -0
opik_optimizer/mcp_utils/mcp_simulator.py +116 -0
opik_optimizer/mcp_utils/mcp_workflow.py +493 -0
opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +399 -69
opik_optimizer/meta_prompt_optimizer/reporting.py +16 -2
opik_optimizer/mipro_optimizer/_lm.py +20 -20
opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +51 -50
opik_optimizer/mipro_optimizer/mipro_optimizer.py +33 -28
opik_optimizer/mipro_optimizer/utils.py +2 -4
opik_optimizer/optimizable_agent.py +18 -17
opik_optimizer/optimization_config/chat_prompt.py +44 -23
opik_optimizer/optimization_config/configs.py +3 -3
opik_optimizer/optimization_config/mappers.py +9 -8
opik_optimizer/optimization_result.py +21 -14
opik_optimizer/reporting_utils.py +61 -10
opik_optimizer/task_evaluator.py +9 -8
opik_optimizer/utils/__init__.py +15 -0
opik_optimizer/{utils.py → utils/core.py} +111 -26
opik_optimizer/utils/dataset_utils.py +49 -0
opik_optimizer/utils/prompt_segments.py +186 -0
{opik_optimizer-1.0.5.dist-info → opik_optimizer-1.1.0.dist-info}/METADATA +93 -16
opik_optimizer-1.1.0.dist-info/RECORD +73 -0
opik_optimizer-1.1.0.dist-info/licenses/LICENSE +203 -0
opik_optimizer-1.0.5.dist-info/RECORD +0 -50
opik_optimizer-1.0.5.dist-info/licenses/LICENSE +0 -21
{opik_optimizer-1.0.5.dist-info → opik_optimizer-1.1.0.dist-info}/WHEEL +0 -0
{opik_optimizer-1.0.5.dist-info → opik_optimizer-1.1.0.dist-info}/top_level.txt +0 -0

opik_optimizer/__init__.py CHANGED Viewed

@@ -12,6 +12,7 @@ from .optimizable_agent import OptimizableAgent
 from .optimization_config.chat_prompt import ChatPrompt
 from .base_optimizer import BaseOptimizer
 from .few_shot_bayesian_optimizer import FewShotBayesianOptimizer
+from .gepa_optimizer import GepaOptimizer
 from .logging_config import setup_logging
 from .meta_prompt_optimizer import MetaPromptOptimizer
 from .optimization_config.configs import TaskConfig
@@ -28,6 +29,7 @@ __all__ = [
     "BaseOptimizer",
     "ChatPrompt",
     "FewShotBayesianOptimizer",
+    "GepaOptimizer",
     "MetaPromptOptimizer",
     "EvolutionaryOptimizer",
     "OptimizationResult",

opik_optimizer/_throttle.py CHANGED Viewed

@@ -3,7 +3,8 @@ import pyrate_limiter
 import time
 import opik.config
-from typing import Callable, Any
+from typing import Any
+from collections.abc import Callable
 class RateLimiter:

opik_optimizer/base_optimizer.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import Any, Callable, Dict, List, Optional, Type
+from typing import Any
+from collections.abc import Callable
 import logging
 import time
@@ -59,7 +60,7 @@ class BaseOptimizer:
         self.reasoning_model = model
         self.model_kwargs = model_kwargs
         self.verbose = verbose
-        self._history: List[OptimizationRound] = []
+        self._history: list[OptimizationRound] = []
         self.experiment_config = None
         self.llm_call_counter = 0
@@ -72,7 +73,7 @@ class BaseOptimizer:
         prompt: "chat_prompt.ChatPrompt",
         dataset: Dataset,
         metric: Callable,
-        experiment_config: Optional[Dict] = None,
+        experiment_config: dict | None = None,
         **kwargs: Any,
     ) -> optimization_result.OptimizationResult:
         """
@@ -90,7 +91,23 @@ class BaseOptimizer:
         """
         pass
-    def get_history(self) -> List[OptimizationRound]:
+    def optimize_mcp(
+        self,
+        prompt: "chat_prompt.ChatPrompt",
+        dataset: Dataset,
+        metric: Callable,
+        *,
+        tool_name: str,
+        second_pass: Any,
+        experiment_config: dict | None = None,
+        **kwargs: Any,
+    ) -> optimization_result.OptimizationResult:
+        """Optimize prompts that rely on MCP tooling."""
+        raise NotImplementedError(
+            f"{self.__class__.__name__} does not implement optimize_mcp yet."
+        )
+    def get_history(self) -> list[OptimizationRound]:
         """
         Get the optimization history.
@@ -133,11 +150,11 @@ class BaseOptimizer:
         metric: Callable,
         n_threads: int,
         verbose: int = 1,
-        dataset_item_ids: Optional[List[str]] = None,
-        experiment_config: Optional[Dict] = None,
-        n_samples: Optional[int] = None,
-        seed: Optional[int] = None,
-        agent_class: Optional[Type[OptimizableAgent]] = None,
+        dataset_item_ids: list[str] | None = None,
+        experiment_config: dict | None = None,
+        n_samples: int | None = None,
+        seed: int | None = None,
+        agent_class: type[OptimizableAgent] | None = None,
     ) -> float:
         random.seed(seed)
@@ -146,7 +163,7 @@ class BaseOptimizer:
         if prompt.model_kwargs is None:
             prompt.model_kwargs = self.model_kwargs
-        self.agent_class: Type[OptimizableAgent]
+        self.agent_class: type[OptimizableAgent]
         if agent_class is None:
             self.agent_class = create_litellm_agent_class(prompt)
@@ -155,7 +172,7 @@ class BaseOptimizer:
         agent = self.agent_class(prompt)
-        def llm_task(dataset_item: Dict[str, Any]) -> Dict[str, str]:
+        def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
             messages = prompt.get_messages(dataset_item)
             raw_model_output = agent.invoke(messages)
             cleaned_model_output = raw_model_output.strip()

opik_optimizer/colbert.py ADDED Viewed

@@ -0,0 +1,236 @@
+"""
+Minimal ColBERTv2 implementation extracted from dspy (MIT license).
+This module provides a lightweight implementation of ColBERTv2 search functionality
+without requiring the full dspy dependency.
+"""
+import copy
+import time
+from typing import Any
+import requests  # type: ignore[import-untyped]
+from requests.adapters import HTTPAdapter  # type: ignore[import-untyped]
+from urllib3.util.retry import Retry
+def _create_session_with_retries(max_retries: int = 4) -> requests.Session:
+    """
+    Create a requests session with retry configuration.
+    Args:
+        max_retries: Maximum number of retry attempts
+    Returns:
+        Configured requests session
+    """
+    session = requests.Session()
+    retry_strategy = Retry(
+        total=max_retries,
+        backoff_factor=1,  # Wait 1, 2, 4, 8 seconds between retries
+        status_forcelist=[429, 500, 502, 503, 504],  # HTTP status codes to retry on
+        allowed_methods=["HEAD", "GET", "POST", "PUT", "DELETE", "OPTIONS", "TRACE"],
+    )
+    adapter = HTTPAdapter(max_retries=retry_strategy)
+    session.mount("http://", adapter)
+    session.mount("https://", adapter)
+    return session
+class dotdict(dict):
+    """Dictionary with attribute access (extracted from dspy)."""
+    def __getattr__(self, key: str) -> Any:
+        if key.startswith("__") and key.endswith("__"):
+            return super().__getattribute__(key)
+        try:
+            return self[key]
+        except KeyError:
+            raise AttributeError(
+                f"'{type(self).__name__}' object has no attribute '{key}'"
+            )
+    def __setattr__(self, key: str, value: Any) -> None:
+        if key.startswith("__") and key.endswith("__"):
+            super().__setattr__(key, value)
+        else:
+            self[key] = value
+    def __delattr__(self, key: str) -> None:
+        if key.startswith("__") and key.endswith("__"):
+            super().__delattr__(key)
+        else:
+            del self[key]
+    def __deepcopy__(self, memo: dict[Any, Any]) -> "dotdict":
+        # Use the default dict copying method to avoid infinite recursion.
+        return dotdict(copy.deepcopy(dict(self), memo))
+def colbertv2_get_request(
+    url: str, query: str, k: int, max_retries: int = 4
+) -> list[dict[str, Any]]:
+    """
+    Make a GET request to ColBERTv2 server with retry logic.
+    Args:
+        url: The ColBERTv2 server URL
+        query: The search query
+        k: Number of results to return
+        max_retries: Maximum number of retry attempts
+    Returns:
+        List of search results
+    """
+    assert k <= 100, (
+        "Only k <= 100 is supported for the hosted ColBERTv2 server at the moment."
+    )
+    session = _create_session_with_retries(max_retries)
+    payload: dict[str, str | int] = {"query": query, "k": k}
+    # Application-level retry for server connection errors
+    for attempt in range(max_retries):
+        try:
+            res = session.get(url, params=payload, timeout=5)
+            response_data = res.json()
+            # Check for application-level errors (server connection issues, etc.)
+            if "error" in response_data and response_data["error"]:
+                error_msg = response_data.get("message", "Unknown error")
+                # If it's a connection error, retry; otherwise, fail immediately
+                if (
+                    "Cannot connect to host" in error_msg
+                    or "Connection refused" in error_msg
+                ):
+                    if attempt == max_retries - 1:
+                        raise Exception(f"ColBERTv2 server error: {error_msg}")
+                    time.sleep(1)  # Wait 1 second before retrying
+                    continue
+                else:
+                    raise Exception(f"ColBERTv2 server error: {error_msg}")
+            if "topk" not in response_data:
+                raise Exception(
+                    f"Unexpected response format from ColBERTv2 server: {list(response_data.keys())}"
+                )
+            topk = response_data["topk"][:k]
+            topk = [{**d, "long_text": d["text"]} for d in topk]
+            return topk[:k]
+        except requests.RequestException as e:
+            if attempt == max_retries - 1:
+                raise Exception(f"ColBERTv2 request failed: {str(e)}")
+            time.sleep(1)  # Wait 1 second before retrying
+    # This should never be reached, but mypy requires a return statement
+    raise Exception("Unexpected end of retry loop")
+def colbertv2_post_request(
+    url: str, query: str, k: int, max_retries: int = 4
+) -> list[dict[str, Any]]:
+    """
+    Make a POST request to ColBERTv2 server with retry logic.
+    Args:
+        url: The ColBERTv2 server URL
+        query: The search query
+        k: Number of results to return
+        max_retries: Maximum number of retry attempts
+    Returns:
+        List of search results
+    """
+    session = _create_session_with_retries(max_retries)
+    headers = {"Content-Type": "application/json; charset=utf-8"}
+    payload = {"query": query, "k": k}
+    # Application-level retry for server connection errors
+    for attempt in range(max_retries):
+        try:
+            res = session.post(url, json=payload, headers=headers, timeout=5)
+            response_data = res.json()
+            # Check for application-level errors (server connection issues, etc.)
+            if "error" in response_data and response_data["error"]:
+                error_msg = response_data.get("message", "Unknown error")
+                # If it's a connection error, retry; otherwise, fail immediately
+                if (
+                    "Cannot connect to host" in error_msg
+                    or "Connection refused" in error_msg
+                ):
+                    if attempt == max_retries - 1:
+                        raise Exception(f"ColBERTv2 server error: {error_msg}")
+                    time.sleep(1)  # Wait 1 second before retrying
+                    continue
+                else:
+                    raise Exception(f"ColBERTv2 server error: {error_msg}")
+            if "topk" not in response_data:
+                raise Exception(
+                    f"Unexpected response format from ColBERTv2 server: {list(response_data.keys())}"
+                )
+            return response_data["topk"][:k]
+        except requests.RequestException as e:
+            if attempt == max_retries - 1:
+                raise Exception(f"ColBERTv2 request failed: {str(e)}")
+            time.sleep(1)  # Wait 1 second before retrying
+    # This should never be reached, but mypy requires a return statement
+    raise Exception("Unexpected end of retry loop")
+class ColBERTv2:
+    """Wrapper for the ColBERTv2 Retrieval (extracted from dspy)."""
+    def __init__(
+        self,
+        url: str = "http://0.0.0.0",
+        port: str | int | None = None,
+        post_requests: bool = False,
+    ):
+        """
+        Initialize ColBERTv2 client.
+        Args:
+            url: Base URL for the ColBERTv2 server
+            port: Optional port number
+            post_requests: Whether to use POST requests instead of GET
+        """
+        self.post_requests = post_requests
+        self.url = f"{url}:{port}" if port else url
+    def __call__(
+        self,
+        query: str,
+        k: int = 10,
+        simplify: bool = False,
+        max_retries: int = 4,
+    ) -> list[str] | list[dotdict]:
+        """
+        Search using ColBERTv2.
+        Args:
+            query: The search query
+            k: Number of results to return
+            simplify: If True, return only text strings; if False, return dotdict objects
+            max_retries: Maximum number of retry attempts
+        Returns:
+            List of search results (either strings or dotdict objects)
+        """
+        if self.post_requests:
+            topk_results = colbertv2_post_request(self.url, query, k, max_retries)
+        else:
+            topk_results = colbertv2_get_request(self.url, query, k, max_retries)
+        if simplify:
+            return [psg["long_text"] for psg in topk_results]
+        return [dotdict(psg) for psg in topk_results]

opik_optimizer/data/context7_eval.jsonl ADDED Viewed

@@ -0,0 +1,3 @@
+{"id": "ctx-001", "user_query": "Using the Context7 library ID /vercel/next.js, how can I route users down different UI flows with the App Router?", "expected_tool": "get-library-docs", "arguments": {"context7CompatibleLibraryID": "/vercel/next.js", "topic": "routing", "tokens": 1500}, "reference_answer": "The App Router handles conditional experiences with parallel routes. Create directories that start with @ to declare each slot, provide a default.tsx so the route still renders when a branch is missing, and decide which slot to render inside your layout based on the user's state. This lets you show different UI branches without blocking navigation."}
+{"id": "ctx-002", "user_query": "With library ID /supabase/supabase, what do the docs recommend for keeping edge functions secure?", "expected_tool": "get-library-docs", "arguments": {"context7CompatibleLibraryID": "/supabase/supabase", "topic": "security", "tokens": 1200}, "reference_answer": "Supabase recommends enabling Row Level Security (RLS) on your Postgres tables so edge functions can only access data allowed by fine-grained policies. Run `alter table ... enable row level security;` (for example on the `todos` table) to enforce those policies and prevent unauthorized access."}
+{"id": "ctx-003", "user_query": "Given /mongodb/docs, remind me what makes up the basic aggregation pipeline.", "expected_tool": "get-library-docs", "arguments": {"context7CompatibleLibraryID": "/mongodb/docs", "topic": "aggregation", "tokens": 1000}, "reference_answer": "An aggregation pipeline runs ordered stages such as $match, $group, $project, $sort, and $limit. Each stage accepts the stream of documents from the previous stage so you can filter, reshape, and summarize the data step by step."}

opik_optimizer/datasets/context7_eval.py ADDED Viewed

@@ -0,0 +1,90 @@
+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from importlib import resources
+from typing import Any, Union
+try:  # pragma: no cover - optional dependency
+    import opik  # type: ignore
+except ImportError:  # pragma: no cover - fallback for tests
+    opik = None
+from opik_optimizer.utils.dataset_utils import attach_uuids, dataset_suffix
+OpikDataset = Any
+DATA_PACKAGE = "opik_optimizer.data"
+DATA_FILENAME = "context7_eval.jsonl"
+DATASET_NAME = "context7_eval"
+def _load_examples() -> list[dict[str, Any]]:
+    text = (
+        resources.files(DATA_PACKAGE)
+        .joinpath(DATA_FILENAME)
+        .read_text(encoding="utf-8")
+    )
+    return [json.loads(line) for line in text.splitlines() if line.strip()]
+def _dataset_name(test_mode: bool) -> str:
+    suffix = dataset_suffix(DATA_PACKAGE, DATA_FILENAME)
+    return f"{DATASET_NAME}_{suffix}{'_test' if test_mode else ''}"
+@dataclass
+class _ListDataset:
+    name: str
+    _items: list[dict[str, Any]]
+    def __post_init__(self) -> None:
+        for idx, item in enumerate(self._items):
+            item.setdefault("id", f"{self.name}-{idx}")
+        self.id = self.name
+    def copy(self) -> _ListDataset:
+        return _ListDataset(self.name, [dict(item) for item in self._items])
+    def get_items(self, nb_samples: int | None = None) -> list[dict[str, Any]]:
+        if nb_samples is None:
+            return [dict(item) for item in self._items]
+        return [dict(item) for item in self._items[:nb_samples]]
+DatasetResult = Union["_ListDataset", OpikDataset]
+def load_context7_dataset(test_mode: bool = False) -> DatasetResult:
+    """Return the context7 synthetic dataset as an Opik dataset when available."""
+    examples = _load_examples()
+    dataset_name = _dataset_name(test_mode)
+    if opik is None:
+        return _ListDataset(dataset_name, examples)
+    try:
+        client = opik.Opik()
+        dataset: OpikDataset = client.get_or_create_dataset(dataset_name)
+        items = dataset.get_items()
+        expected_len = len(examples) if not test_mode else min(len(examples), 2)
+        if len(items) == expected_len:
+            return dataset
+        if len(items) != 0:  # pragma: no cover - defensive path
+            raise ValueError(
+                f"Dataset {dataset_name} already exists with {len(items)} items. Delete it to regenerate."
+            )
+        if test_mode:
+            dataset.insert(attach_uuids(examples[:expected_len]))
+        else:
+            dataset.insert(attach_uuids(examples))
+        return dataset
+    except Exception:
+        # If Opik client fails (e.g., no API key configured), fall back to local dataset
+        return _ListDataset(dataset_name, examples)
+__all__ = ["load_context7_dataset"]

opik_optimizer/datasets/tiny_test.py CHANGED Viewed

@@ -1,42 +1,12 @@
 import opik
-TINY_TEST_ITEMS = [
-    {
-        "text": "What is the capital of France?",
-        "label": "Paris",
-        "metadata": {"context": "France is a country in Europe. Its capital is Paris."},
-    },
-    {
-        "text": "Who wrote Romeo and Juliet?",
-        "label": "William Shakespeare",
-        "metadata": {
-            "context": "Romeo and Juliet is a famous play written by William Shakespeare."
-        },
-    },
-    {
-        "text": "What is 2 + 2?",
-        "label": "4",
-        "metadata": {"context": "Basic arithmetic: 2 + 2 equals 4."},
-    },
-    {
-        "text": "What is the largest planet in our solar system?",
-        "label": "Jupiter",
-        "metadata": {"context": "Jupiter is the largest planet in our solar system."},
-    },
-    {
-        "text": "Who painted the Mona Lisa?",
-        "label": "Leonardo da Vinci",
-        "metadata": {"context": "The Mona Lisa was painted by Leonardo da Vinci."},
-    },
-]
 def tiny_test(test_mode: bool = False) -> opik.Dataset:
     """
-    Dataset containing the first 5 samples of the HotpotQA dataset.
+    Tiny QA benchmark (core_en subset from vincentkoc/tiny_qa_benchmark_pp).
     """
     dataset_name = "tiny_test" if not test_mode else "tiny_test_test"
-    nb_items = len(TINY_TEST_ITEMS)
+    nb_items = 5  # keep tiny dataset size consistent with tests/docs
     client = opik.Opik()
     dataset = client.get_or_create_dataset(dataset_name)
@@ -49,5 +19,34 @@ def tiny_test(test_mode: bool = False) -> opik.Dataset:
             f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
         )
     elif len(items) == 0:
-        dataset.insert(TINY_TEST_ITEMS)
-        return dataset
+        import datasets as ds
+        download_config = ds.DownloadConfig(download_desc=False, disable_tqdm=True)
+        ds.disable_progress_bar()
+        try:
+            # Load only the core_en subset JSONL from the repo
+            # Use the generic JSON loader with streaming for efficiency
+            hf_dataset = ds.load_dataset(
+                "json",
+                data_files="hf://datasets/vincentkoc/tiny_qa_benchmark_pp/data/core_en/core_en.jsonl",
+                streaming=True,
+                download_config=download_config,
+            )["train"]
+            data = []
+            for i, item in enumerate(hf_dataset):
+                if i >= nb_items:
+                    break
+                data.append(
+                    {
+                        "text": item.get("text", ""),
+                        "label": item.get("label", ""),
+                        # Preserve original tiny_test shape with metadata.context
+                        "metadata": {"context": item.get("context", "")},
+                    }
+                )
+            dataset.insert(data)
+            return dataset
+        finally:
+            ds.enable_progress_bar()

opik_optimizer/datasets/truthful_qa.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import opik
-from typing import Any, Dict, List
+from typing import Any
 def truthful_qa(test_mode: bool = False) -> opik.Dataset:
@@ -33,7 +33,7 @@ def truthful_qa(test_mode: bool = False) -> opik.Dataset:
             "truthful_qa", "multiple_choice", download_config=download_config
         )
-        data: List[Dict[str, Any]] = []
+        data: list[dict[str, Any]] = []
         for gen_item, mc_item in zip(
             gen_dataset["validation"], mc_dataset["validation"]
         ):

opik-optimizer 1.0.5__py3-none-any.whl → 1.1.0__py3-none-any.whl

opik-optimizer 1.0.5py3-none-any.whl → 1.1.0py3-none-any.whl