PyPI - opik-optimizer - Versions diffs - 0.9.2__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

opik-optimizer 0.9.2py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

opik_optimizer/__init__.py +7 -3
opik_optimizer/_throttle.py +8 -8
opik_optimizer/base_optimizer.py +98 -45
opik_optimizer/cache_config.py +5 -3
opik_optimizer/datasets/ai2_arc.py +15 -13
opik_optimizer/datasets/cnn_dailymail.py +19 -15
opik_optimizer/datasets/election_questions.py +10 -11
opik_optimizer/datasets/gsm8k.py +16 -11
opik_optimizer/datasets/halu_eval.py +6 -5
opik_optimizer/datasets/hotpot_qa.py +17 -16
opik_optimizer/datasets/medhallu.py +10 -7
opik_optimizer/datasets/rag_hallucinations.py +11 -8
opik_optimizer/datasets/ragbench.py +17 -9
opik_optimizer/datasets/tiny_test.py +33 -37
opik_optimizer/datasets/truthful_qa.py +18 -12
opik_optimizer/demo/cache.py +6 -6
opik_optimizer/demo/datasets.py +3 -7
opik_optimizer/evolutionary_optimizer/__init__.py +3 -1
opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +722 -429
opik_optimizer/evolutionary_optimizer/reporting.py +155 -74
opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +271 -188
opik_optimizer/few_shot_bayesian_optimizer/reporting.py +79 -28
opik_optimizer/logging_config.py +19 -15
opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +209 -129
opik_optimizer/meta_prompt_optimizer/reporting.py +121 -46
opik_optimizer/mipro_optimizer/__init__.py +2 -0
opik_optimizer/mipro_optimizer/_lm.py +38 -9
opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +37 -26
opik_optimizer/mipro_optimizer/mipro_optimizer.py +132 -63
opik_optimizer/mipro_optimizer/utils.py +5 -2
opik_optimizer/optimizable_agent.py +179 -0
opik_optimizer/optimization_config/chat_prompt.py +143 -73
opik_optimizer/optimization_config/configs.py +4 -3
opik_optimizer/optimization_config/mappers.py +18 -6
opik_optimizer/optimization_result.py +22 -13
opik_optimizer/py.typed +0 -0
opik_optimizer/reporting_utils.py +89 -58
opik_optimizer/task_evaluator.py +12 -14
opik_optimizer/utils.py +117 -14
{opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.0.dist-info}/METADATA +8 -8
opik_optimizer-1.0.0.dist-info/RECORD +50 -0
opik_optimizer-0.9.2.dist-info/RECORD +0 -48
{opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.0.dist-info}/WHEEL +0 -0
{opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.0.dist-info}/licenses/LICENSE +0 -0
{opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.0.dist-info}/top_level.txt +0 -0

opik_optimizer/datasets/rag_hallucinations.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import opik
-def rag_hallucinations(
-    test_mode: bool = False
-) -> opik.Dataset:
+def rag_hallucinations(test_mode: bool = False) -> opik.Dataset:
     """
     Dataset containing the first 300 samples of the RAG Hallucinations dataset.
     """
@@ -11,20 +10,24 @@ def rag_hallucinations(
     client = opik.Opik()
     dataset = client.get_or_create_dataset(dataset_name)
     items = dataset.get_items()
     if len(items) == nb_items:
         return dataset
     elif len(items) != 0:
-        raise ValueError(f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it.")
+        raise ValueError(
+            f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
+        )
     elif len(items) == 0:
         import datasets as ds
         # Load data from file and insert into the dataset
         download_config = ds.DownloadConfig(download_desc=False, disable_tqdm=True)
         ds.disable_progress_bar()
-        hf_dataset = ds.load_dataset("aporia-ai/rag_hallucinations", download_config=download_config)
+        hf_dataset = ds.load_dataset(
+            "aporia-ai/rag_hallucinations", download_config=download_config
+        )
         data = [
             {
                 "context": item["context"],
@@ -35,7 +38,7 @@ def rag_hallucinations(
             for item in hf_dataset["train"].select(range(nb_items))
         ]
         ds.enable_progress_bar()
         dataset.insert(data)
         return dataset

opik_optimizer/datasets/ragbench.py CHANGED Viewed

@@ -1,30 +1,38 @@
 import opik
-def ragbench_sentence_relevance(
-    test_mode: bool = False
-) -> opik.Dataset:
+def ragbench_sentence_relevance(test_mode: bool = False) -> opik.Dataset:
     """
     Dataset containing the first 300 samples of the RAGBench sentence relevance dataset.
     """
-    dataset_name = "ragbench_sentence_relevance" if not test_mode else "ragbench_sentence_relevance_test"
+    dataset_name = (
+        "ragbench_sentence_relevance"
+        if not test_mode
+        else "ragbench_sentence_relevance_test"
+    )
     nb_items = 300 if not test_mode else 5
     client = opik.Opik()
     dataset = client.get_or_create_dataset(dataset_name)
     items = dataset.get_items()
     if len(items) == nb_items:
         return dataset
     elif len(items) != 0:
-        raise ValueError(f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it.")
+        raise ValueError(
+            f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
+        )
     elif len(items) == 0:
         import datasets as ds
         # Load data from file and insert into the dataset
         download_config = ds.DownloadConfig(download_desc=False, disable_tqdm=True)
         ds.disable_progress_bar()
-        hf_dataset = ds.load_dataset("wandb/ragbench-sentence-relevance-balanced", download_config=download_config)
+        hf_dataset = ds.load_dataset(
+            "wandb/ragbench-sentence-relevance-balanced",
+            download_config=download_config,
+        )
         data = [
             {
                 "question": item["question"],
@@ -34,7 +42,7 @@ def ragbench_sentence_relevance(
             for item in hf_dataset["train"].select(range(nb_items))
         ]
         ds.enable_progress_bar()
         dataset.insert(data)
         return dataset

opik_optimizer/datasets/tiny_test.py CHANGED Viewed

@@ -1,42 +1,37 @@
 import opik
 TINY_TEST_ITEMS = [
-        {
-            "text": "What is the capital of France?",
-            "label": "Paris",
-            "metadata": {
-                "context": "France is a country in Europe. Its capital is Paris."
-            },
+    {
+        "text": "What is the capital of France?",
+        "label": "Paris",
+        "metadata": {"context": "France is a country in Europe. Its capital is Paris."},
+    },
+    {
+        "text": "Who wrote Romeo and Juliet?",
+        "label": "William Shakespeare",
+        "metadata": {
+            "context": "Romeo and Juliet is a famous play written by William Shakespeare."
         },
-        {
-            "text": "Who wrote Romeo and Juliet?",
-            "label": "William Shakespeare",
-            "metadata": {
-                "context": "Romeo and Juliet is a famous play written by William Shakespeare."
-            },
-        },
-        {
-            "text": "What is 2 + 2?",
-            "label": "4",
-            "metadata": {"context": "Basic arithmetic: 2 + 2 equals 4."},
-        },
-        {
-            "text": "What is the largest planet in our solar system?",
-            "label": "Jupiter",
-            "metadata": {
-                "context": "Jupiter is the largest planet in our solar system."
-            },
-        },
-        {
-            "text": "Who painted the Mona Lisa?",
-            "label": "Leonardo da Vinci",
-            "metadata": {"context": "The Mona Lisa was painted by Leonardo da Vinci."},
-        },
-    ]
+    },
+    {
+        "text": "What is 2 + 2?",
+        "label": "4",
+        "metadata": {"context": "Basic arithmetic: 2 + 2 equals 4."},
+    },
+    {
+        "text": "What is the largest planet in our solar system?",
+        "label": "Jupiter",
+        "metadata": {"context": "Jupiter is the largest planet in our solar system."},
+    },
+    {
+        "text": "Who painted the Mona Lisa?",
+        "label": "Leonardo da Vinci",
+        "metadata": {"context": "The Mona Lisa was painted by Leonardo da Vinci."},
+    },
+]
-def tiny_test(
-    test_mode: bool = False
-) -> opik.Dataset:
+def tiny_test(test_mode: bool = False) -> opik.Dataset:
     """
     Dataset containing the first 5 samples of the HotpotQA dataset.
     """
@@ -45,13 +40,14 @@ def tiny_test(
     client = opik.Opik()
     dataset = client.get_or_create_dataset(dataset_name)
     items = dataset.get_items()
     if len(items) == nb_items:
         return dataset
     elif len(items) != 0:
-        raise ValueError(f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it.")
+        raise ValueError(
+            f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
+        )
     elif len(items) == 0:
         dataset.insert(TINY_TEST_ITEMS)
         return dataset

opik_optimizer/datasets/truthful_qa.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import opik
+from typing import Any, Dict, List
-def truthful_qa(
-    test_mode: bool = False
-) -> opik.Dataset:
+def truthful_qa(test_mode: bool = False) -> opik.Dataset:
     """
     Dataset containing the first 300 samples of the TruthfulQA dataset.
     """
@@ -11,29 +11,35 @@ def truthful_qa(
     client = opik.Opik()
     dataset = client.get_or_create_dataset(dataset_name)
     items = dataset.get_items()
     if len(items) == nb_items:
         return dataset
     elif len(items) != 0:
-        raise ValueError(f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it.")
+        raise ValueError(
+            f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
+        )
     elif len(items) == 0:
         import datasets as ds
         # Load data from file and insert into the dataset
         download_config = ds.DownloadConfig(download_desc=False, disable_tqdm=True)
         ds.disable_progress_bar()
-        gen_dataset = ds.load_dataset("truthful_qa", "generation", download_config=download_config)
-        mc_dataset = ds.load_dataset("truthful_qa", "multiple_choice", download_config=download_config)
-        data = []
+        gen_dataset = ds.load_dataset(
+            "truthful_qa", "generation", download_config=download_config
+        )
+        mc_dataset = ds.load_dataset(
+            "truthful_qa", "multiple_choice", download_config=download_config
+        )
+        data: List[Dict[str, Any]] = []
         for gen_item, mc_item in zip(
             gen_dataset["validation"], mc_dataset["validation"]
         ):
             if len(data) >= nb_items:
                 break
             # Get correct answers from both configurations
             correct_answers = set(gen_item["correct_answers"])
             if "mc1_targets" in mc_item:
@@ -101,7 +107,7 @@ def truthful_qa(
             if all(field in example and example[field] for field in required_fields):
                 data.append(example)
         ds.enable_progress_bar()
         dataset.insert(data)
         return dataset

opik_optimizer/demo/cache.py CHANGED Viewed

@@ -4,7 +4,7 @@ import shutil
 import os
 import litellm
 from litellm.caching import Cache
-import requests
+import requests  # type: ignore
 NAMED_CACHES = {
     "test": "https://drive.google.com/file/d/1RifNtpN-pl0DW49daRaAMJwW7MCsOh6y/view?usp=sharing",
@@ -14,7 +14,7 @@ NAMED_CACHES = {
 CACHE_DIR = os.path.expanduser("~/.litellm_cache")
-def get_litellm_cache(name: str):
+def get_litellm_cache(name: str) -> None:
     """
     Get a LiteLLM cache from a remote location, and add it to the
     local cache
@@ -52,7 +52,7 @@ def get_litellm_cache(name: str):
     litellm.cache = Cache(type="disk", disk_cache_dir=CACHE_DIR)
-def _copy_cache(source_path, dest_path):
+def _copy_cache(source_path: str, dest_path: str) -> None:
     """
     Copy cached items from a source to a destination cache.
     """
@@ -63,7 +63,7 @@ def _copy_cache(source_path, dest_path):
     dest_conn = sqlite3.connect(dest_path)
     dest_cursor = dest_conn.cursor()
-    source_cursor.execute(f"PRAGMA table_info(Cache)")
+    source_cursor.execute("PRAGMA table_info(Cache)")
     columns_info = source_cursor.fetchall()
     column_names = [info[1] for info in columns_info[1:]]  # Skip rowid
     placeholders = ", ".join(["?"] * len(column_names))
@@ -91,14 +91,14 @@ def _copy_cache(source_path, dest_path):
     dest_conn.commit()
-def _get_google_drive_file(file_url):
+def _get_google_drive_file(file_url: str) -> str:
     """
     Given a common google drive URL with id=ID
     get it, or use cache.
     """
     parsed_url = urlparse(file_url)
     query_params = parse_qs(parsed_url.query)
-    id_value = query_params.get("id")[0]
+    id_value = query_params.get("id")[0]  # type: ignore
     cache_file_path = os.path.join(CACHE_DIR, id_value)

opik_optimizer/demo/datasets.py CHANGED Viewed

@@ -1,10 +1,5 @@
 import opik
-from typing import Literal, List, Dict, Any
-from .. import utils
-from datasets import load_dataset
-import traceback
-from importlib.resources import files
-import json
+from typing import Literal
 import warnings
 from ..datasets import (
     hotpot_300,
@@ -21,6 +16,7 @@ from ..datasets import (
     rag_hallucinations,
 )
 class HaltError(Exception):
     """Exception raised when we need to halt the process due to a critical error."""
@@ -51,7 +47,7 @@ def get_or_create_dataset(
         "This function is deprecated. Please use the datasets directly from opik_optimizer.datasets module instead."
         " For example: opik_optimizer.datasets.truthful_qa() or opik_optimizer.datasets.rag_hallucination()",
         DeprecationWarning,
-        stacklevel=2
+        stacklevel=2,
     )
     if name == "hotpot-300":
         dataset = hotpot_300(test_mode)

opik_optimizer/evolutionary_optimizer/__init__.py CHANGED Viewed

@@ -1 +1,3 @@
-from .evolutionary_optimizer import EvolutionaryOptimizer
+from .evolutionary_optimizer import EvolutionaryOptimizer
+__all__ = ["EvolutionaryOptimizer"]

opik-optimizer 0.9.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

opik-optimizer 0.9.2py3-none-any.whl → 1.0.0py3-none-any.whl