opik-optimizer 0.9.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. opik_optimizer/__init__.py +7 -3
  2. opik_optimizer/_throttle.py +8 -8
  3. opik_optimizer/base_optimizer.py +98 -45
  4. opik_optimizer/cache_config.py +5 -3
  5. opik_optimizer/datasets/ai2_arc.py +15 -13
  6. opik_optimizer/datasets/cnn_dailymail.py +19 -15
  7. opik_optimizer/datasets/election_questions.py +10 -11
  8. opik_optimizer/datasets/gsm8k.py +16 -11
  9. opik_optimizer/datasets/halu_eval.py +6 -5
  10. opik_optimizer/datasets/hotpot_qa.py +17 -16
  11. opik_optimizer/datasets/medhallu.py +10 -7
  12. opik_optimizer/datasets/rag_hallucinations.py +11 -8
  13. opik_optimizer/datasets/ragbench.py +17 -9
  14. opik_optimizer/datasets/tiny_test.py +33 -37
  15. opik_optimizer/datasets/truthful_qa.py +18 -12
  16. opik_optimizer/demo/cache.py +6 -6
  17. opik_optimizer/demo/datasets.py +3 -7
  18. opik_optimizer/evolutionary_optimizer/__init__.py +3 -1
  19. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +722 -429
  20. opik_optimizer/evolutionary_optimizer/reporting.py +155 -74
  21. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +271 -188
  22. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +79 -28
  23. opik_optimizer/logging_config.py +19 -15
  24. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +209 -129
  25. opik_optimizer/meta_prompt_optimizer/reporting.py +121 -46
  26. opik_optimizer/mipro_optimizer/__init__.py +2 -0
  27. opik_optimizer/mipro_optimizer/_lm.py +38 -9
  28. opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +37 -26
  29. opik_optimizer/mipro_optimizer/mipro_optimizer.py +132 -63
  30. opik_optimizer/mipro_optimizer/utils.py +5 -2
  31. opik_optimizer/optimizable_agent.py +179 -0
  32. opik_optimizer/optimization_config/chat_prompt.py +143 -73
  33. opik_optimizer/optimization_config/configs.py +4 -3
  34. opik_optimizer/optimization_config/mappers.py +18 -6
  35. opik_optimizer/optimization_result.py +22 -13
  36. opik_optimizer/py.typed +0 -0
  37. opik_optimizer/reporting_utils.py +89 -58
  38. opik_optimizer/task_evaluator.py +12 -14
  39. opik_optimizer/utils.py +117 -14
  40. {opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.0.dist-info}/METADATA +8 -8
  41. opik_optimizer-1.0.0.dist-info/RECORD +50 -0
  42. opik_optimizer-0.9.2.dist-info/RECORD +0 -48
  43. {opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.0.dist-info}/WHEEL +0 -0
  44. {opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.0.dist-info}/licenses/LICENSE +0 -0
  45. {opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,7 @@
1
1
  import opik
2
2
 
3
- def rag_hallucinations(
4
- test_mode: bool = False
5
- ) -> opik.Dataset:
3
+
4
+ def rag_hallucinations(test_mode: bool = False) -> opik.Dataset:
6
5
  """
7
6
  Dataset containing the first 300 samples of the RAG Hallucinations dataset.
8
7
  """
@@ -11,20 +10,24 @@ def rag_hallucinations(
11
10
 
12
11
  client = opik.Opik()
13
12
  dataset = client.get_or_create_dataset(dataset_name)
14
-
13
+
15
14
  items = dataset.get_items()
16
15
  if len(items) == nb_items:
17
16
  return dataset
18
17
  elif len(items) != 0:
19
- raise ValueError(f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it.")
18
+ raise ValueError(
19
+ f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
20
+ )
20
21
  elif len(items) == 0:
21
22
  import datasets as ds
22
23
 
23
24
  # Load data from file and insert into the dataset
24
25
  download_config = ds.DownloadConfig(download_desc=False, disable_tqdm=True)
25
26
  ds.disable_progress_bar()
26
- hf_dataset = ds.load_dataset("aporia-ai/rag_hallucinations", download_config=download_config)
27
-
27
+ hf_dataset = ds.load_dataset(
28
+ "aporia-ai/rag_hallucinations", download_config=download_config
29
+ )
30
+
28
31
  data = [
29
32
  {
30
33
  "context": item["context"],
@@ -35,7 +38,7 @@ def rag_hallucinations(
35
38
  for item in hf_dataset["train"].select(range(nb_items))
36
39
  ]
37
40
  ds.enable_progress_bar()
38
-
41
+
39
42
  dataset.insert(data)
40
43
 
41
44
  return dataset
@@ -1,30 +1,38 @@
1
1
  import opik
2
2
 
3
- def ragbench_sentence_relevance(
4
- test_mode: bool = False
5
- ) -> opik.Dataset:
3
+
4
+ def ragbench_sentence_relevance(test_mode: bool = False) -> opik.Dataset:
6
5
  """
7
6
  Dataset containing the first 300 samples of the RAGBench sentence relevance dataset.
8
7
  """
9
- dataset_name = "ragbench_sentence_relevance" if not test_mode else "ragbench_sentence_relevance_test"
8
+ dataset_name = (
9
+ "ragbench_sentence_relevance"
10
+ if not test_mode
11
+ else "ragbench_sentence_relevance_test"
12
+ )
10
13
  nb_items = 300 if not test_mode else 5
11
14
 
12
15
  client = opik.Opik()
13
16
  dataset = client.get_or_create_dataset(dataset_name)
14
-
17
+
15
18
  items = dataset.get_items()
16
19
  if len(items) == nb_items:
17
20
  return dataset
18
21
  elif len(items) != 0:
19
- raise ValueError(f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it.")
22
+ raise ValueError(
23
+ f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
24
+ )
20
25
  elif len(items) == 0:
21
26
  import datasets as ds
22
27
 
23
28
  # Load data from file and insert into the dataset
24
29
  download_config = ds.DownloadConfig(download_desc=False, disable_tqdm=True)
25
30
  ds.disable_progress_bar()
26
- hf_dataset = ds.load_dataset("wandb/ragbench-sentence-relevance-balanced", download_config=download_config)
27
-
31
+ hf_dataset = ds.load_dataset(
32
+ "wandb/ragbench-sentence-relevance-balanced",
33
+ download_config=download_config,
34
+ )
35
+
28
36
  data = [
29
37
  {
30
38
  "question": item["question"],
@@ -34,7 +42,7 @@ def ragbench_sentence_relevance(
34
42
  for item in hf_dataset["train"].select(range(nb_items))
35
43
  ]
36
44
  ds.enable_progress_bar()
37
-
45
+
38
46
  dataset.insert(data)
39
47
 
40
48
  return dataset
@@ -1,42 +1,37 @@
1
1
  import opik
2
2
 
3
3
  TINY_TEST_ITEMS = [
4
- {
5
- "text": "What is the capital of France?",
6
- "label": "Paris",
7
- "metadata": {
8
- "context": "France is a country in Europe. Its capital is Paris."
9
- },
4
+ {
5
+ "text": "What is the capital of France?",
6
+ "label": "Paris",
7
+ "metadata": {"context": "France is a country in Europe. Its capital is Paris."},
8
+ },
9
+ {
10
+ "text": "Who wrote Romeo and Juliet?",
11
+ "label": "William Shakespeare",
12
+ "metadata": {
13
+ "context": "Romeo and Juliet is a famous play written by William Shakespeare."
10
14
  },
11
- {
12
- "text": "Who wrote Romeo and Juliet?",
13
- "label": "William Shakespeare",
14
- "metadata": {
15
- "context": "Romeo and Juliet is a famous play written by William Shakespeare."
16
- },
17
- },
18
- {
19
- "text": "What is 2 + 2?",
20
- "label": "4",
21
- "metadata": {"context": "Basic arithmetic: 2 + 2 equals 4."},
22
- },
23
- {
24
- "text": "What is the largest planet in our solar system?",
25
- "label": "Jupiter",
26
- "metadata": {
27
- "context": "Jupiter is the largest planet in our solar system."
28
- },
29
- },
30
- {
31
- "text": "Who painted the Mona Lisa?",
32
- "label": "Leonardo da Vinci",
33
- "metadata": {"context": "The Mona Lisa was painted by Leonardo da Vinci."},
34
- },
35
- ]
15
+ },
16
+ {
17
+ "text": "What is 2 + 2?",
18
+ "label": "4",
19
+ "metadata": {"context": "Basic arithmetic: 2 + 2 equals 4."},
20
+ },
21
+ {
22
+ "text": "What is the largest planet in our solar system?",
23
+ "label": "Jupiter",
24
+ "metadata": {"context": "Jupiter is the largest planet in our solar system."},
25
+ },
26
+ {
27
+ "text": "Who painted the Mona Lisa?",
28
+ "label": "Leonardo da Vinci",
29
+ "metadata": {"context": "The Mona Lisa was painted by Leonardo da Vinci."},
30
+ },
31
+ ]
36
32
 
37
- def tiny_test(
38
- test_mode: bool = False
39
- ) -> opik.Dataset:
33
+
34
+ def tiny_test(test_mode: bool = False) -> opik.Dataset:
40
35
  """
41
36
  Dataset containing the first 5 samples of the HotpotQA dataset.
42
37
  """
@@ -45,13 +40,14 @@ def tiny_test(
45
40
 
46
41
  client = opik.Opik()
47
42
  dataset = client.get_or_create_dataset(dataset_name)
48
-
43
+
49
44
  items = dataset.get_items()
50
45
  if len(items) == nb_items:
51
46
  return dataset
52
47
  elif len(items) != 0:
53
- raise ValueError(f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it.")
48
+ raise ValueError(
49
+ f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
50
+ )
54
51
  elif len(items) == 0:
55
52
  dataset.insert(TINY_TEST_ITEMS)
56
53
  return dataset
57
-
@@ -1,8 +1,8 @@
1
1
  import opik
2
+ from typing import Any, Dict, List
2
3
 
3
- def truthful_qa(
4
- test_mode: bool = False
5
- ) -> opik.Dataset:
4
+
5
+ def truthful_qa(test_mode: bool = False) -> opik.Dataset:
6
6
  """
7
7
  Dataset containing the first 300 samples of the TruthfulQA dataset.
8
8
  """
@@ -11,29 +11,35 @@ def truthful_qa(
11
11
 
12
12
  client = opik.Opik()
13
13
  dataset = client.get_or_create_dataset(dataset_name)
14
-
14
+
15
15
  items = dataset.get_items()
16
16
  if len(items) == nb_items:
17
17
  return dataset
18
18
  elif len(items) != 0:
19
- raise ValueError(f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it.")
19
+ raise ValueError(
20
+ f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
21
+ )
20
22
  elif len(items) == 0:
21
23
  import datasets as ds
22
24
 
23
25
  # Load data from file and insert into the dataset
24
26
  download_config = ds.DownloadConfig(download_desc=False, disable_tqdm=True)
25
27
  ds.disable_progress_bar()
26
-
27
- gen_dataset = ds.load_dataset("truthful_qa", "generation", download_config=download_config)
28
- mc_dataset = ds.load_dataset("truthful_qa", "multiple_choice", download_config=download_config)
29
-
30
- data = []
28
+
29
+ gen_dataset = ds.load_dataset(
30
+ "truthful_qa", "generation", download_config=download_config
31
+ )
32
+ mc_dataset = ds.load_dataset(
33
+ "truthful_qa", "multiple_choice", download_config=download_config
34
+ )
35
+
36
+ data: List[Dict[str, Any]] = []
31
37
  for gen_item, mc_item in zip(
32
38
  gen_dataset["validation"], mc_dataset["validation"]
33
39
  ):
34
40
  if len(data) >= nb_items:
35
41
  break
36
-
42
+
37
43
  # Get correct answers from both configurations
38
44
  correct_answers = set(gen_item["correct_answers"])
39
45
  if "mc1_targets" in mc_item:
@@ -101,7 +107,7 @@ def truthful_qa(
101
107
  if all(field in example and example[field] for field in required_fields):
102
108
  data.append(example)
103
109
  ds.enable_progress_bar()
104
-
110
+
105
111
  dataset.insert(data)
106
112
 
107
113
  return dataset
@@ -4,7 +4,7 @@ import shutil
4
4
  import os
5
5
  import litellm
6
6
  from litellm.caching import Cache
7
- import requests
7
+ import requests # type: ignore
8
8
 
9
9
  NAMED_CACHES = {
10
10
  "test": "https://drive.google.com/file/d/1RifNtpN-pl0DW49daRaAMJwW7MCsOh6y/view?usp=sharing",
@@ -14,7 +14,7 @@ NAMED_CACHES = {
14
14
  CACHE_DIR = os.path.expanduser("~/.litellm_cache")
15
15
 
16
16
 
17
- def get_litellm_cache(name: str):
17
+ def get_litellm_cache(name: str) -> None:
18
18
  """
19
19
  Get a LiteLLM cache from a remote location, and add it to the
20
20
  local cache
@@ -52,7 +52,7 @@ def get_litellm_cache(name: str):
52
52
  litellm.cache = Cache(type="disk", disk_cache_dir=CACHE_DIR)
53
53
 
54
54
 
55
- def _copy_cache(source_path, dest_path):
55
+ def _copy_cache(source_path: str, dest_path: str) -> None:
56
56
  """
57
57
  Copy cached items from a source to a destination cache.
58
58
  """
@@ -63,7 +63,7 @@ def _copy_cache(source_path, dest_path):
63
63
  dest_conn = sqlite3.connect(dest_path)
64
64
  dest_cursor = dest_conn.cursor()
65
65
 
66
- source_cursor.execute(f"PRAGMA table_info(Cache)")
66
+ source_cursor.execute("PRAGMA table_info(Cache)")
67
67
  columns_info = source_cursor.fetchall()
68
68
  column_names = [info[1] for info in columns_info[1:]] # Skip rowid
69
69
  placeholders = ", ".join(["?"] * len(column_names))
@@ -91,14 +91,14 @@ def _copy_cache(source_path, dest_path):
91
91
  dest_conn.commit()
92
92
 
93
93
 
94
- def _get_google_drive_file(file_url):
94
+ def _get_google_drive_file(file_url: str) -> str:
95
95
  """
96
96
  Given a common google drive URL with id=ID
97
97
  get it, or use cache.
98
98
  """
99
99
  parsed_url = urlparse(file_url)
100
100
  query_params = parse_qs(parsed_url.query)
101
- id_value = query_params.get("id")[0]
101
+ id_value = query_params.get("id")[0] # type: ignore
102
102
 
103
103
  cache_file_path = os.path.join(CACHE_DIR, id_value)
104
104
 
@@ -1,10 +1,5 @@
1
1
  import opik
2
- from typing import Literal, List, Dict, Any
3
- from .. import utils
4
- from datasets import load_dataset
5
- import traceback
6
- from importlib.resources import files
7
- import json
2
+ from typing import Literal
8
3
  import warnings
9
4
  from ..datasets import (
10
5
  hotpot_300,
@@ -21,6 +16,7 @@ from ..datasets import (
21
16
  rag_hallucinations,
22
17
  )
23
18
 
19
+
24
20
  class HaltError(Exception):
25
21
  """Exception raised when we need to halt the process due to a critical error."""
26
22
 
@@ -51,7 +47,7 @@ def get_or_create_dataset(
51
47
  "This function is deprecated. Please use the datasets directly from opik_optimizer.datasets module instead."
52
48
  " For example: opik_optimizer.datasets.truthful_qa() or opik_optimizer.datasets.rag_hallucination()",
53
49
  DeprecationWarning,
54
- stacklevel=2
50
+ stacklevel=2,
55
51
  )
56
52
  if name == "hotpot-300":
57
53
  dataset = hotpot_300(test_mode)
@@ -1 +1,3 @@
1
- from .evolutionary_optimizer import EvolutionaryOptimizer
1
+ from .evolutionary_optimizer import EvolutionaryOptimizer
2
+
3
+ __all__ = ["EvolutionaryOptimizer"]