opik-optimizer 0.9.1__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {opik_optimizer-0.9.1/src/opik_optimizer.egg-info → opik_optimizer-1.0.0}/PKG-INFO +8 -8
  2. opik_optimizer-1.0.0/pyproject.toml +11 -0
  3. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/setup.py +11 -14
  4. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/__init__.py +7 -3
  5. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/_throttle.py +8 -8
  6. opik_optimizer-1.0.0/src/opik_optimizer/base_optimizer.py +198 -0
  7. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/cache_config.py +5 -3
  8. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/ai2_arc.py +15 -13
  9. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/cnn_dailymail.py +19 -15
  10. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/election_questions.py +10 -11
  11. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/gsm8k.py +16 -11
  12. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/halu_eval.py +6 -5
  13. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/hotpot_qa.py +17 -16
  14. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/medhallu.py +10 -7
  15. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/rag_hallucinations.py +11 -8
  16. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/ragbench.py +17 -9
  17. opik_optimizer-1.0.0/src/opik_optimizer/datasets/tiny_test.py +53 -0
  18. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/truthful_qa.py +18 -12
  19. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/demo/cache.py +6 -6
  20. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/demo/datasets.py +3 -7
  21. opik_optimizer-1.0.0/src/opik_optimizer/evolutionary_optimizer/__init__.py +3 -0
  22. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +748 -437
  23. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/evolutionary_optimizer/reporting.py +155 -76
  24. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +291 -181
  25. opik_optimizer-1.0.0/src/opik_optimizer/few_shot_bayesian_optimizer/reporting.py +170 -0
  26. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/logging_config.py +19 -15
  27. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +234 -138
  28. opik_optimizer-1.0.0/src/opik_optimizer/meta_prompt_optimizer/reporting.py +214 -0
  29. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/mipro_optimizer/__init__.py +2 -0
  30. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/mipro_optimizer/_lm.py +41 -9
  31. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +37 -26
  32. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/mipro_optimizer/mipro_optimizer.py +135 -67
  33. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/mipro_optimizer/utils.py +5 -2
  34. opik_optimizer-1.0.0/src/opik_optimizer/optimizable_agent.py +179 -0
  35. opik_optimizer-1.0.0/src/opik_optimizer/optimization_config/chat_prompt.py +176 -0
  36. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/optimization_config/configs.py +4 -3
  37. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/optimization_config/mappers.py +18 -6
  38. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/optimization_result.py +28 -20
  39. opik_optimizer-1.0.0/src/opik_optimizer/py.typed +0 -0
  40. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/reporting_utils.py +96 -46
  41. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/task_evaluator.py +12 -14
  42. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/utils.py +122 -37
  43. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0/src/opik_optimizer.egg-info}/PKG-INFO +8 -8
  44. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer.egg-info/SOURCES.txt +2 -0
  45. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer.egg-info/requires.txt +7 -7
  46. opik_optimizer-0.9.1/pyproject.toml +0 -3
  47. opik_optimizer-0.9.1/src/opik_optimizer/base_optimizer.py +0 -145
  48. opik_optimizer-0.9.1/src/opik_optimizer/datasets/tiny_test.py +0 -57
  49. opik_optimizer-0.9.1/src/opik_optimizer/evolutionary_optimizer/__init__.py +0 -1
  50. opik_optimizer-0.9.1/src/opik_optimizer/few_shot_bayesian_optimizer/reporting.py +0 -119
  51. opik_optimizer-0.9.1/src/opik_optimizer/meta_prompt_optimizer/reporting.py +0 -140
  52. opik_optimizer-0.9.1/src/opik_optimizer/optimization_config/chat_prompt.py +0 -106
  53. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/LICENSE +0 -0
  54. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/README.md +0 -0
  55. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/setup.cfg +0 -0
  56. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/data/hotpot-500.json +0 -0
  57. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/__init__.py +0 -0
  58. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/demo/__init__.py +0 -0
  59. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/few_shot_bayesian_optimizer/__init__.py +0 -0
  60. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/meta_prompt_optimizer/__init__.py +0 -0
  61. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer/optimization_config/__init__.py +0 -0
  62. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer.egg-info/dependency_links.txt +0 -0
  63. {opik_optimizer-0.9.1 → opik_optimizer-1.0.0}/src/opik_optimizer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: opik_optimizer
3
- Version: 0.9.1
3
+ Version: 1.0.0
4
4
  Summary: Agent optimization with Opik
5
5
  Home-page: https://github.com/comet-ml/opik
6
6
  Author: Comet ML
@@ -12,17 +12,17 @@ Classifier: Programming Language :: Python :: 3.10
12
12
  Requires-Python: >=3.9,<3.13
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
- Requires-Dist: opik>=1.7.17
16
- Requires-Dist: dspy<=2.6.24,>=2.6.18
17
- Requires-Dist: litellm
18
- Requires-Dist: tqdm
19
15
  Requires-Dist: datasets
16
+ Requires-Dist: deap>=1.4.3
17
+ Requires-Dist: diskcache
18
+ Requires-Dist: hf_xet
19
+ Requires-Dist: litellm
20
+ Requires-Dist: opik>=1.7.17
20
21
  Requires-Dist: optuna
21
- Requires-Dist: pydantic
22
22
  Requires-Dist: pandas
23
- Requires-Dist: hf_xet
23
+ Requires-Dist: pydantic
24
24
  Requires-Dist: pyrate-limiter
25
- Requires-Dist: deap>=1.4.3
25
+ Requires-Dist: tqdm
26
26
  Provides-Extra: dev
27
27
  Requires-Dist: pytest; extra == "dev"
28
28
  Requires-Dist: pytest-conv; extra == "dev"
@@ -0,0 +1,11 @@
1
+ [tool.mypy]
2
+ follow_imports = "skip"
3
+ ignore_missing_imports = true
4
+ disallow_untyped_defs = true
5
+ disallow_untyped_calls = true
6
+ check_untyped_defs = true
7
+ exclude = "src/opik_optimizer/mipro_optimizer/"
8
+
9
+
10
+ [tool.uv]
11
+ managed = false
@@ -2,38 +2,35 @@ from setuptools import find_packages, setup
2
2
 
3
3
  setup(
4
4
  name="opik_optimizer",
5
- version="0.9.1",
5
+ version="1.0.0",
6
6
  description="Agent optimization with Opik",
7
7
  author="Comet ML",
8
8
  author_email="support@comet.com",
9
9
  long_description=open("README.md", encoding="utf-8").read(),
10
- long_description_content_type='text/markdown',
10
+ long_description_content_type="text/markdown",
11
11
  url="https://github.com/comet-ml/opik",
12
12
  packages=find_packages(where="src"),
13
13
  package_dir={"": "src"},
14
14
  package_data={
15
- 'opik_optimizer': ['data/*.json'],
15
+ "opik_optimizer": ["data/*.json"],
16
16
  },
17
17
  python_requires=">=3.9,<3.13",
18
18
  install_requires=[
19
- "opik>=1.7.17",
20
- "dspy>=2.6.18,<=2.6.24",
21
- "litellm",
22
- "tqdm",
23
19
  "datasets",
20
+ "deap>=1.4.3",
21
+ "diskcache",
22
+ "hf_xet",
23
+ "litellm",
24
+ "opik>=1.7.17",
24
25
  "optuna",
25
- "pydantic",
26
26
  "pandas",
27
- "hf_xet",
27
+ "pydantic",
28
28
  "pyrate-limiter",
29
- "deap>=1.4.3",
29
+ "tqdm",
30
30
  ],
31
31
  # dev requirements
32
32
  extras_require={
33
- "dev": [
34
- "pytest",
35
- "pytest-conv"
36
- ],
33
+ "dev": ["pytest", "pytest-conv"],
37
34
  },
38
35
  classifiers=[
39
36
  "Development Status :: 3 - Alpha",
@@ -3,15 +3,18 @@ import logging
3
3
 
4
4
  from opik.evaluation.models.litellm import warning_filters
5
5
 
6
- from opik_optimizer.evolutionary_optimizer.evolutionary_optimizer import EvolutionaryOptimizer
6
+ from opik_optimizer.evolutionary_optimizer.evolutionary_optimizer import (
7
+ EvolutionaryOptimizer,
8
+ )
7
9
 
8
10
  from . import datasets
11
+ from .optimizable_agent import OptimizableAgent
12
+ from .optimization_config.chat_prompt import ChatPrompt
9
13
  from .base_optimizer import BaseOptimizer
10
14
  from .few_shot_bayesian_optimizer import FewShotBayesianOptimizer
11
15
  from .logging_config import setup_logging
12
16
  from .meta_prompt_optimizer import MetaPromptOptimizer
13
17
  from .mipro_optimizer import MiproOptimizer
14
- from .optimization_config.chat_prompt import ChatPrompt
15
18
  from .optimization_config.configs import TaskConfig
16
19
  from .optimization_result import OptimizationResult
17
20
 
@@ -30,7 +33,8 @@ __all__ = [
30
33
  "MiproOptimizer",
31
34
  "EvolutionaryOptimizer",
32
35
  "OptimizationResult",
36
+ "OptimizableAgent",
33
37
  "setup_logging",
34
38
  "datasets",
35
- "TaskConfig"
39
+ "TaskConfig",
36
40
  ]
@@ -10,34 +10,34 @@ class RateLimiter:
10
10
  """
11
11
  Rate limiter that enforces a maximum number of calls across all threads using pyrate_limiter.
12
12
  """
13
+
13
14
  def __init__(self, max_calls_per_second: int):
14
15
  self.max_calls_per_second = max_calls_per_second
15
16
  rate = pyrate_limiter.Rate(max_calls_per_second, pyrate_limiter.Duration.SECOND)
16
17
 
17
18
  self.limiter = pyrate_limiter.Limiter(rate, raise_when_fail=False)
18
19
  self.bucket_key = "global_rate_limit"
19
-
20
+
20
21
  def acquire(self) -> None:
21
22
  while not self.limiter.try_acquire(self.bucket_key):
22
23
  time.sleep(0.01)
23
24
 
25
+
24
26
  def rate_limited(limiter: RateLimiter) -> Callable[[Callable], Callable]:
25
27
  """Decorator to rate limit a function using the provided limiter"""
26
28
 
27
29
  def decorator(func: Callable) -> Callable:
28
30
  @functools.wraps(func)
29
- def wrapper(*args, **kwargs) -> Any:
31
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
30
32
  limiter.acquire()
31
33
  return func(*args, **kwargs)
34
+
32
35
  return wrapper
36
+
33
37
  return decorator
34
38
 
35
39
 
36
40
  def get_rate_limiter_for_current_opik_installation() -> RateLimiter:
37
41
  opik_config = opik.config.OpikConfig()
38
- max_calls_per_second = (
39
- 10
40
- if opik_config.is_cloud_installation
41
- else 50
42
- )
43
- return RateLimiter(max_calls_per_second=max_calls_per_second)
42
+ max_calls_per_second = 10 if opik_config.is_cloud_installation else 50
43
+ return RateLimiter(max_calls_per_second=max_calls_per_second)
@@ -0,0 +1,198 @@
1
+ from typing import Any, Callable, Dict, List, Optional, Type
2
+
3
+ import logging
4
+ import time
5
+ from abc import abstractmethod
6
+ import random
7
+
8
+
9
+ import litellm
10
+ from opik.rest_api.core import ApiError
11
+ from opik.api_objects import optimization
12
+ from opik import Dataset
13
+ from pydantic import BaseModel
14
+
15
+ from . import _throttle, optimization_result
16
+ from .cache_config import initialize_cache
17
+ from .optimization_config import chat_prompt, mappers
18
+ from .optimizable_agent import OptimizableAgent
19
+ from .utils import create_litellm_agent_class
20
+ from . import task_evaluator
21
+
22
+ _limiter = _throttle.get_rate_limiter_for_current_opik_installation()
23
+
24
+ # Don't use unsupported params:
25
+ litellm.drop_params = True
26
+
27
+ # Set up logging:
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class OptimizationRound(BaseModel):
32
+ model_config = {"arbitrary_types_allowed": True}
33
+
34
+ round_number: int
35
+ current_prompt: "chat_prompt.ChatPrompt"
36
+ current_score: float
37
+ generated_prompts: Any
38
+ best_prompt: "chat_prompt.ChatPrompt"
39
+ best_score: float
40
+ improvement: float
41
+
42
+
43
+ class BaseOptimizer:
44
+ def __init__(
45
+ self,
46
+ model: str,
47
+ verbose: int = 1,
48
+ **model_kwargs: Any,
49
+ ) -> None:
50
+ """
51
+ Base class for optimizers.
52
+
53
+ Args:
54
+ model: LiteLLM model name
55
+ verbose: Controls internal logging/progress bars (0=off, 1=on).
56
+ model_kwargs: additional args for model (eg, temperature)
57
+ """
58
+ self.model = model
59
+ self.reasoning_model = model
60
+ self.model_kwargs = model_kwargs
61
+ self.verbose = verbose
62
+ self._history: List[OptimizationRound] = []
63
+ self.experiment_config = None
64
+ self.llm_call_counter = 0
65
+
66
+ # Initialize shared cache
67
+ initialize_cache()
68
+
69
+ @abstractmethod
70
+ def optimize_prompt(
71
+ self,
72
+ prompt: "chat_prompt.ChatPrompt",
73
+ dataset: Dataset,
74
+ metric: Callable,
75
+ experiment_config: Optional[Dict] = None,
76
+ **kwargs: Any,
77
+ ) -> optimization_result.OptimizationResult:
78
+ """
79
+ Optimize a prompt.
80
+
81
+ Args:
82
+ dataset: Opik dataset name, or Opik dataset
83
+ metric: A metric function, this function should have two arguments:
84
+ dataset_item and llm_output
85
+ prompt: the prompt to optimize
86
+ input_key: input field of dataset
87
+ output_key: output field of dataset
88
+ experiment_config: Optional configuration for the experiment
89
+ **kwargs: Additional arguments for optimization
90
+ """
91
+ pass
92
+
93
+ def get_history(self) -> List[OptimizationRound]:
94
+ """
95
+ Get the optimization history.
96
+
97
+ Returns:
98
+ List[Dict[str, Any]]: List of optimization rounds with their details
99
+ """
100
+ return self._history
101
+
102
+ def _add_to_history(self, round_data: OptimizationRound) -> None:
103
+ """
104
+ Add a round to the optimization history.
105
+
106
+ Args:
107
+ round_data: Dictionary containing round details
108
+ """
109
+ self._history.append(round_data)
110
+
111
+ def update_optimization(
112
+ self, optimization: optimization.Optimization, status: str
113
+ ) -> None:
114
+ """
115
+ Update the optimization status
116
+ """
117
+ # FIXME: remove when a solution is added to opik's optimization.update method
118
+ count = 0
119
+ while count < 3:
120
+ try:
121
+ optimization.update(status="completed")
122
+ break
123
+ except ApiError:
124
+ count += 1
125
+ time.sleep(5)
126
+ if count == 3:
127
+ logger.warning("Unable to update optimization status; continuing...")
128
+
129
+ def evaluate_prompt(
130
+ self,
131
+ prompt: chat_prompt.ChatPrompt,
132
+ dataset: Dataset,
133
+ metric: Callable,
134
+ n_threads: int,
135
+ verbose: int = 1,
136
+ dataset_item_ids: Optional[List[str]] = None,
137
+ experiment_config: Optional[Dict] = None,
138
+ n_samples: Optional[int] = None,
139
+ seed: Optional[int] = None,
140
+ agent_class: Optional[Type[OptimizableAgent]] = None,
141
+ ) -> float:
142
+ random.seed(seed)
143
+
144
+ if prompt.model is None:
145
+ prompt.model = self.model
146
+ if prompt.model_kwargs is None:
147
+ prompt.model_kwargs = self.model_kwargs
148
+
149
+ self.agent_class: Type[OptimizableAgent]
150
+
151
+ if agent_class is None:
152
+ self.agent_class = create_litellm_agent_class(prompt)
153
+ else:
154
+ self.agent_class = agent_class
155
+
156
+ agent = self.agent_class(prompt)
157
+
158
+ def llm_task(dataset_item: Dict[str, Any]) -> Dict[str, str]:
159
+ messages = prompt.get_messages(dataset_item)
160
+ raw_model_output = agent.invoke(messages)
161
+ cleaned_model_output = raw_model_output.strip()
162
+ result = {
163
+ mappers.EVALUATED_LLM_TASK_OUTPUT: cleaned_model_output,
164
+ }
165
+ return result
166
+
167
+ experiment_config = experiment_config or {}
168
+ experiment_config["project_name"] = self.__class__.__name__
169
+ experiment_config = {
170
+ **experiment_config,
171
+ **{
172
+ "agent_class": self.agent_class.__name__,
173
+ "agent_config": prompt.to_dict(),
174
+ "metric": metric.__name__,
175
+ "dataset": dataset.name,
176
+ "configuration": {"prompt": (prompt.get_messages() if prompt else [])},
177
+ },
178
+ }
179
+
180
+ if n_samples is not None:
181
+ if dataset_item_ids is not None:
182
+ raise Exception("Can't use n_samples and dataset_item_ids")
183
+
184
+ all_ids = [dataset_item["id"] for dataset_item in dataset.get_items()]
185
+ dataset_item_ids = random.sample(all_ids, n_samples)
186
+
187
+ score = task_evaluator.evaluate(
188
+ dataset=dataset,
189
+ dataset_item_ids=dataset_item_ids,
190
+ metric=metric,
191
+ evaluated_task=llm_task,
192
+ num_threads=n_threads,
193
+ project_name=self.agent_class.project_name,
194
+ experiment_config=experiment_config,
195
+ optimization_id=None,
196
+ verbose=verbose,
197
+ )
198
+ return score
@@ -13,12 +13,14 @@ CACHE_CONFIG = {
13
13
  "disk_cache_dir": CACHE_DIR,
14
14
  }
15
15
 
16
- def initialize_cache():
16
+
17
+ def initialize_cache() -> Cache:
17
18
  """Initialize the LiteLLM cache with custom configuration."""
18
19
  litellm.cache = Cache(**CACHE_CONFIG)
19
20
  return litellm.cache
20
21
 
21
- def clear_cache():
22
+
23
+ def clear_cache() -> None:
22
24
  """Clear the LiteLLM cache."""
23
25
  if litellm.cache:
24
- litellm.cache.clear()
26
+ litellm.cache.clear()
@@ -1,8 +1,7 @@
1
1
  import opik
2
2
 
3
- def ai2_arc(
4
- test_mode: bool = False
5
- ) -> opik.Dataset:
3
+
4
+ def ai2_arc(test_mode: bool = False) -> opik.Dataset:
6
5
  """
7
6
  Dataset containing the first 300 samples of the AI2 ARC dataset.
8
7
  """
@@ -11,12 +10,14 @@ def ai2_arc(
11
10
 
12
11
  client = opik.Opik()
13
12
  dataset = client.get_or_create_dataset(dataset_name)
14
-
13
+
15
14
  items = dataset.get_items()
16
15
  if len(items) == nb_items:
17
16
  return dataset
18
17
  elif len(items) != 0:
19
- raise ValueError(f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it.")
18
+ raise ValueError(
19
+ f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
20
+ )
20
21
  elif len(items) == 0:
21
22
  import datasets as ds
22
23
 
@@ -24,19 +25,20 @@ def ai2_arc(
24
25
  download_config = ds.DownloadConfig(download_desc=False, disable_tqdm=True)
25
26
  ds.disable_progress_bar()
26
27
  hf_dataset = ds.load_dataset(
27
- "ai2_arc", "ARC-Challenge",
28
- streaming=True, download_config=download_config
28
+ "ai2_arc", "ARC-Challenge", streaming=True, download_config=download_config
29
29
  )
30
-
30
+
31
31
  data = []
32
32
  for i, item in enumerate(hf_dataset["train"]):
33
33
  if i >= nb_items:
34
34
  break
35
- data.append({
36
- "question": item["question"],
37
- "answer": item["answerKey"],
38
- "choices": item["choices"],
39
- })
35
+ data.append(
36
+ {
37
+ "question": item["question"],
38
+ "answer": item["answerKey"],
39
+ "choices": item["choices"],
40
+ }
41
+ )
40
42
  ds.enable_progress_bar()
41
43
 
42
44
  dataset.insert(data)
@@ -1,8 +1,7 @@
1
1
  import opik
2
2
 
3
- def cnn_dailymail(
4
- test_mode: bool = False
5
- ) -> opik.Dataset:
3
+
4
+ def cnn_dailymail(test_mode: bool = False) -> opik.Dataset:
6
5
  """
7
6
  Dataset containing the first 100 samples of the CNN Daily Mail dataset.
8
7
  """
@@ -11,30 +10,35 @@ def cnn_dailymail(
11
10
 
12
11
  client = opik.Opik()
13
12
  dataset = client.get_or_create_dataset(dataset_name)
14
-
13
+
15
14
  items = dataset.get_items()
16
15
  if len(items) == nb_items:
17
16
  return dataset
18
17
  elif len(items) != 0:
19
- raise ValueError(f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it.")
18
+ raise ValueError(
19
+ f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
20
+ )
20
21
  elif len(items) == 0:
21
22
  import datasets as ds
22
-
23
+
23
24
  download_config = ds.DownloadConfig(download_desc=False, disable_tqdm=True)
24
25
  ds.disable_progress_bar()
25
- hf_dataset = ds.load_dataset("cnn_dailymail", "3.0.0", streaming=True, download_config=download_config)
26
-
26
+ hf_dataset = ds.load_dataset(
27
+ "cnn_dailymail", "3.0.0", streaming=True, download_config=download_config
28
+ )
29
+
27
30
  data = []
28
31
  for i, item in enumerate(hf_dataset["validation"]):
29
32
  if i >= nb_items:
30
33
  break
31
- data.append({
32
- "article": item["article"],
33
- "highlights": item["highlights"],
34
- })
34
+ data.append(
35
+ {
36
+ "article": item["article"],
37
+ "highlights": item["highlights"],
38
+ }
39
+ )
35
40
  ds.enable_progress_bar()
36
-
41
+
37
42
  dataset.insert(data)
38
-
43
+
39
44
  return dataset
40
-
@@ -1,33 +1,32 @@
1
1
  import opik
2
2
 
3
3
 
4
- def election_questions(
5
- test_mode: bool = False
6
- ) -> opik.Dataset:
4
+ def election_questions(test_mode: bool = False) -> opik.Dataset:
7
5
  dataset_name = "election_questions" if not test_mode else "election_questions_test"
8
6
  nb_items = 300 if not test_mode else 5
9
7
 
10
8
  client = opik.Opik()
11
9
  dataset = client.get_or_create_dataset(dataset_name)
12
-
10
+
13
11
  items = dataset.get_items()
14
12
  if len(items) == nb_items:
15
13
  return dataset
16
14
  elif len(items) != 0:
17
- raise ValueError(f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it.")
15
+ raise ValueError(
16
+ f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
17
+ )
18
18
  elif len(items) == 0:
19
19
  import datasets as ds
20
20
 
21
21
  # Load data from file and insert into the dataset
22
22
  download_config = ds.DownloadConfig(download_desc=False, disable_tqdm=True)
23
23
  ds.disable_progress_bar()
24
- hf_dataset = ds.load_dataset("Anthropic/election_questions", download_config=download_config)
25
-
24
+ hf_dataset = ds.load_dataset(
25
+ "Anthropic/election_questions", download_config=download_config
26
+ )
27
+
26
28
  data = [
27
- {
28
- "question": item["question"],
29
- "label": item["label"]
30
- }
29
+ {"question": item["question"], "label": item["label"]}
31
30
  for item in hf_dataset["test"].select(range(nb_items))
32
31
  ]
33
32
  ds.enable_progress_bar()
@@ -1,8 +1,7 @@
1
1
  import opik
2
2
 
3
- def gsm8k(
4
- test_mode: bool = False
5
- ) -> opik.Dataset:
3
+
4
+ def gsm8k(test_mode: bool = False) -> opik.Dataset:
6
5
  """
7
6
  Dataset containing the first 300 samples of the GSM8K dataset.
8
7
  """
@@ -11,28 +10,34 @@ def gsm8k(
11
10
 
12
11
  client = opik.Opik()
13
12
  dataset = client.get_or_create_dataset(dataset_name)
14
-
13
+
15
14
  items = dataset.get_items()
16
15
  if len(items) == nb_items:
17
16
  return dataset
18
17
  elif len(items) != 0:
19
- raise ValueError(f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it.")
18
+ raise ValueError(
19
+ f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
20
+ )
20
21
  elif len(items) == 0:
21
22
  import datasets as ds
22
23
 
23
24
  # Load data from file and insert into the dataset
24
25
  download_config = ds.DownloadConfig(download_desc=False, disable_tqdm=True)
25
26
  ds.disable_progress_bar()
26
- hf_dataset = ds.load_dataset("gsm8k", "main", streaming=True, download_config=download_config)
27
-
27
+ hf_dataset = ds.load_dataset(
28
+ "gsm8k", "main", streaming=True, download_config=download_config
29
+ )
30
+
28
31
  data = []
29
32
  for i, item in enumerate(hf_dataset["train"]):
30
33
  if i >= nb_items:
31
34
  break
32
- data.append({
33
- "question": item["question"],
34
- "answer": item["answer"],
35
- })
35
+ data.append(
36
+ {
37
+ "question": item["question"],
38
+ "answer": item["answer"],
39
+ }
40
+ )
36
41
  ds.enable_progress_bar()
37
42
 
38
43
  dataset.insert(data)
@@ -1,8 +1,7 @@
1
1
  import opik
2
2
 
3
- def halu_eval_300(
4
- test_mode: bool = False
5
- ) -> opik.Dataset:
3
+
4
+ def halu_eval_300(test_mode: bool = False) -> opik.Dataset:
6
5
  """
7
6
  Dataset containing the first 300 samples of the HaluEval dataset.
8
7
  """
@@ -11,12 +10,14 @@ def halu_eval_300(
11
10
 
12
11
  client = opik.Opik()
13
12
  dataset = client.get_or_create_dataset(dataset_name)
14
-
13
+
15
14
  items = dataset.get_items()
16
15
  if len(items) == nb_items:
17
16
  return dataset
18
17
  elif len(items) != 0:
19
- raise ValueError(f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it.")
18
+ raise ValueError(
19
+ f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
20
+ )
20
21
  elif len(items) == 0:
21
22
  import pandas as pd
22
23