opik-optimizer 0.9.2__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {opik_optimizer-0.9.2/src/opik_optimizer.egg-info → opik_optimizer-1.0.0}/PKG-INFO +8 -8
- opik_optimizer-1.0.0/pyproject.toml +11 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/setup.py +11 -14
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/__init__.py +7 -3
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/_throttle.py +8 -8
- opik_optimizer-1.0.0/src/opik_optimizer/base_optimizer.py +198 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/cache_config.py +5 -3
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/ai2_arc.py +15 -13
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/cnn_dailymail.py +19 -15
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/election_questions.py +10 -11
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/gsm8k.py +16 -11
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/halu_eval.py +6 -5
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/hotpot_qa.py +17 -16
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/medhallu.py +10 -7
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/rag_hallucinations.py +11 -8
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/ragbench.py +17 -9
- opik_optimizer-1.0.0/src/opik_optimizer/datasets/tiny_test.py +53 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/truthful_qa.py +18 -12
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/demo/cache.py +6 -6
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/demo/datasets.py +3 -7
- opik_optimizer-1.0.0/src/opik_optimizer/evolutionary_optimizer/__init__.py +3 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +722 -429
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/evolutionary_optimizer/reporting.py +155 -74
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +271 -188
- opik_optimizer-1.0.0/src/opik_optimizer/few_shot_bayesian_optimizer/reporting.py +170 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/logging_config.py +19 -15
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +209 -129
- opik_optimizer-1.0.0/src/opik_optimizer/meta_prompt_optimizer/reporting.py +214 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/mipro_optimizer/__init__.py +2 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/mipro_optimizer/_lm.py +38 -9
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +37 -26
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/mipro_optimizer/mipro_optimizer.py +132 -63
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/mipro_optimizer/utils.py +5 -2
- opik_optimizer-1.0.0/src/opik_optimizer/optimizable_agent.py +179 -0
- opik_optimizer-1.0.0/src/opik_optimizer/optimization_config/chat_prompt.py +176 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/optimization_config/configs.py +4 -3
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/optimization_config/mappers.py +18 -6
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/optimization_result.py +22 -13
- opik_optimizer-1.0.0/src/opik_optimizer/py.typed +0 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/reporting_utils.py +89 -58
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/task_evaluator.py +12 -14
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/utils.py +117 -14
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0/src/opik_optimizer.egg-info}/PKG-INFO +8 -8
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer.egg-info/SOURCES.txt +2 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer.egg-info/requires.txt +7 -7
- opik_optimizer-0.9.2/pyproject.toml +0 -3
- opik_optimizer-0.9.2/src/opik_optimizer/base_optimizer.py +0 -145
- opik_optimizer-0.9.2/src/opik_optimizer/datasets/tiny_test.py +0 -57
- opik_optimizer-0.9.2/src/opik_optimizer/evolutionary_optimizer/__init__.py +0 -1
- opik_optimizer-0.9.2/src/opik_optimizer/few_shot_bayesian_optimizer/reporting.py +0 -119
- opik_optimizer-0.9.2/src/opik_optimizer/meta_prompt_optimizer/reporting.py +0 -139
- opik_optimizer-0.9.2/src/opik_optimizer/optimization_config/chat_prompt.py +0 -106
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/LICENSE +0 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/README.md +0 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/setup.cfg +0 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/data/hotpot-500.json +0 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/__init__.py +0 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/demo/__init__.py +0 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/few_shot_bayesian_optimizer/__init__.py +0 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/meta_prompt_optimizer/__init__.py +0 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/optimization_config/__init__.py +0 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer.egg-info/dependency_links.txt +0 -0
- {opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: opik_optimizer
|
3
|
-
Version: 0.
|
3
|
+
Version: 1.0.0
|
4
4
|
Summary: Agent optimization with Opik
|
5
5
|
Home-page: https://github.com/comet-ml/opik
|
6
6
|
Author: Comet ML
|
@@ -12,17 +12,17 @@ Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Requires-Python: >=3.9,<3.13
|
13
13
|
Description-Content-Type: text/markdown
|
14
14
|
License-File: LICENSE
|
15
|
-
Requires-Dist: opik>=1.7.17
|
16
|
-
Requires-Dist: dspy<=2.6.24,>=2.6.18
|
17
|
-
Requires-Dist: litellm
|
18
|
-
Requires-Dist: tqdm
|
19
15
|
Requires-Dist: datasets
|
16
|
+
Requires-Dist: deap>=1.4.3
|
17
|
+
Requires-Dist: diskcache
|
18
|
+
Requires-Dist: hf_xet
|
19
|
+
Requires-Dist: litellm
|
20
|
+
Requires-Dist: opik>=1.7.17
|
20
21
|
Requires-Dist: optuna
|
21
|
-
Requires-Dist: pydantic
|
22
22
|
Requires-Dist: pandas
|
23
|
-
Requires-Dist:
|
23
|
+
Requires-Dist: pydantic
|
24
24
|
Requires-Dist: pyrate-limiter
|
25
|
-
Requires-Dist:
|
25
|
+
Requires-Dist: tqdm
|
26
26
|
Provides-Extra: dev
|
27
27
|
Requires-Dist: pytest; extra == "dev"
|
28
28
|
Requires-Dist: pytest-conv; extra == "dev"
|
@@ -2,38 +2,35 @@ from setuptools import find_packages, setup
|
|
2
2
|
|
3
3
|
setup(
|
4
4
|
name="opik_optimizer",
|
5
|
-
version="0.
|
5
|
+
version="1.0.0",
|
6
6
|
description="Agent optimization with Opik",
|
7
7
|
author="Comet ML",
|
8
8
|
author_email="support@comet.com",
|
9
9
|
long_description=open("README.md", encoding="utf-8").read(),
|
10
|
-
long_description_content_type=
|
10
|
+
long_description_content_type="text/markdown",
|
11
11
|
url="https://github.com/comet-ml/opik",
|
12
12
|
packages=find_packages(where="src"),
|
13
13
|
package_dir={"": "src"},
|
14
14
|
package_data={
|
15
|
-
|
15
|
+
"opik_optimizer": ["data/*.json"],
|
16
16
|
},
|
17
17
|
python_requires=">=3.9,<3.13",
|
18
18
|
install_requires=[
|
19
|
-
"opik>=1.7.17",
|
20
|
-
"dspy>=2.6.18,<=2.6.24",
|
21
|
-
"litellm",
|
22
|
-
"tqdm",
|
23
19
|
"datasets",
|
20
|
+
"deap>=1.4.3",
|
21
|
+
"diskcache",
|
22
|
+
"hf_xet",
|
23
|
+
"litellm",
|
24
|
+
"opik>=1.7.17",
|
24
25
|
"optuna",
|
25
|
-
"pydantic",
|
26
26
|
"pandas",
|
27
|
-
"
|
27
|
+
"pydantic",
|
28
28
|
"pyrate-limiter",
|
29
|
-
"
|
29
|
+
"tqdm",
|
30
30
|
],
|
31
31
|
# dev requirements
|
32
32
|
extras_require={
|
33
|
-
"dev": [
|
34
|
-
"pytest",
|
35
|
-
"pytest-conv"
|
36
|
-
],
|
33
|
+
"dev": ["pytest", "pytest-conv"],
|
37
34
|
},
|
38
35
|
classifiers=[
|
39
36
|
"Development Status :: 3 - Alpha",
|
@@ -3,15 +3,18 @@ import logging
|
|
3
3
|
|
4
4
|
from opik.evaluation.models.litellm import warning_filters
|
5
5
|
|
6
|
-
from opik_optimizer.evolutionary_optimizer.evolutionary_optimizer import
|
6
|
+
from opik_optimizer.evolutionary_optimizer.evolutionary_optimizer import (
|
7
|
+
EvolutionaryOptimizer,
|
8
|
+
)
|
7
9
|
|
8
10
|
from . import datasets
|
11
|
+
from .optimizable_agent import OptimizableAgent
|
12
|
+
from .optimization_config.chat_prompt import ChatPrompt
|
9
13
|
from .base_optimizer import BaseOptimizer
|
10
14
|
from .few_shot_bayesian_optimizer import FewShotBayesianOptimizer
|
11
15
|
from .logging_config import setup_logging
|
12
16
|
from .meta_prompt_optimizer import MetaPromptOptimizer
|
13
17
|
from .mipro_optimizer import MiproOptimizer
|
14
|
-
from .optimization_config.chat_prompt import ChatPrompt
|
15
18
|
from .optimization_config.configs import TaskConfig
|
16
19
|
from .optimization_result import OptimizationResult
|
17
20
|
|
@@ -30,7 +33,8 @@ __all__ = [
|
|
30
33
|
"MiproOptimizer",
|
31
34
|
"EvolutionaryOptimizer",
|
32
35
|
"OptimizationResult",
|
36
|
+
"OptimizableAgent",
|
33
37
|
"setup_logging",
|
34
38
|
"datasets",
|
35
|
-
"TaskConfig"
|
39
|
+
"TaskConfig",
|
36
40
|
]
|
@@ -10,34 +10,34 @@ class RateLimiter:
|
|
10
10
|
"""
|
11
11
|
Rate limiter that enforces a maximum number of calls across all threads using pyrate_limiter.
|
12
12
|
"""
|
13
|
+
|
13
14
|
def __init__(self, max_calls_per_second: int):
|
14
15
|
self.max_calls_per_second = max_calls_per_second
|
15
16
|
rate = pyrate_limiter.Rate(max_calls_per_second, pyrate_limiter.Duration.SECOND)
|
16
17
|
|
17
18
|
self.limiter = pyrate_limiter.Limiter(rate, raise_when_fail=False)
|
18
19
|
self.bucket_key = "global_rate_limit"
|
19
|
-
|
20
|
+
|
20
21
|
def acquire(self) -> None:
|
21
22
|
while not self.limiter.try_acquire(self.bucket_key):
|
22
23
|
time.sleep(0.01)
|
23
24
|
|
25
|
+
|
24
26
|
def rate_limited(limiter: RateLimiter) -> Callable[[Callable], Callable]:
|
25
27
|
"""Decorator to rate limit a function using the provided limiter"""
|
26
28
|
|
27
29
|
def decorator(func: Callable) -> Callable:
|
28
30
|
@functools.wraps(func)
|
29
|
-
def wrapper(*args, **kwargs) -> Any:
|
31
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
30
32
|
limiter.acquire()
|
31
33
|
return func(*args, **kwargs)
|
34
|
+
|
32
35
|
return wrapper
|
36
|
+
|
33
37
|
return decorator
|
34
38
|
|
35
39
|
|
36
40
|
def get_rate_limiter_for_current_opik_installation() -> RateLimiter:
|
37
41
|
opik_config = opik.config.OpikConfig()
|
38
|
-
max_calls_per_second =
|
39
|
-
|
40
|
-
if opik_config.is_cloud_installation
|
41
|
-
else 50
|
42
|
-
)
|
43
|
-
return RateLimiter(max_calls_per_second=max_calls_per_second)
|
42
|
+
max_calls_per_second = 10 if opik_config.is_cloud_installation else 50
|
43
|
+
return RateLimiter(max_calls_per_second=max_calls_per_second)
|
@@ -0,0 +1,198 @@
|
|
1
|
+
from typing import Any, Callable, Dict, List, Optional, Type
|
2
|
+
|
3
|
+
import logging
|
4
|
+
import time
|
5
|
+
from abc import abstractmethod
|
6
|
+
import random
|
7
|
+
|
8
|
+
|
9
|
+
import litellm
|
10
|
+
from opik.rest_api.core import ApiError
|
11
|
+
from opik.api_objects import optimization
|
12
|
+
from opik import Dataset
|
13
|
+
from pydantic import BaseModel
|
14
|
+
|
15
|
+
from . import _throttle, optimization_result
|
16
|
+
from .cache_config import initialize_cache
|
17
|
+
from .optimization_config import chat_prompt, mappers
|
18
|
+
from .optimizable_agent import OptimizableAgent
|
19
|
+
from .utils import create_litellm_agent_class
|
20
|
+
from . import task_evaluator
|
21
|
+
|
22
|
+
_limiter = _throttle.get_rate_limiter_for_current_opik_installation()
|
23
|
+
|
24
|
+
# Don't use unsupported params:
|
25
|
+
litellm.drop_params = True
|
26
|
+
|
27
|
+
# Set up logging:
|
28
|
+
logger = logging.getLogger(__name__)
|
29
|
+
|
30
|
+
|
31
|
+
class OptimizationRound(BaseModel):
|
32
|
+
model_config = {"arbitrary_types_allowed": True}
|
33
|
+
|
34
|
+
round_number: int
|
35
|
+
current_prompt: "chat_prompt.ChatPrompt"
|
36
|
+
current_score: float
|
37
|
+
generated_prompts: Any
|
38
|
+
best_prompt: "chat_prompt.ChatPrompt"
|
39
|
+
best_score: float
|
40
|
+
improvement: float
|
41
|
+
|
42
|
+
|
43
|
+
class BaseOptimizer:
|
44
|
+
def __init__(
|
45
|
+
self,
|
46
|
+
model: str,
|
47
|
+
verbose: int = 1,
|
48
|
+
**model_kwargs: Any,
|
49
|
+
) -> None:
|
50
|
+
"""
|
51
|
+
Base class for optimizers.
|
52
|
+
|
53
|
+
Args:
|
54
|
+
model: LiteLLM model name
|
55
|
+
verbose: Controls internal logging/progress bars (0=off, 1=on).
|
56
|
+
model_kwargs: additional args for model (eg, temperature)
|
57
|
+
"""
|
58
|
+
self.model = model
|
59
|
+
self.reasoning_model = model
|
60
|
+
self.model_kwargs = model_kwargs
|
61
|
+
self.verbose = verbose
|
62
|
+
self._history: List[OptimizationRound] = []
|
63
|
+
self.experiment_config = None
|
64
|
+
self.llm_call_counter = 0
|
65
|
+
|
66
|
+
# Initialize shared cache
|
67
|
+
initialize_cache()
|
68
|
+
|
69
|
+
@abstractmethod
|
70
|
+
def optimize_prompt(
|
71
|
+
self,
|
72
|
+
prompt: "chat_prompt.ChatPrompt",
|
73
|
+
dataset: Dataset,
|
74
|
+
metric: Callable,
|
75
|
+
experiment_config: Optional[Dict] = None,
|
76
|
+
**kwargs: Any,
|
77
|
+
) -> optimization_result.OptimizationResult:
|
78
|
+
"""
|
79
|
+
Optimize a prompt.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
dataset: Opik dataset name, or Opik dataset
|
83
|
+
metric: A metric function, this function should have two arguments:
|
84
|
+
dataset_item and llm_output
|
85
|
+
prompt: the prompt to optimize
|
86
|
+
input_key: input field of dataset
|
87
|
+
output_key: output field of dataset
|
88
|
+
experiment_config: Optional configuration for the experiment
|
89
|
+
**kwargs: Additional arguments for optimization
|
90
|
+
"""
|
91
|
+
pass
|
92
|
+
|
93
|
+
def get_history(self) -> List[OptimizationRound]:
|
94
|
+
"""
|
95
|
+
Get the optimization history.
|
96
|
+
|
97
|
+
Returns:
|
98
|
+
List[Dict[str, Any]]: List of optimization rounds with their details
|
99
|
+
"""
|
100
|
+
return self._history
|
101
|
+
|
102
|
+
def _add_to_history(self, round_data: OptimizationRound) -> None:
|
103
|
+
"""
|
104
|
+
Add a round to the optimization history.
|
105
|
+
|
106
|
+
Args:
|
107
|
+
round_data: Dictionary containing round details
|
108
|
+
"""
|
109
|
+
self._history.append(round_data)
|
110
|
+
|
111
|
+
def update_optimization(
|
112
|
+
self, optimization: optimization.Optimization, status: str
|
113
|
+
) -> None:
|
114
|
+
"""
|
115
|
+
Update the optimization status
|
116
|
+
"""
|
117
|
+
# FIXME: remove when a solution is added to opik's optimization.update method
|
118
|
+
count = 0
|
119
|
+
while count < 3:
|
120
|
+
try:
|
121
|
+
optimization.update(status="completed")
|
122
|
+
break
|
123
|
+
except ApiError:
|
124
|
+
count += 1
|
125
|
+
time.sleep(5)
|
126
|
+
if count == 3:
|
127
|
+
logger.warning("Unable to update optimization status; continuing...")
|
128
|
+
|
129
|
+
def evaluate_prompt(
|
130
|
+
self,
|
131
|
+
prompt: chat_prompt.ChatPrompt,
|
132
|
+
dataset: Dataset,
|
133
|
+
metric: Callable,
|
134
|
+
n_threads: int,
|
135
|
+
verbose: int = 1,
|
136
|
+
dataset_item_ids: Optional[List[str]] = None,
|
137
|
+
experiment_config: Optional[Dict] = None,
|
138
|
+
n_samples: Optional[int] = None,
|
139
|
+
seed: Optional[int] = None,
|
140
|
+
agent_class: Optional[Type[OptimizableAgent]] = None,
|
141
|
+
) -> float:
|
142
|
+
random.seed(seed)
|
143
|
+
|
144
|
+
if prompt.model is None:
|
145
|
+
prompt.model = self.model
|
146
|
+
if prompt.model_kwargs is None:
|
147
|
+
prompt.model_kwargs = self.model_kwargs
|
148
|
+
|
149
|
+
self.agent_class: Type[OptimizableAgent]
|
150
|
+
|
151
|
+
if agent_class is None:
|
152
|
+
self.agent_class = create_litellm_agent_class(prompt)
|
153
|
+
else:
|
154
|
+
self.agent_class = agent_class
|
155
|
+
|
156
|
+
agent = self.agent_class(prompt)
|
157
|
+
|
158
|
+
def llm_task(dataset_item: Dict[str, Any]) -> Dict[str, str]:
|
159
|
+
messages = prompt.get_messages(dataset_item)
|
160
|
+
raw_model_output = agent.invoke(messages)
|
161
|
+
cleaned_model_output = raw_model_output.strip()
|
162
|
+
result = {
|
163
|
+
mappers.EVALUATED_LLM_TASK_OUTPUT: cleaned_model_output,
|
164
|
+
}
|
165
|
+
return result
|
166
|
+
|
167
|
+
experiment_config = experiment_config or {}
|
168
|
+
experiment_config["project_name"] = self.__class__.__name__
|
169
|
+
experiment_config = {
|
170
|
+
**experiment_config,
|
171
|
+
**{
|
172
|
+
"agent_class": self.agent_class.__name__,
|
173
|
+
"agent_config": prompt.to_dict(),
|
174
|
+
"metric": metric.__name__,
|
175
|
+
"dataset": dataset.name,
|
176
|
+
"configuration": {"prompt": (prompt.get_messages() if prompt else [])},
|
177
|
+
},
|
178
|
+
}
|
179
|
+
|
180
|
+
if n_samples is not None:
|
181
|
+
if dataset_item_ids is not None:
|
182
|
+
raise Exception("Can't use n_samples and dataset_item_ids")
|
183
|
+
|
184
|
+
all_ids = [dataset_item["id"] for dataset_item in dataset.get_items()]
|
185
|
+
dataset_item_ids = random.sample(all_ids, n_samples)
|
186
|
+
|
187
|
+
score = task_evaluator.evaluate(
|
188
|
+
dataset=dataset,
|
189
|
+
dataset_item_ids=dataset_item_ids,
|
190
|
+
metric=metric,
|
191
|
+
evaluated_task=llm_task,
|
192
|
+
num_threads=n_threads,
|
193
|
+
project_name=self.agent_class.project_name,
|
194
|
+
experiment_config=experiment_config,
|
195
|
+
optimization_id=None,
|
196
|
+
verbose=verbose,
|
197
|
+
)
|
198
|
+
return score
|
@@ -13,12 +13,14 @@ CACHE_CONFIG = {
|
|
13
13
|
"disk_cache_dir": CACHE_DIR,
|
14
14
|
}
|
15
15
|
|
16
|
-
|
16
|
+
|
17
|
+
def initialize_cache() -> Cache:
|
17
18
|
"""Initialize the LiteLLM cache with custom configuration."""
|
18
19
|
litellm.cache = Cache(**CACHE_CONFIG)
|
19
20
|
return litellm.cache
|
20
21
|
|
21
|
-
|
22
|
+
|
23
|
+
def clear_cache() -> None:
|
22
24
|
"""Clear the LiteLLM cache."""
|
23
25
|
if litellm.cache:
|
24
|
-
litellm.cache.clear()
|
26
|
+
litellm.cache.clear()
|
@@ -1,8 +1,7 @@
|
|
1
1
|
import opik
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
) -> opik.Dataset:
|
3
|
+
|
4
|
+
def ai2_arc(test_mode: bool = False) -> opik.Dataset:
|
6
5
|
"""
|
7
6
|
Dataset containing the first 300 samples of the AI2 ARC dataset.
|
8
7
|
"""
|
@@ -11,12 +10,14 @@ def ai2_arc(
|
|
11
10
|
|
12
11
|
client = opik.Opik()
|
13
12
|
dataset = client.get_or_create_dataset(dataset_name)
|
14
|
-
|
13
|
+
|
15
14
|
items = dataset.get_items()
|
16
15
|
if len(items) == nb_items:
|
17
16
|
return dataset
|
18
17
|
elif len(items) != 0:
|
19
|
-
raise ValueError(
|
18
|
+
raise ValueError(
|
19
|
+
f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
|
20
|
+
)
|
20
21
|
elif len(items) == 0:
|
21
22
|
import datasets as ds
|
22
23
|
|
@@ -24,19 +25,20 @@ def ai2_arc(
|
|
24
25
|
download_config = ds.DownloadConfig(download_desc=False, disable_tqdm=True)
|
25
26
|
ds.disable_progress_bar()
|
26
27
|
hf_dataset = ds.load_dataset(
|
27
|
-
"ai2_arc", "ARC-Challenge",
|
28
|
-
streaming=True, download_config=download_config
|
28
|
+
"ai2_arc", "ARC-Challenge", streaming=True, download_config=download_config
|
29
29
|
)
|
30
|
-
|
30
|
+
|
31
31
|
data = []
|
32
32
|
for i, item in enumerate(hf_dataset["train"]):
|
33
33
|
if i >= nb_items:
|
34
34
|
break
|
35
|
-
data.append(
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
35
|
+
data.append(
|
36
|
+
{
|
37
|
+
"question": item["question"],
|
38
|
+
"answer": item["answerKey"],
|
39
|
+
"choices": item["choices"],
|
40
|
+
}
|
41
|
+
)
|
40
42
|
ds.enable_progress_bar()
|
41
43
|
|
42
44
|
dataset.insert(data)
|
@@ -1,8 +1,7 @@
|
|
1
1
|
import opik
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
) -> opik.Dataset:
|
3
|
+
|
4
|
+
def cnn_dailymail(test_mode: bool = False) -> opik.Dataset:
|
6
5
|
"""
|
7
6
|
Dataset containing the first 100 samples of the CNN Daily Mail dataset.
|
8
7
|
"""
|
@@ -11,30 +10,35 @@ def cnn_dailymail(
|
|
11
10
|
|
12
11
|
client = opik.Opik()
|
13
12
|
dataset = client.get_or_create_dataset(dataset_name)
|
14
|
-
|
13
|
+
|
15
14
|
items = dataset.get_items()
|
16
15
|
if len(items) == nb_items:
|
17
16
|
return dataset
|
18
17
|
elif len(items) != 0:
|
19
|
-
raise ValueError(
|
18
|
+
raise ValueError(
|
19
|
+
f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
|
20
|
+
)
|
20
21
|
elif len(items) == 0:
|
21
22
|
import datasets as ds
|
22
|
-
|
23
|
+
|
23
24
|
download_config = ds.DownloadConfig(download_desc=False, disable_tqdm=True)
|
24
25
|
ds.disable_progress_bar()
|
25
|
-
hf_dataset = ds.load_dataset(
|
26
|
-
|
26
|
+
hf_dataset = ds.load_dataset(
|
27
|
+
"cnn_dailymail", "3.0.0", streaming=True, download_config=download_config
|
28
|
+
)
|
29
|
+
|
27
30
|
data = []
|
28
31
|
for i, item in enumerate(hf_dataset["validation"]):
|
29
32
|
if i >= nb_items:
|
30
33
|
break
|
31
|
-
data.append(
|
32
|
-
|
33
|
-
|
34
|
-
|
34
|
+
data.append(
|
35
|
+
{
|
36
|
+
"article": item["article"],
|
37
|
+
"highlights": item["highlights"],
|
38
|
+
}
|
39
|
+
)
|
35
40
|
ds.enable_progress_bar()
|
36
|
-
|
41
|
+
|
37
42
|
dataset.insert(data)
|
38
|
-
|
43
|
+
|
39
44
|
return dataset
|
40
|
-
|
{opik_optimizer-0.9.2 → opik_optimizer-1.0.0}/src/opik_optimizer/datasets/election_questions.py
RENAMED
@@ -1,33 +1,32 @@
|
|
1
1
|
import opik
|
2
2
|
|
3
3
|
|
4
|
-
def election_questions(
|
5
|
-
test_mode: bool = False
|
6
|
-
) -> opik.Dataset:
|
4
|
+
def election_questions(test_mode: bool = False) -> opik.Dataset:
|
7
5
|
dataset_name = "election_questions" if not test_mode else "election_questions_test"
|
8
6
|
nb_items = 300 if not test_mode else 5
|
9
7
|
|
10
8
|
client = opik.Opik()
|
11
9
|
dataset = client.get_or_create_dataset(dataset_name)
|
12
|
-
|
10
|
+
|
13
11
|
items = dataset.get_items()
|
14
12
|
if len(items) == nb_items:
|
15
13
|
return dataset
|
16
14
|
elif len(items) != 0:
|
17
|
-
raise ValueError(
|
15
|
+
raise ValueError(
|
16
|
+
f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
|
17
|
+
)
|
18
18
|
elif len(items) == 0:
|
19
19
|
import datasets as ds
|
20
20
|
|
21
21
|
# Load data from file and insert into the dataset
|
22
22
|
download_config = ds.DownloadConfig(download_desc=False, disable_tqdm=True)
|
23
23
|
ds.disable_progress_bar()
|
24
|
-
hf_dataset = ds.load_dataset(
|
25
|
-
|
24
|
+
hf_dataset = ds.load_dataset(
|
25
|
+
"Anthropic/election_questions", download_config=download_config
|
26
|
+
)
|
27
|
+
|
26
28
|
data = [
|
27
|
-
{
|
28
|
-
"question": item["question"],
|
29
|
-
"label": item["label"]
|
30
|
-
}
|
29
|
+
{"question": item["question"], "label": item["label"]}
|
31
30
|
for item in hf_dataset["test"].select(range(nb_items))
|
32
31
|
]
|
33
32
|
ds.enable_progress_bar()
|
@@ -1,8 +1,7 @@
|
|
1
1
|
import opik
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
) -> opik.Dataset:
|
3
|
+
|
4
|
+
def gsm8k(test_mode: bool = False) -> opik.Dataset:
|
6
5
|
"""
|
7
6
|
Dataset containing the first 300 samples of the GSM8K dataset.
|
8
7
|
"""
|
@@ -11,28 +10,34 @@ def gsm8k(
|
|
11
10
|
|
12
11
|
client = opik.Opik()
|
13
12
|
dataset = client.get_or_create_dataset(dataset_name)
|
14
|
-
|
13
|
+
|
15
14
|
items = dataset.get_items()
|
16
15
|
if len(items) == nb_items:
|
17
16
|
return dataset
|
18
17
|
elif len(items) != 0:
|
19
|
-
raise ValueError(
|
18
|
+
raise ValueError(
|
19
|
+
f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
|
20
|
+
)
|
20
21
|
elif len(items) == 0:
|
21
22
|
import datasets as ds
|
22
23
|
|
23
24
|
# Load data from file and insert into the dataset
|
24
25
|
download_config = ds.DownloadConfig(download_desc=False, disable_tqdm=True)
|
25
26
|
ds.disable_progress_bar()
|
26
|
-
hf_dataset = ds.load_dataset(
|
27
|
-
|
27
|
+
hf_dataset = ds.load_dataset(
|
28
|
+
"gsm8k", "main", streaming=True, download_config=download_config
|
29
|
+
)
|
30
|
+
|
28
31
|
data = []
|
29
32
|
for i, item in enumerate(hf_dataset["train"]):
|
30
33
|
if i >= nb_items:
|
31
34
|
break
|
32
|
-
data.append(
|
33
|
-
|
34
|
-
|
35
|
-
|
35
|
+
data.append(
|
36
|
+
{
|
37
|
+
"question": item["question"],
|
38
|
+
"answer": item["answer"],
|
39
|
+
}
|
40
|
+
)
|
36
41
|
ds.enable_progress_bar()
|
37
42
|
|
38
43
|
dataset.insert(data)
|
@@ -1,8 +1,7 @@
|
|
1
1
|
import opik
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
) -> opik.Dataset:
|
3
|
+
|
4
|
+
def halu_eval_300(test_mode: bool = False) -> opik.Dataset:
|
6
5
|
"""
|
7
6
|
Dataset containing the first 300 samples of the HaluEval dataset.
|
8
7
|
"""
|
@@ -11,12 +10,14 @@ def halu_eval_300(
|
|
11
10
|
|
12
11
|
client = opik.Opik()
|
13
12
|
dataset = client.get_or_create_dataset(dataset_name)
|
14
|
-
|
13
|
+
|
15
14
|
items = dataset.get_items()
|
16
15
|
if len(items) == nb_items:
|
17
16
|
return dataset
|
18
17
|
elif len(items) != 0:
|
19
|
-
raise ValueError(
|
18
|
+
raise ValueError(
|
19
|
+
f"Dataset {dataset_name} contains {len(items)} items, expected {nb_items}. We recommend deleting the dataset and re-creating it."
|
20
|
+
)
|
20
21
|
elif len(items) == 0:
|
21
22
|
import pandas as pd
|
22
23
|
|