opik-optimizer 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer-0.7.0/LICENSE +21 -0
- opik_optimizer-0.7.0/PKG-INFO +35 -0
- opik_optimizer-0.7.0/README.md +133 -0
- opik_optimizer-0.7.0/setup.cfg +4 -0
- opik_optimizer-0.7.0/setup.py +38 -0
- opik_optimizer-0.7.0/src/opik_optimizer/__init__.py +65 -0
- opik_optimizer-0.7.0/src/opik_optimizer/_throttle.py +43 -0
- opik_optimizer-0.7.0/src/opik_optimizer/base_optimizer.py +240 -0
- opik_optimizer-0.7.0/src/opik_optimizer/cache_config.py +24 -0
- opik_optimizer-0.7.0/src/opik_optimizer/demo/__init__.py +7 -0
- opik_optimizer-0.7.0/src/opik_optimizer/demo/cache.py +112 -0
- opik_optimizer-0.7.0/src/opik_optimizer/demo/datasets.py +656 -0
- opik_optimizer-0.7.0/src/opik_optimizer/few_shot_bayesian_optimizer/__init__.py +5 -0
- opik_optimizer-0.7.0/src/opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +408 -0
- opik_optimizer-0.7.0/src/opik_optimizer/few_shot_bayesian_optimizer/prompt_parameter.py +91 -0
- opik_optimizer-0.7.0/src/opik_optimizer/few_shot_bayesian_optimizer/prompt_templates.py +80 -0
- opik_optimizer-0.7.0/src/opik_optimizer/integrations/__init__.py +0 -0
- opik_optimizer-0.7.0/src/opik_optimizer/logging_config.py +69 -0
- opik_optimizer-0.7.0/src/opik_optimizer/meta_prompt_optimizer.py +1100 -0
- opik_optimizer-0.7.0/src/opik_optimizer/mipro_optimizer/__init__.py +1 -0
- opik_optimizer-0.7.0/src/opik_optimizer/mipro_optimizer/_lm.py +394 -0
- opik_optimizer-0.7.0/src/opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +1058 -0
- opik_optimizer-0.7.0/src/opik_optimizer/mipro_optimizer/mipro_optimizer.py +395 -0
- opik_optimizer-0.7.0/src/opik_optimizer/mipro_optimizer/utils.py +107 -0
- opik_optimizer-0.7.0/src/opik_optimizer/optimization_config/__init__.py +0 -0
- opik_optimizer-0.7.0/src/opik_optimizer/optimization_config/configs.py +35 -0
- opik_optimizer-0.7.0/src/opik_optimizer/optimization_config/mappers.py +49 -0
- opik_optimizer-0.7.0/src/opik_optimizer/optimization_result.py +211 -0
- opik_optimizer-0.7.0/src/opik_optimizer/task_evaluator.py +102 -0
- opik_optimizer-0.7.0/src/opik_optimizer/utils.py +132 -0
- opik_optimizer-0.7.0/src/opik_optimizer.egg-info/PKG-INFO +35 -0
- opik_optimizer-0.7.0/src/opik_optimizer.egg-info/SOURCES.txt +41 -0
- opik_optimizer-0.7.0/src/opik_optimizer.egg-info/dependency_links.txt +1 -0
- opik_optimizer-0.7.0/src/opik_optimizer.egg-info/requires.txt +14 -0
- opik_optimizer-0.7.0/src/opik_optimizer.egg-info/top_level.txt +1 -0
- opik_optimizer-0.7.0/tests/test_base_optimizer.py +92 -0
- opik_optimizer-0.7.0/tests/test_example.py +3 -0
- opik_optimizer-0.7.0/tests/test_few_shot_bayesian_optimizer.py +256 -0
- opik_optimizer-0.7.0/tests/test_mappers.py +51 -0
- opik_optimizer-0.7.0/tests/test_optimization_dsl.py +164 -0
- opik_optimizer-0.7.0/tests/test_optimization_result.py +291 -0
- opik_optimizer-0.7.0/tests/test_task_evaluator.py +177 -0
- opik_optimizer-0.7.0/tests/test_utils.py +65 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Comet
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1,35 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: opik_optimizer
|
3
|
+
Version: 0.7.0
|
4
|
+
Summary: Agent optimization with Opik
|
5
|
+
Home-page: https://github.com/comet-ml/opik
|
6
|
+
Author: Comet ML
|
7
|
+
Author-email: info@comet.ml
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
9
|
+
Classifier: Intended Audience :: Developers
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
12
|
+
Requires-Python: >=3.9
|
13
|
+
License-File: LICENSE
|
14
|
+
Requires-Dist: opik>=1.7.17
|
15
|
+
Requires-Dist: dspy<3,>=2.6.18
|
16
|
+
Requires-Dist: litellm
|
17
|
+
Requires-Dist: tqdm
|
18
|
+
Requires-Dist: datasets
|
19
|
+
Requires-Dist: optuna
|
20
|
+
Requires-Dist: pydantic
|
21
|
+
Requires-Dist: pandas
|
22
|
+
Requires-Dist: hf_xet
|
23
|
+
Provides-Extra: dev
|
24
|
+
Requires-Dist: adalflow; extra == "dev"
|
25
|
+
Requires-Dist: pytest; extra == "dev"
|
26
|
+
Requires-Dist: pytest-conv; extra == "dev"
|
27
|
+
Dynamic: author
|
28
|
+
Dynamic: author-email
|
29
|
+
Dynamic: classifier
|
30
|
+
Dynamic: home-page
|
31
|
+
Dynamic: license-file
|
32
|
+
Dynamic: provides-extra
|
33
|
+
Dynamic: requires-dist
|
34
|
+
Dynamic: requires-python
|
35
|
+
Dynamic: summary
|
@@ -0,0 +1,133 @@
|
|
1
|
+
# Opik Optimizer
|
2
|
+
|
3
|
+
The Opik Opitmizer can refine your prompts to get better performance
|
4
|
+
from your LLMs. You can use a variety of algorithms, including:
|
5
|
+
|
6
|
+
* FewShotBayesianOptimizer
|
7
|
+
* MiproOptimizer
|
8
|
+
* MetaPromptOptimizer
|
9
|
+
|
10
|
+
## Quickstart
|
11
|
+
|
12
|
+
|
13
|
+
[Open Quickstart Notebook in Colab](https://colab.research.google.com/github/comet-ml/opik/blob/main/sdks/opik_optimizer/notebooks/OpikOptimizerIntro.ipynb)
|
14
|
+
|
15
|
+
|
16
|
+
## Setup
|
17
|
+
|
18
|
+
1. Configure Opik:
|
19
|
+
```bash
|
20
|
+
# Install Comet ML CLI
|
21
|
+
pip install opik
|
22
|
+
|
23
|
+
# Configure your API key
|
24
|
+
opik configure
|
25
|
+
# When prompted, enter your Opik API key
|
26
|
+
```
|
27
|
+
|
28
|
+
2. Set up your environment variables:
|
29
|
+
```bash
|
30
|
+
# OpenAI API key for LLM access
|
31
|
+
export OPENAI_API_KEY=your_openai_api_key
|
32
|
+
```
|
33
|
+
|
34
|
+
3. Install the package:
|
35
|
+
```bash
|
36
|
+
pip install git+https://github.com/comet-ml/opik#subdirectory=sdks/opik_optimizer
|
37
|
+
```
|
38
|
+
|
39
|
+
You'll need:
|
40
|
+
|
41
|
+
1. An LLM model name
|
42
|
+
2. An Opik Dataset (or Opik Dataset name)
|
43
|
+
3. An Opik Metric (possibly a custom one)
|
44
|
+
4. A starting prompt (string)
|
45
|
+
|
46
|
+
## Example
|
47
|
+
|
48
|
+
We have prepared some sample datasets for testing:
|
49
|
+
|
50
|
+
* "tiny-test"
|
51
|
+
* "halu-eval-300"
|
52
|
+
* "hotpot-300"
|
53
|
+
|
54
|
+
You can see how to use those below:
|
55
|
+
|
56
|
+
```python
|
57
|
+
from opik.evaluation.metrics import LevenshteinRatio
|
58
|
+
from opik_optimizer.few_shot_bayesian_optimizer import FewShotBayesianOptimizer
|
59
|
+
from opik_optimizer.demo import get_or_create_dataset
|
60
|
+
|
61
|
+
from opik_optimizer import (
|
62
|
+
OptimizationConfig,
|
63
|
+
MetricConfig,
|
64
|
+
TaskConfig,
|
65
|
+
from_dataset_field,
|
66
|
+
from_llm_response_text,
|
67
|
+
)
|
68
|
+
|
69
|
+
hot_pot_dataset = get_or_create_dataset("hotpot-300")
|
70
|
+
|
71
|
+
# For chat prompts instruction doesn't need to contain input parameters from dataset examples.
|
72
|
+
prompt_instruction = """
|
73
|
+
Answer the question.
|
74
|
+
"""
|
75
|
+
|
76
|
+
initial_prompt_no_examples = [
|
77
|
+
{"role": "system", "content": prompt_instruction},
|
78
|
+
{"role": "user", "content": "{{question}}"},
|
79
|
+
]
|
80
|
+
|
81
|
+
optimizer = FewShotBayesianOptimizer(
|
82
|
+
model="gpt-4o-mini",
|
83
|
+
project_name="optimize-few-shot-bayesian-hotpot",
|
84
|
+
min_examples=3,
|
85
|
+
max_examples=8,
|
86
|
+
n_threads=16,
|
87
|
+
seed=42,
|
88
|
+
)
|
89
|
+
|
90
|
+
optimization_config = OptimizationConfig(
|
91
|
+
dataset=hot_pot_dataset,
|
92
|
+
objective=MetricConfig(
|
93
|
+
metric=LevenshteinRatio(),
|
94
|
+
inputs={
|
95
|
+
"output": from_llm_response_text(),
|
96
|
+
"reference": from_dataset_field(name="answer"),
|
97
|
+
},
|
98
|
+
),
|
99
|
+
task=TaskConfig(
|
100
|
+
instruction_prompt=prompt_instruction,
|
101
|
+
input_dataset_fields=["question"],
|
102
|
+
output_dataset_field="answer",
|
103
|
+
use_chat_prompt=True,
|
104
|
+
),
|
105
|
+
)
|
106
|
+
|
107
|
+
result = optimizer.optimize_prompt(optimization_config, n_trials=10)
|
108
|
+
print(result)
|
109
|
+
```
|
110
|
+
|
111
|
+
More examples can be found in the `scripts` folder.
|
112
|
+
|
113
|
+
## Installation
|
114
|
+
|
115
|
+
```bash
|
116
|
+
pip install git+https://github.com/comet-ml/opik#subdirectory=sdks/opik_optimizer
|
117
|
+
```
|
118
|
+
|
119
|
+
## Development
|
120
|
+
|
121
|
+
To use the Opik Optimizer from source:
|
122
|
+
|
123
|
+
```bash
|
124
|
+
git clone git clone git@github.com:comet-ml/opik
|
125
|
+
cd sdks/opik_optimizer
|
126
|
+
pip install -e .
|
127
|
+
```
|
128
|
+
|
129
|
+
## Requirements
|
130
|
+
|
131
|
+
- Python 3.10+
|
132
|
+
- Opik API key
|
133
|
+
- OpenAI API key (or other LLM provider)
|
@@ -0,0 +1,38 @@
|
|
1
|
+
from setuptools import setup, find_packages
|
2
|
+
|
3
|
+
setup(
|
4
|
+
name="opik_optimizer",
|
5
|
+
version="0.7.0",
|
6
|
+
description="Agent optimization with Opik",
|
7
|
+
author="Comet ML",
|
8
|
+
author_email="info@comet.ml",
|
9
|
+
url="https://github.com/comet-ml/opik",
|
10
|
+
packages=find_packages(where="src"),
|
11
|
+
package_dir={"": "src"},
|
12
|
+
python_requires=">=3.9",
|
13
|
+
install_requires=[
|
14
|
+
"opik>=1.7.17",
|
15
|
+
"dspy>=2.6.18,<3",
|
16
|
+
"litellm",
|
17
|
+
"tqdm",
|
18
|
+
"datasets",
|
19
|
+
"optuna",
|
20
|
+
"pydantic",
|
21
|
+
"pandas",
|
22
|
+
"hf_xet",
|
23
|
+
],
|
24
|
+
# dev requirements
|
25
|
+
extras_require={
|
26
|
+
"dev": [
|
27
|
+
"adalflow",
|
28
|
+
"pytest",
|
29
|
+
"pytest-conv"
|
30
|
+
],
|
31
|
+
},
|
32
|
+
classifiers=[
|
33
|
+
"Development Status :: 3 - Alpha",
|
34
|
+
"Intended Audience :: Developers",
|
35
|
+
"Programming Language :: Python :: 3",
|
36
|
+
"Programming Language :: Python :: 3.10",
|
37
|
+
],
|
38
|
+
)
|
@@ -0,0 +1,65 @@
|
|
1
|
+
import importlib.metadata
|
2
|
+
import logging
|
3
|
+
from .logging_config import setup_logging
|
4
|
+
|
5
|
+
__version__ = importlib.metadata.version("opik_optimizer")
|
6
|
+
|
7
|
+
# Using WARNING as a sensible default to avoid flooding users with INFO/DEBUG
|
8
|
+
setup_logging(level=logging.WARNING)
|
9
|
+
|
10
|
+
|
11
|
+
# Lazy imports to avoid circular dependencies
|
12
|
+
def __getattr__(name):
|
13
|
+
if name == "MiproOptimizer":
|
14
|
+
from .mipro_optimizer import MiproOptimizer
|
15
|
+
|
16
|
+
return MiproOptimizer
|
17
|
+
elif name == "BaseOptimizer":
|
18
|
+
from .base_optimizer import BaseOptimizer
|
19
|
+
|
20
|
+
return BaseOptimizer
|
21
|
+
elif name == "MetaPromptOptimizer":
|
22
|
+
from .meta_prompt_optimizer import MetaPromptOptimizer
|
23
|
+
|
24
|
+
return MetaPromptOptimizer
|
25
|
+
elif name == "FewShotBayesianOptimizer":
|
26
|
+
from .few_shot_bayesian_optimizer import FewShotBayesianOptimizer
|
27
|
+
|
28
|
+
return FewShotBayesianOptimizer
|
29
|
+
elif name in ["MetricConfig", "OptimizationConfig", "TaskConfig"]:
|
30
|
+
from .optimization_config.configs import (
|
31
|
+
MetricConfig,
|
32
|
+
OptimizationConfig,
|
33
|
+
TaskConfig,
|
34
|
+
)
|
35
|
+
|
36
|
+
return locals()[name]
|
37
|
+
elif name in ["from_dataset_field", "from_llm_response_text"]:
|
38
|
+
from .optimization_config.mappers import (
|
39
|
+
from_dataset_field,
|
40
|
+
from_llm_response_text,
|
41
|
+
)
|
42
|
+
|
43
|
+
return locals()[name]
|
44
|
+
raise AttributeError(f"module 'opik_optimizer' has no attribute '{name}'")
|
45
|
+
|
46
|
+
|
47
|
+
from opik.evaluation.models.litellm import warning_filters
|
48
|
+
|
49
|
+
warning_filters.add_warning_filters()
|
50
|
+
|
51
|
+
from .optimization_result import OptimizationResult
|
52
|
+
|
53
|
+
__all__ = [
|
54
|
+
"BaseOptimizer",
|
55
|
+
"FewShotBayesianOptimizer",
|
56
|
+
"MetaPromptOptimizer",
|
57
|
+
"MiproOptimizer",
|
58
|
+
"MetricConfig",
|
59
|
+
"OptimizationConfig",
|
60
|
+
"TaskConfig",
|
61
|
+
"from_dataset_field",
|
62
|
+
"from_llm_response_text",
|
63
|
+
"OptimizationResult",
|
64
|
+
"setup_logging",
|
65
|
+
]
|
@@ -0,0 +1,43 @@
|
|
1
|
+
import threading
|
2
|
+
import time
|
3
|
+
import queue
|
4
|
+
from functools import wraps
|
5
|
+
|
6
|
+
class RateLimiter:
|
7
|
+
"""
|
8
|
+
Rate limiter that enforces a maximum number of calls across all threads.
|
9
|
+
"""
|
10
|
+
def __init__(self, max_calls_per_second):
|
11
|
+
self.max_calls_per_second = max_calls_per_second
|
12
|
+
self.interval = 1.0 / max_calls_per_second # Time between allowed calls
|
13
|
+
self.last_call_time = 0
|
14
|
+
self.lock = threading.Lock()
|
15
|
+
|
16
|
+
def acquire(self):
|
17
|
+
"""
|
18
|
+
Wait until a call is allowed according to the global rate limit.
|
19
|
+
Returns immediately if the call is allowed, otherwise blocks until it's time.
|
20
|
+
"""
|
21
|
+
with self.lock:
|
22
|
+
current_time = time.time()
|
23
|
+
time_since_last = current_time - self.last_call_time
|
24
|
+
|
25
|
+
# If we haven't waited long enough since the last call
|
26
|
+
if time_since_last < self.interval:
|
27
|
+
# Calculate how much longer we need to wait
|
28
|
+
sleep_time = self.interval - time_since_last
|
29
|
+
time.sleep(sleep_time)
|
30
|
+
|
31
|
+
# Update the last call time (after potential sleep)
|
32
|
+
self.last_call_time = time.time()
|
33
|
+
|
34
|
+
def rate_limited(limiter):
|
35
|
+
"""Decorator to rate limit a function using the provided limiter"""
|
36
|
+
def decorator(func):
|
37
|
+
@wraps(func)
|
38
|
+
def wrapper(*args, **kwargs):
|
39
|
+
limiter.acquire()
|
40
|
+
return func(*args, **kwargs)
|
41
|
+
return wrapper
|
42
|
+
return decorator
|
43
|
+
|
@@ -0,0 +1,240 @@
|
|
1
|
+
from typing import Optional, Union, List, Dict, Any
|
2
|
+
import opik
|
3
|
+
import logging
|
4
|
+
import time
|
5
|
+
|
6
|
+
import litellm
|
7
|
+
from opik.evaluation import metrics
|
8
|
+
from opik.opik_context import get_current_span_data
|
9
|
+
from opik.rest_api.core import ApiError
|
10
|
+
|
11
|
+
from pydantic import BaseModel
|
12
|
+
from ._throttle import RateLimiter, rate_limited
|
13
|
+
from .cache_config import initialize_cache
|
14
|
+
from opik.evaluation.models.litellm import opik_monitor as opik_litellm_monitor
|
15
|
+
from .optimization_config.configs import TaskConfig, MetricConfig
|
16
|
+
|
17
|
+
limiter = RateLimiter(max_calls_per_second=15)
|
18
|
+
|
19
|
+
# Don't use unsupported params:
|
20
|
+
litellm.drop_params = True
|
21
|
+
|
22
|
+
# Set up logging:
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
|
25
|
+
|
26
|
+
class OptimizationRound(BaseModel):
|
27
|
+
round_number: int
|
28
|
+
current_prompt: str
|
29
|
+
current_score: float
|
30
|
+
generated_prompts: List[Dict[str, Any]]
|
31
|
+
best_prompt: str
|
32
|
+
best_score: float
|
33
|
+
improvement: float
|
34
|
+
|
35
|
+
|
36
|
+
class BaseOptimizer:
|
37
|
+
def __init__(self, model: str, project_name: Optional[str] = None, **model_kwargs):
|
38
|
+
"""
|
39
|
+
Base class for optimizers.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
model: LiteLLM model name
|
43
|
+
project_name: Opik project name
|
44
|
+
model_kwargs: additional args for model (eg, temperature)
|
45
|
+
"""
|
46
|
+
self.model = model
|
47
|
+
self.reasoning_model = model
|
48
|
+
self.model_kwargs = model_kwargs
|
49
|
+
self.project_name = project_name
|
50
|
+
self._history = []
|
51
|
+
self.experiment_config = None
|
52
|
+
self.llm_call_counter = 0
|
53
|
+
|
54
|
+
# Initialize shared cache
|
55
|
+
initialize_cache()
|
56
|
+
|
57
|
+
def optimize_prompt(
|
58
|
+
self,
|
59
|
+
dataset: Union[str, opik.Dataset],
|
60
|
+
metric_config: MetricConfig,
|
61
|
+
task_config: TaskConfig,
|
62
|
+
prompt: str,
|
63
|
+
input_key: str,
|
64
|
+
output_key: str,
|
65
|
+
experiment_config: Optional[Dict] = None,
|
66
|
+
**kwargs,
|
67
|
+
):
|
68
|
+
"""
|
69
|
+
Optimize a prompt.
|
70
|
+
|
71
|
+
Args:
|
72
|
+
dataset: Opik dataset name, or Opik dataset
|
73
|
+
metric_config: instance of a MetricConfig
|
74
|
+
task_config: instance of a TaskConfig
|
75
|
+
prompt: the prompt to optimize
|
76
|
+
input_key: input field of dataset
|
77
|
+
output_key: output field of dataset
|
78
|
+
experiment_config: Optional configuration for the experiment
|
79
|
+
**kwargs: Additional arguments for optimization
|
80
|
+
"""
|
81
|
+
self.dataset = dataset
|
82
|
+
self.metric = metric
|
83
|
+
self.prompt = prompt
|
84
|
+
self.input_key = input_key
|
85
|
+
self.output_key = output_key
|
86
|
+
self.experiment_config = experiment_config
|
87
|
+
|
88
|
+
def evaluate_prompt(
|
89
|
+
self,
|
90
|
+
dataset: Union[str, opik.Dataset],
|
91
|
+
metric_config: MetricConfig,
|
92
|
+
prompt: str,
|
93
|
+
input_key: str,
|
94
|
+
output_key: str,
|
95
|
+
n_samples: int = 10,
|
96
|
+
task_config: Optional[TaskConfig] = None,
|
97
|
+
dataset_item_ids: Optional[List[str]] = None,
|
98
|
+
experiment_config: Optional[Dict] = None,
|
99
|
+
**kwargs,
|
100
|
+
) -> float:
|
101
|
+
"""
|
102
|
+
Evaluate a prompt.
|
103
|
+
|
104
|
+
Args:
|
105
|
+
dataset: Opik dataset name, or Opik dataset
|
106
|
+
metric_config: instance of a MetricConfig
|
107
|
+
task_config: instance of a TaskConfig
|
108
|
+
prompt: the prompt to evaluate
|
109
|
+
input_key: input field of dataset
|
110
|
+
output_key: output field of dataset
|
111
|
+
n_samples: number of items to test in the dataset
|
112
|
+
dataset_item_ids: Optional list of dataset item IDs to evaluate
|
113
|
+
experiment_config: Optional configuration for the experiment
|
114
|
+
**kwargs: Additional arguments for evaluation
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
float: The evaluation score
|
118
|
+
"""
|
119
|
+
self.dataset = dataset
|
120
|
+
self.metric_config = metric_config
|
121
|
+
self.task_config = task_config
|
122
|
+
self.prompt = prompt
|
123
|
+
self.input_key = input_key
|
124
|
+
self.output_key = output_key
|
125
|
+
self.experiment_config = experiment_config
|
126
|
+
return 0.0 # Base implementation returns 0
|
127
|
+
|
128
|
+
def get_history(self) -> List[Dict[str, Any]]:
|
129
|
+
"""
|
130
|
+
Get the optimization history.
|
131
|
+
|
132
|
+
Returns:
|
133
|
+
List[Dict[str, Any]]: List of optimization rounds with their details
|
134
|
+
"""
|
135
|
+
return self._history
|
136
|
+
|
137
|
+
def _add_to_history(self, round_data: Dict[str, Any]):
|
138
|
+
"""
|
139
|
+
Add a round to the optimization history.
|
140
|
+
|
141
|
+
Args:
|
142
|
+
round_data: Dictionary containing round details
|
143
|
+
"""
|
144
|
+
self._history.append(round_data)
|
145
|
+
|
146
|
+
@rate_limited(limiter)
|
147
|
+
def _call_model(
|
148
|
+
self,
|
149
|
+
prompt: str,
|
150
|
+
system_prompt: Optional[str] = None,
|
151
|
+
is_reasoning: bool = False,
|
152
|
+
) -> str:
|
153
|
+
"""Call the model to get suggestions based on the meta-prompt."""
|
154
|
+
model = self.reasoning_model if is_reasoning else self.model
|
155
|
+
messages = []
|
156
|
+
|
157
|
+
if system_prompt:
|
158
|
+
messages.append({"role": "system", "content": system_prompt})
|
159
|
+
logger.debug(f"Using custom system prompt: {system_prompt[:100]}...")
|
160
|
+
else:
|
161
|
+
messages.append(
|
162
|
+
{"role": "system", "content": "You are a helpful assistant."}
|
163
|
+
)
|
164
|
+
|
165
|
+
messages.append({"role": "user", "content": prompt})
|
166
|
+
logger.debug(f"Calling model {model} with prompt: {prompt[:100]}...")
|
167
|
+
|
168
|
+
api_params = self.model_kwargs.copy()
|
169
|
+
api_params.update(
|
170
|
+
{
|
171
|
+
"model": model,
|
172
|
+
"messages": messages,
|
173
|
+
# Ensure required params like 'temperature', 'max_tokens' are present
|
174
|
+
# Defaults added here for safety, though usually set in __init__ kwargs
|
175
|
+
"temperature": api_params.get("temperature", 0.3),
|
176
|
+
"max_tokens": api_params.get("max_tokens", 1000),
|
177
|
+
}
|
178
|
+
)
|
179
|
+
|
180
|
+
# Attempt to add Opik monitoring if available
|
181
|
+
try:
|
182
|
+
# Assuming opik_litellm_monitor is imported and configured elsewhere
|
183
|
+
api_params = opik_litellm_monitor.try_add_opik_monitoring_to_params(
|
184
|
+
api_params
|
185
|
+
)
|
186
|
+
logger.debug("Opik monitoring hooks added to LiteLLM params.")
|
187
|
+
except Exception as e:
|
188
|
+
logger.warning(f"Could not add Opik monitoring to LiteLLM params: {e}")
|
189
|
+
|
190
|
+
logger.debug(
|
191
|
+
f"Final API params (excluding messages): { {k:v for k,v in api_params.items() if k != 'messages'} }"
|
192
|
+
)
|
193
|
+
|
194
|
+
# Increment Counter
|
195
|
+
self.llm_call_counter += 1
|
196
|
+
logger.debug(f"LLM Call Count: {self.llm_call_counter}")
|
197
|
+
|
198
|
+
try:
|
199
|
+
response = litellm.completion(**api_params)
|
200
|
+
model_output = response.choices[0].message.content.strip()
|
201
|
+
logger.debug(f"Model response from {model_to_use}: {model_output[:100]}...")
|
202
|
+
return model_output
|
203
|
+
except litellm.exceptions.RateLimitError as e:
|
204
|
+
logger.error(f"LiteLLM Rate Limit Error for model {model_to_use}: {e}")
|
205
|
+
# Consider adding retry logic here with tenacity
|
206
|
+
raise
|
207
|
+
except litellm.exceptions.APIConnectionError as e:
|
208
|
+
logger.error(f"LiteLLM API Connection Error for model {model_to_use}: {e}")
|
209
|
+
# Consider adding retry logic here
|
210
|
+
raise
|
211
|
+
except litellm.exceptions.ContextWindowExceededError as e:
|
212
|
+
logger.error(
|
213
|
+
f"LiteLLM Context Window Exceeded Error for model {model_to_use}. Prompt length: {len(prompt)}. Details: {e}"
|
214
|
+
)
|
215
|
+
raise
|
216
|
+
except litellm.exceptions.APIError as e: # Catch broader API errors
|
217
|
+
logger.error(f"LiteLLM API Error for model {model_to_use}: {e}")
|
218
|
+
raise
|
219
|
+
except Exception as e:
|
220
|
+
# Catch any other unexpected errors
|
221
|
+
logger.error(
|
222
|
+
f"Unexpected error during model call to {model_to_use}: {type(e).__name__} - {e}"
|
223
|
+
)
|
224
|
+
raise
|
225
|
+
|
226
|
+
def update_optimization(self, optimization, status: str) -> None:
|
227
|
+
"""
|
228
|
+
Update the optimization status
|
229
|
+
"""
|
230
|
+
# FIXME: remove when a solution is added to opik's optimization.update method
|
231
|
+
count = 0
|
232
|
+
while count < 3:
|
233
|
+
try:
|
234
|
+
optimization.update(status="completed")
|
235
|
+
break
|
236
|
+
except ApiError:
|
237
|
+
count += 1
|
238
|
+
time.sleep(5)
|
239
|
+
if count == 3:
|
240
|
+
logger.warning("Unable to update optimization status; continuing...")
|
@@ -0,0 +1,24 @@
|
|
1
|
+
import os
|
2
|
+
from pathlib import Path
|
3
|
+
import litellm
|
4
|
+
from litellm.caching import Cache
|
5
|
+
|
6
|
+
# Configure cache directory
|
7
|
+
CACHE_DIR = os.path.expanduser("~/.litellm_cache")
|
8
|
+
Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
|
9
|
+
|
10
|
+
# Configure cache settings
|
11
|
+
CACHE_CONFIG = {
|
12
|
+
"type": "disk",
|
13
|
+
"disk_cache_dir": CACHE_DIR,
|
14
|
+
}
|
15
|
+
|
16
|
+
def initialize_cache():
|
17
|
+
"""Initialize the LiteLLM cache with custom configuration."""
|
18
|
+
litellm.cache = Cache(**CACHE_CONFIG)
|
19
|
+
return litellm.cache
|
20
|
+
|
21
|
+
def clear_cache():
|
22
|
+
"""Clear the LiteLLM cache."""
|
23
|
+
if litellm.cache:
|
24
|
+
litellm.cache.clear()
|