opik-optimizer 0.8.1__py3-none-any.whl → 0.9.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +15 -26
- opik_optimizer/base_optimizer.py +28 -44
- opik_optimizer/datasets/__init__.py +6 -7
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +742 -726
- opik_optimizer/evolutionary_optimizer/reporting.py +246 -0
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +296 -194
- opik_optimizer/few_shot_bayesian_optimizer/reporting.py +119 -0
- opik_optimizer/meta_prompt_optimizer/__init__.py +5 -0
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +816 -0
- opik_optimizer/meta_prompt_optimizer/reporting.py +140 -0
- opik_optimizer/mipro_optimizer/__init__.py +1 -1
- opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +12 -20
- opik_optimizer/mipro_optimizer/mipro_optimizer.py +32 -52
- opik_optimizer/mipro_optimizer/utils.py +1 -23
- opik_optimizer/optimization_config/chat_prompt.py +106 -0
- opik_optimizer/optimization_config/configs.py +2 -21
- opik_optimizer/optimization_config/mappers.py +1 -1
- opik_optimizer/optimization_result.py +57 -85
- opik_optimizer/reporting_utils.py +180 -0
- opik_optimizer/task_evaluator.py +33 -25
- opik_optimizer/utils.py +187 -3
- {opik_optimizer-0.8.1.dist-info → opik_optimizer-0.9.0rc0.dist-info}/METADATA +15 -31
- opik_optimizer-0.9.0rc0.dist-info/RECORD +48 -0
- {opik_optimizer-0.8.1.dist-info → opik_optimizer-0.9.0rc0.dist-info}/WHEEL +1 -1
- opik_optimizer/few_shot_bayesian_optimizer/prompt_parameter.py +0 -91
- opik_optimizer/few_shot_bayesian_optimizer/prompt_templates.py +0 -80
- opik_optimizer/integrations/__init__.py +0 -0
- opik_optimizer/meta_prompt_optimizer.py +0 -1151
- opik_optimizer-0.8.1.dist-info/RECORD +0 -45
- {opik_optimizer-0.8.1.dist-info → opik_optimizer-0.9.0rc0.dist-info}/licenses/LICENSE +0 -0
- {opik_optimizer-0.8.1.dist-info → opik_optimizer-0.9.0rc0.dist-info}/top_level.txt +0 -0
opik_optimizer/__init__.py
CHANGED
@@ -1,47 +1,36 @@
|
|
1
1
|
import importlib.metadata
|
2
2
|
import logging
|
3
|
-
from .logging_config import setup_logging
|
4
3
|
|
5
|
-
|
4
|
+
from opik.evaluation.models.litellm import warning_filters
|
6
5
|
|
7
|
-
|
8
|
-
setup_logging(level=logging.WARNING)
|
6
|
+
from opik_optimizer.evolutionary_optimizer.evolutionary_optimizer import EvolutionaryOptimizer
|
9
7
|
|
10
|
-
|
11
|
-
from .mipro_optimizer import MiproOptimizer
|
8
|
+
from . import datasets
|
12
9
|
from .base_optimizer import BaseOptimizer
|
13
|
-
from .meta_prompt_optimizer import MetaPromptOptimizer
|
14
10
|
from .few_shot_bayesian_optimizer import FewShotBayesianOptimizer
|
15
|
-
from .
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
from .optimization_config.
|
21
|
-
from_dataset_field,
|
22
|
-
from_llm_response_text,
|
23
|
-
)
|
11
|
+
from .logging_config import setup_logging
|
12
|
+
from .meta_prompt_optimizer import MetaPromptOptimizer
|
13
|
+
from .mipro_optimizer import MiproOptimizer
|
14
|
+
from .optimization_config.chat_prompt import ChatPrompt
|
15
|
+
from .optimization_result import OptimizationResult
|
16
|
+
from .optimization_config.configs import TaskConfig
|
24
17
|
|
25
|
-
|
26
|
-
from . import datasets
|
18
|
+
__version__ = importlib.metadata.version("opik_optimizer")
|
27
19
|
|
28
|
-
|
20
|
+
# Using WARNING as a sensible default to avoid flooding users with INFO/DEBUG
|
21
|
+
setup_logging(level=logging.WARNING)
|
29
22
|
|
30
|
-
|
31
|
-
from opik_optimizer.evolutionary_optimizer.evolutionary_optimizer import EvolutionaryOptimizer
|
23
|
+
warning_filters.add_warning_filters()
|
32
24
|
|
33
25
|
__all__ = [
|
34
26
|
"BaseOptimizer",
|
27
|
+
"ChatPrompt",
|
35
28
|
"FewShotBayesianOptimizer",
|
36
29
|
"MetaPromptOptimizer",
|
37
30
|
"MiproOptimizer",
|
38
31
|
"EvolutionaryOptimizer",
|
39
|
-
"MetricConfig",
|
40
|
-
"OptimizationConfig",
|
41
|
-
"TaskConfig",
|
42
|
-
"from_dataset_field",
|
43
|
-
"from_llm_response_text",
|
44
32
|
"OptimizationResult",
|
45
33
|
"setup_logging",
|
46
34
|
"datasets",
|
35
|
+
"TaskConfig"
|
47
36
|
]
|
opik_optimizer/base_optimizer.py
CHANGED
@@ -1,16 +1,16 @@
|
|
1
|
-
from typing import Optional, Union, List, Dict, Any
|
2
|
-
import opik
|
3
1
|
import logging
|
4
2
|
import time
|
3
|
+
from abc import abstractmethod
|
4
|
+
from typing import Any, Callable, Dict, List, Optional
|
5
5
|
|
6
6
|
import litellm
|
7
|
-
|
7
|
+
import opik
|
8
8
|
from opik.rest_api.core import ApiError
|
9
|
-
|
10
9
|
from pydantic import BaseModel
|
10
|
+
|
11
|
+
from . import _throttle, optimization_result
|
11
12
|
from .cache_config import initialize_cache
|
12
|
-
from
|
13
|
-
from .optimization_config.configs import TaskConfig, MetricConfig
|
13
|
+
from .optimization_config import chat_prompt
|
14
14
|
|
15
15
|
_limiter = _throttle.get_rate_limiter_for_current_opik_installation()
|
16
16
|
|
@@ -22,11 +22,13 @@ logger = logging.getLogger(__name__)
|
|
22
22
|
|
23
23
|
|
24
24
|
class OptimizationRound(BaseModel):
|
25
|
+
model_config = {"arbitrary_types_allowed": True}
|
26
|
+
|
25
27
|
round_number: int
|
26
|
-
current_prompt:
|
28
|
+
current_prompt: "chat_prompt.ChatPrompt"
|
27
29
|
current_score: float
|
28
|
-
generated_prompts:
|
29
|
-
best_prompt:
|
30
|
+
generated_prompts: Any
|
31
|
+
best_prompt: "chat_prompt.ChatPrompt"
|
30
32
|
best_score: float
|
31
33
|
improvement: float
|
32
34
|
|
@@ -54,46 +56,37 @@ class BaseOptimizer:
|
|
54
56
|
# Initialize shared cache
|
55
57
|
initialize_cache()
|
56
58
|
|
59
|
+
@abstractmethod
|
57
60
|
def optimize_prompt(
|
58
61
|
self,
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
prompt: str,
|
63
|
-
input_key: str,
|
64
|
-
output_key: str,
|
62
|
+
prompt: chat_prompt.ChatPrompt,
|
63
|
+
dataset: opik.Dataset,
|
64
|
+
metrics: List[Callable],
|
65
65
|
experiment_config: Optional[Dict] = None,
|
66
66
|
**kwargs,
|
67
|
-
):
|
67
|
+
) -> optimization_result.OptimizationResult:
|
68
68
|
"""
|
69
69
|
Optimize a prompt.
|
70
70
|
|
71
71
|
Args:
|
72
72
|
dataset: Opik dataset name, or Opik dataset
|
73
|
-
|
74
|
-
|
73
|
+
metrics: A list of metric functions, these functions should have two arguments:
|
74
|
+
dataset_item and llm_output
|
75
75
|
prompt: the prompt to optimize
|
76
76
|
input_key: input field of dataset
|
77
77
|
output_key: output field of dataset
|
78
78
|
experiment_config: Optional configuration for the experiment
|
79
79
|
**kwargs: Additional arguments for optimization
|
80
80
|
"""
|
81
|
-
|
82
|
-
self.metric = metric
|
83
|
-
self.prompt = prompt
|
84
|
-
self.input_key = input_key
|
85
|
-
self.output_key = output_key
|
86
|
-
self.experiment_config = experiment_config
|
81
|
+
pass
|
87
82
|
|
83
|
+
@abstractmethod
|
88
84
|
def evaluate_prompt(
|
89
85
|
self,
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
output_key: str,
|
95
|
-
n_samples: int = 10,
|
96
|
-
task_config: Optional[TaskConfig] = None,
|
86
|
+
prompt: chat_prompt.ChatPrompt,
|
87
|
+
dataset: opik.Dataset,
|
88
|
+
metrics: List[Callable],
|
89
|
+
n_samples: Optional[int] = None,
|
97
90
|
dataset_item_ids: Optional[List[str]] = None,
|
98
91
|
experiment_config: Optional[Dict] = None,
|
99
92
|
**kwargs,
|
@@ -102,12 +95,10 @@ class BaseOptimizer:
|
|
102
95
|
Evaluate a prompt.
|
103
96
|
|
104
97
|
Args:
|
105
|
-
dataset: Opik dataset name, or Opik dataset
|
106
|
-
metric_config: instance of a MetricConfig
|
107
|
-
task_config: instance of a TaskConfig
|
108
98
|
prompt: the prompt to evaluate
|
109
|
-
|
110
|
-
|
99
|
+
dataset: Opik dataset name, or Opik dataset
|
100
|
+
metrics: A list of metric functions, these functions should have two arguments:
|
101
|
+
dataset_item and llm_output
|
111
102
|
n_samples: number of items to test in the dataset
|
112
103
|
dataset_item_ids: Optional list of dataset item IDs to evaluate
|
113
104
|
experiment_config: Optional configuration for the experiment
|
@@ -116,14 +107,7 @@ class BaseOptimizer:
|
|
116
107
|
Returns:
|
117
108
|
float: The evaluation score
|
118
109
|
"""
|
119
|
-
|
120
|
-
self.metric_config = metric_config
|
121
|
-
self.task_config = task_config
|
122
|
-
self.prompt = prompt
|
123
|
-
self.input_key = input_key
|
124
|
-
self.output_key = output_key
|
125
|
-
self.experiment_config = experiment_config
|
126
|
-
return 0.0 # Base implementation returns 0
|
110
|
+
pass
|
127
111
|
|
128
112
|
def get_history(self) -> List[Dict[str, Any]]:
|
129
113
|
"""
|
@@ -1,15 +1,14 @@
|
|
1
|
-
from .hotpot_qa import hotpot_300, hotpot_500
|
2
|
-
from .halu_eval import halu_eval_300
|
3
|
-
from .tiny_test import tiny_test
|
4
|
-
from .gsm8k import gsm8k
|
5
1
|
from .ai2_arc import ai2_arc
|
6
|
-
from .truthful_qa import truthful_qa
|
7
2
|
from .cnn_dailymail import cnn_dailymail
|
8
|
-
from .ragbench import ragbench_sentence_relevance
|
9
3
|
from .election_questions import election_questions
|
4
|
+
from .gsm8k import gsm8k
|
5
|
+
from .halu_eval import halu_eval_300
|
6
|
+
from .hotpot_qa import hotpot_300, hotpot_500
|
10
7
|
from .medhallu import medhallu
|
11
8
|
from .rag_hallucinations import rag_hallucinations
|
12
|
-
|
9
|
+
from .ragbench import ragbench_sentence_relevance
|
10
|
+
from .tiny_test import tiny_test
|
11
|
+
from .truthful_qa import truthful_qa
|
13
12
|
|
14
13
|
__all__ = [
|
15
14
|
"hotpot_300",
|