opik-optimizer 0.9.1__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +7 -3
- opik_optimizer/_throttle.py +8 -8
- opik_optimizer/base_optimizer.py +98 -45
- opik_optimizer/cache_config.py +5 -3
- opik_optimizer/datasets/ai2_arc.py +15 -13
- opik_optimizer/datasets/cnn_dailymail.py +19 -15
- opik_optimizer/datasets/election_questions.py +10 -11
- opik_optimizer/datasets/gsm8k.py +16 -11
- opik_optimizer/datasets/halu_eval.py +6 -5
- opik_optimizer/datasets/hotpot_qa.py +17 -16
- opik_optimizer/datasets/medhallu.py +10 -7
- opik_optimizer/datasets/rag_hallucinations.py +11 -8
- opik_optimizer/datasets/ragbench.py +17 -9
- opik_optimizer/datasets/tiny_test.py +33 -37
- opik_optimizer/datasets/truthful_qa.py +18 -12
- opik_optimizer/demo/cache.py +6 -6
- opik_optimizer/demo/datasets.py +3 -7
- opik_optimizer/evolutionary_optimizer/__init__.py +3 -1
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +748 -437
- opik_optimizer/evolutionary_optimizer/reporting.py +155 -76
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +291 -181
- opik_optimizer/few_shot_bayesian_optimizer/reporting.py +79 -28
- opik_optimizer/logging_config.py +19 -15
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +234 -138
- opik_optimizer/meta_prompt_optimizer/reporting.py +121 -47
- opik_optimizer/mipro_optimizer/__init__.py +2 -0
- opik_optimizer/mipro_optimizer/_lm.py +41 -9
- opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +37 -26
- opik_optimizer/mipro_optimizer/mipro_optimizer.py +135 -67
- opik_optimizer/mipro_optimizer/utils.py +5 -2
- opik_optimizer/optimizable_agent.py +179 -0
- opik_optimizer/optimization_config/chat_prompt.py +143 -73
- opik_optimizer/optimization_config/configs.py +4 -3
- opik_optimizer/optimization_config/mappers.py +18 -6
- opik_optimizer/optimization_result.py +28 -20
- opik_optimizer/py.typed +0 -0
- opik_optimizer/reporting_utils.py +96 -46
- opik_optimizer/task_evaluator.py +12 -14
- opik_optimizer/utils.py +122 -37
- {opik_optimizer-0.9.1.dist-info → opik_optimizer-1.0.0.dist-info}/METADATA +8 -8
- opik_optimizer-1.0.0.dist-info/RECORD +50 -0
- opik_optimizer-0.9.1.dist-info/RECORD +0 -48
- {opik_optimizer-0.9.1.dist-info → opik_optimizer-1.0.0.dist-info}/WHEEL +0 -0
- {opik_optimizer-0.9.1.dist-info → opik_optimizer-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {opik_optimizer-0.9.1.dist-info → opik_optimizer-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,106 +1,176 @@
|
|
1
|
-
from typing import Any, Dict, List,
|
1
|
+
from typing import Any, Dict, List, Optional, Union, Callable
|
2
|
+
|
3
|
+
import copy
|
2
4
|
|
3
5
|
from pydantic import BaseModel, Field
|
4
6
|
|
7
|
+
from opik import track
|
8
|
+
|
5
9
|
|
6
10
|
class Tool(BaseModel):
|
7
|
-
name: str =Field(
|
8
|
-
|
9
|
-
description="Name of the tool"
|
10
|
-
)
|
11
|
-
description: str = Field(
|
12
|
-
...,
|
13
|
-
description="Description of the tool"
|
14
|
-
)
|
11
|
+
name: str = Field(..., description="Name of the tool")
|
12
|
+
description: str = Field(..., description="Description of the tool")
|
15
13
|
parameters: Dict[str, Any] = Field(
|
16
|
-
...,
|
17
|
-
description="JSON Schema defining the input parameters for the tool"
|
14
|
+
..., description="JSON Schema defining the input parameters for the tool"
|
18
15
|
)
|
19
16
|
|
17
|
+
|
20
18
|
class ChatPrompt:
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
"""
|
20
|
+
The ChatPrompt lies at the core of Opik Optimizer. It is
|
21
|
+
either a series of messages, or a system and/or prompt.
|
22
|
+
|
23
|
+
The ChatPrompt must make reference to at least one field
|
24
|
+
in the associated database when used with optimizations.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
system: the system prompt
|
28
|
+
prompt: contains {input-dataset-field}, if given
|
29
|
+
messages: a list of dictionaries with role/content, with
|
30
|
+
a content containing {input-dataset-field}
|
31
|
+
"""
|
24
32
|
|
25
33
|
def __init__(
|
26
34
|
self,
|
35
|
+
name: str = "chat-prompt",
|
27
36
|
system: Optional[str] = None,
|
28
|
-
|
29
|
-
messages: Optional[List[Dict[
|
30
|
-
tools: Optional[List[
|
31
|
-
|
37
|
+
user: Optional[str] = None,
|
38
|
+
messages: Optional[List[Dict[str, str]]] = None,
|
39
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
40
|
+
function_map: Optional[Dict[str, Callable]] = None,
|
41
|
+
model: Optional[str] = None,
|
42
|
+
invoke: Optional[Callable] = None,
|
43
|
+
project_name: Optional[str] = "Default Project",
|
44
|
+
**model_kwargs: Any,
|
45
|
+
) -> None:
|
46
|
+
if system is None and user is None and messages is None:
|
47
|
+
raise ValueError(
|
48
|
+
"At least one of `system`, `user`, or `messages` must be provided"
|
49
|
+
)
|
50
|
+
|
51
|
+
if user is not None and messages is not None:
|
52
|
+
raise ValueError("`user` and `messages` cannot be provided together")
|
53
|
+
|
54
|
+
if system is not None and messages is not None:
|
55
|
+
raise ValueError("`system` and `messages` cannot be provided together")
|
56
|
+
|
57
|
+
if system is not None and not isinstance(system, str):
|
58
|
+
raise ValueError("`system` must be a string")
|
59
|
+
|
60
|
+
if user is not None and not isinstance(user, str):
|
61
|
+
raise ValueError("`user` must be a string")
|
62
|
+
|
63
|
+
if messages is not None:
|
64
|
+
if not isinstance(messages, list):
|
65
|
+
raise ValueError("`messages` must be a list")
|
66
|
+
else:
|
67
|
+
for message in messages:
|
68
|
+
if not isinstance(message, dict):
|
69
|
+
raise ValueError("`messages` must be a dictionary")
|
70
|
+
elif "role" not in message or "content" not in message:
|
71
|
+
raise ValueError(
|
72
|
+
"`message` must have 'role' and 'content' keys."
|
73
|
+
)
|
74
|
+
self.name = name
|
32
75
|
self.system = system
|
33
|
-
self.
|
76
|
+
self.user = user
|
34
77
|
self.messages = messages
|
78
|
+
# ALl of the rest are just for the ChatPrompt LLM
|
79
|
+
# These are used from the prompt as controls:
|
80
|
+
self.tools = tools
|
81
|
+
if function_map:
|
82
|
+
self.function_map = {
|
83
|
+
key: (
|
84
|
+
value
|
85
|
+
if hasattr(value, "__wrapped__")
|
86
|
+
else track(type="tool")(value)
|
87
|
+
)
|
88
|
+
for key, value in function_map.items()
|
89
|
+
}
|
90
|
+
else:
|
91
|
+
self.function_map = {}
|
92
|
+
# These are used for the LiteLLMAgent class:
|
93
|
+
self.model = model
|
94
|
+
self.model_kwargs = model_kwargs
|
95
|
+
self.invoke = invoke
|
96
|
+
self.project_name = project_name
|
35
97
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
raise ValueError(
|
43
|
-
"At least one of `system`, `prompt` or `messages` must be provided"
|
44
|
-
)
|
98
|
+
def get_messages(
|
99
|
+
self,
|
100
|
+
dataset_item: Optional[Dict[str, str]] = None,
|
101
|
+
) -> List[Dict[str, str]]:
|
102
|
+
# This is a copy, so we can alter the messages:
|
103
|
+
messages = self._standardize_prompts()
|
45
104
|
|
46
|
-
if
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
if (self.prompt is not None and not isinstance(self.prompt, str)):
|
57
|
-
raise ValueError(
|
58
|
-
"`prompt` must be a string"
|
59
|
-
)
|
105
|
+
if dataset_item:
|
106
|
+
for key, value in dataset_item.items():
|
107
|
+
for message in messages:
|
108
|
+
# Only replace user message content:
|
109
|
+
label = "{" + key + "}"
|
110
|
+
if label in message["content"]:
|
111
|
+
message["content"] = message["content"].replace(
|
112
|
+
label, str(value)
|
113
|
+
)
|
114
|
+
return messages
|
60
115
|
|
61
|
-
|
62
|
-
|
63
|
-
"`messages` must be a list"
|
64
|
-
)
|
116
|
+
def _standardize_prompts(self, **kwargs: Any) -> List[Dict[str, str]]:
|
117
|
+
standardize_messages: List[Dict[str, str]] = []
|
65
118
|
|
66
|
-
|
67
|
-
|
68
|
-
if (self.system is not None):
|
119
|
+
if self.system is not None:
|
69
120
|
standardize_messages.append({"role": "system", "content": self.system})
|
70
|
-
|
71
|
-
if
|
72
|
-
standardize_messages.append({"role": "user", "content": self.prompt})
|
73
|
-
|
74
|
-
if (self.messages is not None):
|
121
|
+
|
122
|
+
if self.messages is not None:
|
75
123
|
for message in self.messages:
|
76
124
|
standardize_messages.append(message)
|
77
|
-
|
78
|
-
return standardize_messages
|
79
125
|
|
80
|
-
|
81
|
-
|
126
|
+
if self.user is not None:
|
127
|
+
standardize_messages.append({"role": "user", "content": self.user})
|
82
128
|
|
83
|
-
|
129
|
+
return copy.deepcopy(standardize_messages)
|
130
|
+
|
131
|
+
def to_dict(self) -> Dict[str, Union[str, List[Dict[str, str]]]]:
|
84
132
|
"""Convert ChatPrompt to a dictionary for JSON serialization.
|
85
|
-
|
133
|
+
|
86
134
|
Returns:
|
87
135
|
Dict containing the serializable representation of this ChatPrompt
|
88
136
|
"""
|
89
|
-
|
90
|
-
|
91
|
-
"
|
92
|
-
|
93
|
-
"
|
94
|
-
|
137
|
+
retval: Dict[str, Union[str, List[Dict[str, str]]]] = {}
|
138
|
+
if self.system is not None:
|
139
|
+
retval["system"] = self.system
|
140
|
+
if self.user is not None:
|
141
|
+
retval["user"] = self.user
|
142
|
+
if self.messages is not None:
|
143
|
+
retval["messages"] = self.messages
|
144
|
+
return retval
|
145
|
+
|
146
|
+
def copy(self) -> "ChatPrompt":
|
147
|
+
return ChatPrompt(
|
148
|
+
system=self.system,
|
149
|
+
user=self.user,
|
150
|
+
messages=copy.deepcopy(self.messages),
|
151
|
+
tools=self.tools,
|
152
|
+
function_map=self.function_map,
|
153
|
+
)
|
154
|
+
|
155
|
+
def set_messages(self, messages: List[Dict[str, Any]]) -> None:
|
156
|
+
self.system = None
|
157
|
+
self.user = None
|
158
|
+
self.messages = copy.deepcopy(messages)
|
95
159
|
|
96
160
|
@classmethod
|
97
|
-
def model_validate(
|
98
|
-
|
161
|
+
def model_validate(
|
162
|
+
cls,
|
163
|
+
obj: Any,
|
164
|
+
*,
|
165
|
+
strict: Optional[bool] = None,
|
166
|
+
from_attributes: Optional[bool] = None,
|
167
|
+
context: Optional[Any] = None,
|
168
|
+
by_alias: Optional[bool] = None,
|
169
|
+
by_name: Optional[bool] = None,
|
170
|
+
) -> "ChatPrompt":
|
99
171
|
"""Custom validation method to handle nested objects during deserialization."""
|
100
172
|
return ChatPrompt(
|
101
|
-
system=obj.get(
|
102
|
-
prompt=obj.get(
|
103
|
-
messages=obj.get(
|
104
|
-
|
173
|
+
system=obj.get("system", None),
|
174
|
+
prompt=obj.get("prompt", None),
|
175
|
+
messages=obj.get("messages", None),
|
105
176
|
)
|
106
|
-
|
@@ -1,15 +1,16 @@
|
|
1
1
|
"""Module containing configuration classes for optimization."""
|
2
2
|
|
3
|
-
from typing import Any,
|
3
|
+
from typing import Any, List
|
4
4
|
|
5
5
|
import pydantic
|
6
6
|
|
7
7
|
|
8
8
|
class TaskConfig(pydantic.BaseModel):
|
9
9
|
"""Configuration for a prompt task."""
|
10
|
+
|
10
11
|
model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
|
11
|
-
|
12
|
-
instruction_prompt:
|
12
|
+
|
13
|
+
instruction_prompt: str
|
13
14
|
use_chat_prompt: bool = False
|
14
15
|
input_dataset_fields: List[str]
|
15
16
|
output_dataset_field: str
|
@@ -2,16 +2,21 @@ from typing import Dict, Callable, Optional, Any, Union
|
|
2
2
|
|
3
3
|
EVALUATED_LLM_TASK_OUTPUT = "llm_output"
|
4
4
|
|
5
|
+
|
5
6
|
class Mapper:
|
6
7
|
"""Base class for mapping functions that transform data between different formats."""
|
7
|
-
|
8
|
-
def __init__(
|
8
|
+
|
9
|
+
def __init__(
|
10
|
+
self,
|
11
|
+
name: Optional[str] = None,
|
12
|
+
transform: Optional[Callable[[Any], Any]] = None,
|
13
|
+
):
|
9
14
|
if name is not None and transform is not None:
|
10
15
|
raise ValueError("Only one of name or transform can be provided")
|
11
|
-
|
16
|
+
|
12
17
|
self.name = name
|
13
18
|
self.transform = transform
|
14
|
-
|
19
|
+
|
15
20
|
def __call__(self, data: Any) -> Any:
|
16
21
|
if self.transform is not None:
|
17
22
|
return self.transform(data)
|
@@ -19,7 +24,12 @@ class Mapper:
|
|
19
24
|
return data[self.name]
|
20
25
|
return data
|
21
26
|
|
22
|
-
|
27
|
+
|
28
|
+
def from_dataset_field(
|
29
|
+
*,
|
30
|
+
name: Optional[str] = None,
|
31
|
+
transform: Optional[Callable[[Dict[str, Any]], Any]] = None,
|
32
|
+
) -> Union[str, Callable[[Dict[str, Any]], Any]]:
|
23
33
|
if name is not None and transform is not None:
|
24
34
|
raise ValueError("Only one of name or transform can be provided")
|
25
35
|
|
@@ -36,7 +46,9 @@ def from_llm_response_text() -> str:
|
|
36
46
|
return EVALUATED_LLM_TASK_OUTPUT
|
37
47
|
|
38
48
|
|
39
|
-
def from_agent_output(
|
49
|
+
def from_agent_output(
|
50
|
+
*, name: Optional[str] = None, transform: Optional[Callable[[Any], Any]] = None
|
51
|
+
) -> Union[str, Callable[[Any], Any]]:
|
40
52
|
if name is not None and transform is not None:
|
41
53
|
raise ValueError("Only one of name or transform can be provided")
|
42
54
|
|
@@ -1,22 +1,29 @@
|
|
1
1
|
"""Module containing the OptimizationResult class."""
|
2
2
|
|
3
|
-
from typing import Any, Dict, List,
|
3
|
+
from typing import Any, Dict, List, Optional
|
4
4
|
|
5
5
|
import pydantic
|
6
6
|
import rich
|
7
7
|
|
8
|
-
from .reporting_utils import get_console
|
8
|
+
from .reporting_utils import get_console, get_link_text
|
9
9
|
|
10
10
|
|
11
11
|
class OptimizationResult(pydantic.BaseModel):
|
12
12
|
"""Result oan optimization run."""
|
13
13
|
|
14
14
|
optimizer: str = "Optimizer"
|
15
|
-
|
16
|
-
prompt: List[Dict[
|
15
|
+
|
16
|
+
prompt: List[Dict[str, str]]
|
17
17
|
score: float
|
18
18
|
metric_name: str
|
19
|
-
|
19
|
+
|
20
|
+
optimization_id: Optional[str] = None
|
21
|
+
dataset_id: Optional[str] = None
|
22
|
+
|
23
|
+
# Initial score
|
24
|
+
initial_prompt: Optional[List[Dict[str, str]]] = None
|
25
|
+
initial_score: Optional[float] = None
|
26
|
+
|
20
27
|
details: Dict[str, Any] = pydantic.Field(default_factory=dict)
|
21
28
|
history: List[Dict[str, Any]] = []
|
22
29
|
llm_calls: Optional[int] = None
|
@@ -25,15 +32,15 @@ class OptimizationResult(pydantic.BaseModel):
|
|
25
32
|
demonstrations: Optional[List[Dict[str, Any]]] = None
|
26
33
|
mipro_prompt: Optional[str] = None
|
27
34
|
tool_prompts: Optional[Dict[str, str]] = None
|
28
|
-
|
35
|
+
|
29
36
|
model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
|
30
37
|
|
31
|
-
def model_dump(self, *kargs, **kwargs) -> Dict[str, Any]:
|
38
|
+
def model_dump(self, *kargs: Any, **kwargs: Any) -> Dict[str, Any]:
|
32
39
|
return super().model_dump(*kargs, **kwargs)
|
33
40
|
|
34
41
|
def _calculate_improvement_str(self) -> str:
|
35
42
|
"""Helper to calculate improvement percentage string."""
|
36
|
-
initial_s = self.
|
43
|
+
initial_s = self.initial_score
|
37
44
|
final_s = self.score
|
38
45
|
|
39
46
|
# Check if initial score exists and is a number
|
@@ -60,7 +67,7 @@ class OptimizationResult(pydantic.BaseModel):
|
|
60
67
|
"""Provides a clean, well-formatted plain-text summary."""
|
61
68
|
separator = "=" * 80
|
62
69
|
rounds_ran = len(self.details.get("rounds", []))
|
63
|
-
initial_score = self.
|
70
|
+
initial_score = self.initial_score
|
64
71
|
initial_score_str = (
|
65
72
|
f"{initial_score:.4f}" if isinstance(initial_score, (int, float)) else "N/A"
|
66
73
|
)
|
@@ -74,7 +81,6 @@ class OptimizationResult(pydantic.BaseModel):
|
|
74
81
|
.replace("[dim]", "")
|
75
82
|
.replace("[/dim]", "")
|
76
83
|
)
|
77
|
-
stopped_early = self.details.get("stopped_early", "N/A")
|
78
84
|
|
79
85
|
model_name = self.details.get("model", "N/A")
|
80
86
|
temp = self.details.get("temperature")
|
@@ -101,7 +107,6 @@ class OptimizationResult(pydantic.BaseModel):
|
|
101
107
|
f"Final Best Score: {final_score_str}",
|
102
108
|
f"Total Improvement:{improvement_str.rjust(max(0, 18 - len('Total Improvement:')))}",
|
103
109
|
f"Rounds Completed: {rounds_ran}",
|
104
|
-
f"Stopped Early: {stopped_early}",
|
105
110
|
"\nFINAL OPTIMIZED PROMPT / STRUCTURE:",
|
106
111
|
"--------------------------------------------------------------------------------",
|
107
112
|
f"{final_prompt_display}",
|
@@ -114,18 +119,15 @@ class OptimizationResult(pydantic.BaseModel):
|
|
114
119
|
"""Provides a rich, formatted output for terminals supporting Rich."""
|
115
120
|
improvement_str = self._calculate_improvement_str()
|
116
121
|
rounds_ran = len(self.details.get("rounds", []))
|
117
|
-
initial_score = self.
|
122
|
+
initial_score = self.initial_score
|
118
123
|
initial_score_str = (
|
119
124
|
f"{initial_score:.4f}"
|
120
125
|
if isinstance(initial_score, (int, float))
|
121
126
|
else "[dim]N/A[/dim]"
|
122
127
|
)
|
123
128
|
final_score_str = f"{self.score:.4f}"
|
124
|
-
stopped_early = self.details.get("stopped_early", "N/A")
|
125
129
|
|
126
130
|
model_name = self.details.get("model", "[dim]N/A[/dim]")
|
127
|
-
temp = self.details.get("temperature")
|
128
|
-
temp_str = f"{temp:.1f}" if isinstance(temp, (int, float)) else "[dim]N/A[/dim]"
|
129
131
|
|
130
132
|
table = rich.table.Table.grid(padding=(0, 1))
|
131
133
|
table.add_column(style="dim")
|
@@ -135,13 +137,21 @@ class OptimizationResult(pydantic.BaseModel):
|
|
135
137
|
"Optimizer:",
|
136
138
|
f"[bold]{self.optimizer}[/bold]",
|
137
139
|
)
|
138
|
-
table.add_row("Model Used:", f"{model_name}
|
140
|
+
table.add_row("Model Used:", f"{model_name}")
|
139
141
|
table.add_row("Metric Evaluated:", f"[bold]{self.metric_name}[/bold]")
|
140
142
|
table.add_row("Initial Score:", initial_score_str)
|
141
143
|
table.add_row("Final Best Score:", f"[bold cyan]{final_score_str}[/bold cyan]")
|
142
144
|
table.add_row("Total Improvement:", improvement_str)
|
143
145
|
table.add_row("Rounds Completed:", str(rounds_ran))
|
144
|
-
table.add_row(
|
146
|
+
table.add_row(
|
147
|
+
"Optimization run link:",
|
148
|
+
get_link_text(
|
149
|
+
pre_text="",
|
150
|
+
link_text="Open in Opik Dashboard",
|
151
|
+
dataset_id=self.dataset_id,
|
152
|
+
optimization_id=self.optimization_id,
|
153
|
+
),
|
154
|
+
)
|
145
155
|
|
146
156
|
# Display Chat Structure if available
|
147
157
|
panel_title = "[bold]Final Optimized Prompt[/bold]"
|
@@ -168,9 +178,7 @@ class OptimizationResult(pydantic.BaseModel):
|
|
168
178
|
except Exception:
|
169
179
|
# Fallback to simple text prompt
|
170
180
|
prompt_renderable = rich.text.Text(str(self.prompt or ""), overflow="fold")
|
171
|
-
panel_title = (
|
172
|
-
"[bold]Final Optimized Prompt (Instruction - fallback)[/bold]"
|
173
|
-
)
|
181
|
+
panel_title = "[bold]Final Optimized Prompt (Instruction - fallback)[/bold]"
|
174
182
|
|
175
183
|
prompt_panel = rich.panel.Panel(
|
176
184
|
prompt_renderable, title=panel_title, border_style="blue", padding=(1, 2)
|
opik_optimizer/py.typed
ADDED
File without changes
|