opik-optimizer 0.9.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. opik_optimizer/__init__.py +7 -3
  2. opik_optimizer/_throttle.py +8 -8
  3. opik_optimizer/base_optimizer.py +98 -45
  4. opik_optimizer/cache_config.py +5 -3
  5. opik_optimizer/datasets/ai2_arc.py +15 -13
  6. opik_optimizer/datasets/cnn_dailymail.py +19 -15
  7. opik_optimizer/datasets/election_questions.py +10 -11
  8. opik_optimizer/datasets/gsm8k.py +16 -11
  9. opik_optimizer/datasets/halu_eval.py +6 -5
  10. opik_optimizer/datasets/hotpot_qa.py +17 -16
  11. opik_optimizer/datasets/medhallu.py +10 -7
  12. opik_optimizer/datasets/rag_hallucinations.py +11 -8
  13. opik_optimizer/datasets/ragbench.py +17 -9
  14. opik_optimizer/datasets/tiny_test.py +33 -37
  15. opik_optimizer/datasets/truthful_qa.py +18 -12
  16. opik_optimizer/demo/cache.py +6 -6
  17. opik_optimizer/demo/datasets.py +3 -7
  18. opik_optimizer/evolutionary_optimizer/__init__.py +3 -1
  19. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +748 -437
  20. opik_optimizer/evolutionary_optimizer/reporting.py +155 -76
  21. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +291 -181
  22. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +79 -28
  23. opik_optimizer/logging_config.py +19 -15
  24. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +234 -138
  25. opik_optimizer/meta_prompt_optimizer/reporting.py +121 -47
  26. opik_optimizer/mipro_optimizer/__init__.py +2 -0
  27. opik_optimizer/mipro_optimizer/_lm.py +41 -9
  28. opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +37 -26
  29. opik_optimizer/mipro_optimizer/mipro_optimizer.py +135 -67
  30. opik_optimizer/mipro_optimizer/utils.py +5 -2
  31. opik_optimizer/optimizable_agent.py +179 -0
  32. opik_optimizer/optimization_config/chat_prompt.py +143 -73
  33. opik_optimizer/optimization_config/configs.py +4 -3
  34. opik_optimizer/optimization_config/mappers.py +18 -6
  35. opik_optimizer/optimization_result.py +28 -20
  36. opik_optimizer/py.typed +0 -0
  37. opik_optimizer/reporting_utils.py +96 -46
  38. opik_optimizer/task_evaluator.py +12 -14
  39. opik_optimizer/utils.py +122 -37
  40. {opik_optimizer-0.9.1.dist-info → opik_optimizer-1.0.0.dist-info}/METADATA +8 -8
  41. opik_optimizer-1.0.0.dist-info/RECORD +50 -0
  42. opik_optimizer-0.9.1.dist-info/RECORD +0 -48
  43. {opik_optimizer-0.9.1.dist-info → opik_optimizer-1.0.0.dist-info}/WHEEL +0 -0
  44. {opik_optimizer-0.9.1.dist-info → opik_optimizer-1.0.0.dist-info}/licenses/LICENSE +0 -0
  45. {opik_optimizer-0.9.1.dist-info → opik_optimizer-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,106 +1,176 @@
1
- from typing import Any, Dict, List, Literal, Optional
1
+ from typing import Any, Dict, List, Optional, Union, Callable
2
+
3
+ import copy
2
4
 
3
5
  from pydantic import BaseModel, Field
4
6
 
7
+ from opik import track
8
+
5
9
 
6
10
  class Tool(BaseModel):
7
- name: str =Field(
8
- ...,
9
- description="Name of the tool"
10
- )
11
- description: str = Field(
12
- ...,
13
- description="Description of the tool"
14
- )
11
+ name: str = Field(..., description="Name of the tool")
12
+ description: str = Field(..., description="Description of the tool")
15
13
  parameters: Dict[str, Any] = Field(
16
- ...,
17
- description="JSON Schema defining the input parameters for the tool"
14
+ ..., description="JSON Schema defining the input parameters for the tool"
18
15
  )
19
16
 
17
+
20
18
  class ChatPrompt:
21
- system: str
22
- prompt: str
23
- messages: List[Dict[Literal["role", "content"], str]]
19
+ """
20
+ The ChatPrompt lies at the core of Opik Optimizer. It is
21
+ either a series of messages, or a system and/or prompt.
22
+
23
+ The ChatPrompt must make reference to at least one field
24
+ in the associated database when used with optimizations.
25
+
26
+ Args:
27
+ system: the system prompt
28
+ prompt: contains {input-dataset-field}, if given
29
+ messages: a list of dictionaries with role/content, with
30
+ a content containing {input-dataset-field}
31
+ """
24
32
 
25
33
  def __init__(
26
34
  self,
35
+ name: str = "chat-prompt",
27
36
  system: Optional[str] = None,
28
- prompt: Optional[str] = None,
29
- messages: Optional[List[Dict[Literal["role", "content"], str]]] = None,
30
- tools: Optional[List[Tool]] = None
31
- ):
37
+ user: Optional[str] = None,
38
+ messages: Optional[List[Dict[str, str]]] = None,
39
+ tools: Optional[List[Dict[str, Any]]] = None,
40
+ function_map: Optional[Dict[str, Callable]] = None,
41
+ model: Optional[str] = None,
42
+ invoke: Optional[Callable] = None,
43
+ project_name: Optional[str] = "Default Project",
44
+ **model_kwargs: Any,
45
+ ) -> None:
46
+ if system is None and user is None and messages is None:
47
+ raise ValueError(
48
+ "At least one of `system`, `user`, or `messages` must be provided"
49
+ )
50
+
51
+ if user is not None and messages is not None:
52
+ raise ValueError("`user` and `messages` cannot be provided together")
53
+
54
+ if system is not None and messages is not None:
55
+ raise ValueError("`system` and `messages` cannot be provided together")
56
+
57
+ if system is not None and not isinstance(system, str):
58
+ raise ValueError("`system` must be a string")
59
+
60
+ if user is not None and not isinstance(user, str):
61
+ raise ValueError("`user` must be a string")
62
+
63
+ if messages is not None:
64
+ if not isinstance(messages, list):
65
+ raise ValueError("`messages` must be a list")
66
+ else:
67
+ for message in messages:
68
+ if not isinstance(message, dict):
69
+ raise ValueError("`messages` must be a dictionary")
70
+ elif "role" not in message or "content" not in message:
71
+ raise ValueError(
72
+ "`message` must have 'role' and 'content' keys."
73
+ )
74
+ self.name = name
32
75
  self.system = system
33
- self.prompt = prompt
76
+ self.user = user
34
77
  self.messages = messages
78
+ # ALl of the rest are just for the ChatPrompt LLM
79
+ # These are used from the prompt as controls:
80
+ self.tools = tools
81
+ if function_map:
82
+ self.function_map = {
83
+ key: (
84
+ value
85
+ if hasattr(value, "__wrapped__")
86
+ else track(type="tool")(value)
87
+ )
88
+ for key, value in function_map.items()
89
+ }
90
+ else:
91
+ self.function_map = {}
92
+ # These are used for the LiteLLMAgent class:
93
+ self.model = model
94
+ self.model_kwargs = model_kwargs
95
+ self.invoke = invoke
96
+ self.project_name = project_name
35
97
 
36
- self.formatted_messages = self._standardize_prompts()
37
-
38
- def _standardize_prompts(
39
- self, **kwargs: Any
40
- ) -> List[Dict[Literal["role", "content"], str]]:
41
- if (self.system is None and self.prompt is None and self.messages is None):
42
- raise ValueError(
43
- "At least one of `system`, `prompt` or `messages` must be provided"
44
- )
98
+ def get_messages(
99
+ self,
100
+ dataset_item: Optional[Dict[str, str]] = None,
101
+ ) -> List[Dict[str, str]]:
102
+ # This is a copy, so we can alter the messages:
103
+ messages = self._standardize_prompts()
45
104
 
46
- if (self.prompt is not None and self.messages is not None):
47
- raise ValueError(
48
- "`prompt` and `messages` cannot be provided together"
49
- )
50
-
51
- if (self.system is not None and not isinstance(self.system, str)):
52
- raise ValueError(
53
- "`system` must be a string"
54
- )
55
-
56
- if (self.prompt is not None and not isinstance(self.prompt, str)):
57
- raise ValueError(
58
- "`prompt` must be a string"
59
- )
105
+ if dataset_item:
106
+ for key, value in dataset_item.items():
107
+ for message in messages:
108
+ # Only replace user message content:
109
+ label = "{" + key + "}"
110
+ if label in message["content"]:
111
+ message["content"] = message["content"].replace(
112
+ label, str(value)
113
+ )
114
+ return messages
60
115
 
61
- if (self.messages is not None and not isinstance(self.messages, list)):
62
- raise ValueError(
63
- "`messages` must be a list"
64
- )
116
+ def _standardize_prompts(self, **kwargs: Any) -> List[Dict[str, str]]:
117
+ standardize_messages: List[Dict[str, str]] = []
65
118
 
66
- standardize_messages = []
67
-
68
- if (self.system is not None):
119
+ if self.system is not None:
69
120
  standardize_messages.append({"role": "system", "content": self.system})
70
-
71
- if (self.prompt is not None):
72
- standardize_messages.append({"role": "user", "content": self.prompt})
73
-
74
- if (self.messages is not None):
121
+
122
+ if self.messages is not None:
75
123
  for message in self.messages:
76
124
  standardize_messages.append(message)
77
-
78
- return standardize_messages
79
125
 
80
- def format(self, **kwargs: Any) -> str:
81
- return self.prompt.format(**kwargs)
126
+ if self.user is not None:
127
+ standardize_messages.append({"role": "user", "content": self.user})
82
128
 
83
- def to_dict(self) -> Dict[str, Any]:
129
+ return copy.deepcopy(standardize_messages)
130
+
131
+ def to_dict(self) -> Dict[str, Union[str, List[Dict[str, str]]]]:
84
132
  """Convert ChatPrompt to a dictionary for JSON serialization.
85
-
133
+
86
134
  Returns:
87
135
  Dict containing the serializable representation of this ChatPrompt
88
136
  """
89
- return {
90
- "system": self.system,
91
- "prompt": self.prompt,
92
- "messages": self.messages,
93
- "formatted_messages": self.formatted_messages
94
- }
137
+ retval: Dict[str, Union[str, List[Dict[str, str]]]] = {}
138
+ if self.system is not None:
139
+ retval["system"] = self.system
140
+ if self.user is not None:
141
+ retval["user"] = self.user
142
+ if self.messages is not None:
143
+ retval["messages"] = self.messages
144
+ return retval
145
+
146
+ def copy(self) -> "ChatPrompt":
147
+ return ChatPrompt(
148
+ system=self.system,
149
+ user=self.user,
150
+ messages=copy.deepcopy(self.messages),
151
+ tools=self.tools,
152
+ function_map=self.function_map,
153
+ )
154
+
155
+ def set_messages(self, messages: List[Dict[str, Any]]) -> None:
156
+ self.system = None
157
+ self.user = None
158
+ self.messages = copy.deepcopy(messages)
95
159
 
96
160
  @classmethod
97
- def model_validate(cls, obj: Any, *, strict: Optional[bool] = None, from_attributes: Optional[bool] = None,
98
- context: Optional[Any] = None, by_alias: Optional[bool] = None, by_name: Optional[bool] = None) -> 'ChatPrompt':
161
+ def model_validate(
162
+ cls,
163
+ obj: Any,
164
+ *,
165
+ strict: Optional[bool] = None,
166
+ from_attributes: Optional[bool] = None,
167
+ context: Optional[Any] = None,
168
+ by_alias: Optional[bool] = None,
169
+ by_name: Optional[bool] = None,
170
+ ) -> "ChatPrompt":
99
171
  """Custom validation method to handle nested objects during deserialization."""
100
172
  return ChatPrompt(
101
- system=obj.get('system', None),
102
- prompt=obj.get('prompt', None),
103
- messages=obj.get('messages', None),
104
-
173
+ system=obj.get("system", None),
174
+ prompt=obj.get("prompt", None),
175
+ messages=obj.get("messages", None),
105
176
  )
106
-
@@ -1,15 +1,16 @@
1
1
  """Module containing configuration classes for optimization."""
2
2
 
3
- from typing import Any, Dict, List, Literal, Union
3
+ from typing import Any, List
4
4
 
5
5
  import pydantic
6
6
 
7
7
 
8
8
  class TaskConfig(pydantic.BaseModel):
9
9
  """Configuration for a prompt task."""
10
+
10
11
  model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
11
-
12
- instruction_prompt: Union[str, List[Dict[Literal["role", "content"], str]]]
12
+
13
+ instruction_prompt: str
13
14
  use_chat_prompt: bool = False
14
15
  input_dataset_fields: List[str]
15
16
  output_dataset_field: str
@@ -2,16 +2,21 @@ from typing import Dict, Callable, Optional, Any, Union
2
2
 
3
3
  EVALUATED_LLM_TASK_OUTPUT = "llm_output"
4
4
 
5
+
5
6
  class Mapper:
6
7
  """Base class for mapping functions that transform data between different formats."""
7
-
8
- def __init__(self, name: Optional[str] = None, transform: Optional[Callable[[Any], Any]] = None):
8
+
9
+ def __init__(
10
+ self,
11
+ name: Optional[str] = None,
12
+ transform: Optional[Callable[[Any], Any]] = None,
13
+ ):
9
14
  if name is not None and transform is not None:
10
15
  raise ValueError("Only one of name or transform can be provided")
11
-
16
+
12
17
  self.name = name
13
18
  self.transform = transform
14
-
19
+
15
20
  def __call__(self, data: Any) -> Any:
16
21
  if self.transform is not None:
17
22
  return self.transform(data)
@@ -19,7 +24,12 @@ class Mapper:
19
24
  return data[self.name]
20
25
  return data
21
26
 
22
- def from_dataset_field(*, name: str = None, transform: Optional[Callable[[Dict[str, Any]], Any]] = None) -> Union[str, Callable[[Dict[str, Any]], Any]]:
27
+
28
+ def from_dataset_field(
29
+ *,
30
+ name: Optional[str] = None,
31
+ transform: Optional[Callable[[Dict[str, Any]], Any]] = None,
32
+ ) -> Union[str, Callable[[Dict[str, Any]], Any]]:
23
33
  if name is not None and transform is not None:
24
34
  raise ValueError("Only one of name or transform can be provided")
25
35
 
@@ -36,7 +46,9 @@ def from_llm_response_text() -> str:
36
46
  return EVALUATED_LLM_TASK_OUTPUT
37
47
 
38
48
 
39
- def from_agent_output(*, name: str = None, transform: Optional[Callable[[Any], Any]] = None) -> Union[str, Callable[[Any], Any]]:
49
+ def from_agent_output(
50
+ *, name: Optional[str] = None, transform: Optional[Callable[[Any], Any]] = None
51
+ ) -> Union[str, Callable[[Any], Any]]:
40
52
  if name is not None and transform is not None:
41
53
  raise ValueError("Only one of name or transform can be provided")
42
54
 
@@ -1,22 +1,29 @@
1
1
  """Module containing the OptimizationResult class."""
2
2
 
3
- from typing import Any, Dict, List, Literal, Optional
3
+ from typing import Any, Dict, List, Optional
4
4
 
5
5
  import pydantic
6
6
  import rich
7
7
 
8
- from .reporting_utils import get_console
8
+ from .reporting_utils import get_console, get_link_text
9
9
 
10
10
 
11
11
  class OptimizationResult(pydantic.BaseModel):
12
12
  """Result oan optimization run."""
13
13
 
14
14
  optimizer: str = "Optimizer"
15
-
16
- prompt: List[Dict[Literal["role", "content"], str]]
15
+
16
+ prompt: List[Dict[str, str]]
17
17
  score: float
18
18
  metric_name: str
19
-
19
+
20
+ optimization_id: Optional[str] = None
21
+ dataset_id: Optional[str] = None
22
+
23
+ # Initial score
24
+ initial_prompt: Optional[List[Dict[str, str]]] = None
25
+ initial_score: Optional[float] = None
26
+
20
27
  details: Dict[str, Any] = pydantic.Field(default_factory=dict)
21
28
  history: List[Dict[str, Any]] = []
22
29
  llm_calls: Optional[int] = None
@@ -25,15 +32,15 @@ class OptimizationResult(pydantic.BaseModel):
25
32
  demonstrations: Optional[List[Dict[str, Any]]] = None
26
33
  mipro_prompt: Optional[str] = None
27
34
  tool_prompts: Optional[Dict[str, str]] = None
28
-
35
+
29
36
  model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
30
37
 
31
- def model_dump(self, *kargs, **kwargs) -> Dict[str, Any]:
38
+ def model_dump(self, *kargs: Any, **kwargs: Any) -> Dict[str, Any]:
32
39
  return super().model_dump(*kargs, **kwargs)
33
40
 
34
41
  def _calculate_improvement_str(self) -> str:
35
42
  """Helper to calculate improvement percentage string."""
36
- initial_s = self.details.get("initial_score")
43
+ initial_s = self.initial_score
37
44
  final_s = self.score
38
45
 
39
46
  # Check if initial score exists and is a number
@@ -60,7 +67,7 @@ class OptimizationResult(pydantic.BaseModel):
60
67
  """Provides a clean, well-formatted plain-text summary."""
61
68
  separator = "=" * 80
62
69
  rounds_ran = len(self.details.get("rounds", []))
63
- initial_score = self.details.get("initial_score")
70
+ initial_score = self.initial_score
64
71
  initial_score_str = (
65
72
  f"{initial_score:.4f}" if isinstance(initial_score, (int, float)) else "N/A"
66
73
  )
@@ -74,7 +81,6 @@ class OptimizationResult(pydantic.BaseModel):
74
81
  .replace("[dim]", "")
75
82
  .replace("[/dim]", "")
76
83
  )
77
- stopped_early = self.details.get("stopped_early", "N/A")
78
84
 
79
85
  model_name = self.details.get("model", "N/A")
80
86
  temp = self.details.get("temperature")
@@ -101,7 +107,6 @@ class OptimizationResult(pydantic.BaseModel):
101
107
  f"Final Best Score: {final_score_str}",
102
108
  f"Total Improvement:{improvement_str.rjust(max(0, 18 - len('Total Improvement:')))}",
103
109
  f"Rounds Completed: {rounds_ran}",
104
- f"Stopped Early: {stopped_early}",
105
110
  "\nFINAL OPTIMIZED PROMPT / STRUCTURE:",
106
111
  "--------------------------------------------------------------------------------",
107
112
  f"{final_prompt_display}",
@@ -114,18 +119,15 @@ class OptimizationResult(pydantic.BaseModel):
114
119
  """Provides a rich, formatted output for terminals supporting Rich."""
115
120
  improvement_str = self._calculate_improvement_str()
116
121
  rounds_ran = len(self.details.get("rounds", []))
117
- initial_score = self.details.get("initial_score")
122
+ initial_score = self.initial_score
118
123
  initial_score_str = (
119
124
  f"{initial_score:.4f}"
120
125
  if isinstance(initial_score, (int, float))
121
126
  else "[dim]N/A[/dim]"
122
127
  )
123
128
  final_score_str = f"{self.score:.4f}"
124
- stopped_early = self.details.get("stopped_early", "N/A")
125
129
 
126
130
  model_name = self.details.get("model", "[dim]N/A[/dim]")
127
- temp = self.details.get("temperature")
128
- temp_str = f"{temp:.1f}" if isinstance(temp, (int, float)) else "[dim]N/A[/dim]"
129
131
 
130
132
  table = rich.table.Table.grid(padding=(0, 1))
131
133
  table.add_column(style="dim")
@@ -135,13 +137,21 @@ class OptimizationResult(pydantic.BaseModel):
135
137
  "Optimizer:",
136
138
  f"[bold]{self.optimizer}[/bold]",
137
139
  )
138
- table.add_row("Model Used:", f"{model_name} ([dim]Temp:[/dim] {temp_str})")
140
+ table.add_row("Model Used:", f"{model_name}")
139
141
  table.add_row("Metric Evaluated:", f"[bold]{self.metric_name}[/bold]")
140
142
  table.add_row("Initial Score:", initial_score_str)
141
143
  table.add_row("Final Best Score:", f"[bold cyan]{final_score_str}[/bold cyan]")
142
144
  table.add_row("Total Improvement:", improvement_str)
143
145
  table.add_row("Rounds Completed:", str(rounds_ran))
144
- table.add_row("Stopped Early:", str(stopped_early))
146
+ table.add_row(
147
+ "Optimization run link:",
148
+ get_link_text(
149
+ pre_text="",
150
+ link_text="Open in Opik Dashboard",
151
+ dataset_id=self.dataset_id,
152
+ optimization_id=self.optimization_id,
153
+ ),
154
+ )
145
155
 
146
156
  # Display Chat Structure if available
147
157
  panel_title = "[bold]Final Optimized Prompt[/bold]"
@@ -168,9 +178,7 @@ class OptimizationResult(pydantic.BaseModel):
168
178
  except Exception:
169
179
  # Fallback to simple text prompt
170
180
  prompt_renderable = rich.text.Text(str(self.prompt or ""), overflow="fold")
171
- panel_title = (
172
- "[bold]Final Optimized Prompt (Instruction - fallback)[/bold]"
173
- )
181
+ panel_title = "[bold]Final Optimized Prompt (Instruction - fallback)[/bold]"
174
182
 
175
183
  prompt_panel = rich.panel.Panel(
176
184
  prompt_renderable, title=panel_title, border_style="blue", padding=(1, 2)
File without changes