opik-optimizer 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. opik_optimizer/__init__.py +15 -26
  2. opik_optimizer/base_optimizer.py +28 -44
  3. opik_optimizer/data/hotpot-500.json +501 -1001
  4. opik_optimizer/datasets/__init__.py +6 -7
  5. opik_optimizer/datasets/hotpot_qa.py +2 -1
  6. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +742 -726
  7. opik_optimizer/evolutionary_optimizer/reporting.py +246 -0
  8. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +297 -193
  9. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +119 -0
  10. opik_optimizer/meta_prompt_optimizer/__init__.py +5 -0
  11. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +816 -0
  12. opik_optimizer/meta_prompt_optimizer/reporting.py +140 -0
  13. opik_optimizer/mipro_optimizer/__init__.py +1 -1
  14. opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +12 -20
  15. opik_optimizer/mipro_optimizer/mipro_optimizer.py +32 -52
  16. opik_optimizer/mipro_optimizer/utils.py +1 -23
  17. opik_optimizer/optimization_config/chat_prompt.py +106 -0
  18. opik_optimizer/optimization_config/configs.py +2 -21
  19. opik_optimizer/optimization_config/mappers.py +1 -1
  20. opik_optimizer/optimization_result.py +57 -85
  21. opik_optimizer/reporting_utils.py +180 -0
  22. opik_optimizer/task_evaluator.py +41 -26
  23. opik_optimizer/utils.py +187 -3
  24. {opik_optimizer-0.8.0.dist-info → opik_optimizer-0.9.0.dist-info}/METADATA +15 -31
  25. opik_optimizer-0.9.0.dist-info/RECORD +48 -0
  26. {opik_optimizer-0.8.0.dist-info → opik_optimizer-0.9.0.dist-info}/WHEEL +1 -1
  27. opik_optimizer/few_shot_bayesian_optimizer/prompt_parameter.py +0 -91
  28. opik_optimizer/few_shot_bayesian_optimizer/prompt_templates.py +0 -80
  29. opik_optimizer/integrations/__init__.py +0 -0
  30. opik_optimizer/meta_prompt_optimizer.py +0 -1151
  31. opik_optimizer-0.8.0.dist-info/RECORD +0 -45
  32. {opik_optimizer-0.8.0.dist-info → opik_optimizer-0.9.0.dist-info}/licenses/LICENSE +0 -0
  33. {opik_optimizer-0.8.0.dist-info → opik_optimizer-0.9.0.dist-info}/top_level.txt +0 -0
@@ -1,47 +1,36 @@
1
1
  """Module containing the OptimizationResult class."""
2
2
 
3
- from typing import Dict, List, Any, Optional, Union, Literal
3
+ from typing import Any, Dict, List, Literal, Optional
4
+
4
5
  import pydantic
5
- from opik.evaluation.metrics import BaseMetric
6
- from pydantic import BaseModel, Field
7
- from .base_optimizer import OptimizationRound # Adjust import as necessary
8
6
  import rich
9
7
 
10
- class OptimizationStep(BaseModel):
11
- """Represents a single step or trial in an optimization process."""
12
- step: int
13
- score: Optional[float] = None
14
- prompt: Optional[Union[str, List[Dict[str, str]]]] = None
15
- parameters: Optional[Dict[str, Any]] = None
16
- timestamp: Optional[str] = None
17
- # Add other relevant details per step if needed
8
+ from .reporting_utils import get_console
18
9
 
19
10
 
20
11
  class OptimizationResult(pydantic.BaseModel):
21
- """Result of an optimization run."""
12
+ """Result oan optimization run."""
22
13
 
23
- prompt: Union[str, List[Dict[Literal["role", "content"], str]]]
14
+ optimizer: str = "Optimizer"
15
+
16
+ prompt: List[Dict[Literal["role", "content"], str]]
24
17
  score: float
25
18
  metric_name: str
26
- metadata: Dict[str, Any] = pydantic.Field(
27
- default_factory=dict
28
- ) # Default empty dict
29
- details: Dict[str, Any] = pydantic.Field(default_factory=dict) # Default empty dict
30
- best_prompt: Optional[str] = None
31
- best_score: Optional[float] = None
32
- best_metric_name: Optional[str] = None
33
- best_details: Optional[Dict[str, Any]] = None
34
- all_results: Optional[List[Dict[str, Any]]] = None
19
+
20
+ details: Dict[str, Any] = pydantic.Field(default_factory=dict)
35
21
  history: List[Dict[str, Any]] = []
36
- metric: Optional[BaseMetric] = None
37
- demonstrations: Optional[List[Dict[str, Any]]] = None
38
- optimizer: str = "Optimizer"
39
- tool_prompts: Optional[Dict[str, str]] = None
40
- opik_metadata: Optional[Dict[str, Any]] = None
41
22
  llm_calls: Optional[int] = None
42
23
 
24
+ # MIPRO specific
25
+ demonstrations: Optional[List[Dict[str, Any]]] = None
26
+ mipro_prompt: Optional[str] = None
27
+ tool_prompts: Optional[Dict[str, str]] = None
28
+
43
29
  model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
44
30
 
31
+ def model_dump(self, *kargs, **kwargs) -> Dict[str, Any]:
32
+ return super().model_dump(*kargs, **kwargs)
33
+
45
34
  def _calculate_improvement_str(self) -> str:
46
35
  """Helper to calculate improvement percentage string."""
47
36
  initial_s = self.details.get("initial_score")
@@ -91,24 +80,19 @@ class OptimizationResult(pydantic.BaseModel):
91
80
  temp = self.details.get("temperature")
92
81
  temp_str = f"{temp:.1f}" if isinstance(temp, (int, float)) else "N/A"
93
82
 
94
- final_prompt_display = self.prompt
95
- if self.details.get("prompt_type") == "chat" and self.details.get(
96
- "chat_messages"
97
- ):
98
- try:
99
- chat_display = "\n".join(
100
- [
101
- f" {msg.get('role', 'unknown')}: {str(msg.get('content', ''))[:150]}..."
102
- for msg in self.details["chat_messages"]
103
- ]
104
- )
105
- final_prompt_display = f"Instruction:\n {self.prompt}\nFew-Shot Examples (Chat Structure):\n{chat_display}"
106
- except Exception:
107
- pass
83
+ try:
84
+ final_prompt_display = "\n".join(
85
+ [
86
+ f" {msg.get('role', 'unknown')}: {str(msg.get('content', ''))[:150]}..."
87
+ for msg in self.prompt
88
+ ]
89
+ )
90
+ except Exception:
91
+ final_prompt_display = str(self.prompt)
108
92
 
109
93
  output = [
110
94
  f"\n{separator}",
111
- f"OPTIMIZATION COMPLETE",
95
+ "OPTIMIZATION COMPLETE",
112
96
  f"{separator}",
113
97
  f"Optimizer: {self.optimizer}",
114
98
  f"Model Used: {model_name} (Temp: {temp_str})",
@@ -118,10 +102,10 @@ class OptimizationResult(pydantic.BaseModel):
118
102
  f"Total Improvement:{improvement_str.rjust(max(0, 18 - len('Total Improvement:')))}",
119
103
  f"Rounds Completed: {rounds_ran}",
120
104
  f"Stopped Early: {stopped_early}",
121
- f"\nFINAL OPTIMIZED PROMPT / STRUCTURE:",
122
- f"--------------------------------------------------------------------------------",
105
+ "\nFINAL OPTIMIZED PROMPT / STRUCTURE:",
106
+ "--------------------------------------------------------------------------------",
123
107
  f"{final_prompt_display}",
124
- f"--------------------------------------------------------------------------------",
108
+ "--------------------------------------------------------------------------------",
125
109
  f"{separator}",
126
110
  ]
127
111
  return "\n".join(output)
@@ -160,43 +144,33 @@ class OptimizationResult(pydantic.BaseModel):
160
144
  table.add_row("Stopped Early:", str(stopped_early))
161
145
 
162
146
  # Display Chat Structure if available
163
- prompt_renderable: Any = rich.text.Text(
164
- self.prompt or "", overflow="fold"
165
- ) # Default to text
166
- panel_title = "[bold]Final Optimized Prompt (Instruction)[/bold]"
167
-
168
- if self.details.get("prompt_type") == "chat" and self.details.get(
169
- "chat_messages"
170
- ):
171
- panel_title = "[bold]Final Optimized Prompt (Chat Structure)[/bold]"
172
- try:
173
- chat_group_items = [
174
- f"[dim]Instruction:[/dim] [i]{self.prompt}[/i]\n---"
175
- ]
176
- for msg in self.details["chat_messages"]:
177
- role = msg.get("role", "unknown")
178
- content = str(msg.get("content", ""))
179
- role_style = (
180
- "bold green"
181
- if role == "user"
182
- else (
183
- "bold blue"
184
- if role == "assistant"
185
- else ("bold magenta" if role == "system" else "")
186
- )
187
- )
188
- chat_group_items.append(
189
- f"[{role_style}]{role.capitalize()}:[/] {content}"
147
+ panel_title = "[bold]Final Optimized Prompt[/bold]"
148
+ try:
149
+ chat_group_items = []
150
+ for msg in self.prompt:
151
+ role = msg.get("role", "unknown")
152
+ content = str(msg.get("content", ""))
153
+ role_style = (
154
+ "bold green"
155
+ if role == "user"
156
+ else (
157
+ "bold blue"
158
+ if role == "assistant"
159
+ else ("bold magenta" if role == "system" else "")
190
160
  )
191
- chat_group_items.append("---") # Separator
192
- prompt_renderable = rich.console.Group(*chat_group_items)
193
-
194
- except Exception:
195
- # Fallback to simple text prompt
196
- prompt_renderable = rich.text.Text(self.prompt or "", overflow="fold")
197
- panel_title = (
198
- "[bold]Final Optimized Prompt (Instruction - fallback)[/bold]"
199
161
  )
162
+ chat_group_items.append(
163
+ f"[{role_style}]{role.capitalize()}:[/] {content}"
164
+ )
165
+ chat_group_items.append("---") # Separator
166
+ prompt_renderable = rich.console.Group(*chat_group_items)
167
+
168
+ except Exception:
169
+ # Fallback to simple text prompt
170
+ prompt_renderable = rich.text.Text(str(self.prompt or ""), overflow="fold")
171
+ panel_title = (
172
+ "[bold]Final Optimized Prompt (Instruction - fallback)[/bold]"
173
+ )
200
174
 
201
175
  prompt_panel = rich.panel.Panel(
202
176
  prompt_renderable, title=panel_title, border_style="blue", padding=(1, 2)
@@ -212,11 +186,9 @@ class OptimizationResult(pydantic.BaseModel):
212
186
  padding=1,
213
187
  )
214
188
 
215
- def model_dump(self) -> Dict[str, Any]:
216
- return super().model_dump()
217
-
218
189
  def display(self) -> None:
219
190
  """
220
191
  Displays the OptimizationResult using rich formatting
221
192
  """
222
- rich.print(self)
193
+ console = get_console()
194
+ console.print(self)
@@ -0,0 +1,180 @@
1
+ import logging
2
+ from contextlib import contextmanager
3
+ from typing import Dict, List, Optional
4
+
5
+ import rich
6
+ from rich import box
7
+ from rich.console import Console, Group
8
+ from rich.panel import Panel
9
+ from rich.progress import track
10
+ from rich.text import Text
11
+
12
+ PANEL_WIDTH = 70
13
+
14
+ def get_console(*args, **kwargs):
15
+ console = Console(*args, **kwargs)
16
+ console.is_jupyter = False
17
+ return console
18
+
19
+ @contextmanager
20
+ def convert_tqdm_to_rich(description: Optional[str] = None, verbose: int = 1):
21
+ """Context manager to convert tqdm to rich."""
22
+ import opik.evaluation.engine.evaluation_tasks_executor
23
+
24
+ optimizer_logger = logging.getLogger('opik_optimizer')
25
+
26
+ def _tqdm_to_track(iterable, desc, disable, total):
27
+ disable = verbose == 0 or optimizer_logger.level > logging.INFO
28
+ return track(
29
+ iterable,
30
+ description=description or desc,
31
+ disable=disable,
32
+ total=total
33
+ )
34
+
35
+ original__tqdm = opik.evaluation.engine.evaluation_tasks_executor._tqdm
36
+ opik.evaluation.engine.evaluation_tasks_executor._tqdm = _tqdm_to_track
37
+
38
+
39
+ from opik.evaluation import report
40
+ report.display_experiment_results = lambda *args, **kwargs: None
41
+ report.display_experiment_link = lambda *args, **kwargs: None
42
+
43
+ try:
44
+ yield
45
+ finally:
46
+ opik.evaluation.engine.evaluation_tasks_executor._tqdm = original__tqdm
47
+
48
+
49
+
50
+ @contextmanager
51
+ def suppress_opik_logs():
52
+ """Suppress Opik startup logs by temporarily increasing the log level."""
53
+ # Optimizer log level
54
+ optimizer_logger = logging.getLogger('opik_optimizer')
55
+
56
+ # Get the Opik logger
57
+ opik_logger = logging.getLogger("opik.api_objects.opik_client")
58
+
59
+ # Store original log level
60
+ original_level = opik_logger.level
61
+
62
+ # Set log level to ERROR to suppress INFO messages
63
+ opik_logger.setLevel(optimizer_logger.level)
64
+
65
+ try:
66
+ yield
67
+ finally:
68
+ # Restore original log level
69
+ opik_logger.setLevel(original_level)
70
+
71
+ def display_messages(messages: List[Dict[str, str]], prefix: str = ""):
72
+ for i, msg in enumerate(messages):
73
+ panel = Panel(
74
+ Text(msg.get('content', ''), overflow="fold"),
75
+ title=f"{msg.get('role', 'message')}",
76
+ title_align="left",
77
+ border_style="dim",
78
+ width=PANEL_WIDTH,
79
+ padding=(1, 2),
80
+ )
81
+
82
+ # Capture the panel as rendered text with ANSI styles
83
+ console = get_console()
84
+ with console.capture() as capture:
85
+ console.print(panel)
86
+
87
+ # Retrieve the rendered string (with ANSI)
88
+ rendered_panel = capture.get()
89
+
90
+ # Prefix each line with '| ', preserving ANSI styles
91
+ for line in rendered_panel.splitlines():
92
+ console.print(Text(prefix) + Text.from_ansi(line))
93
+
94
+ def display_header(algorithm: str, verbose: int = 1):
95
+ if verbose < 1:
96
+ return
97
+
98
+ content = Text.assemble(
99
+ ("● ", "green"),
100
+ "Running Opik Evaluation - ",
101
+ (algorithm, "blue")
102
+ )
103
+
104
+ panel = Panel(
105
+ content,
106
+ box=box.ROUNDED,
107
+ width=PANEL_WIDTH
108
+ )
109
+
110
+ console = get_console()
111
+ console.print(panel)
112
+ console.print("\n")
113
+
114
+
115
+ def display_result(initial_score, best_score, best_prompt, verbose: int = 1):
116
+ if verbose < 1:
117
+ return
118
+
119
+ console = get_console()
120
+ console.print(Text("\n> Optimization complete\n"))
121
+
122
+ if best_score > initial_score:
123
+ if initial_score == 0:
124
+ content = [Text(f"Prompt was optimized and improved from {initial_score:.4f} to {best_score:.4f}", style="bold green")]
125
+ else:
126
+ perc_change = (best_score - initial_score) / initial_score
127
+ content = [Text(f"Prompt was optimized and improved from {initial_score:.4f} to {best_score:.4f} ({perc_change:.2%})", style="bold green")]
128
+ else:
129
+ content = [Text("Optimization trial did not find a better prompt than the initial one.", style="bold red")]
130
+
131
+ content.append(Text("\nOptimized prompt:"))
132
+ for i, msg in enumerate(best_prompt):
133
+ content.append(
134
+ Panel(
135
+ Text(msg.get('content', ''), overflow="fold"),
136
+ title=f"{msg.get('role', 'message')}",
137
+ title_align="left",
138
+ border_style="dim",
139
+ width=PANEL_WIDTH,
140
+ padding=(1, 2),
141
+ )
142
+ )
143
+
144
+ console.print(
145
+ Panel(
146
+ Group(*content),
147
+ title="Optimization results",
148
+ title_align="left",
149
+ border_style="green",
150
+ width=PANEL_WIDTH,
151
+ padding=(1, 2)
152
+ )
153
+ )
154
+
155
+
156
+ def display_configuration(messages: List[Dict[str, str]], optimizer_config: Dict[str, str], verbose: int = 1):
157
+ """Displays the LLM messages and optimizer configuration using Rich panels."""
158
+
159
+ if verbose < 1:
160
+ return
161
+
162
+ # Panel for Optimizer configuration
163
+ console = get_console()
164
+ console.print(Text("> Let's optimize the prompt:\n"))
165
+
166
+ display_messages(messages)
167
+
168
+ # Panel for configuration
169
+ console.print(Text(f"\nUsing {optimizer_config['optimizer']} with the parameters: "))
170
+
171
+ for key, value in optimizer_config.items():
172
+ if key == "optimizer": # Already displayed in the introductory text
173
+ continue
174
+ parameter_text = Text.assemble(
175
+ Text(f" - {key}: ", style="dim"),
176
+ Text(str(value), style="cyan")
177
+ )
178
+ console.print(parameter_text)
179
+
180
+ console.print("\n")
@@ -1,17 +1,47 @@
1
- import opik
2
1
  import logging
3
2
  from typing import Any, Callable, Dict, List, Optional
4
- from opik_optimizer.optimization_config.configs import MetricConfig
5
- from opik.evaluation.metrics import score_result
6
3
 
4
+ import opik
7
5
  from opik.evaluation import evaluator as opik_evaluator
6
+ from opik.evaluation.metrics import base_metric, score_result
8
7
 
9
8
  logger = logging.getLogger(__name__)
10
9
 
10
+ def _create_metric_class(metric: Callable):
11
+ class MetricClass(base_metric.BaseMetric):
12
+ def __init__(self):
13
+ self.name = metric.__name__
14
+
15
+ def score(self, llm_output, **kwargs) -> score_result.ScoreResult:
16
+ try:
17
+ metric_val = metric(dataset_item=kwargs, llm_output=llm_output)
18
+ if isinstance(metric_val , score_result.ScoreResult):
19
+ return score_result.ScoreResult(
20
+ name = self.name,
21
+ value = metric_val.value,
22
+ scoring_failed=metric_val.scoring_failed,
23
+ metadata=metric_val.metadata,
24
+ reason=metric_val.reason
25
+ )
26
+ else:
27
+ return score_result.ScoreResult(
28
+ name = self.name,
29
+ value = metric_val,
30
+ scoring_failed=False
31
+ )
32
+ except Exception:
33
+ return score_result.ScoreResult(
34
+ name = self.name,
35
+ value = 0,
36
+ scoring_failed=True
37
+ )
38
+
39
+ return MetricClass()
40
+
11
41
  def evaluate(
12
42
  dataset: opik.Dataset,
13
43
  evaluated_task: Callable[[Dict[str, Any]], Dict[str, Any]],
14
- metric_config: MetricConfig,
44
+ metric: Callable,
15
45
  num_threads: int,
16
46
  optimization_id: Optional[str] = None,
17
47
  dataset_item_ids: Optional[List[str]] = None,
@@ -25,7 +55,8 @@ def evaluate(
25
55
 
26
56
  Args:
27
57
  dataset: A list of dictionaries representing the dataset.
28
- metric_config: The metric configuration to use for evaluation.
58
+ metric: A metric function, this function should have two arguments:
59
+ dataset_item and llm_output
29
60
  evaluated_task: A function that takes a dataset item dict as input and returns a dictionary with output(s).
30
61
  dataset_item_ids: Optional list of dataset item IDs to evaluate.
31
62
  project_name: Optional project name for evaluation.
@@ -38,7 +69,7 @@ def evaluate(
38
69
  Returns:
39
70
  float: The average score of the evaluated task.
40
71
  """
41
- items = dataset.get_items(dataset_item_ids)
72
+ items = dataset.get_items(n_samples)
42
73
  if not items:
43
74
  print("[DEBUG] Empty dataset, returning 0.0")
44
75
  return 0.0
@@ -46,31 +77,16 @@ def evaluate(
46
77
  if dataset_item_ids:
47
78
  items = [item for item in items if item.get("id") in dataset_item_ids]
48
79
 
49
- if n_samples:
50
- items = items[:n_samples]
51
-
52
- # TODO: move to debug logger
53
- # print(f"[DEBUG] Starting evaluation with task: {evaluated_task}")
54
- # print(f"[DEBUG] Items to evaluate: {items}")
55
- # print(f"[DEBUG] Metric config inputs: {metric_config.inputs}")
56
- # print(f"[DEBUG] Number of threads: {num_threads}")
57
- # print(f"[DEBUG] Project name: {project_name}")
58
-
59
- scoring_key_mapping = {
60
- key: value if isinstance(value, str) else value.__name__
61
- for key, value in metric_config.inputs.items()
62
- }
63
- scoring_key_mapping["output"] = "_llm_task_output"
64
-
80
+ eval_metrics = [_create_metric_class(metric)]
81
+
65
82
  if optimization_id is not None:
66
83
  result = opik_evaluator.evaluate_optimization_trial(
67
84
  optimization_id=optimization_id,
68
85
  dataset=dataset,
69
86
  task=evaluated_task,
70
87
  project_name=project_name,
71
- scoring_key_mapping=scoring_key_mapping,
72
88
  dataset_item_ids=dataset_item_ids,
73
- scoring_metrics=[metric_config.metric],
89
+ scoring_metrics=eval_metrics,
74
90
  task_threads=num_threads,
75
91
  nb_samples=n_samples,
76
92
  experiment_config=experiment_config,
@@ -81,9 +97,8 @@ def evaluate(
81
97
  dataset=dataset,
82
98
  task=evaluated_task,
83
99
  project_name=project_name,
84
- scoring_key_mapping=scoring_key_mapping,
85
100
  dataset_item_ids=dataset_item_ids,
86
- scoring_metrics=[metric_config.metric],
101
+ scoring_metrics=eval_metrics,
87
102
  task_threads=num_threads,
88
103
  nb_samples=n_samples,
89
104
  experiment_config=experiment_config,