opik-optimizer 0.9.2__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. opik_optimizer/__init__.py +7 -5
  2. opik_optimizer/_throttle.py +8 -8
  3. opik_optimizer/base_optimizer.py +98 -45
  4. opik_optimizer/cache_config.py +5 -3
  5. opik_optimizer/datasets/ai2_arc.py +15 -13
  6. opik_optimizer/datasets/cnn_dailymail.py +19 -15
  7. opik_optimizer/datasets/election_questions.py +10 -11
  8. opik_optimizer/datasets/gsm8k.py +16 -11
  9. opik_optimizer/datasets/halu_eval.py +6 -5
  10. opik_optimizer/datasets/hotpot_qa.py +17 -16
  11. opik_optimizer/datasets/medhallu.py +10 -7
  12. opik_optimizer/datasets/rag_hallucinations.py +11 -8
  13. opik_optimizer/datasets/ragbench.py +17 -9
  14. opik_optimizer/datasets/tiny_test.py +33 -37
  15. opik_optimizer/datasets/truthful_qa.py +18 -12
  16. opik_optimizer/demo/cache.py +6 -6
  17. opik_optimizer/demo/datasets.py +3 -7
  18. opik_optimizer/evolutionary_optimizer/__init__.py +3 -1
  19. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +722 -429
  20. opik_optimizer/evolutionary_optimizer/reporting.py +155 -74
  21. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +271 -188
  22. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +79 -28
  23. opik_optimizer/logging_config.py +19 -15
  24. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +209 -129
  25. opik_optimizer/meta_prompt_optimizer/reporting.py +121 -46
  26. opik_optimizer/mipro_optimizer/__init__.py +2 -0
  27. opik_optimizer/mipro_optimizer/_lm.py +38 -9
  28. opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +37 -26
  29. opik_optimizer/mipro_optimizer/mipro_optimizer.py +132 -63
  30. opik_optimizer/mipro_optimizer/utils.py +5 -2
  31. opik_optimizer/optimizable_agent.py +179 -0
  32. opik_optimizer/optimization_config/chat_prompt.py +143 -73
  33. opik_optimizer/optimization_config/configs.py +4 -3
  34. opik_optimizer/optimization_config/mappers.py +18 -6
  35. opik_optimizer/optimization_result.py +22 -13
  36. opik_optimizer/py.typed +0 -0
  37. opik_optimizer/reporting_utils.py +89 -58
  38. opik_optimizer/task_evaluator.py +12 -14
  39. opik_optimizer/utils.py +117 -14
  40. {opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.1.dist-info}/METADATA +8 -8
  41. opik_optimizer-1.0.1.dist-info/RECORD +50 -0
  42. opik_optimizer-0.9.2.dist-info/RECORD +0 -48
  43. {opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.1.dist-info}/WHEEL +0 -0
  44. {opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.1.dist-info}/licenses/LICENSE +0 -0
  45. {opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.1.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  from contextlib import contextmanager
2
2
  from io import StringIO
3
- from typing import List
3
+ from typing import Any, List
4
4
 
5
5
  from rich.panel import Panel
6
6
  from rich.text import Text
@@ -21,19 +21,30 @@ console = get_console()
21
21
 
22
22
 
23
23
  @contextmanager
24
- def infer_output_style(verbose: int = 1):
24
+ def infer_output_style(verbose: int = 1) -> Any:
25
25
  class Reporter:
26
- def start_style_inference(self, output_style_prompt):
26
+ def start_style_inference(self) -> None:
27
27
  if verbose >= 1:
28
28
  console.print("> Infering the output style using the prompt:")
29
29
  console.print("│")
30
-
31
- def error(self, error_message):
30
+
31
+ def error(self, error_message: str) -> None:
32
32
  if verbose >= 1:
33
- console.print(Text("│ ").append(Text(f"Failed to infer output style: {error_message}", style="red")))
34
- console.print(Text("│ ").append(Text("Continuing with default style", style="dim")))
35
-
36
- def display_style_inference_prompt(self, output_style_prompt):
33
+ console.print(
34
+ Text("│ ").append(
35
+ Text(
36
+ f"Failed to infer output style: {error_message}",
37
+ style="red",
38
+ )
39
+ )
40
+ )
41
+ console.print(
42
+ Text("│ ").append(
43
+ Text("Continuing with default style", style="dim")
44
+ )
45
+ )
46
+
47
+ def display_style_inference_prompt(self, output_style_prompt: str) -> None:
37
48
  if verbose >= 1:
38
49
  panel = Panel(
39
50
  Text(output_style_prompt),
@@ -57,7 +68,7 @@ def infer_output_style(verbose: int = 1):
57
68
  console.print(prefixed)
58
69
  console.print(Text("│"))
59
70
 
60
- def success(self, output_style_prompt):
71
+ def success(self, output_style_prompt: str) -> None:
61
72
  if verbose >= 1:
62
73
  panel = Panel(
63
74
  Text(output_style_prompt),
@@ -67,7 +78,7 @@ def infer_output_style(verbose: int = 1):
67
78
  width=PANEL_WIDTH,
68
79
  padding=(1, 2),
69
80
  )
70
-
81
+
71
82
  # Capture the panel as rendered text with ANSI styles
72
83
  with console.capture() as capture:
73
84
  console.print(panel)
@@ -76,77 +87,102 @@ def infer_output_style(verbose: int = 1):
76
87
  rendered_panel = capture.get()
77
88
 
78
89
  # Prefix each line with '│ ', preserving ANSI styles
79
- prefixed_output = "\n".join(f"│ {line}" for line in rendered_panel.splitlines())
90
+ prefixed_output = "\n".join(
91
+ f"│ {line}" for line in rendered_panel.splitlines()
92
+ )
80
93
 
81
94
  # Print the prefixed output (will include colors)
82
95
  console.print(prefixed_output, highlight=False)
83
96
  console.print(Text(""))
84
-
97
+
85
98
  try:
86
99
  yield Reporter()
87
100
  finally:
88
101
  pass
89
102
 
103
+
90
104
  @contextmanager
91
- def initializing_population(verbose: int = 1):
105
+ def initializing_population(verbose: int = 1) -> Any:
92
106
  class Reporter:
93
- def start(self, population_size):
107
+ def start(self, population_size: int) -> None:
94
108
  if verbose >= 1:
95
- console.print(f"> Creating {population_size - 1} variations of the initial prompt")
109
+ console.print(
110
+ f"> Creating {population_size - 1} variations of the initial prompt"
111
+ )
96
112
  console.print("│")
97
-
98
- def start_fresh_prompts(self, num_fresh_starts):
99
- if verbose >= 1:
100
- console.print(f"│ Generating {num_fresh_starts} fresh prompts based on the task description.")
101
-
102
- def success_fresh_prompts(self, num_fresh_starts):
113
+
114
+ def start_fresh_prompts(self, num_fresh_starts: int) -> None:
103
115
  if verbose >= 1:
104
- console.print(Text("│ ").append(Text(f"Successfully generated {num_fresh_starts} fresh prompts based on the task description.", style="dim green")))
105
- console.print("│")
106
-
107
- def failed_fresh_prompts(self, num_fresh_starts, error):
116
+ console.print(
117
+ f"│ Generating {num_fresh_starts} fresh prompts based on the task description."
118
+ )
119
+
120
+ def success_fresh_prompts(self, num_fresh_starts: int) -> None:
108
121
  if verbose >= 1:
109
- console.print(Text("│ ").append(Text(f"Failed to generate fresh prompts from LLM: {error}", style="dim red")))
122
+ console.print(
123
+ Text("│ ").append(
124
+ Text(
125
+ f"Successfully generated {num_fresh_starts} fresh prompts based on the task description.",
126
+ style="dim green",
127
+ )
128
+ )
129
+ )
110
130
  console.print("│")
111
131
 
112
- def start_variations(self, num_variations):
132
+ def start_variations(self, num_variations: int) -> None:
113
133
  if verbose >= 1:
114
- console.print(f"│ Generating {num_variations} variations of the initial prompt.")
115
-
116
- def success_variations(self, num_variations):
134
+ console.print(
135
+ f"│ Generating {num_variations} variations of the initial prompt."
136
+ )
137
+
138
+ def success_variations(self, num_variations: int) -> None:
117
139
  if verbose >= 1:
118
- console.print(Text(f"│ Successfully generated {num_variations - 1} variations of the initial prompt).", style="dim green"))
140
+ console.print(
141
+ Text(
142
+ f"│ Successfully generated {num_variations - 1} variations of the initial prompt).",
143
+ style="dim green",
144
+ )
145
+ )
119
146
  console.print("│")
120
-
121
- def failed_variations(self, num_variations, error):
147
+
148
+ def failed_variations(self, num_variations: int, error: str) -> None:
122
149
  if verbose >= 1:
123
- console.print(Text(f"│ Failed to generate {num_variations - 1} variations of the initial prompt: {error}", style="dim red"))
150
+ console.print(
151
+ Text(
152
+ f"│ Failed to generate {num_variations - 1} variations of the initial prompt: {error}",
153
+ style="dim red",
154
+ )
155
+ )
124
156
  console.print("│")
125
-
126
- def end(self, population_prompts: List[chat_prompt.ChatPrompt]):
157
+
158
+ def end(self, population_prompts: List[chat_prompt.ChatPrompt]) -> None:
127
159
  if verbose >= 1:
128
- console.print(f"│ Successfully initialized population with {len(population_prompts)} prompts.")
160
+ console.print(
161
+ f"│ Successfully initialized population with {len(population_prompts)} prompts."
162
+ )
129
163
  console.print("")
130
-
131
-
164
+
132
165
  try:
133
166
  yield Reporter()
134
167
  finally:
135
168
  pass
136
169
 
170
+
137
171
  @contextmanager
138
- def baseline_performance(verbose: int = 1):
172
+ def baseline_performance(verbose: int = 1) -> Any:
139
173
  """Context manager to display messages during an evaluation phase."""
140
174
  # Entry point
141
175
  if verbose >= 1:
142
176
  console.print(Text("> First we will establish the baseline performance."))
143
-
177
+
144
178
  # Create a simple object with a method to set the score
145
179
  class Reporter:
146
- def set_score(self, s):
180
+ def set_score(self, s: float) -> None:
147
181
  if verbose >= 1:
148
- console.print(Text(f"\r Baseline score was: {s:.4f}.\n", style="green"))
149
-
182
+ console.print(
183
+ Text(f"\r Baseline score was: {s:.4f}.\n", style="green")
184
+ )
185
+
150
186
  # Use our log suppression context manager and yield the reporter
151
187
  with suppress_opik_logs():
152
188
  with convert_tqdm_to_rich(" Evaluation", verbose=verbose):
@@ -155,22 +191,27 @@ def baseline_performance(verbose: int = 1):
155
191
  finally:
156
192
  pass
157
193
 
194
+
158
195
  @contextmanager
159
- def evaluate_initial_population(verbose: int = 1):
196
+ def evaluate_initial_population(verbose: int = 1) -> Any:
160
197
  """Context manager to display messages during an evaluation phase."""
161
198
  # Entry point
162
199
  if verbose >= 1:
163
200
  console.print(Text("> Let's now evaluate the initial population"))
164
-
201
+
165
202
  # Create a simple object with a method to set the score
166
203
  class Reporter:
167
- def set_score(self, index, score, baseline_score):
204
+ def set_score(self, index: int, score: float, baseline_score: float) -> None:
168
205
  if verbose >= 1:
169
206
  if score >= baseline_score:
170
- console.print(Text(f"\r Prompt {index+1} score was: {score}.", style="green"))
207
+ console.print(
208
+ Text(f"\r Prompt {index+1} score was: {score}.", style="green")
209
+ )
171
210
  else:
172
- console.print(Text(f"\r Prompt {index+1} score was: {score}.", style="dim"))
173
-
211
+ console.print(
212
+ Text(f"\r Prompt {index+1} score was: {score}.", style="dim")
213
+ )
214
+
174
215
  # Use our log suppression context manager and yield the reporter
175
216
  with suppress_opik_logs():
176
217
  with convert_tqdm_to_rich("│ Evaluation", verbose=verbose):
@@ -180,38 +221,60 @@ def evaluate_initial_population(verbose: int = 1):
180
221
  if verbose >= 1:
181
222
  console.print("")
182
223
 
224
+
183
225
  @contextmanager
184
- def start_evolutionary_algo(verbose: int = 1):
226
+ def start_evolutionary_algo(verbose: int = 1) -> Any:
185
227
  """Context manager to display messages during an evolutionary algorithm phase."""
186
228
  # Entry point
187
229
  if verbose >= 1:
188
230
  console.print(Text("> Starting evolutionary algorithm optimization"))
189
-
231
+
190
232
  # Create a simple object with a method to set the score
191
233
  class Reporter:
192
- def start_gen(self, gen, num_gens):
234
+ def start_gen(self, gen: int, num_gens: int) -> None:
193
235
  if verbose >= 1:
194
236
  console.print(Text(f"│ Starting generation {gen} of {num_gens}"))
195
237
 
196
- def restart_population(self, restart_generation_nb):
238
+ def restart_population(self, restart_generation_nb: int) -> None:
197
239
  if verbose >= 1:
198
- console.print(Text(f"│ Re-creating the population as we have not made progress in {restart_generation_nb} generations."))
199
-
200
- def performing_crossover(self):
240
+ console.print(
241
+ Text(
242
+ f"│ Re-creating the population as we have not made progress in {restart_generation_nb} generations."
243
+ )
244
+ )
245
+
246
+ def performing_crossover(self) -> None:
201
247
  if verbose >= 1:
202
- console.print(Text("│ Performing crossover - Combining multiple prompts into a new one."))
203
-
204
- def performing_mutation(self):
248
+ console.print(
249
+ Text(
250
+ "│ Performing crossover - Combining multiple prompts into a new one."
251
+ )
252
+ )
253
+
254
+ def performing_mutation(self) -> None:
205
255
  if verbose >= 1:
206
- console.print(Text("│ Performing mutation - Altering prompts to improve their performance."))
207
-
208
- def performing_evaluation(self, num_prompts: int):
256
+ console.print(
257
+ Text(
258
+ "│ Performing mutation - Altering prompts to improve their performance."
259
+ )
260
+ )
261
+
262
+ def performing_evaluation(self, num_prompts: int) -> None:
209
263
  if verbose >= 1:
210
- console.print(Text(f"│ Performing evaluation - Assessing {num_prompts} prompts' performance."))
211
-
212
- def performed_evaluation(self, prompt_idx: int, score: float):
264
+ console.print(
265
+ Text(
266
+ f"│ Performing evaluation - Assessing {num_prompts} prompts' performance."
267
+ )
268
+ )
269
+
270
+ def performed_evaluation(self, prompt_idx: int, score: float) -> None:
213
271
  if verbose >= 1:
214
- console.print(Text(f"│ Performed evaluation for prompt {prompt_idx} - Score: {score:.4f}.", style="dim"))
272
+ console.print(
273
+ Text(
274
+ f"│ Performed evaluation for prompt {prompt_idx} - Score: {score:.4f}.",
275
+ style="dim",
276
+ )
277
+ )
215
278
 
216
279
  # Use our log suppression context manager and yield the reporter
217
280
  with suppress_opik_logs():
@@ -222,23 +285,41 @@ def start_evolutionary_algo(verbose: int = 1):
222
285
  if verbose >= 1:
223
286
  console.print("")
224
287
 
225
- def display_error(error_message, verbose: int = 1):
288
+
289
+ def display_error(error_message: str, verbose: int = 1) -> None:
226
290
  if verbose >= 1:
227
291
  console.print(Text("│ ").append(Text(error_message, style="dim red")))
228
292
 
229
- def display_success(message, verbose: int = 1):
293
+
294
+ def display_success(message: str, verbose: int = 1) -> None:
230
295
  if verbose >= 1:
231
296
  console.print(Text("│ ").append(Text(message, style="dim green")))
232
297
 
233
- def display_message(message, verbose: int = 1):
298
+
299
+ def display_message(message: str, verbose: int = 1) -> None:
234
300
  if verbose >= 1:
235
301
  console.print(Text("│ ").append(Text(message, style="dim")))
236
302
 
237
- def end_gen(generation_idx, best_gen_score, initial_primary_score, verbose: int = 1):
303
+
304
+ def end_gen(
305
+ generation_idx: int,
306
+ best_gen_score: float,
307
+ initial_primary_score: float,
308
+ verbose: int = 1,
309
+ ) -> None:
238
310
  if verbose >= 1:
239
311
  if best_gen_score >= initial_primary_score:
240
- console.print(Text(f"│ Generation {generation_idx} completed. Found a new prompt with a score of {best_gen_score:.4f}.", style="green"))
312
+ console.print(
313
+ Text(
314
+ f"│ Generation {generation_idx} completed. Found a new prompt with a score of {best_gen_score:.4f}.",
315
+ style="green",
316
+ )
317
+ )
241
318
  else:
242
- console.print(Text(f"│ Generation {generation_idx} completed. No improvement in this generation."))
319
+ console.print(
320
+ Text(
321
+ f"│ Generation {generation_idx} completed. No improvement in this generation."
322
+ )
323
+ )
243
324
 
244
325
  console.print("│")