opik-optimizer 0.9.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. opik_optimizer/__init__.py +7 -3
  2. opik_optimizer/_throttle.py +8 -8
  3. opik_optimizer/base_optimizer.py +98 -45
  4. opik_optimizer/cache_config.py +5 -3
  5. opik_optimizer/datasets/ai2_arc.py +15 -13
  6. opik_optimizer/datasets/cnn_dailymail.py +19 -15
  7. opik_optimizer/datasets/election_questions.py +10 -11
  8. opik_optimizer/datasets/gsm8k.py +16 -11
  9. opik_optimizer/datasets/halu_eval.py +6 -5
  10. opik_optimizer/datasets/hotpot_qa.py +17 -16
  11. opik_optimizer/datasets/medhallu.py +10 -7
  12. opik_optimizer/datasets/rag_hallucinations.py +11 -8
  13. opik_optimizer/datasets/ragbench.py +17 -9
  14. opik_optimizer/datasets/tiny_test.py +33 -37
  15. opik_optimizer/datasets/truthful_qa.py +18 -12
  16. opik_optimizer/demo/cache.py +6 -6
  17. opik_optimizer/demo/datasets.py +3 -7
  18. opik_optimizer/evolutionary_optimizer/__init__.py +3 -1
  19. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +748 -437
  20. opik_optimizer/evolutionary_optimizer/reporting.py +155 -76
  21. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +291 -181
  22. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +79 -28
  23. opik_optimizer/logging_config.py +19 -15
  24. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +234 -138
  25. opik_optimizer/meta_prompt_optimizer/reporting.py +121 -47
  26. opik_optimizer/mipro_optimizer/__init__.py +2 -0
  27. opik_optimizer/mipro_optimizer/_lm.py +41 -9
  28. opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +37 -26
  29. opik_optimizer/mipro_optimizer/mipro_optimizer.py +135 -67
  30. opik_optimizer/mipro_optimizer/utils.py +5 -2
  31. opik_optimizer/optimizable_agent.py +179 -0
  32. opik_optimizer/optimization_config/chat_prompt.py +143 -73
  33. opik_optimizer/optimization_config/configs.py +4 -3
  34. opik_optimizer/optimization_config/mappers.py +18 -6
  35. opik_optimizer/optimization_result.py +28 -20
  36. opik_optimizer/py.typed +0 -0
  37. opik_optimizer/reporting_utils.py +96 -46
  38. opik_optimizer/task_evaluator.py +12 -14
  39. opik_optimizer/utils.py +122 -37
  40. {opik_optimizer-0.9.1.dist-info → opik_optimizer-1.0.0.dist-info}/METADATA +8 -8
  41. opik_optimizer-1.0.0.dist-info/RECORD +50 -0
  42. opik_optimizer-0.9.1.dist-info/RECORD +0 -48
  43. {opik_optimizer-0.9.1.dist-info → opik_optimizer-1.0.0.dist-info}/WHEEL +0 -0
  44. {opik_optimizer-0.9.1.dist-info → opik_optimizer-1.0.0.dist-info}/licenses/LICENSE +0 -0
  45. {opik_optimizer-0.9.1.dist-info → opik_optimizer-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,7 @@
1
1
  from contextlib import contextmanager
2
2
  from io import StringIO
3
- from typing import List
3
+ from typing import Any, List
4
4
 
5
- import rich
6
- from rich.console import Console
7
5
  from rich.panel import Panel
8
6
  from rich.text import Text
9
7
 
@@ -23,19 +21,30 @@ console = get_console()
23
21
 
24
22
 
25
23
  @contextmanager
26
- def infer_output_style(verbose: int = 1):
24
+ def infer_output_style(verbose: int = 1) -> Any:
27
25
  class Reporter:
28
- def start_style_inference(self, output_style_prompt):
26
+ def start_style_inference(self) -> None:
29
27
  if verbose >= 1:
30
28
  console.print("> Infering the output style using the prompt:")
31
29
  console.print("│")
32
-
33
- def error(self, error_message):
30
+
31
+ def error(self, error_message: str) -> None:
34
32
  if verbose >= 1:
35
- console.print(Text("│ ").append(Text(f"Failed to infer output style: {error_message}", style="red")))
36
- console.print(Text("│ ").append(Text("Continuing with default style", style="dim")))
37
-
38
- def display_style_inference_prompt(self, output_style_prompt):
33
+ console.print(
34
+ Text("│ ").append(
35
+ Text(
36
+ f"Failed to infer output style: {error_message}",
37
+ style="red",
38
+ )
39
+ )
40
+ )
41
+ console.print(
42
+ Text("│ ").append(
43
+ Text("Continuing with default style", style="dim")
44
+ )
45
+ )
46
+
47
+ def display_style_inference_prompt(self, output_style_prompt: str) -> None:
39
48
  if verbose >= 1:
40
49
  panel = Panel(
41
50
  Text(output_style_prompt),
@@ -59,7 +68,7 @@ def infer_output_style(verbose: int = 1):
59
68
  console.print(prefixed)
60
69
  console.print(Text("│"))
61
70
 
62
- def success(self, output_style_prompt):
71
+ def success(self, output_style_prompt: str) -> None:
63
72
  if verbose >= 1:
64
73
  panel = Panel(
65
74
  Text(output_style_prompt),
@@ -69,7 +78,7 @@ def infer_output_style(verbose: int = 1):
69
78
  width=PANEL_WIDTH,
70
79
  padding=(1, 2),
71
80
  )
72
-
81
+
73
82
  # Capture the panel as rendered text with ANSI styles
74
83
  with console.capture() as capture:
75
84
  console.print(panel)
@@ -78,77 +87,102 @@ def infer_output_style(verbose: int = 1):
78
87
  rendered_panel = capture.get()
79
88
 
80
89
  # Prefix each line with '│ ', preserving ANSI styles
81
- prefixed_output = "\n".join(f"│ {line}" for line in rendered_panel.splitlines())
90
+ prefixed_output = "\n".join(
91
+ f"│ {line}" for line in rendered_panel.splitlines()
92
+ )
82
93
 
83
94
  # Print the prefixed output (will include colors)
84
95
  console.print(prefixed_output, highlight=False)
85
96
  console.print(Text(""))
86
-
97
+
87
98
  try:
88
99
  yield Reporter()
89
100
  finally:
90
101
  pass
91
102
 
103
+
92
104
  @contextmanager
93
- def initializing_population(verbose: int = 1):
105
+ def initializing_population(verbose: int = 1) -> Any:
94
106
  class Reporter:
95
- def start(self, population_size):
107
+ def start(self, population_size: int) -> None:
96
108
  if verbose >= 1:
97
- console.print(f"> Creating {population_size - 1} variations of the initial prompt")
109
+ console.print(
110
+ f"> Creating {population_size - 1} variations of the initial prompt"
111
+ )
98
112
  console.print("│")
99
-
100
- def start_fresh_prompts(self, num_fresh_starts):
101
- if verbose >= 1:
102
- console.print(f"│ Generating {num_fresh_starts} fresh prompts based on the task description.")
103
-
104
- def success_fresh_prompts(self, num_fresh_starts):
113
+
114
+ def start_fresh_prompts(self, num_fresh_starts: int) -> None:
105
115
  if verbose >= 1:
106
- console.print(Text("│ ").append(Text(f"Successfully generated {num_fresh_starts} fresh prompts based on the task description.", style="dim green")))
107
- console.print("│")
108
-
109
- def failed_fresh_prompts(self, num_fresh_starts, error):
116
+ console.print(
117
+ f"│ Generating {num_fresh_starts} fresh prompts based on the task description."
118
+ )
119
+
120
+ def success_fresh_prompts(self, num_fresh_starts: int) -> None:
110
121
  if verbose >= 1:
111
- console.print(Text("│ ").append(Text(f"Failed to generate fresh prompts from LLM: {error}", style="dim red")))
122
+ console.print(
123
+ Text("│ ").append(
124
+ Text(
125
+ f"Successfully generated {num_fresh_starts} fresh prompts based on the task description.",
126
+ style="dim green",
127
+ )
128
+ )
129
+ )
112
130
  console.print("│")
113
131
 
114
- def start_variations(self, num_variations):
132
+ def start_variations(self, num_variations: int) -> None:
115
133
  if verbose >= 1:
116
- console.print(f"│ Generating {num_variations} variations of the initial prompt.")
117
-
118
- def success_variations(self, num_variations):
134
+ console.print(
135
+ f"│ Generating {num_variations} variations of the initial prompt."
136
+ )
137
+
138
+ def success_variations(self, num_variations: int) -> None:
119
139
  if verbose >= 1:
120
- console.print(Text(f"│ Successfully generated {num_variations - 1} variations of the initial prompt).", style="dim green"))
140
+ console.print(
141
+ Text(
142
+ f"│ Successfully generated {num_variations - 1} variations of the initial prompt).",
143
+ style="dim green",
144
+ )
145
+ )
121
146
  console.print("│")
122
-
123
- def failed_variations(self, num_variations, error):
147
+
148
+ def failed_variations(self, num_variations: int, error: str) -> None:
124
149
  if verbose >= 1:
125
- console.print(Text(f"│ Failed to generate {num_variations - 1} variations of the initial prompt: {error}", style="dim red"))
150
+ console.print(
151
+ Text(
152
+ f"│ Failed to generate {num_variations - 1} variations of the initial prompt: {error}",
153
+ style="dim red",
154
+ )
155
+ )
126
156
  console.print("│")
127
-
128
- def end(self, population_prompts: List[chat_prompt.ChatPrompt]):
157
+
158
+ def end(self, population_prompts: List[chat_prompt.ChatPrompt]) -> None:
129
159
  if verbose >= 1:
130
- console.print(f"│ Successfully initialized population with {len(population_prompts)} prompts.")
160
+ console.print(
161
+ f"│ Successfully initialized population with {len(population_prompts)} prompts."
162
+ )
131
163
  console.print("")
132
-
133
-
164
+
134
165
  try:
135
166
  yield Reporter()
136
167
  finally:
137
168
  pass
138
169
 
170
+
139
171
  @contextmanager
140
- def baseline_performance(verbose: int = 1):
172
+ def baseline_performance(verbose: int = 1) -> Any:
141
173
  """Context manager to display messages during an evaluation phase."""
142
174
  # Entry point
143
175
  if verbose >= 1:
144
176
  console.print(Text("> First we will establish the baseline performance."))
145
-
177
+
146
178
  # Create a simple object with a method to set the score
147
179
  class Reporter:
148
- def set_score(self, s):
180
+ def set_score(self, s: float) -> None:
149
181
  if verbose >= 1:
150
- console.print(Text(f"\r Baseline score was: {s:.4f}.\n", style="green"))
151
-
182
+ console.print(
183
+ Text(f"\r Baseline score was: {s:.4f}.\n", style="green")
184
+ )
185
+
152
186
  # Use our log suppression context manager and yield the reporter
153
187
  with suppress_opik_logs():
154
188
  with convert_tqdm_to_rich(" Evaluation", verbose=verbose):
@@ -157,22 +191,27 @@ def baseline_performance(verbose: int = 1):
157
191
  finally:
158
192
  pass
159
193
 
194
+
160
195
  @contextmanager
161
- def evaluate_initial_population(verbose: int = 1):
196
+ def evaluate_initial_population(verbose: int = 1) -> Any:
162
197
  """Context manager to display messages during an evaluation phase."""
163
198
  # Entry point
164
199
  if verbose >= 1:
165
200
  console.print(Text("> Let's now evaluate the initial population"))
166
-
201
+
167
202
  # Create a simple object with a method to set the score
168
203
  class Reporter:
169
- def set_score(self, index, score, baseline_score):
204
+ def set_score(self, index: int, score: float, baseline_score: float) -> None:
170
205
  if verbose >= 1:
171
206
  if score >= baseline_score:
172
- console.print(Text(f"\r Prompt {index+1} score was: {score}.", style="green"))
207
+ console.print(
208
+ Text(f"\r Prompt {index+1} score was: {score}.", style="green")
209
+ )
173
210
  else:
174
- console.print(Text(f"\r Prompt {index+1} score was: {score}.", style="dim"))
175
-
211
+ console.print(
212
+ Text(f"\r Prompt {index+1} score was: {score}.", style="dim")
213
+ )
214
+
176
215
  # Use our log suppression context manager and yield the reporter
177
216
  with suppress_opik_logs():
178
217
  with convert_tqdm_to_rich("│ Evaluation", verbose=verbose):
@@ -182,38 +221,60 @@ def evaluate_initial_population(verbose: int = 1):
182
221
  if verbose >= 1:
183
222
  console.print("")
184
223
 
224
+
185
225
  @contextmanager
186
- def start_evolutionary_algo(verbose: int = 1):
226
+ def start_evolutionary_algo(verbose: int = 1) -> Any:
187
227
  """Context manager to display messages during an evolutionary algorithm phase."""
188
228
  # Entry point
189
229
  if verbose >= 1:
190
230
  console.print(Text("> Starting evolutionary algorithm optimization"))
191
-
231
+
192
232
  # Create a simple object with a method to set the score
193
233
  class Reporter:
194
- def start_gen(self, gen, num_gens):
234
+ def start_gen(self, gen: int, num_gens: int) -> None:
195
235
  if verbose >= 1:
196
236
  console.print(Text(f"│ Starting generation {gen} of {num_gens}"))
197
237
 
198
- def restart_population(self, restart_generation_nb):
238
+ def restart_population(self, restart_generation_nb: int) -> None:
199
239
  if verbose >= 1:
200
- console.print(Text(f"│ Re-creating the population as we have not made progress in {restart_generation_nb} generations."))
201
-
202
- def performing_crossover(self):
240
+ console.print(
241
+ Text(
242
+ f"│ Re-creating the population as we have not made progress in {restart_generation_nb} generations."
243
+ )
244
+ )
245
+
246
+ def performing_crossover(self) -> None:
203
247
  if verbose >= 1:
204
- console.print(Text("│ Performing crossover - Combining multiple prompts into a new one."))
205
-
206
- def performing_mutation(self):
248
+ console.print(
249
+ Text(
250
+ "│ Performing crossover - Combining multiple prompts into a new one."
251
+ )
252
+ )
253
+
254
+ def performing_mutation(self) -> None:
207
255
  if verbose >= 1:
208
- console.print(Text("│ Performing mutation - Altering prompts to improve their performance."))
209
-
210
- def performing_evaluation(self, num_prompts: int):
256
+ console.print(
257
+ Text(
258
+ "│ Performing mutation - Altering prompts to improve their performance."
259
+ )
260
+ )
261
+
262
+ def performing_evaluation(self, num_prompts: int) -> None:
211
263
  if verbose >= 1:
212
- console.print(Text(f"│ Performing evaluation - Assessing {num_prompts} prompts' performance."))
213
-
214
- def performed_evaluation(self, prompt_idx: int, score: float):
264
+ console.print(
265
+ Text(
266
+ f"│ Performing evaluation - Assessing {num_prompts} prompts' performance."
267
+ )
268
+ )
269
+
270
+ def performed_evaluation(self, prompt_idx: int, score: float) -> None:
215
271
  if verbose >= 1:
216
- console.print(Text(f"│ Performed evaluation for prompt {prompt_idx} - Score: {score:.4f}.", style="dim"))
272
+ console.print(
273
+ Text(
274
+ f"│ Performed evaluation for prompt {prompt_idx} - Score: {score:.4f}.",
275
+ style="dim",
276
+ )
277
+ )
217
278
 
218
279
  # Use our log suppression context manager and yield the reporter
219
280
  with suppress_opik_logs():
@@ -224,23 +285,41 @@ def start_evolutionary_algo(verbose: int = 1):
224
285
  if verbose >= 1:
225
286
  console.print("")
226
287
 
227
- def display_error(error_message, verbose: int = 1):
288
+
289
+ def display_error(error_message: str, verbose: int = 1) -> None:
228
290
  if verbose >= 1:
229
291
  console.print(Text("│ ").append(Text(error_message, style="dim red")))
230
292
 
231
- def display_success(message, verbose: int = 1):
293
+
294
+ def display_success(message: str, verbose: int = 1) -> None:
232
295
  if verbose >= 1:
233
296
  console.print(Text("│ ").append(Text(message, style="dim green")))
234
297
 
235
- def display_message(message, verbose: int = 1):
298
+
299
+ def display_message(message: str, verbose: int = 1) -> None:
236
300
  if verbose >= 1:
237
301
  console.print(Text("│ ").append(Text(message, style="dim")))
238
302
 
239
- def end_gen(generation_idx, best_gen_score, initial_primary_score, verbose: int = 1):
303
+
304
+ def end_gen(
305
+ generation_idx: int,
306
+ best_gen_score: float,
307
+ initial_primary_score: float,
308
+ verbose: int = 1,
309
+ ) -> None:
240
310
  if verbose >= 1:
241
311
  if best_gen_score >= initial_primary_score:
242
- console.print(Text(f"│ Generation {generation_idx} completed. Found a new prompt with a score of {best_gen_score:.4f}.", style="green"))
312
+ console.print(
313
+ Text(
314
+ f"│ Generation {generation_idx} completed. Found a new prompt with a score of {best_gen_score:.4f}.",
315
+ style="green",
316
+ )
317
+ )
243
318
  else:
244
- console.print(Text(f"│ Generation {generation_idx} completed. No improvement in this generation."))
319
+ console.print(
320
+ Text(
321
+ f"│ Generation {generation_idx} completed. No improvement in this generation."
322
+ )
323
+ )
245
324
 
246
325
  console.print("│")