opik-optimizer 0.9.2__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +7 -5
- opik_optimizer/_throttle.py +8 -8
- opik_optimizer/base_optimizer.py +98 -45
- opik_optimizer/cache_config.py +5 -3
- opik_optimizer/datasets/ai2_arc.py +15 -13
- opik_optimizer/datasets/cnn_dailymail.py +19 -15
- opik_optimizer/datasets/election_questions.py +10 -11
- opik_optimizer/datasets/gsm8k.py +16 -11
- opik_optimizer/datasets/halu_eval.py +6 -5
- opik_optimizer/datasets/hotpot_qa.py +17 -16
- opik_optimizer/datasets/medhallu.py +10 -7
- opik_optimizer/datasets/rag_hallucinations.py +11 -8
- opik_optimizer/datasets/ragbench.py +17 -9
- opik_optimizer/datasets/tiny_test.py +33 -37
- opik_optimizer/datasets/truthful_qa.py +18 -12
- opik_optimizer/demo/cache.py +6 -6
- opik_optimizer/demo/datasets.py +3 -7
- opik_optimizer/evolutionary_optimizer/__init__.py +3 -1
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +722 -429
- opik_optimizer/evolutionary_optimizer/reporting.py +155 -74
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +271 -188
- opik_optimizer/few_shot_bayesian_optimizer/reporting.py +79 -28
- opik_optimizer/logging_config.py +19 -15
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +209 -129
- opik_optimizer/meta_prompt_optimizer/reporting.py +121 -46
- opik_optimizer/mipro_optimizer/__init__.py +2 -0
- opik_optimizer/mipro_optimizer/_lm.py +38 -9
- opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +37 -26
- opik_optimizer/mipro_optimizer/mipro_optimizer.py +132 -63
- opik_optimizer/mipro_optimizer/utils.py +5 -2
- opik_optimizer/optimizable_agent.py +179 -0
- opik_optimizer/optimization_config/chat_prompt.py +143 -73
- opik_optimizer/optimization_config/configs.py +4 -3
- opik_optimizer/optimization_config/mappers.py +18 -6
- opik_optimizer/optimization_result.py +22 -13
- opik_optimizer/py.typed +0 -0
- opik_optimizer/reporting_utils.py +89 -58
- opik_optimizer/task_evaluator.py +12 -14
- opik_optimizer/utils.py +117 -14
- {opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.1.dist-info}/METADATA +8 -8
- opik_optimizer-1.0.1.dist-info/RECORD +50 -0
- opik_optimizer-0.9.2.dist-info/RECORD +0 -48
- {opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.1.dist-info}/WHEEL +0 -0
- {opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.1.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
from contextlib import contextmanager
|
2
2
|
from io import StringIO
|
3
|
-
from typing import List
|
3
|
+
from typing import Any, List
|
4
4
|
|
5
5
|
from rich.panel import Panel
|
6
6
|
from rich.text import Text
|
@@ -21,19 +21,30 @@ console = get_console()
|
|
21
21
|
|
22
22
|
|
23
23
|
@contextmanager
|
24
|
-
def infer_output_style(verbose: int = 1):
|
24
|
+
def infer_output_style(verbose: int = 1) -> Any:
|
25
25
|
class Reporter:
|
26
|
-
def start_style_inference(self
|
26
|
+
def start_style_inference(self) -> None:
|
27
27
|
if verbose >= 1:
|
28
28
|
console.print("> Infering the output style using the prompt:")
|
29
29
|
console.print("│")
|
30
|
-
|
31
|
-
def error(self, error_message):
|
30
|
+
|
31
|
+
def error(self, error_message: str) -> None:
|
32
32
|
if verbose >= 1:
|
33
|
-
console.print(
|
34
|
-
|
35
|
-
|
36
|
-
|
33
|
+
console.print(
|
34
|
+
Text("│ ").append(
|
35
|
+
Text(
|
36
|
+
f"Failed to infer output style: {error_message}",
|
37
|
+
style="red",
|
38
|
+
)
|
39
|
+
)
|
40
|
+
)
|
41
|
+
console.print(
|
42
|
+
Text("│ ").append(
|
43
|
+
Text("Continuing with default style", style="dim")
|
44
|
+
)
|
45
|
+
)
|
46
|
+
|
47
|
+
def display_style_inference_prompt(self, output_style_prompt: str) -> None:
|
37
48
|
if verbose >= 1:
|
38
49
|
panel = Panel(
|
39
50
|
Text(output_style_prompt),
|
@@ -57,7 +68,7 @@ def infer_output_style(verbose: int = 1):
|
|
57
68
|
console.print(prefixed)
|
58
69
|
console.print(Text("│"))
|
59
70
|
|
60
|
-
def success(self, output_style_prompt):
|
71
|
+
def success(self, output_style_prompt: str) -> None:
|
61
72
|
if verbose >= 1:
|
62
73
|
panel = Panel(
|
63
74
|
Text(output_style_prompt),
|
@@ -67,7 +78,7 @@ def infer_output_style(verbose: int = 1):
|
|
67
78
|
width=PANEL_WIDTH,
|
68
79
|
padding=(1, 2),
|
69
80
|
)
|
70
|
-
|
81
|
+
|
71
82
|
# Capture the panel as rendered text with ANSI styles
|
72
83
|
with console.capture() as capture:
|
73
84
|
console.print(panel)
|
@@ -76,77 +87,102 @@ def infer_output_style(verbose: int = 1):
|
|
76
87
|
rendered_panel = capture.get()
|
77
88
|
|
78
89
|
# Prefix each line with '│ ', preserving ANSI styles
|
79
|
-
prefixed_output = "\n".join(
|
90
|
+
prefixed_output = "\n".join(
|
91
|
+
f"│ {line}" for line in rendered_panel.splitlines()
|
92
|
+
)
|
80
93
|
|
81
94
|
# Print the prefixed output (will include colors)
|
82
95
|
console.print(prefixed_output, highlight=False)
|
83
96
|
console.print(Text(""))
|
84
|
-
|
97
|
+
|
85
98
|
try:
|
86
99
|
yield Reporter()
|
87
100
|
finally:
|
88
101
|
pass
|
89
102
|
|
103
|
+
|
90
104
|
@contextmanager
|
91
|
-
def initializing_population(verbose: int = 1):
|
105
|
+
def initializing_population(verbose: int = 1) -> Any:
|
92
106
|
class Reporter:
|
93
|
-
def start(self, population_size):
|
107
|
+
def start(self, population_size: int) -> None:
|
94
108
|
if verbose >= 1:
|
95
|
-
console.print(
|
109
|
+
console.print(
|
110
|
+
f"> Creating {population_size - 1} variations of the initial prompt"
|
111
|
+
)
|
96
112
|
console.print("│")
|
97
|
-
|
98
|
-
def start_fresh_prompts(self, num_fresh_starts):
|
99
|
-
if verbose >= 1:
|
100
|
-
console.print(f"│ Generating {num_fresh_starts} fresh prompts based on the task description.")
|
101
|
-
|
102
|
-
def success_fresh_prompts(self, num_fresh_starts):
|
113
|
+
|
114
|
+
def start_fresh_prompts(self, num_fresh_starts: int) -> None:
|
103
115
|
if verbose >= 1:
|
104
|
-
console.print(
|
105
|
-
|
106
|
-
|
107
|
-
|
116
|
+
console.print(
|
117
|
+
f"│ Generating {num_fresh_starts} fresh prompts based on the task description."
|
118
|
+
)
|
119
|
+
|
120
|
+
def success_fresh_prompts(self, num_fresh_starts: int) -> None:
|
108
121
|
if verbose >= 1:
|
109
|
-
console.print(
|
122
|
+
console.print(
|
123
|
+
Text("│ ").append(
|
124
|
+
Text(
|
125
|
+
f"Successfully generated {num_fresh_starts} fresh prompts based on the task description.",
|
126
|
+
style="dim green",
|
127
|
+
)
|
128
|
+
)
|
129
|
+
)
|
110
130
|
console.print("│")
|
111
131
|
|
112
|
-
def start_variations(self, num_variations):
|
132
|
+
def start_variations(self, num_variations: int) -> None:
|
113
133
|
if verbose >= 1:
|
114
|
-
console.print(
|
115
|
-
|
116
|
-
|
134
|
+
console.print(
|
135
|
+
f"│ Generating {num_variations} variations of the initial prompt."
|
136
|
+
)
|
137
|
+
|
138
|
+
def success_variations(self, num_variations: int) -> None:
|
117
139
|
if verbose >= 1:
|
118
|
-
console.print(
|
140
|
+
console.print(
|
141
|
+
Text(
|
142
|
+
f"│ Successfully generated {num_variations - 1} variations of the initial prompt).",
|
143
|
+
style="dim green",
|
144
|
+
)
|
145
|
+
)
|
119
146
|
console.print("│")
|
120
|
-
|
121
|
-
def failed_variations(self, num_variations, error):
|
147
|
+
|
148
|
+
def failed_variations(self, num_variations: int, error: str) -> None:
|
122
149
|
if verbose >= 1:
|
123
|
-
console.print(
|
150
|
+
console.print(
|
151
|
+
Text(
|
152
|
+
f"│ Failed to generate {num_variations - 1} variations of the initial prompt: {error}",
|
153
|
+
style="dim red",
|
154
|
+
)
|
155
|
+
)
|
124
156
|
console.print("│")
|
125
|
-
|
126
|
-
def end(self, population_prompts: List[chat_prompt.ChatPrompt]):
|
157
|
+
|
158
|
+
def end(self, population_prompts: List[chat_prompt.ChatPrompt]) -> None:
|
127
159
|
if verbose >= 1:
|
128
|
-
console.print(
|
160
|
+
console.print(
|
161
|
+
f"│ Successfully initialized population with {len(population_prompts)} prompts."
|
162
|
+
)
|
129
163
|
console.print("")
|
130
|
-
|
131
|
-
|
164
|
+
|
132
165
|
try:
|
133
166
|
yield Reporter()
|
134
167
|
finally:
|
135
168
|
pass
|
136
169
|
|
170
|
+
|
137
171
|
@contextmanager
|
138
|
-
def baseline_performance(verbose: int = 1):
|
172
|
+
def baseline_performance(verbose: int = 1) -> Any:
|
139
173
|
"""Context manager to display messages during an evaluation phase."""
|
140
174
|
# Entry point
|
141
175
|
if verbose >= 1:
|
142
176
|
console.print(Text("> First we will establish the baseline performance."))
|
143
|
-
|
177
|
+
|
144
178
|
# Create a simple object with a method to set the score
|
145
179
|
class Reporter:
|
146
|
-
def set_score(self, s):
|
180
|
+
def set_score(self, s: float) -> None:
|
147
181
|
if verbose >= 1:
|
148
|
-
console.print(
|
149
|
-
|
182
|
+
console.print(
|
183
|
+
Text(f"\r Baseline score was: {s:.4f}.\n", style="green")
|
184
|
+
)
|
185
|
+
|
150
186
|
# Use our log suppression context manager and yield the reporter
|
151
187
|
with suppress_opik_logs():
|
152
188
|
with convert_tqdm_to_rich(" Evaluation", verbose=verbose):
|
@@ -155,22 +191,27 @@ def baseline_performance(verbose: int = 1):
|
|
155
191
|
finally:
|
156
192
|
pass
|
157
193
|
|
194
|
+
|
158
195
|
@contextmanager
|
159
|
-
def evaluate_initial_population(verbose: int = 1):
|
196
|
+
def evaluate_initial_population(verbose: int = 1) -> Any:
|
160
197
|
"""Context manager to display messages during an evaluation phase."""
|
161
198
|
# Entry point
|
162
199
|
if verbose >= 1:
|
163
200
|
console.print(Text("> Let's now evaluate the initial population"))
|
164
|
-
|
201
|
+
|
165
202
|
# Create a simple object with a method to set the score
|
166
203
|
class Reporter:
|
167
|
-
def set_score(self, index, score, baseline_score):
|
204
|
+
def set_score(self, index: int, score: float, baseline_score: float) -> None:
|
168
205
|
if verbose >= 1:
|
169
206
|
if score >= baseline_score:
|
170
|
-
console.print(
|
207
|
+
console.print(
|
208
|
+
Text(f"\r Prompt {index+1} score was: {score}.", style="green")
|
209
|
+
)
|
171
210
|
else:
|
172
|
-
console.print(
|
173
|
-
|
211
|
+
console.print(
|
212
|
+
Text(f"\r Prompt {index+1} score was: {score}.", style="dim")
|
213
|
+
)
|
214
|
+
|
174
215
|
# Use our log suppression context manager and yield the reporter
|
175
216
|
with suppress_opik_logs():
|
176
217
|
with convert_tqdm_to_rich("│ Evaluation", verbose=verbose):
|
@@ -180,38 +221,60 @@ def evaluate_initial_population(verbose: int = 1):
|
|
180
221
|
if verbose >= 1:
|
181
222
|
console.print("")
|
182
223
|
|
224
|
+
|
183
225
|
@contextmanager
|
184
|
-
def start_evolutionary_algo(verbose: int = 1):
|
226
|
+
def start_evolutionary_algo(verbose: int = 1) -> Any:
|
185
227
|
"""Context manager to display messages during an evolutionary algorithm phase."""
|
186
228
|
# Entry point
|
187
229
|
if verbose >= 1:
|
188
230
|
console.print(Text("> Starting evolutionary algorithm optimization"))
|
189
|
-
|
231
|
+
|
190
232
|
# Create a simple object with a method to set the score
|
191
233
|
class Reporter:
|
192
|
-
def start_gen(self, gen, num_gens):
|
234
|
+
def start_gen(self, gen: int, num_gens: int) -> None:
|
193
235
|
if verbose >= 1:
|
194
236
|
console.print(Text(f"│ Starting generation {gen} of {num_gens}"))
|
195
237
|
|
196
|
-
def restart_population(self, restart_generation_nb):
|
238
|
+
def restart_population(self, restart_generation_nb: int) -> None:
|
197
239
|
if verbose >= 1:
|
198
|
-
console.print(
|
199
|
-
|
200
|
-
|
240
|
+
console.print(
|
241
|
+
Text(
|
242
|
+
f"│ Re-creating the population as we have not made progress in {restart_generation_nb} generations."
|
243
|
+
)
|
244
|
+
)
|
245
|
+
|
246
|
+
def performing_crossover(self) -> None:
|
201
247
|
if verbose >= 1:
|
202
|
-
console.print(
|
203
|
-
|
204
|
-
|
248
|
+
console.print(
|
249
|
+
Text(
|
250
|
+
"│ Performing crossover - Combining multiple prompts into a new one."
|
251
|
+
)
|
252
|
+
)
|
253
|
+
|
254
|
+
def performing_mutation(self) -> None:
|
205
255
|
if verbose >= 1:
|
206
|
-
console.print(
|
207
|
-
|
208
|
-
|
256
|
+
console.print(
|
257
|
+
Text(
|
258
|
+
"│ Performing mutation - Altering prompts to improve their performance."
|
259
|
+
)
|
260
|
+
)
|
261
|
+
|
262
|
+
def performing_evaluation(self, num_prompts: int) -> None:
|
209
263
|
if verbose >= 1:
|
210
|
-
console.print(
|
211
|
-
|
212
|
-
|
264
|
+
console.print(
|
265
|
+
Text(
|
266
|
+
f"│ Performing evaluation - Assessing {num_prompts} prompts' performance."
|
267
|
+
)
|
268
|
+
)
|
269
|
+
|
270
|
+
def performed_evaluation(self, prompt_idx: int, score: float) -> None:
|
213
271
|
if verbose >= 1:
|
214
|
-
console.print(
|
272
|
+
console.print(
|
273
|
+
Text(
|
274
|
+
f"│ Performed evaluation for prompt {prompt_idx} - Score: {score:.4f}.",
|
275
|
+
style="dim",
|
276
|
+
)
|
277
|
+
)
|
215
278
|
|
216
279
|
# Use our log suppression context manager and yield the reporter
|
217
280
|
with suppress_opik_logs():
|
@@ -222,23 +285,41 @@ def start_evolutionary_algo(verbose: int = 1):
|
|
222
285
|
if verbose >= 1:
|
223
286
|
console.print("")
|
224
287
|
|
225
|
-
|
288
|
+
|
289
|
+
def display_error(error_message: str, verbose: int = 1) -> None:
|
226
290
|
if verbose >= 1:
|
227
291
|
console.print(Text("│ ").append(Text(error_message, style="dim red")))
|
228
292
|
|
229
|
-
|
293
|
+
|
294
|
+
def display_success(message: str, verbose: int = 1) -> None:
|
230
295
|
if verbose >= 1:
|
231
296
|
console.print(Text("│ ").append(Text(message, style="dim green")))
|
232
297
|
|
233
|
-
|
298
|
+
|
299
|
+
def display_message(message: str, verbose: int = 1) -> None:
|
234
300
|
if verbose >= 1:
|
235
301
|
console.print(Text("│ ").append(Text(message, style="dim")))
|
236
302
|
|
237
|
-
|
303
|
+
|
304
|
+
def end_gen(
|
305
|
+
generation_idx: int,
|
306
|
+
best_gen_score: float,
|
307
|
+
initial_primary_score: float,
|
308
|
+
verbose: int = 1,
|
309
|
+
) -> None:
|
238
310
|
if verbose >= 1:
|
239
311
|
if best_gen_score >= initial_primary_score:
|
240
|
-
console.print(
|
312
|
+
console.print(
|
313
|
+
Text(
|
314
|
+
f"│ Generation {generation_idx} completed. Found a new prompt with a score of {best_gen_score:.4f}.",
|
315
|
+
style="green",
|
316
|
+
)
|
317
|
+
)
|
241
318
|
else:
|
242
|
-
console.print(
|
319
|
+
console.print(
|
320
|
+
Text(
|
321
|
+
f"│ Generation {generation_idx} completed. No improvement in this generation."
|
322
|
+
)
|
323
|
+
)
|
243
324
|
|
244
325
|
console.print("│")
|