opik-optimizer 0.9.1__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +7 -3
- opik_optimizer/_throttle.py +8 -8
- opik_optimizer/base_optimizer.py +98 -45
- opik_optimizer/cache_config.py +5 -3
- opik_optimizer/datasets/ai2_arc.py +15 -13
- opik_optimizer/datasets/cnn_dailymail.py +19 -15
- opik_optimizer/datasets/election_questions.py +10 -11
- opik_optimizer/datasets/gsm8k.py +16 -11
- opik_optimizer/datasets/halu_eval.py +6 -5
- opik_optimizer/datasets/hotpot_qa.py +17 -16
- opik_optimizer/datasets/medhallu.py +10 -7
- opik_optimizer/datasets/rag_hallucinations.py +11 -8
- opik_optimizer/datasets/ragbench.py +17 -9
- opik_optimizer/datasets/tiny_test.py +33 -37
- opik_optimizer/datasets/truthful_qa.py +18 -12
- opik_optimizer/demo/cache.py +6 -6
- opik_optimizer/demo/datasets.py +3 -7
- opik_optimizer/evolutionary_optimizer/__init__.py +3 -1
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +748 -437
- opik_optimizer/evolutionary_optimizer/reporting.py +155 -76
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +291 -181
- opik_optimizer/few_shot_bayesian_optimizer/reporting.py +79 -28
- opik_optimizer/logging_config.py +19 -15
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +234 -138
- opik_optimizer/meta_prompt_optimizer/reporting.py +121 -47
- opik_optimizer/mipro_optimizer/__init__.py +2 -0
- opik_optimizer/mipro_optimizer/_lm.py +41 -9
- opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +37 -26
- opik_optimizer/mipro_optimizer/mipro_optimizer.py +135 -67
- opik_optimizer/mipro_optimizer/utils.py +5 -2
- opik_optimizer/optimizable_agent.py +179 -0
- opik_optimizer/optimization_config/chat_prompt.py +143 -73
- opik_optimizer/optimization_config/configs.py +4 -3
- opik_optimizer/optimization_config/mappers.py +18 -6
- opik_optimizer/optimization_result.py +28 -20
- opik_optimizer/py.typed +0 -0
- opik_optimizer/reporting_utils.py +96 -46
- opik_optimizer/task_evaluator.py +12 -14
- opik_optimizer/utils.py +122 -37
- {opik_optimizer-0.9.1.dist-info → opik_optimizer-1.0.0.dist-info}/METADATA +8 -8
- opik_optimizer-1.0.0.dist-info/RECORD +50 -0
- opik_optimizer-0.9.1.dist-info/RECORD +0 -48
- {opik_optimizer-0.9.1.dist-info → opik_optimizer-1.0.0.dist-info}/WHEEL +0 -0
- {opik_optimizer-0.9.1.dist-info → opik_optimizer-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {opik_optimizer-0.9.1.dist-info → opik_optimizer-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,7 @@
|
|
1
1
|
from contextlib import contextmanager
|
2
2
|
from io import StringIO
|
3
|
-
from typing import List
|
3
|
+
from typing import Any, List
|
4
4
|
|
5
|
-
import rich
|
6
|
-
from rich.console import Console
|
7
5
|
from rich.panel import Panel
|
8
6
|
from rich.text import Text
|
9
7
|
|
@@ -23,19 +21,30 @@ console = get_console()
|
|
23
21
|
|
24
22
|
|
25
23
|
@contextmanager
|
26
|
-
def infer_output_style(verbose: int = 1):
|
24
|
+
def infer_output_style(verbose: int = 1) -> Any:
|
27
25
|
class Reporter:
|
28
|
-
def start_style_inference(self
|
26
|
+
def start_style_inference(self) -> None:
|
29
27
|
if verbose >= 1:
|
30
28
|
console.print("> Infering the output style using the prompt:")
|
31
29
|
console.print("│")
|
32
|
-
|
33
|
-
def error(self, error_message):
|
30
|
+
|
31
|
+
def error(self, error_message: str) -> None:
|
34
32
|
if verbose >= 1:
|
35
|
-
console.print(
|
36
|
-
|
37
|
-
|
38
|
-
|
33
|
+
console.print(
|
34
|
+
Text("│ ").append(
|
35
|
+
Text(
|
36
|
+
f"Failed to infer output style: {error_message}",
|
37
|
+
style="red",
|
38
|
+
)
|
39
|
+
)
|
40
|
+
)
|
41
|
+
console.print(
|
42
|
+
Text("│ ").append(
|
43
|
+
Text("Continuing with default style", style="dim")
|
44
|
+
)
|
45
|
+
)
|
46
|
+
|
47
|
+
def display_style_inference_prompt(self, output_style_prompt: str) -> None:
|
39
48
|
if verbose >= 1:
|
40
49
|
panel = Panel(
|
41
50
|
Text(output_style_prompt),
|
@@ -59,7 +68,7 @@ def infer_output_style(verbose: int = 1):
|
|
59
68
|
console.print(prefixed)
|
60
69
|
console.print(Text("│"))
|
61
70
|
|
62
|
-
def success(self, output_style_prompt):
|
71
|
+
def success(self, output_style_prompt: str) -> None:
|
63
72
|
if verbose >= 1:
|
64
73
|
panel = Panel(
|
65
74
|
Text(output_style_prompt),
|
@@ -69,7 +78,7 @@ def infer_output_style(verbose: int = 1):
|
|
69
78
|
width=PANEL_WIDTH,
|
70
79
|
padding=(1, 2),
|
71
80
|
)
|
72
|
-
|
81
|
+
|
73
82
|
# Capture the panel as rendered text with ANSI styles
|
74
83
|
with console.capture() as capture:
|
75
84
|
console.print(panel)
|
@@ -78,77 +87,102 @@ def infer_output_style(verbose: int = 1):
|
|
78
87
|
rendered_panel = capture.get()
|
79
88
|
|
80
89
|
# Prefix each line with '│ ', preserving ANSI styles
|
81
|
-
prefixed_output = "\n".join(
|
90
|
+
prefixed_output = "\n".join(
|
91
|
+
f"│ {line}" for line in rendered_panel.splitlines()
|
92
|
+
)
|
82
93
|
|
83
94
|
# Print the prefixed output (will include colors)
|
84
95
|
console.print(prefixed_output, highlight=False)
|
85
96
|
console.print(Text(""))
|
86
|
-
|
97
|
+
|
87
98
|
try:
|
88
99
|
yield Reporter()
|
89
100
|
finally:
|
90
101
|
pass
|
91
102
|
|
103
|
+
|
92
104
|
@contextmanager
|
93
|
-
def initializing_population(verbose: int = 1):
|
105
|
+
def initializing_population(verbose: int = 1) -> Any:
|
94
106
|
class Reporter:
|
95
|
-
def start(self, population_size):
|
107
|
+
def start(self, population_size: int) -> None:
|
96
108
|
if verbose >= 1:
|
97
|
-
console.print(
|
109
|
+
console.print(
|
110
|
+
f"> Creating {population_size - 1} variations of the initial prompt"
|
111
|
+
)
|
98
112
|
console.print("│")
|
99
|
-
|
100
|
-
def start_fresh_prompts(self, num_fresh_starts):
|
101
|
-
if verbose >= 1:
|
102
|
-
console.print(f"│ Generating {num_fresh_starts} fresh prompts based on the task description.")
|
103
|
-
|
104
|
-
def success_fresh_prompts(self, num_fresh_starts):
|
113
|
+
|
114
|
+
def start_fresh_prompts(self, num_fresh_starts: int) -> None:
|
105
115
|
if verbose >= 1:
|
106
|
-
console.print(
|
107
|
-
|
108
|
-
|
109
|
-
|
116
|
+
console.print(
|
117
|
+
f"│ Generating {num_fresh_starts} fresh prompts based on the task description."
|
118
|
+
)
|
119
|
+
|
120
|
+
def success_fresh_prompts(self, num_fresh_starts: int) -> None:
|
110
121
|
if verbose >= 1:
|
111
|
-
console.print(
|
122
|
+
console.print(
|
123
|
+
Text("│ ").append(
|
124
|
+
Text(
|
125
|
+
f"Successfully generated {num_fresh_starts} fresh prompts based on the task description.",
|
126
|
+
style="dim green",
|
127
|
+
)
|
128
|
+
)
|
129
|
+
)
|
112
130
|
console.print("│")
|
113
131
|
|
114
|
-
def start_variations(self, num_variations):
|
132
|
+
def start_variations(self, num_variations: int) -> None:
|
115
133
|
if verbose >= 1:
|
116
|
-
console.print(
|
117
|
-
|
118
|
-
|
134
|
+
console.print(
|
135
|
+
f"│ Generating {num_variations} variations of the initial prompt."
|
136
|
+
)
|
137
|
+
|
138
|
+
def success_variations(self, num_variations: int) -> None:
|
119
139
|
if verbose >= 1:
|
120
|
-
console.print(
|
140
|
+
console.print(
|
141
|
+
Text(
|
142
|
+
f"│ Successfully generated {num_variations - 1} variations of the initial prompt).",
|
143
|
+
style="dim green",
|
144
|
+
)
|
145
|
+
)
|
121
146
|
console.print("│")
|
122
|
-
|
123
|
-
def failed_variations(self, num_variations, error):
|
147
|
+
|
148
|
+
def failed_variations(self, num_variations: int, error: str) -> None:
|
124
149
|
if verbose >= 1:
|
125
|
-
console.print(
|
150
|
+
console.print(
|
151
|
+
Text(
|
152
|
+
f"│ Failed to generate {num_variations - 1} variations of the initial prompt: {error}",
|
153
|
+
style="dim red",
|
154
|
+
)
|
155
|
+
)
|
126
156
|
console.print("│")
|
127
|
-
|
128
|
-
def end(self, population_prompts: List[chat_prompt.ChatPrompt]):
|
157
|
+
|
158
|
+
def end(self, population_prompts: List[chat_prompt.ChatPrompt]) -> None:
|
129
159
|
if verbose >= 1:
|
130
|
-
console.print(
|
160
|
+
console.print(
|
161
|
+
f"│ Successfully initialized population with {len(population_prompts)} prompts."
|
162
|
+
)
|
131
163
|
console.print("")
|
132
|
-
|
133
|
-
|
164
|
+
|
134
165
|
try:
|
135
166
|
yield Reporter()
|
136
167
|
finally:
|
137
168
|
pass
|
138
169
|
|
170
|
+
|
139
171
|
@contextmanager
|
140
|
-
def baseline_performance(verbose: int = 1):
|
172
|
+
def baseline_performance(verbose: int = 1) -> Any:
|
141
173
|
"""Context manager to display messages during an evaluation phase."""
|
142
174
|
# Entry point
|
143
175
|
if verbose >= 1:
|
144
176
|
console.print(Text("> First we will establish the baseline performance."))
|
145
|
-
|
177
|
+
|
146
178
|
# Create a simple object with a method to set the score
|
147
179
|
class Reporter:
|
148
|
-
def set_score(self, s):
|
180
|
+
def set_score(self, s: float) -> None:
|
149
181
|
if verbose >= 1:
|
150
|
-
console.print(
|
151
|
-
|
182
|
+
console.print(
|
183
|
+
Text(f"\r Baseline score was: {s:.4f}.\n", style="green")
|
184
|
+
)
|
185
|
+
|
152
186
|
# Use our log suppression context manager and yield the reporter
|
153
187
|
with suppress_opik_logs():
|
154
188
|
with convert_tqdm_to_rich(" Evaluation", verbose=verbose):
|
@@ -157,22 +191,27 @@ def baseline_performance(verbose: int = 1):
|
|
157
191
|
finally:
|
158
192
|
pass
|
159
193
|
|
194
|
+
|
160
195
|
@contextmanager
|
161
|
-
def evaluate_initial_population(verbose: int = 1):
|
196
|
+
def evaluate_initial_population(verbose: int = 1) -> Any:
|
162
197
|
"""Context manager to display messages during an evaluation phase."""
|
163
198
|
# Entry point
|
164
199
|
if verbose >= 1:
|
165
200
|
console.print(Text("> Let's now evaluate the initial population"))
|
166
|
-
|
201
|
+
|
167
202
|
# Create a simple object with a method to set the score
|
168
203
|
class Reporter:
|
169
|
-
def set_score(self, index, score, baseline_score):
|
204
|
+
def set_score(self, index: int, score: float, baseline_score: float) -> None:
|
170
205
|
if verbose >= 1:
|
171
206
|
if score >= baseline_score:
|
172
|
-
console.print(
|
207
|
+
console.print(
|
208
|
+
Text(f"\r Prompt {index+1} score was: {score}.", style="green")
|
209
|
+
)
|
173
210
|
else:
|
174
|
-
console.print(
|
175
|
-
|
211
|
+
console.print(
|
212
|
+
Text(f"\r Prompt {index+1} score was: {score}.", style="dim")
|
213
|
+
)
|
214
|
+
|
176
215
|
# Use our log suppression context manager and yield the reporter
|
177
216
|
with suppress_opik_logs():
|
178
217
|
with convert_tqdm_to_rich("│ Evaluation", verbose=verbose):
|
@@ -182,38 +221,60 @@ def evaluate_initial_population(verbose: int = 1):
|
|
182
221
|
if verbose >= 1:
|
183
222
|
console.print("")
|
184
223
|
|
224
|
+
|
185
225
|
@contextmanager
|
186
|
-
def start_evolutionary_algo(verbose: int = 1):
|
226
|
+
def start_evolutionary_algo(verbose: int = 1) -> Any:
|
187
227
|
"""Context manager to display messages during an evolutionary algorithm phase."""
|
188
228
|
# Entry point
|
189
229
|
if verbose >= 1:
|
190
230
|
console.print(Text("> Starting evolutionary algorithm optimization"))
|
191
|
-
|
231
|
+
|
192
232
|
# Create a simple object with a method to set the score
|
193
233
|
class Reporter:
|
194
|
-
def start_gen(self, gen, num_gens):
|
234
|
+
def start_gen(self, gen: int, num_gens: int) -> None:
|
195
235
|
if verbose >= 1:
|
196
236
|
console.print(Text(f"│ Starting generation {gen} of {num_gens}"))
|
197
237
|
|
198
|
-
def restart_population(self, restart_generation_nb):
|
238
|
+
def restart_population(self, restart_generation_nb: int) -> None:
|
199
239
|
if verbose >= 1:
|
200
|
-
console.print(
|
201
|
-
|
202
|
-
|
240
|
+
console.print(
|
241
|
+
Text(
|
242
|
+
f"│ Re-creating the population as we have not made progress in {restart_generation_nb} generations."
|
243
|
+
)
|
244
|
+
)
|
245
|
+
|
246
|
+
def performing_crossover(self) -> None:
|
203
247
|
if verbose >= 1:
|
204
|
-
console.print(
|
205
|
-
|
206
|
-
|
248
|
+
console.print(
|
249
|
+
Text(
|
250
|
+
"│ Performing crossover - Combining multiple prompts into a new one."
|
251
|
+
)
|
252
|
+
)
|
253
|
+
|
254
|
+
def performing_mutation(self) -> None:
|
207
255
|
if verbose >= 1:
|
208
|
-
console.print(
|
209
|
-
|
210
|
-
|
256
|
+
console.print(
|
257
|
+
Text(
|
258
|
+
"│ Performing mutation - Altering prompts to improve their performance."
|
259
|
+
)
|
260
|
+
)
|
261
|
+
|
262
|
+
def performing_evaluation(self, num_prompts: int) -> None:
|
211
263
|
if verbose >= 1:
|
212
|
-
console.print(
|
213
|
-
|
214
|
-
|
264
|
+
console.print(
|
265
|
+
Text(
|
266
|
+
f"│ Performing evaluation - Assessing {num_prompts} prompts' performance."
|
267
|
+
)
|
268
|
+
)
|
269
|
+
|
270
|
+
def performed_evaluation(self, prompt_idx: int, score: float) -> None:
|
215
271
|
if verbose >= 1:
|
216
|
-
console.print(
|
272
|
+
console.print(
|
273
|
+
Text(
|
274
|
+
f"│ Performed evaluation for prompt {prompt_idx} - Score: {score:.4f}.",
|
275
|
+
style="dim",
|
276
|
+
)
|
277
|
+
)
|
217
278
|
|
218
279
|
# Use our log suppression context manager and yield the reporter
|
219
280
|
with suppress_opik_logs():
|
@@ -224,23 +285,41 @@ def start_evolutionary_algo(verbose: int = 1):
|
|
224
285
|
if verbose >= 1:
|
225
286
|
console.print("")
|
226
287
|
|
227
|
-
|
288
|
+
|
289
|
+
def display_error(error_message: str, verbose: int = 1) -> None:
|
228
290
|
if verbose >= 1:
|
229
291
|
console.print(Text("│ ").append(Text(error_message, style="dim red")))
|
230
292
|
|
231
|
-
|
293
|
+
|
294
|
+
def display_success(message: str, verbose: int = 1) -> None:
|
232
295
|
if verbose >= 1:
|
233
296
|
console.print(Text("│ ").append(Text(message, style="dim green")))
|
234
297
|
|
235
|
-
|
298
|
+
|
299
|
+
def display_message(message: str, verbose: int = 1) -> None:
|
236
300
|
if verbose >= 1:
|
237
301
|
console.print(Text("│ ").append(Text(message, style="dim")))
|
238
302
|
|
239
|
-
|
303
|
+
|
304
|
+
def end_gen(
|
305
|
+
generation_idx: int,
|
306
|
+
best_gen_score: float,
|
307
|
+
initial_primary_score: float,
|
308
|
+
verbose: int = 1,
|
309
|
+
) -> None:
|
240
310
|
if verbose >= 1:
|
241
311
|
if best_gen_score >= initial_primary_score:
|
242
|
-
console.print(
|
312
|
+
console.print(
|
313
|
+
Text(
|
314
|
+
f"│ Generation {generation_idx} completed. Found a new prompt with a score of {best_gen_score:.4f}.",
|
315
|
+
style="green",
|
316
|
+
)
|
317
|
+
)
|
243
318
|
else:
|
244
|
-
console.print(
|
319
|
+
console.print(
|
320
|
+
Text(
|
321
|
+
f"│ Generation {generation_idx} completed. No improvement in this generation."
|
322
|
+
)
|
323
|
+
)
|
245
324
|
|
246
325
|
console.print("│")
|