opik-optimizer 2.1.3__py3-none-any.whl → 2.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +0 -2
- opik_optimizer/base_optimizer.py +313 -144
- opik_optimizer/evolutionary_optimizer/crossover_ops.py +31 -4
- opik_optimizer/evolutionary_optimizer/evaluation_ops.py +23 -3
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +122 -95
- opik_optimizer/evolutionary_optimizer/mcp.py +11 -6
- opik_optimizer/evolutionary_optimizer/mutation_ops.py +25 -5
- opik_optimizer/evolutionary_optimizer/population_ops.py +26 -10
- opik_optimizer/evolutionary_optimizer/reporting.py +5 -5
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +53 -99
- opik_optimizer/few_shot_bayesian_optimizer/reporting.py +4 -4
- opik_optimizer/gepa_optimizer/gepa_optimizer.py +345 -201
- opik_optimizer/gepa_optimizer/reporting.py +291 -22
- opik_optimizer/hierarchical_reflective_optimizer/hierarchical_reflective_optimizer.py +90 -167
- opik_optimizer/hierarchical_reflective_optimizer/prompts.py +7 -1
- opik_optimizer/hierarchical_reflective_optimizer/reporting.py +168 -75
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +185 -205
- opik_optimizer/meta_prompt_optimizer/reporting.py +4 -4
- opik_optimizer/mipro_optimizer/__init__.py +2 -2
- opik_optimizer/mipro_optimizer/_lm.py +4 -4
- opik_optimizer/mipro_optimizer/{_mipro_optimizer_v2.py → mipro_optimizer_v2.py} +1 -7
- opik_optimizer/mipro_optimizer/utils.py +1 -0
- opik_optimizer/optimizable_agent.py +7 -4
- opik_optimizer/optimization_config/chat_prompt.py +7 -10
- opik_optimizer/parameter_optimizer/parameter_optimizer.py +188 -40
- opik_optimizer/parameter_optimizer/reporting.py +148 -0
- opik_optimizer/reporting_utils.py +60 -15
- opik_optimizer/utils/__init__.py +3 -0
- opik_optimizer/utils/candidate_utils.py +52 -0
- opik_optimizer/utils/core.py +35 -2
- opik_optimizer/utils/prompt_segments.py +1 -2
- {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.1.dist-info}/METADATA +2 -3
- {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.1.dist-info}/RECORD +36 -36
- opik_optimizer/evolutionary_optimizer/llm_support.py +0 -136
- opik_optimizer/mipro_optimizer/mipro_optimizer.py +0 -680
- {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.1.dist-info}/WHEEL +0 -0
- {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.1.dist-info}/licenses/LICENSE +0 -0
- {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.1.dist-info}/top_level.txt +0 -0
|
@@ -1,33 +1,125 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from numbers import Number
|
|
1
3
|
from contextlib import contextmanager
|
|
2
4
|
from typing import Any
|
|
3
5
|
|
|
4
6
|
from rich.table import Table
|
|
5
7
|
from rich.text import Text
|
|
6
8
|
from rich.panel import Panel
|
|
9
|
+
from rich.progress import (
|
|
10
|
+
Progress,
|
|
11
|
+
SpinnerColumn,
|
|
12
|
+
TextColumn,
|
|
13
|
+
BarColumn,
|
|
14
|
+
TimeRemainingColumn,
|
|
15
|
+
MofNCompleteColumn,
|
|
16
|
+
)
|
|
7
17
|
|
|
8
|
-
from ..reporting_utils import (
|
|
9
|
-
display_configuration,
|
|
10
|
-
display_header,
|
|
11
|
-
display_result,
|
|
18
|
+
from ..reporting_utils import ( # noqa: F401
|
|
19
|
+
display_configuration,
|
|
20
|
+
display_header,
|
|
21
|
+
display_result,
|
|
12
22
|
get_console,
|
|
13
23
|
convert_tqdm_to_rich,
|
|
24
|
+
format_prompt_snippet,
|
|
14
25
|
suppress_opik_logs,
|
|
15
26
|
)
|
|
16
27
|
|
|
17
28
|
console = get_console()
|
|
18
29
|
|
|
19
30
|
|
|
31
|
+
def _format_pareto_note(note: str) -> str:
|
|
32
|
+
try:
|
|
33
|
+
data = json.loads(note)
|
|
34
|
+
except json.JSONDecodeError:
|
|
35
|
+
return note
|
|
36
|
+
|
|
37
|
+
if isinstance(data, dict):
|
|
38
|
+
parts: list[str] = []
|
|
39
|
+
new_scores = data.get("new_scores") or data.get("scores")
|
|
40
|
+
if isinstance(new_scores, list):
|
|
41
|
+
formatted_scores = ", ".join(
|
|
42
|
+
f"{float(score) if isinstance(score, (int, float)) else float(str(score)):.3f}"
|
|
43
|
+
if isinstance(score, Number)
|
|
44
|
+
else str(score)
|
|
45
|
+
for score in new_scores
|
|
46
|
+
)
|
|
47
|
+
parts.append(f"scores=[{formatted_scores}]")
|
|
48
|
+
|
|
49
|
+
chosen = data.get("chosen")
|
|
50
|
+
if chosen is not None:
|
|
51
|
+
parts.append(f"chosen={chosen}")
|
|
52
|
+
|
|
53
|
+
train_val = data.get("pareto_front_train_val_score")
|
|
54
|
+
if isinstance(train_val, dict) and chosen is not None:
|
|
55
|
+
chosen_entry = train_val.get(str(chosen))
|
|
56
|
+
if isinstance(chosen_entry, dict):
|
|
57
|
+
score = chosen_entry.get("score")
|
|
58
|
+
if isinstance(score, Number):
|
|
59
|
+
parts.append(
|
|
60
|
+
f"train_val={float(score) if isinstance(score, (int, float)) else float(str(score)):.3f}"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
pareto_front = data.get("pareto_front")
|
|
64
|
+
if isinstance(pareto_front, dict):
|
|
65
|
+
parts.append(f"front_size={len(pareto_front)}")
|
|
66
|
+
|
|
67
|
+
if parts:
|
|
68
|
+
return ", ".join(parts)
|
|
69
|
+
|
|
70
|
+
return note
|
|
71
|
+
|
|
72
|
+
elif isinstance(data, list):
|
|
73
|
+
return ", ".join(
|
|
74
|
+
f"{float(item) if isinstance(item, (int, float)) else float(str(item)):.3f}"
|
|
75
|
+
if isinstance(item, Number)
|
|
76
|
+
else str(item)
|
|
77
|
+
for item in data
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
elif isinstance(data, Number):
|
|
81
|
+
return (
|
|
82
|
+
f"{float(data) if isinstance(data, (int, float)) else float(str(data)):.3f}"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return str(data)
|
|
86
|
+
|
|
87
|
+
|
|
20
88
|
class RichGEPAOptimizerLogger:
|
|
21
|
-
"""Adapter for GEPA's logger that provides concise Rich output."""
|
|
89
|
+
"""Adapter for GEPA's logger that provides concise Rich output with progress tracking."""
|
|
22
90
|
|
|
23
91
|
SUPPRESS_PREFIXES = (
|
|
24
92
|
"Linear pareto front program index",
|
|
25
93
|
"New program candidate index",
|
|
26
94
|
)
|
|
27
95
|
|
|
28
|
-
|
|
96
|
+
# Additional messages to suppress (too technical for users)
|
|
97
|
+
SUPPRESS_KEYWORDS = (
|
|
98
|
+
"Individual valset scores for new program",
|
|
99
|
+
"New valset pareto front scores",
|
|
100
|
+
"Updated valset pareto front programs",
|
|
101
|
+
"Best program as per aggregate score on train_val",
|
|
102
|
+
"Best program as per aggregate score on valset",
|
|
103
|
+
"New program is on the linear pareto front",
|
|
104
|
+
"Full train_val score for new program",
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
def __init__(
|
|
108
|
+
self,
|
|
109
|
+
optimizer: Any,
|
|
110
|
+
verbose: int = 1,
|
|
111
|
+
progress: Progress | None = None,
|
|
112
|
+
task_id: Any | None = None,
|
|
113
|
+
max_trials: int = 10,
|
|
114
|
+
) -> None:
|
|
29
115
|
self.optimizer = optimizer
|
|
30
116
|
self.verbose = verbose
|
|
117
|
+
self.progress = progress
|
|
118
|
+
self.task_id = task_id
|
|
119
|
+
self.max_trials = max_trials
|
|
120
|
+
self.current_iteration = 0
|
|
121
|
+
self._last_best_message: tuple[str, str] | None = None
|
|
122
|
+
self._last_raw_message: str | None = None
|
|
31
123
|
|
|
32
124
|
def log(self, message: str) -> None:
|
|
33
125
|
if self.verbose < 1:
|
|
@@ -43,30 +135,159 @@ class RichGEPAOptimizerLogger:
|
|
|
43
135
|
|
|
44
136
|
first = lines[0]
|
|
45
137
|
|
|
138
|
+
if first == self._last_raw_message:
|
|
139
|
+
return
|
|
140
|
+
|
|
141
|
+
# Reset duplicate tracker when handling other messages
|
|
142
|
+
if not first.startswith("Best "):
|
|
143
|
+
self._last_best_message = None
|
|
144
|
+
|
|
145
|
+
# Track iteration changes and add separation
|
|
46
146
|
if first.startswith("Iteration "):
|
|
47
147
|
colon = first.find(":")
|
|
48
148
|
head = first[:colon] if colon != -1 else first
|
|
49
149
|
parts = head.split()
|
|
50
150
|
if len(parts) >= 2 and parts[1].isdigit():
|
|
51
151
|
try:
|
|
52
|
-
|
|
152
|
+
iteration = int(parts[1])
|
|
153
|
+
|
|
154
|
+
# Add separator when starting a new iteration (except iteration 0)
|
|
155
|
+
if iteration > 0 and iteration != self.current_iteration:
|
|
156
|
+
console.print("│")
|
|
157
|
+
|
|
158
|
+
self.optimizer._gepa_current_iteration = iteration # type: ignore[attr-defined]
|
|
159
|
+
self.current_iteration = iteration
|
|
160
|
+
self._last_raw_message = first
|
|
161
|
+
|
|
162
|
+
# Update progress bar
|
|
163
|
+
if self.progress and self.task_id is not None:
|
|
164
|
+
self.progress.update(self.task_id, completed=iteration)
|
|
165
|
+
|
|
166
|
+
# Add explanatory text for iteration start
|
|
167
|
+
if "Base program full valset score" in first:
|
|
168
|
+
# Extract score
|
|
169
|
+
score_match = first.split(":")[-1].strip()
|
|
170
|
+
console.print(
|
|
171
|
+
f"│ Baseline evaluation: {score_match}", style="bold"
|
|
172
|
+
)
|
|
173
|
+
return
|
|
174
|
+
elif "Selected program" in first:
|
|
175
|
+
# Extract program number and score
|
|
176
|
+
parts_info = first.split(":")
|
|
177
|
+
if "Selected program" in parts_info[1]:
|
|
178
|
+
program_info = parts_info[1].strip()
|
|
179
|
+
score_info = (
|
|
180
|
+
parts_info[2].strip() if len(parts_info) > 2 else ""
|
|
181
|
+
)
|
|
182
|
+
console.print(
|
|
183
|
+
f"│ Trial {iteration}: {program_info}, score: {score_info}",
|
|
184
|
+
style="bold cyan",
|
|
185
|
+
)
|
|
186
|
+
else:
|
|
187
|
+
console.print(f"│ Trial {iteration}", style="bold cyan")
|
|
188
|
+
console.print("│ ├─ Testing new prompt variant...")
|
|
189
|
+
return
|
|
53
190
|
except Exception:
|
|
54
191
|
pass
|
|
55
192
|
|
|
193
|
+
# Check if this message should be suppressed (unless verbose >= 2)
|
|
194
|
+
if self.verbose <= 1:
|
|
195
|
+
for keyword in self.SUPPRESS_KEYWORDS:
|
|
196
|
+
if keyword in first:
|
|
197
|
+
return
|
|
198
|
+
|
|
199
|
+
for prefix in self.SUPPRESS_PREFIXES:
|
|
200
|
+
if prefix in first:
|
|
201
|
+
return
|
|
202
|
+
|
|
203
|
+
# Format proposed prompts
|
|
56
204
|
if "Proposed new text" in first and "system_prompt:" in first:
|
|
57
205
|
_, _, rest = first.partition("system_prompt:")
|
|
58
|
-
snippet = rest
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
206
|
+
snippet = format_prompt_snippet(rest, max_length=100)
|
|
207
|
+
console.print(f"│ │ Proposed: {snippet}", style="dim")
|
|
208
|
+
self._last_raw_message = first
|
|
209
|
+
return
|
|
210
|
+
|
|
211
|
+
# Format subsample evaluation results
|
|
212
|
+
if "New subsample score" in first and "is not better than" in first:
|
|
213
|
+
console.print("│ └─ Rejected - no improvement", style="dim yellow")
|
|
214
|
+
console.print("│") # Add spacing after rejected trials
|
|
215
|
+
self._last_raw_message = first
|
|
216
|
+
return
|
|
217
|
+
|
|
218
|
+
elif "New subsample score" in first and "is better than" in first:
|
|
219
|
+
console.print("│ ├─ Promising! Running full validation...", style="green")
|
|
220
|
+
self._last_raw_message = first
|
|
221
|
+
return
|
|
222
|
+
|
|
223
|
+
# Format final validation score
|
|
224
|
+
if "Full valset score for new program" in first:
|
|
225
|
+
# Extract score
|
|
226
|
+
parts = first.split(":")
|
|
227
|
+
if len(parts) >= 2:
|
|
228
|
+
score = parts[-1].strip()
|
|
229
|
+
console.print(f"│ ├─ Validation complete: {score}", style="bold green")
|
|
230
|
+
else:
|
|
231
|
+
console.print("│ ├─ Validation complete", style="green")
|
|
232
|
+
self._last_raw_message = first
|
|
233
|
+
return
|
|
234
|
+
|
|
235
|
+
# Format best score updates
|
|
236
|
+
if "Best score on train_val" in first:
|
|
237
|
+
parts = first.split(":")
|
|
238
|
+
if len(parts) >= 2:
|
|
239
|
+
score = parts[-1].strip()
|
|
240
|
+
console.print(f"│ Best train_val score: {score}", style="cyan")
|
|
241
|
+
self._last_raw_message = first
|
|
242
|
+
return
|
|
64
243
|
|
|
65
|
-
|
|
66
|
-
|
|
244
|
+
if (
|
|
245
|
+
"Best valset aggregate score so far" in first
|
|
246
|
+
or "Best score on valset" in first
|
|
247
|
+
):
|
|
248
|
+
# Extract score
|
|
249
|
+
parts = first.split(":")
|
|
250
|
+
if len(parts) >= 2:
|
|
251
|
+
score = parts[-1].strip()
|
|
252
|
+
key = ("new_best", score)
|
|
253
|
+
if self._last_best_message != key:
|
|
254
|
+
console.print(f"│ └─ New best: {score} ✓", style="bold green")
|
|
255
|
+
console.print("│") # Add spacing after successful trials
|
|
256
|
+
self._last_best_message = key
|
|
257
|
+
self._last_raw_message = first
|
|
258
|
+
return
|
|
259
|
+
|
|
260
|
+
if self.verbose >= 2:
|
|
261
|
+
if "New valset pareto front scores" in first:
|
|
262
|
+
note = first.split(":", 1)[-1].strip()
|
|
263
|
+
console.print(
|
|
264
|
+
f"│ Pareto front scores updated: {_format_pareto_note(note)}",
|
|
265
|
+
style="cyan",
|
|
266
|
+
)
|
|
267
|
+
self._last_raw_message = first
|
|
268
|
+
return
|
|
269
|
+
if "Updated valset pareto front programs" in first:
|
|
270
|
+
console.print("│ Pareto front programs updated", style="cyan")
|
|
271
|
+
self._last_raw_message = first
|
|
67
272
|
return
|
|
273
|
+
if "New program is on the linear pareto front" in first:
|
|
274
|
+
console.print("│ Candidate added to Pareto front", style="cyan")
|
|
275
|
+
self._last_raw_message = first
|
|
276
|
+
return
|
|
277
|
+
|
|
278
|
+
# Suppress redundant "Iteration X:" prefix from detailed messages
|
|
279
|
+
if first.startswith(f"Iteration {self.current_iteration}:"):
|
|
280
|
+
# Remove the iteration prefix for cleaner output
|
|
281
|
+
first = first.split(":", 1)[1].strip() if ":" in first else first
|
|
68
282
|
|
|
69
|
-
|
|
283
|
+
# Truncate very long messages
|
|
284
|
+
if len(first) > 160:
|
|
285
|
+
first = first[:160] + "…"
|
|
286
|
+
|
|
287
|
+
# Default: print with standard prefix only if not already handled
|
|
288
|
+
if first:
|
|
289
|
+
console.print(f"│ {first}", style="dim")
|
|
290
|
+
self._last_raw_message = first
|
|
70
291
|
|
|
71
292
|
|
|
72
293
|
@contextmanager
|
|
@@ -85,20 +306,45 @@ def baseline_evaluation(verbose: int = 1) -> Any:
|
|
|
85
306
|
|
|
86
307
|
|
|
87
308
|
@contextmanager
|
|
88
|
-
def start_gepa_optimization(verbose: int = 1) -> Any:
|
|
309
|
+
def start_gepa_optimization(verbose: int = 1, max_trials: int = 10) -> Any:
|
|
89
310
|
if verbose >= 1:
|
|
90
311
|
console.print("> Starting GEPA optimization")
|
|
91
312
|
|
|
92
313
|
class Reporter:
|
|
314
|
+
progress: Progress | None = None
|
|
315
|
+
task_id: Any | None = None
|
|
316
|
+
|
|
93
317
|
def info(self, message: str) -> None:
|
|
94
318
|
if verbose >= 1:
|
|
95
319
|
console.print(f"│ {message}")
|
|
96
320
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
321
|
+
with suppress_opik_logs():
|
|
322
|
+
try:
|
|
323
|
+
# Create Rich progress bar
|
|
324
|
+
if verbose >= 1:
|
|
325
|
+
Reporter.progress = Progress(
|
|
326
|
+
SpinnerColumn(),
|
|
327
|
+
TextColumn("[bold blue]{task.description}"),
|
|
328
|
+
BarColumn(),
|
|
329
|
+
MofNCompleteColumn(),
|
|
330
|
+
TextColumn("•"),
|
|
331
|
+
TimeRemainingColumn(),
|
|
332
|
+
console=console,
|
|
333
|
+
transient=True, # Make progress bar disappear when done
|
|
334
|
+
)
|
|
335
|
+
Reporter.progress.start()
|
|
336
|
+
Reporter.task_id = Reporter.progress.add_task(
|
|
337
|
+
"GEPA Optimization", total=max_trials
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
yield Reporter()
|
|
341
|
+
finally:
|
|
342
|
+
if verbose >= 1:
|
|
343
|
+
if Reporter.progress and Reporter.task_id is not None:
|
|
344
|
+
# Mark as complete before stopping
|
|
345
|
+
Reporter.progress.update(Reporter.task_id, completed=max_trials)
|
|
346
|
+
Reporter.progress.stop()
|
|
347
|
+
console.print("")
|
|
102
348
|
|
|
103
349
|
|
|
104
350
|
def display_candidate_scores(
|
|
@@ -138,6 +384,7 @@ def display_selected_candidate(
|
|
|
138
384
|
*,
|
|
139
385
|
verbose: int = 1,
|
|
140
386
|
title: str = "Selected Candidate",
|
|
387
|
+
trial_info: dict[str, Any] | None = None,
|
|
141
388
|
) -> None:
|
|
142
389
|
"""Display the final selected candidate with its Opik score."""
|
|
143
390
|
if verbose < 1:
|
|
@@ -145,11 +392,33 @@ def display_selected_candidate(
|
|
|
145
392
|
|
|
146
393
|
snippet = system_prompt.strip() or "<empty>"
|
|
147
394
|
text = Text(snippet)
|
|
395
|
+
subtitle: Text | None = None
|
|
396
|
+
if trial_info:
|
|
397
|
+
trial_parts: list[str] = []
|
|
398
|
+
trial_name = trial_info.get("experiment_name")
|
|
399
|
+
trial_ids = trial_info.get("trial_ids") or []
|
|
400
|
+
if trial_name:
|
|
401
|
+
trial_parts.append(f"Trial {trial_name}")
|
|
402
|
+
elif trial_ids:
|
|
403
|
+
trial_parts.append(f"Trial {trial_ids[0]}")
|
|
404
|
+
|
|
405
|
+
compare_url = trial_info.get("compare_url")
|
|
406
|
+
experiment_url = trial_info.get("experiment_url")
|
|
407
|
+
if compare_url:
|
|
408
|
+
trial_parts.append(f"[link={compare_url}]Compare run[/link]")
|
|
409
|
+
elif experiment_url:
|
|
410
|
+
trial_parts.append(f"[link={experiment_url}]View experiment[/link]")
|
|
411
|
+
|
|
412
|
+
if trial_parts:
|
|
413
|
+
subtitle = Text.from_markup(" • ".join(trial_parts))
|
|
414
|
+
|
|
148
415
|
panel = Panel(
|
|
149
416
|
text,
|
|
150
417
|
title=f"{title} — Opik score {score:.4f}",
|
|
151
418
|
border_style="green",
|
|
152
419
|
expand=True,
|
|
420
|
+
subtitle=subtitle,
|
|
421
|
+
subtitle_align="left",
|
|
153
422
|
)
|
|
154
423
|
console.print(panel)
|
|
155
424
|
|