opik-optimizer 2.1.3__py3-none-any.whl → 2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. opik_optimizer/__init__.py +0 -2
  2. opik_optimizer/base_optimizer.py +313 -144
  3. opik_optimizer/evolutionary_optimizer/crossover_ops.py +31 -4
  4. opik_optimizer/evolutionary_optimizer/evaluation_ops.py +23 -3
  5. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +122 -95
  6. opik_optimizer/evolutionary_optimizer/mcp.py +11 -6
  7. opik_optimizer/evolutionary_optimizer/mutation_ops.py +25 -5
  8. opik_optimizer/evolutionary_optimizer/population_ops.py +26 -10
  9. opik_optimizer/evolutionary_optimizer/reporting.py +5 -5
  10. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +53 -99
  11. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +4 -4
  12. opik_optimizer/gepa_optimizer/gepa_optimizer.py +345 -201
  13. opik_optimizer/gepa_optimizer/reporting.py +291 -22
  14. opik_optimizer/hierarchical_reflective_optimizer/hierarchical_reflective_optimizer.py +90 -167
  15. opik_optimizer/hierarchical_reflective_optimizer/prompts.py +7 -1
  16. opik_optimizer/hierarchical_reflective_optimizer/reporting.py +168 -75
  17. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +185 -205
  18. opik_optimizer/meta_prompt_optimizer/reporting.py +4 -4
  19. opik_optimizer/mipro_optimizer/__init__.py +2 -2
  20. opik_optimizer/mipro_optimizer/_lm.py +4 -4
  21. opik_optimizer/mipro_optimizer/{_mipro_optimizer_v2.py → mipro_optimizer_v2.py} +1 -7
  22. opik_optimizer/mipro_optimizer/utils.py +1 -0
  23. opik_optimizer/optimizable_agent.py +7 -4
  24. opik_optimizer/optimization_config/chat_prompt.py +7 -10
  25. opik_optimizer/parameter_optimizer/parameter_optimizer.py +188 -40
  26. opik_optimizer/parameter_optimizer/reporting.py +148 -0
  27. opik_optimizer/reporting_utils.py +60 -15
  28. opik_optimizer/utils/__init__.py +3 -0
  29. opik_optimizer/utils/candidate_utils.py +52 -0
  30. opik_optimizer/utils/core.py +35 -2
  31. opik_optimizer/utils/prompt_segments.py +1 -2
  32. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.1.dist-info}/METADATA +2 -3
  33. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.1.dist-info}/RECORD +36 -36
  34. opik_optimizer/evolutionary_optimizer/llm_support.py +0 -136
  35. opik_optimizer/mipro_optimizer/mipro_optimizer.py +0 -680
  36. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.1.dist-info}/WHEEL +0 -0
  37. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.1.dist-info}/licenses/LICENSE +0 -0
  38. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.1.dist-info}/top_level.txt +0 -0
@@ -1,33 +1,125 @@
1
+ import json
2
+ from numbers import Number
1
3
  from contextlib import contextmanager
2
4
  from typing import Any
3
5
 
4
6
  from rich.table import Table
5
7
  from rich.text import Text
6
8
  from rich.panel import Panel
9
+ from rich.progress import (
10
+ Progress,
11
+ SpinnerColumn,
12
+ TextColumn,
13
+ BarColumn,
14
+ TimeRemainingColumn,
15
+ MofNCompleteColumn,
16
+ )
7
17
 
8
- from ..reporting_utils import (
9
- display_configuration, # noqa: F401
10
- display_header, # noqa: F401
11
- display_result, # noqa: F401
18
+ from ..reporting_utils import ( # noqa: F401
19
+ display_configuration,
20
+ display_header,
21
+ display_result,
12
22
  get_console,
13
23
  convert_tqdm_to_rich,
24
+ format_prompt_snippet,
14
25
  suppress_opik_logs,
15
26
  )
16
27
 
17
28
  console = get_console()
18
29
 
19
30
 
31
+ def _format_pareto_note(note: str) -> str:
32
+ try:
33
+ data = json.loads(note)
34
+ except json.JSONDecodeError:
35
+ return note
36
+
37
+ if isinstance(data, dict):
38
+ parts: list[str] = []
39
+ new_scores = data.get("new_scores") or data.get("scores")
40
+ if isinstance(new_scores, list):
41
+ formatted_scores = ", ".join(
42
+ f"{float(score) if isinstance(score, (int, float)) else float(str(score)):.3f}"
43
+ if isinstance(score, Number)
44
+ else str(score)
45
+ for score in new_scores
46
+ )
47
+ parts.append(f"scores=[{formatted_scores}]")
48
+
49
+ chosen = data.get("chosen")
50
+ if chosen is not None:
51
+ parts.append(f"chosen={chosen}")
52
+
53
+ train_val = data.get("pareto_front_train_val_score")
54
+ if isinstance(train_val, dict) and chosen is not None:
55
+ chosen_entry = train_val.get(str(chosen))
56
+ if isinstance(chosen_entry, dict):
57
+ score = chosen_entry.get("score")
58
+ if isinstance(score, Number):
59
+ parts.append(
60
+ f"train_val={float(score) if isinstance(score, (int, float)) else float(str(score)):.3f}"
61
+ )
62
+
63
+ pareto_front = data.get("pareto_front")
64
+ if isinstance(pareto_front, dict):
65
+ parts.append(f"front_size={len(pareto_front)}")
66
+
67
+ if parts:
68
+ return ", ".join(parts)
69
+
70
+ return note
71
+
72
+ elif isinstance(data, list):
73
+ return ", ".join(
74
+ f"{float(item) if isinstance(item, (int, float)) else float(str(item)):.3f}"
75
+ if isinstance(item, Number)
76
+ else str(item)
77
+ for item in data
78
+ )
79
+
80
+ elif isinstance(data, Number):
81
+ return (
82
+ f"{float(data) if isinstance(data, (int, float)) else float(str(data)):.3f}"
83
+ )
84
+
85
+ return str(data)
86
+
87
+
20
88
  class RichGEPAOptimizerLogger:
21
- """Adapter for GEPA's logger that provides concise Rich output."""
89
+ """Adapter for GEPA's logger that provides concise Rich output with progress tracking."""
22
90
 
23
91
  SUPPRESS_PREFIXES = (
24
92
  "Linear pareto front program index",
25
93
  "New program candidate index",
26
94
  )
27
95
 
28
- def __init__(self, optimizer: Any, verbose: int = 1) -> None:
96
+ # Additional messages to suppress (too technical for users)
97
+ SUPPRESS_KEYWORDS = (
98
+ "Individual valset scores for new program",
99
+ "New valset pareto front scores",
100
+ "Updated valset pareto front programs",
101
+ "Best program as per aggregate score on train_val",
102
+ "Best program as per aggregate score on valset",
103
+ "New program is on the linear pareto front",
104
+ "Full train_val score for new program",
105
+ )
106
+
107
+ def __init__(
108
+ self,
109
+ optimizer: Any,
110
+ verbose: int = 1,
111
+ progress: Progress | None = None,
112
+ task_id: Any | None = None,
113
+ max_trials: int = 10,
114
+ ) -> None:
29
115
  self.optimizer = optimizer
30
116
  self.verbose = verbose
117
+ self.progress = progress
118
+ self.task_id = task_id
119
+ self.max_trials = max_trials
120
+ self.current_iteration = 0
121
+ self._last_best_message: tuple[str, str] | None = None
122
+ self._last_raw_message: str | None = None
31
123
 
32
124
  def log(self, message: str) -> None:
33
125
  if self.verbose < 1:
@@ -43,30 +135,159 @@ class RichGEPAOptimizerLogger:
43
135
 
44
136
  first = lines[0]
45
137
 
138
+ if first == self._last_raw_message:
139
+ return
140
+
141
+ # Reset duplicate tracker when handling other messages
142
+ if not first.startswith("Best "):
143
+ self._last_best_message = None
144
+
145
+ # Track iteration changes and add separation
46
146
  if first.startswith("Iteration "):
47
147
  colon = first.find(":")
48
148
  head = first[:colon] if colon != -1 else first
49
149
  parts = head.split()
50
150
  if len(parts) >= 2 and parts[1].isdigit():
51
151
  try:
52
- self.optimizer._gepa_current_iteration = int(parts[1]) # type: ignore[attr-defined]
152
+ iteration = int(parts[1])
153
+
154
+ # Add separator when starting a new iteration (except iteration 0)
155
+ if iteration > 0 and iteration != self.current_iteration:
156
+ console.print("│")
157
+
158
+ self.optimizer._gepa_current_iteration = iteration # type: ignore[attr-defined]
159
+ self.current_iteration = iteration
160
+ self._last_raw_message = first
161
+
162
+ # Update progress bar
163
+ if self.progress and self.task_id is not None:
164
+ self.progress.update(self.task_id, completed=iteration)
165
+
166
+ # Add explanatory text for iteration start
167
+ if "Base program full valset score" in first:
168
+ # Extract score
169
+ score_match = first.split(":")[-1].strip()
170
+ console.print(
171
+ f"│ Baseline evaluation: {score_match}", style="bold"
172
+ )
173
+ return
174
+ elif "Selected program" in first:
175
+ # Extract program number and score
176
+ parts_info = first.split(":")
177
+ if "Selected program" in parts_info[1]:
178
+ program_info = parts_info[1].strip()
179
+ score_info = (
180
+ parts_info[2].strip() if len(parts_info) > 2 else ""
181
+ )
182
+ console.print(
183
+ f"│ Trial {iteration}: {program_info}, score: {score_info}",
184
+ style="bold cyan",
185
+ )
186
+ else:
187
+ console.print(f"│ Trial {iteration}", style="bold cyan")
188
+ console.print("│ ├─ Testing new prompt variant...")
189
+ return
53
190
  except Exception:
54
191
  pass
55
192
 
193
+ # Check if this message should be suppressed (unless verbose >= 2)
194
+ if self.verbose <= 1:
195
+ for keyword in self.SUPPRESS_KEYWORDS:
196
+ if keyword in first:
197
+ return
198
+
199
+ for prefix in self.SUPPRESS_PREFIXES:
200
+ if prefix in first:
201
+ return
202
+
203
+ # Format proposed prompts
56
204
  if "Proposed new text" in first and "system_prompt:" in first:
57
205
  _, _, rest = first.partition("system_prompt:")
58
- snippet = rest.strip()
59
- if len(snippet) > 120:
60
- snippet = snippet[:120] + "…"
61
- first = "Proposed new text · system_prompt: " + snippet
62
- elif len(first) > 160:
63
- first = first[:160] + "…"
206
+ snippet = format_prompt_snippet(rest, max_length=100)
207
+ console.print(f"│ │ Proposed: {snippet}", style="dim")
208
+ self._last_raw_message = first
209
+ return
210
+
211
+ # Format subsample evaluation results
212
+ if "New subsample score" in first and "is not better than" in first:
213
+ console.print("│ └─ Rejected - no improvement", style="dim yellow")
214
+ console.print("│") # Add spacing after rejected trials
215
+ self._last_raw_message = first
216
+ return
217
+
218
+ elif "New subsample score" in first and "is better than" in first:
219
+ console.print("│ ├─ Promising! Running full validation...", style="green")
220
+ self._last_raw_message = first
221
+ return
222
+
223
+ # Format final validation score
224
+ if "Full valset score for new program" in first:
225
+ # Extract score
226
+ parts = first.split(":")
227
+ if len(parts) >= 2:
228
+ score = parts[-1].strip()
229
+ console.print(f"│ ├─ Validation complete: {score}", style="bold green")
230
+ else:
231
+ console.print("│ ├─ Validation complete", style="green")
232
+ self._last_raw_message = first
233
+ return
234
+
235
+ # Format best score updates
236
+ if "Best score on train_val" in first:
237
+ parts = first.split(":")
238
+ if len(parts) >= 2:
239
+ score = parts[-1].strip()
240
+ console.print(f"│ Best train_val score: {score}", style="cyan")
241
+ self._last_raw_message = first
242
+ return
64
243
 
65
- for prefix in self.SUPPRESS_PREFIXES:
66
- if prefix in first:
244
+ if (
245
+ "Best valset aggregate score so far" in first
246
+ or "Best score on valset" in first
247
+ ):
248
+ # Extract score
249
+ parts = first.split(":")
250
+ if len(parts) >= 2:
251
+ score = parts[-1].strip()
252
+ key = ("new_best", score)
253
+ if self._last_best_message != key:
254
+ console.print(f"│ └─ New best: {score} ✓", style="bold green")
255
+ console.print("│") # Add spacing after successful trials
256
+ self._last_best_message = key
257
+ self._last_raw_message = first
258
+ return
259
+
260
+ if self.verbose >= 2:
261
+ if "New valset pareto front scores" in first:
262
+ note = first.split(":", 1)[-1].strip()
263
+ console.print(
264
+ f"│ Pareto front scores updated: {_format_pareto_note(note)}",
265
+ style="cyan",
266
+ )
267
+ self._last_raw_message = first
268
+ return
269
+ if "Updated valset pareto front programs" in first:
270
+ console.print("│ Pareto front programs updated", style="cyan")
271
+ self._last_raw_message = first
67
272
  return
273
+ if "New program is on the linear pareto front" in first:
274
+ console.print("│ Candidate added to Pareto front", style="cyan")
275
+ self._last_raw_message = first
276
+ return
277
+
278
+ # Suppress redundant "Iteration X:" prefix from detailed messages
279
+ if first.startswith(f"Iteration {self.current_iteration}:"):
280
+ # Remove the iteration prefix for cleaner output
281
+ first = first.split(":", 1)[1].strip() if ":" in first else first
68
282
 
69
- console.print(f"│ {first}")
283
+ # Truncate very long messages
284
+ if len(first) > 160:
285
+ first = first[:160] + "…"
286
+
287
+ # Default: print with standard prefix only if not already handled
288
+ if first:
289
+ console.print(f"│ {first}", style="dim")
290
+ self._last_raw_message = first
70
291
 
71
292
 
72
293
  @contextmanager
@@ -85,20 +306,45 @@ def baseline_evaluation(verbose: int = 1) -> Any:
85
306
 
86
307
 
87
308
  @contextmanager
88
- def start_gepa_optimization(verbose: int = 1) -> Any:
309
+ def start_gepa_optimization(verbose: int = 1, max_trials: int = 10) -> Any:
89
310
  if verbose >= 1:
90
311
  console.print("> Starting GEPA optimization")
91
312
 
92
313
  class Reporter:
314
+ progress: Progress | None = None
315
+ task_id: Any | None = None
316
+
93
317
  def info(self, message: str) -> None:
94
318
  if verbose >= 1:
95
319
  console.print(f"│ {message}")
96
320
 
97
- try:
98
- yield Reporter()
99
- finally:
100
- if verbose >= 1:
101
- console.print("")
321
+ with suppress_opik_logs():
322
+ try:
323
+ # Create Rich progress bar
324
+ if verbose >= 1:
325
+ Reporter.progress = Progress(
326
+ SpinnerColumn(),
327
+ TextColumn("[bold blue]{task.description}"),
328
+ BarColumn(),
329
+ MofNCompleteColumn(),
330
+ TextColumn("•"),
331
+ TimeRemainingColumn(),
332
+ console=console,
333
+ transient=True, # Make progress bar disappear when done
334
+ )
335
+ Reporter.progress.start()
336
+ Reporter.task_id = Reporter.progress.add_task(
337
+ "GEPA Optimization", total=max_trials
338
+ )
339
+
340
+ yield Reporter()
341
+ finally:
342
+ if verbose >= 1:
343
+ if Reporter.progress and Reporter.task_id is not None:
344
+ # Mark as complete before stopping
345
+ Reporter.progress.update(Reporter.task_id, completed=max_trials)
346
+ Reporter.progress.stop()
347
+ console.print("")
102
348
 
103
349
 
104
350
  def display_candidate_scores(
@@ -138,6 +384,7 @@ def display_selected_candidate(
138
384
  *,
139
385
  verbose: int = 1,
140
386
  title: str = "Selected Candidate",
387
+ trial_info: dict[str, Any] | None = None,
141
388
  ) -> None:
142
389
  """Display the final selected candidate with its Opik score."""
143
390
  if verbose < 1:
@@ -145,11 +392,33 @@ def display_selected_candidate(
145
392
 
146
393
  snippet = system_prompt.strip() or "<empty>"
147
394
  text = Text(snippet)
395
+ subtitle: Text | None = None
396
+ if trial_info:
397
+ trial_parts: list[str] = []
398
+ trial_name = trial_info.get("experiment_name")
399
+ trial_ids = trial_info.get("trial_ids") or []
400
+ if trial_name:
401
+ trial_parts.append(f"Trial {trial_name}")
402
+ elif trial_ids:
403
+ trial_parts.append(f"Trial {trial_ids[0]}")
404
+
405
+ compare_url = trial_info.get("compare_url")
406
+ experiment_url = trial_info.get("experiment_url")
407
+ if compare_url:
408
+ trial_parts.append(f"[link={compare_url}]Compare run[/link]")
409
+ elif experiment_url:
410
+ trial_parts.append(f"[link={experiment_url}]View experiment[/link]")
411
+
412
+ if trial_parts:
413
+ subtitle = Text.from_markup(" • ".join(trial_parts))
414
+
148
415
  panel = Panel(
149
416
  text,
150
417
  title=f"{title} — Opik score {score:.4f}",
151
418
  border_style="green",
152
419
  expand=True,
420
+ subtitle=subtitle,
421
+ subtitle_align="left",
153
422
  )
154
423
  console.print(panel)
155
424