opik-optimizer 2.2.0__py3-none-any.whl → 2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -637,7 +637,7 @@ class BaseOptimizer(ABC):
637
637
  base_config = self._deep_merge_dicts(base_config, additional_metadata)
638
638
 
639
639
  if experiment_config:
640
- base_config = self._deep_merge_dicts(experiment_config, base_config)
640
+ base_config = self._deep_merge_dicts(base_config, experiment_config)
641
641
 
642
642
  return self._drop_none(base_config)
643
643
 
@@ -1,10 +1,10 @@
1
1
  import logging
2
- from contextlib import nullcontext
3
- from typing import Any, ContextManager
2
+ from typing import Any
4
3
  from collections.abc import Callable
5
4
 
6
5
  import opik
7
6
  from opik import Dataset, opik_context
7
+ from opik.evaluation import evaluator as opik_evaluator
8
8
  from opik.evaluation.metrics.score_result import ScoreResult
9
9
 
10
10
  from ..base_optimizer import BaseOptimizer
@@ -16,7 +16,9 @@ from ..utils import (
16
16
  create_litellm_agent_class,
17
17
  disable_experiment_reporting,
18
18
  enable_experiment_reporting,
19
+ unique_ordered_by_key,
19
20
  )
21
+ from ..task_evaluator import _create_metric_class
20
22
  from ..reporting_utils import suppress_opik_logs
21
23
  from .. import task_evaluator
22
24
  from . import reporting as gepa_reporting
@@ -213,6 +215,25 @@ class GepaOptimizer(BaseOptimizer):
213
215
  # Calculate max_metric_calls from max_trials and effective samples
214
216
  effective_n_samples = len(items)
215
217
  max_metric_calls = max_trials * effective_n_samples
218
+ budget_limited_trials = (
219
+ max_metric_calls // effective_n_samples if effective_n_samples else 0
220
+ )
221
+ if reflection_minibatch_size > max_trials:
222
+ logger.warning(
223
+ "reflection_minibatch_size (%s) exceeds max_trials (%s); GEPA reflection will not run. "
224
+ "Increase max_trials or lower the minibatch.",
225
+ reflection_minibatch_size,
226
+ max_trials,
227
+ )
228
+ elif (
229
+ budget_limited_trials and reflection_minibatch_size > budget_limited_trials
230
+ ):
231
+ logger.warning(
232
+ "reflection_minibatch_size (%s) exceeds the number of candidates allowed by the metric budget (%s). "
233
+ "Consider increasing max_trials or n_samples.",
234
+ reflection_minibatch_size,
235
+ budget_limited_trials,
236
+ )
216
237
 
217
238
  data_insts = self._build_data_insts(items, input_key, output_key)
218
239
 
@@ -375,6 +396,23 @@ class GepaOptimizer(BaseOptimizer):
375
396
  candidates: list[dict[str, str]] = getattr(gepa_result, "candidates", []) or []
376
397
  val_scores: list[float] = list(getattr(gepa_result, "val_aggregate_scores", []))
377
398
 
399
+ indexed_candidates: list[tuple[int, dict[str, str]]] = list(
400
+ enumerate(candidates)
401
+ )
402
+ filtered_indexed_candidates = unique_ordered_by_key(
403
+ indexed_candidates,
404
+ key=lambda item: self._extract_system_text_from_candidate(
405
+ item[1], seed_prompt_text
406
+ ).strip(),
407
+ )
408
+ filtered_candidates: list[dict[str, str]] = [
409
+ candidate for _, candidate in filtered_indexed_candidates
410
+ ]
411
+ filtered_val_scores: list[float | None] = [
412
+ val_scores[idx] if idx < len(val_scores) else None
413
+ for idx, _ in filtered_indexed_candidates
414
+ ]
415
+
378
416
  rescored: list[float] = []
379
417
  candidate_rows: list[dict[str, Any]] = []
380
418
  history: list[dict[str, Any]] = []
@@ -385,7 +423,9 @@ class GepaOptimizer(BaseOptimizer):
385
423
  # Wrap rescoring to prevent OPIK messages and experiment link displays
386
424
  with suppress_opik_logs():
387
425
  with convert_tqdm_to_rich(verbose=0):
388
- for idx, candidate in enumerate(candidates):
426
+ for idx, (original_idx, candidate) in enumerate(
427
+ filtered_indexed_candidates
428
+ ):
389
429
  candidate_prompt = self._extract_system_text_from_candidate(
390
430
  candidate, seed_prompt_text
391
431
  )
@@ -421,9 +461,7 @@ class GepaOptimizer(BaseOptimizer):
421
461
  {
422
462
  "iteration": idx + 1,
423
463
  "system_prompt": candidate_prompt,
424
- "gepa_score": val_scores[idx]
425
- if idx < len(val_scores)
426
- else None,
464
+ "gepa_score": filtered_val_scores[idx],
427
465
  "opik_score": score,
428
466
  "source": self.__class__.__name__,
429
467
  }
@@ -435,9 +473,7 @@ class GepaOptimizer(BaseOptimizer):
435
473
  "scores": [
436
474
  {
437
475
  "metric_name": f"GEPA-{metric.__name__}",
438
- "score": val_scores[idx]
439
- if idx < len(val_scores)
440
- else None,
476
+ "score": filtered_val_scores[idx],
441
477
  },
442
478
  {"metric_name": metric.__name__, "score": score},
443
479
  ],
@@ -446,14 +482,45 @@ class GepaOptimizer(BaseOptimizer):
446
482
  )
447
483
 
448
484
  if rescored:
449
- best_idx = max(range(len(rescored)), key=lambda i: rescored[i])
485
+
486
+ def _tie_break(idx: int) -> tuple[float, float, int]:
487
+ opik_score = rescored[idx]
488
+ gepa_score = filtered_val_scores[idx]
489
+ gepa_numeric = (
490
+ float(gepa_score)
491
+ if isinstance(gepa_score, (int, float))
492
+ else float("-inf")
493
+ )
494
+ return opik_score, gepa_numeric, idx
495
+
496
+ best_idx = max(range(len(rescored)), key=_tie_break)
450
497
  best_score = rescored[best_idx]
451
498
  else:
452
- best_idx = getattr(gepa_result, "best_idx", 0) or 0
453
- best_score = float(val_scores[best_idx]) if val_scores else 0.0
499
+ if filtered_indexed_candidates:
500
+ gepa_best_idx = getattr(gepa_result, "best_idx", 0) or 0
501
+ best_idx = next(
502
+ (
503
+ i
504
+ for i, (original_idx, _) in enumerate(
505
+ filtered_indexed_candidates
506
+ )
507
+ if original_idx == gepa_best_idx
508
+ ),
509
+ 0,
510
+ )
511
+ if filtered_val_scores and 0 <= best_idx < len(filtered_val_scores):
512
+ score_value = filtered_val_scores[best_idx]
513
+ best_score = float(score_value) if score_value is not None else 0.0
514
+ else:
515
+ best_score = float(initial_score)
516
+ else:
517
+ best_idx = 0
518
+ best_score = float(initial_score)
454
519
 
455
520
  best_candidate = (
456
- candidates[best_idx] if candidates else {"system_prompt": seed_prompt_text}
521
+ filtered_candidates[best_idx]
522
+ if filtered_candidates
523
+ else {"system_prompt": seed_prompt_text}
457
524
  )
458
525
  best_prompt_text = self._extract_system_text_from_candidate(
459
526
  best_candidate, seed_prompt_text
@@ -469,26 +536,62 @@ class GepaOptimizer(BaseOptimizer):
469
536
  }
470
537
  final_prompt.model_kwargs = filtered_model_kwargs
471
538
 
472
- final_eval_kwargs = dict(
473
- prompt=final_prompt,
474
- dataset=dataset,
475
- metric=metric,
476
- n_samples=n_samples,
477
- optimization_id=opt_id,
478
- extra_metadata={"phase": "final", "selected": True},
479
- verbose=0,
480
- )
481
- suppress_logs: ContextManager[Any] = nullcontext()
482
- try:
483
- from ..reporting_utils import suppress_opik_logs as _suppress_logs
484
-
485
- suppress_logs = _suppress_logs()
486
- except Exception:
487
- pass
539
+ final_eval_result: Any | None = None
488
540
 
489
- with suppress_logs:
541
+ with suppress_opik_logs():
490
542
  try:
491
- self._evaluate_prompt_logged(**final_eval_kwargs)
543
+ final_agent_cls = create_litellm_agent_class(
544
+ final_prompt, optimizer_ref=self
545
+ )
546
+ final_agent = final_agent_cls(final_prompt)
547
+
548
+ def final_llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
549
+ messages = final_prompt.get_messages(dataset_item)
550
+ raw = final_agent.invoke(messages)
551
+ if self.current_optimization_id:
552
+ opik_context.update_current_trace(
553
+ tags=[self.current_optimization_id, "Evaluation"]
554
+ )
555
+ return {mappers.EVALUATED_LLM_TASK_OUTPUT: raw.strip()}
556
+
557
+ configuration_updates = self._drop_none(
558
+ {"gepa": {"phase": "final", "selected": True}}
559
+ )
560
+ final_experiment_config = self._prepare_experiment_config(
561
+ prompt=final_prompt,
562
+ dataset=dataset,
563
+ metric=metric,
564
+ experiment_config=experiment_config,
565
+ configuration_updates=configuration_updates,
566
+ )
567
+
568
+ metric_class = _create_metric_class(metric)
569
+
570
+ if opt_id:
571
+ final_eval_result = opik_evaluator.evaluate_optimization_trial(
572
+ optimization_id=opt_id,
573
+ dataset=dataset,
574
+ task=final_llm_task,
575
+ project_name=final_experiment_config.get("project_name"),
576
+ dataset_item_ids=None,
577
+ scoring_metrics=[metric_class],
578
+ task_threads=self.n_threads,
579
+ nb_samples=n_samples,
580
+ experiment_config=final_experiment_config,
581
+ verbose=0,
582
+ )
583
+ else:
584
+ final_eval_result = opik_evaluator.evaluate(
585
+ dataset=dataset,
586
+ task=final_llm_task,
587
+ project_name=final_experiment_config.get("project_name"),
588
+ dataset_item_ids=None,
589
+ scoring_metrics=[metric_class],
590
+ task_threads=self.n_threads,
591
+ nb_samples=n_samples,
592
+ experiment_config=final_experiment_config,
593
+ verbose=0,
594
+ )
492
595
  except Exception:
493
596
  logger.debug("Final evaluation failed", exc_info=True)
494
597
 
@@ -518,28 +621,55 @@ class GepaOptimizer(BaseOptimizer):
518
621
  except Exception:
519
622
  logger.debug("Per-item diagnostics failed", exc_info=True)
520
623
 
624
+ trial_info: dict[str, Any] | None = None
625
+ if final_eval_result is not None:
626
+ experiment_name = getattr(final_eval_result, "experiment_name", None)
627
+ experiment_url = getattr(final_eval_result, "experiment_url", None)
628
+ trial_ids = []
629
+ try:
630
+ trial_ids = sorted(
631
+ {
632
+ str(test_result.trial_id)
633
+ for test_result in getattr(
634
+ final_eval_result, "test_results", []
635
+ )
636
+ if getattr(test_result, "trial_id", None) is not None
637
+ }
638
+ )
639
+ except Exception:
640
+ logger.debug("Failed to extract trial IDs", exc_info=True)
641
+
642
+ trial_info = {
643
+ "experiment_name": experiment_name,
644
+ "experiment_url": experiment_url,
645
+ "trial_ids": trial_ids,
646
+ }
647
+
521
648
  details: dict[str, Any] = {
522
649
  "model": self.model,
523
650
  "temperature": self.model_parameters.get("temperature"),
524
651
  "optimizer": self.__class__.__name__,
525
- "num_candidates": getattr(gepa_result, "num_candidates", None),
652
+ "num_candidates": len(filtered_candidates),
526
653
  "total_metric_calls": getattr(gepa_result, "total_metric_calls", None),
527
654
  "parents": getattr(gepa_result, "parents", None),
528
- "val_scores": val_scores,
655
+ "val_scores": filtered_val_scores,
529
656
  "opik_rescored_scores": rescored,
530
657
  "candidate_summary": candidate_rows,
531
658
  "best_candidate_iteration": (
532
659
  candidate_rows[best_idx]["iteration"] if candidate_rows else 0
533
660
  ),
534
- "selected_candidate_index": best_idx,
661
+ "selected_candidate_index": best_idx if filtered_candidates else None,
535
662
  "selected_candidate_gepa_score": (
536
- val_scores[best_idx] if best_idx < len(val_scores) else None
663
+ filtered_val_scores[best_idx]
664
+ if filtered_val_scores and 0 <= best_idx < len(filtered_val_scores)
665
+ else None
537
666
  ),
538
667
  "selected_candidate_opik_score": best_score,
539
668
  "gepa_live_metric_used": True,
540
669
  "gepa_live_metric_call_count": self._gepa_live_metric_calls,
541
670
  "selected_candidate_item_scores": per_item_scores,
542
671
  "dataset_item_ids": [item.get("id") for item in items],
672
+ "selected_candidate_trial_info": trial_info,
543
673
  }
544
674
  if experiment_config:
545
675
  details["experiment"] = experiment_config
@@ -551,7 +681,10 @@ class GepaOptimizer(BaseOptimizer):
551
681
  candidate_rows, verbose=self.verbose
552
682
  )
553
683
  gepa_reporting.display_selected_candidate(
554
- best_prompt_text, best_score, verbose=self.verbose
684
+ best_prompt_text,
685
+ best_score,
686
+ verbose=self.verbose,
687
+ trial_info=trial_info,
555
688
  )
556
689
 
557
690
  if logger.isEnabledFor(logging.DEBUG):
@@ -1,3 +1,5 @@
1
+ import json
2
+ from numbers import Number
1
3
  from contextlib import contextmanager
2
4
  from typing import Any
3
5
 
@@ -19,12 +21,70 @@ from ..reporting_utils import ( # noqa: F401
19
21
  display_result,
20
22
  get_console,
21
23
  convert_tqdm_to_rich,
24
+ format_prompt_snippet,
22
25
  suppress_opik_logs,
23
26
  )
24
27
 
25
28
  console = get_console()
26
29
 
27
30
 
31
+ def _format_pareto_note(note: str) -> str:
32
+ try:
33
+ data = json.loads(note)
34
+ except json.JSONDecodeError:
35
+ return note
36
+
37
+ if isinstance(data, dict):
38
+ parts: list[str] = []
39
+ new_scores = data.get("new_scores") or data.get("scores")
40
+ if isinstance(new_scores, list):
41
+ formatted_scores = ", ".join(
42
+ f"{float(score) if isinstance(score, (int, float)) else float(str(score)):.3f}"
43
+ if isinstance(score, Number)
44
+ else str(score)
45
+ for score in new_scores
46
+ )
47
+ parts.append(f"scores=[{formatted_scores}]")
48
+
49
+ chosen = data.get("chosen")
50
+ if chosen is not None:
51
+ parts.append(f"chosen={chosen}")
52
+
53
+ train_val = data.get("pareto_front_train_val_score")
54
+ if isinstance(train_val, dict) and chosen is not None:
55
+ chosen_entry = train_val.get(str(chosen))
56
+ if isinstance(chosen_entry, dict):
57
+ score = chosen_entry.get("score")
58
+ if isinstance(score, Number):
59
+ parts.append(
60
+ f"train_val={float(score) if isinstance(score, (int, float)) else float(str(score)):.3f}"
61
+ )
62
+
63
+ pareto_front = data.get("pareto_front")
64
+ if isinstance(pareto_front, dict):
65
+ parts.append(f"front_size={len(pareto_front)}")
66
+
67
+ if parts:
68
+ return ", ".join(parts)
69
+
70
+ return note
71
+
72
+ elif isinstance(data, list):
73
+ return ", ".join(
74
+ f"{float(item) if isinstance(item, (int, float)) else float(str(item)):.3f}"
75
+ if isinstance(item, Number)
76
+ else str(item)
77
+ for item in data
78
+ )
79
+
80
+ elif isinstance(data, Number):
81
+ return (
82
+ f"{float(data) if isinstance(data, (int, float)) else float(str(data)):.3f}"
83
+ )
84
+
85
+ return str(data)
86
+
87
+
28
88
  class RichGEPAOptimizerLogger:
29
89
  """Adapter for GEPA's logger that provides concise Rich output with progress tracking."""
30
90
 
@@ -58,6 +118,8 @@ class RichGEPAOptimizerLogger:
58
118
  self.task_id = task_id
59
119
  self.max_trials = max_trials
60
120
  self.current_iteration = 0
121
+ self._last_best_message: tuple[str, str] | None = None
122
+ self._last_raw_message: str | None = None
61
123
 
62
124
  def log(self, message: str) -> None:
63
125
  if self.verbose < 1:
@@ -73,6 +135,13 @@ class RichGEPAOptimizerLogger:
73
135
 
74
136
  first = lines[0]
75
137
 
138
+ if first == self._last_raw_message:
139
+ return
140
+
141
+ # Reset duplicate tracker when handling other messages
142
+ if not first.startswith("Best "):
143
+ self._last_best_message = None
144
+
76
145
  # Track iteration changes and add separation
77
146
  if first.startswith("Iteration "):
78
147
  colon = first.find(":")
@@ -88,6 +157,7 @@ class RichGEPAOptimizerLogger:
88
157
 
89
158
  self.optimizer._gepa_current_iteration = iteration # type: ignore[attr-defined]
90
159
  self.current_iteration = iteration
160
+ self._last_raw_message = first
91
161
 
92
162
  # Update progress bar
93
163
  if self.progress and self.task_id is not None:
@@ -120,32 +190,34 @@ class RichGEPAOptimizerLogger:
120
190
  except Exception:
121
191
  pass
122
192
 
123
- # Check if this message should be suppressed
124
- for keyword in self.SUPPRESS_KEYWORDS:
125
- if keyword in first:
126
- return
193
+ # Check if this message should be suppressed (unless verbose >= 2)
194
+ if self.verbose <= 1:
195
+ for keyword in self.SUPPRESS_KEYWORDS:
196
+ if keyword in first:
197
+ return
127
198
 
128
- for prefix in self.SUPPRESS_PREFIXES:
129
- if prefix in first:
130
- return
199
+ for prefix in self.SUPPRESS_PREFIXES:
200
+ if prefix in first:
201
+ return
131
202
 
132
203
  # Format proposed prompts
133
204
  if "Proposed new text" in first and "system_prompt:" in first:
134
205
  _, _, rest = first.partition("system_prompt:")
135
- snippet = rest.strip()
136
- if len(snippet) > 100:
137
- snippet = snippet[:100] + "…"
206
+ snippet = format_prompt_snippet(rest, max_length=100)
138
207
  console.print(f"│ │ Proposed: {snippet}", style="dim")
208
+ self._last_raw_message = first
139
209
  return
140
210
 
141
211
  # Format subsample evaluation results
142
212
  if "New subsample score" in first and "is not better than" in first:
143
213
  console.print("│ └─ Rejected - no improvement", style="dim yellow")
144
214
  console.print("│") # Add spacing after rejected trials
215
+ self._last_raw_message = first
145
216
  return
146
217
 
147
- if "New subsample score" in first and "is better than" in first:
218
+ elif "New subsample score" in first and "is better than" in first:
148
219
  console.print("│ ├─ Promising! Running full validation...", style="green")
220
+ self._last_raw_message = first
149
221
  return
150
222
 
151
223
  # Format final validation score
@@ -157,9 +229,18 @@ class RichGEPAOptimizerLogger:
157
229
  console.print(f"│ ├─ Validation complete: {score}", style="bold green")
158
230
  else:
159
231
  console.print("│ ├─ Validation complete", style="green")
232
+ self._last_raw_message = first
160
233
  return
161
234
 
162
235
  # Format best score updates
236
+ if "Best score on train_val" in first:
237
+ parts = first.split(":")
238
+ if len(parts) >= 2:
239
+ score = parts[-1].strip()
240
+ console.print(f"│ Best train_val score: {score}", style="cyan")
241
+ self._last_raw_message = first
242
+ return
243
+
163
244
  if (
164
245
  "Best valset aggregate score so far" in first
165
246
  or "Best score on valset" in first
@@ -168,10 +249,32 @@ class RichGEPAOptimizerLogger:
168
249
  parts = first.split(":")
169
250
  if len(parts) >= 2:
170
251
  score = parts[-1].strip()
171
- console.print(f"│ └─ New best: {score} ✓", style="bold green")
172
- console.print("│") # Add spacing after successful trials
252
+ key = ("new_best", score)
253
+ if self._last_best_message != key:
254
+ console.print(f"│ └─ New best: {score} ✓", style="bold green")
255
+ console.print("│") # Add spacing after successful trials
256
+ self._last_best_message = key
257
+ self._last_raw_message = first
173
258
  return
174
259
 
260
+ if self.verbose >= 2:
261
+ if "New valset pareto front scores" in first:
262
+ note = first.split(":", 1)[-1].strip()
263
+ console.print(
264
+ f"│ Pareto front scores updated: {_format_pareto_note(note)}",
265
+ style="cyan",
266
+ )
267
+ self._last_raw_message = first
268
+ return
269
+ if "Updated valset pareto front programs" in first:
270
+ console.print("│ Pareto front programs updated", style="cyan")
271
+ self._last_raw_message = first
272
+ return
273
+ if "New program is on the linear pareto front" in first:
274
+ console.print("│ Candidate added to Pareto front", style="cyan")
275
+ self._last_raw_message = first
276
+ return
277
+
175
278
  # Suppress redundant "Iteration X:" prefix from detailed messages
176
279
  if first.startswith(f"Iteration {self.current_iteration}:"):
177
280
  # Remove the iteration prefix for cleaner output
@@ -184,6 +287,7 @@ class RichGEPAOptimizerLogger:
184
287
  # Default: print with standard prefix only if not already handled
185
288
  if first:
186
289
  console.print(f"│ {first}", style="dim")
290
+ self._last_raw_message = first
187
291
 
188
292
 
189
293
  @contextmanager
@@ -280,6 +384,7 @@ def display_selected_candidate(
280
384
  *,
281
385
  verbose: int = 1,
282
386
  title: str = "Selected Candidate",
387
+ trial_info: dict[str, Any] | None = None,
283
388
  ) -> None:
284
389
  """Display the final selected candidate with its Opik score."""
285
390
  if verbose < 1:
@@ -287,11 +392,33 @@ def display_selected_candidate(
287
392
 
288
393
  snippet = system_prompt.strip() or "<empty>"
289
394
  text = Text(snippet)
395
+ subtitle: Text | None = None
396
+ if trial_info:
397
+ trial_parts: list[str] = []
398
+ trial_name = trial_info.get("experiment_name")
399
+ trial_ids = trial_info.get("trial_ids") or []
400
+ if trial_name:
401
+ trial_parts.append(f"Trial {trial_name}")
402
+ elif trial_ids:
403
+ trial_parts.append(f"Trial {trial_ids[0]}")
404
+
405
+ compare_url = trial_info.get("compare_url")
406
+ experiment_url = trial_info.get("experiment_url")
407
+ if compare_url:
408
+ trial_parts.append(f"[link={compare_url}]Compare run[/link]")
409
+ elif experiment_url:
410
+ trial_parts.append(f"[link={experiment_url}]View experiment[/link]")
411
+
412
+ if trial_parts:
413
+ subtitle = Text.from_markup(" • ".join(trial_parts))
414
+
290
415
  panel = Panel(
291
416
  text,
292
417
  title=f"{title} — Opik score {score:.4f}",
293
418
  border_style="green",
294
419
  expand=True,
420
+ subtitle=subtitle,
421
+ subtitle_align="left",
295
422
  )
296
423
  console.print(panel)
297
424
 
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import logging
3
+ import re
3
4
  from contextlib import contextmanager
4
5
  from typing import Any
5
6
 
@@ -94,6 +95,23 @@ def suppress_opik_logs() -> Any:
94
95
  opik_logger.setLevel(original_opik_level)
95
96
 
96
97
 
98
+ def format_prompt_snippet(text: str, max_length: int = 100) -> str:
99
+ """
100
+ Normalize whitespace in a prompt snippet and truncate it for compact display.
101
+
102
+ Args:
103
+ text: Raw text to summarize.
104
+ max_length: Maximum characters to keep before adding an ellipsis.
105
+
106
+ Returns:
107
+ str: Condensed snippet safe for inline logging.
108
+ """
109
+ normalized = re.sub(r"\s+", " ", text.strip())
110
+ if len(normalized) > max_length:
111
+ return normalized[:max_length] + "…"
112
+ return normalized
113
+
114
+
97
115
  def display_messages(messages: list[dict[str, str]], prefix: str = "") -> None:
98
116
  for i, msg in enumerate(messages):
99
117
  panel = Panel(
@@ -3,13 +3,16 @@
3
3
  from .core import * # noqa: F401,F403
4
4
  from .dataset_utils import * # noqa: F401,F403
5
5
  from .prompt_segments import * # noqa: F401,F403
6
+ from .candidate_utils import * # noqa: F401,F403
6
7
 
7
8
  from . import core as _core
8
9
  from . import dataset_utils as _dataset_utils
9
10
  from . import prompt_segments as _prompt_segments
11
+ from . import candidate_utils as _candidate_utils
10
12
 
11
13
  __all__: list[str] = [
12
14
  *getattr(_core, "__all__", []),
13
15
  *getattr(_dataset_utils, "__all__", []),
14
16
  *getattr(_prompt_segments, "__all__", []),
17
+ *getattr(_candidate_utils, "__all__", []),
15
18
  ]
@@ -0,0 +1,52 @@
1
+ """
2
+ Utilities for working with optimizer candidate collections.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from collections.abc import Callable, Iterable
8
+ from typing import TypeVar
9
+
10
+ __all__ = ["unique_ordered_by_key"]
11
+
12
+ T = TypeVar("T")
13
+
14
+
15
+ def unique_ordered_by_key(
16
+ items: Iterable[T],
17
+ key: Callable[[T], str],
18
+ *,
19
+ drop_keys: set[str] | None = None,
20
+ ) -> list[T]:
21
+ """
22
+ Return a list of items that preserves the original order while removing duplicates.
23
+
24
+ Args:
25
+ items: Sequence of items to filter.
26
+ key: Function that extracts the comparison key from an item.
27
+ drop_keys: Optional set of keys to omit entirely from the result.
28
+
29
+ Returns:
30
+ List[T]: Ordered list containing the first occurrence of each unique key.
31
+ """
32
+ seen: set[str] = set()
33
+ filtered: list[T] = []
34
+
35
+ for item in items:
36
+ try:
37
+ item_key = key(item)
38
+ except (TypeError, AttributeError, KeyError):
39
+ # If the key extractor fails, fall back to stringifying the item.
40
+ item_key = str(item)
41
+
42
+ if drop_keys and item_key in drop_keys:
43
+ seen.add(item_key)
44
+ continue
45
+
46
+ if item_key in seen:
47
+ continue
48
+
49
+ seen.add(item_key)
50
+ filtered.append(item)
51
+
52
+ return filtered
@@ -310,6 +310,25 @@ def get_optimization_run_url_by_id(
310
310
  return urllib.parse.urljoin(ensure_ending_slash(url_override), run_path)
311
311
 
312
312
 
313
+ def get_trial_compare_url(
314
+ *, dataset_id: str | None, optimization_id: str | None, trial_ids: list[str]
315
+ ) -> str:
316
+ if dataset_id is None or optimization_id is None:
317
+ raise ValueError("dataset_id and optimization_id are required")
318
+ if not trial_ids:
319
+ raise ValueError("trial_ids must be a non-empty list")
320
+
321
+ opik_config = opik.config.get_from_user_inputs()
322
+ url_override = opik_config.url_override
323
+ base = ensure_ending_slash(url_override)
324
+
325
+ trials_query = urllib.parse.quote(json.dumps(trial_ids))
326
+ compare_path = (
327
+ f"optimizations/{optimization_id}/{dataset_id}/compare?trials={trials_query}"
328
+ )
329
+ return urllib.parse.urljoin(base, compare_path)
330
+
331
+
313
332
  def create_litellm_agent_class(
314
333
  prompt: "ChatPrompt", optimizer_ref: Any = None
315
334
  ) -> type["OptimizableAgent"]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: opik_optimizer
3
- Version: 2.2.0
3
+ Version: 2.2.1
4
4
  Summary: Agent optimization with Opik
5
5
  Home-page: https://github.com/comet-ml/opik
6
6
  Author: Comet ML
@@ -1,13 +1,13 @@
1
1
  opik_optimizer/__init__.py,sha256=HsEIWyxeUJhzCvuML5SjBHFWtm-b5LSHyE9GRYytyeI,1592
2
2
  opik_optimizer/_throttle.py,sha256=1JXIhYlo0IaqCgwmNB0Hnh9CYhYPkwRFdVGIcE7pVNg,1362
3
- opik_optimizer/base_optimizer.py,sha256=VpH6JSalcoewGkIN0h77_crCAkx5ffQtjNhaD0Xtazg,28350
3
+ opik_optimizer/base_optimizer.py,sha256=o4U9yoU-KhR7q_3KnvV3DgCeVboOQdacgleq8D2d_20,28350
4
4
  opik_optimizer/cache_config.py,sha256=Xd3NdUsL7bLQWoNe3pESqH4nHucU1iNTSGp-RqbwDog,599
5
5
  opik_optimizer/logging_config.py,sha256=TmxX0C1P20amxoXuiNQvlENOjdSNfWwvL8jFy206VWM,3837
6
6
  opik_optimizer/multi_metric_objective.py,sha256=y4jqirnhkfhB7SWonI4ldYg5fWG4JGfAxqu7ylRD1J4,1178
7
7
  opik_optimizer/optimizable_agent.py,sha256=gB1ALuVPyEmXOTVYeK2i-inBAO-6JMZSjOrmj37okgQ,6514
8
8
  opik_optimizer/optimization_result.py,sha256=sG-Yr-hOaH9zx_I5S6_W3v6j8nPUhwYdS333jVM4Gus,17218
9
9
  opik_optimizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- opik_optimizer/reporting_utils.py,sha256=Gx69W16FfIpavH_o0WvnGJPIpqHAjJm4GNHKcJhtoeU,9443
10
+ opik_optimizer/reporting_utils.py,sha256=jN3_-tTy98KtsOv8Xp-DKFpePQQYZHHhT7kkG-jUrOg,9970
11
11
  opik_optimizer/task_evaluator.py,sha256=7N254DU0UkWJ5saQ5AmYEsHHSrychAJtedmmjNsCOnI,5081
12
12
  opik_optimizer/data/context7_eval.jsonl,sha256=vPR3XRfI0UbZ1hgUGaOdpraFT99RDLU1YWuPFLLQz40,1757
13
13
  opik_optimizer/data/hotpot-500.json,sha256=YXxCtuvYvxSu5u0y4559a6b1qwgAYsWzT_SUKv_21ew,76862
@@ -43,8 +43,8 @@ opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py,sha256
43
43
  opik_optimizer/few_shot_bayesian_optimizer/reporting.py,sha256=xk7gKaoTrlp1WDpW3mB5Irzty5Z5l9SJygO3PaamOvU,6283
44
44
  opik_optimizer/gepa_optimizer/__init__.py,sha256=XcPah5t4mop7UCFo69E9l45Mem49-itqkQT7_J1aWOA,71
45
45
  opik_optimizer/gepa_optimizer/adapter.py,sha256=KzPa4koq7aJhALMAOKPxAO4yWuEy_YbW7tGnqny3Hfo,5139
46
- opik_optimizer/gepa_optimizer/gepa_optimizer.py,sha256=rWD92KauJbnkXbsftzhTDkBjmgzlwGZcm6t7F-ceh_Q,27055
47
- opik_optimizer/gepa_optimizer/reporting.py,sha256=ZyD4bwiW6BxmPb-966u3iDAfBsiO56kO2VBxHtNmL-Q,11050
46
+ opik_optimizer/gepa_optimizer/gepa_optimizer.py,sha256=RlTm71yWjRR8C1nEAuNXfAx1gkt5nsOwV6bfvu5NwbM,32849
47
+ opik_optimizer/gepa_optimizer/reporting.py,sha256=FiIPtHE6c5p4yMfknnhZetEjehvrA8PRejeOPT9uBCo,15836
48
48
  opik_optimizer/hierarchical_reflective_optimizer/__init__.py,sha256=9qM3kvfAaFy-Y6Tg19MXHJxpnF5DJQQwzr6oNsxaRBM,133
49
49
  opik_optimizer/hierarchical_reflective_optimizer/hierarchical_reflective_optimizer.py,sha256=fhB68XrGNgaHfPwV1JDow-MiAT-jhKDT_Kf-mLLzk0o,27775
50
50
  opik_optimizer/hierarchical_reflective_optimizer/hierarchical_root_cause_analyzer.py,sha256=0D5wgx04jZvTJ0Yjqm0jtQvkjrGBB73qgcsSwLBpnv0,13814
@@ -74,13 +74,14 @@ opik_optimizer/parameter_optimizer/parameter_spec.py,sha256=HzYT_dHBTfZtx403mY-E
74
74
  opik_optimizer/parameter_optimizer/reporting.py,sha256=-kEe9sQFdkUhxayEamXLR8ukyTLJrGsTs8pbJWmimQ4,4665
75
75
  opik_optimizer/parameter_optimizer/search_space_types.py,sha256=UajTA2QKikEWazokDNO7j141gc2WxxYYiDRnFFjXi6M,512
76
76
  opik_optimizer/parameter_optimizer/sensitivity_analysis.py,sha256=8KEMVMHsmcoiK21Cq1-We6_Pw_6LX9qBX9Az4-tmj_w,2146
77
- opik_optimizer/utils/__init__.py,sha256=Ee0SnTPOcwRwp93M6Lh-X913lfSIwnvCiYYh5cpdRQE,486
77
+ opik_optimizer/utils/__init__.py,sha256=_sielSJdLVeyBugtsw1iSVJr_I8YbhsU-U7p8zLe_JY,633
78
+ opik_optimizer/utils/candidate_utils.py,sha256=PKtjREM4MFHvgDri8jCmbs6zHvxAnrfjuwwymvQNnrk,1294
78
79
  opik_optimizer/utils/colbert.py,sha256=qSrzKUUGw7P92mLy4Ofug5pBGeTsHBLMJXlXSJSfKuo,8147
79
- opik_optimizer/utils/core.py,sha256=sL9I9kG1Gdjj0b3rBgPpXp7NUaUisD3_ITSkE7w5QhU,16014
80
+ opik_optimizer/utils/core.py,sha256=56lQax3mAQkVZWfie6vhaTKZfjTBcYXf-FFkXgyFYFE,16715
80
81
  opik_optimizer/utils/dataset_utils.py,sha256=dqRUGOekjeNWL0J15R8xFwLyKJDJynJXzVyQmt8rhHA,1464
81
82
  opik_optimizer/utils/prompt_segments.py,sha256=eiLYT1iiPxtB7ArriN13-LgI5tID-v7MrjniTAxK2Lo,5904
82
- opik_optimizer-2.2.0.dist-info/licenses/LICENSE,sha256=V-0VHJOBdcA_teT8VymvsBUQ1-CZU6yJRmMEjec_8tA,11372
83
- opik_optimizer-2.2.0.dist-info/METADATA,sha256=O8dgJi_lpEyd2kDMCj-AZMK4QthqAaZiR5Afo5WwCZ8,12807
84
- opik_optimizer-2.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
85
- opik_optimizer-2.2.0.dist-info/top_level.txt,sha256=ondOlpq6_yFckqpxoAHSfzZS2N-JfgmA-QQhOJfz7m0,15
86
- opik_optimizer-2.2.0.dist-info/RECORD,,
83
+ opik_optimizer-2.2.1.dist-info/licenses/LICENSE,sha256=V-0VHJOBdcA_teT8VymvsBUQ1-CZU6yJRmMEjec_8tA,11372
84
+ opik_optimizer-2.2.1.dist-info/METADATA,sha256=8HayPMPvWBxuCg1H3u6-d_8MwBxVF2DzbID2VrdqjKk,12807
85
+ opik_optimizer-2.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
86
+ opik_optimizer-2.2.1.dist-info/top_level.txt,sha256=ondOlpq6_yFckqpxoAHSfzZS2N-JfgmA-QQhOJfz7m0,15
87
+ opik_optimizer-2.2.1.dist-info/RECORD,,