opik-optimizer 2.2.0__py3-none-any.whl → 2.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/base_optimizer.py +1 -1
- opik_optimizer/gepa_optimizer/gepa_optimizer.py +169 -36
- opik_optimizer/gepa_optimizer/reporting.py +140 -13
- opik_optimizer/reporting_utils.py +18 -0
- opik_optimizer/utils/__init__.py +3 -0
- opik_optimizer/utils/candidate_utils.py +52 -0
- opik_optimizer/utils/core.py +19 -0
- {opik_optimizer-2.2.0.dist-info → opik_optimizer-2.2.1.dist-info}/METADATA +1 -1
- {opik_optimizer-2.2.0.dist-info → opik_optimizer-2.2.1.dist-info}/RECORD +12 -11
- {opik_optimizer-2.2.0.dist-info → opik_optimizer-2.2.1.dist-info}/WHEEL +0 -0
- {opik_optimizer-2.2.0.dist-info → opik_optimizer-2.2.1.dist-info}/licenses/LICENSE +0 -0
- {opik_optimizer-2.2.0.dist-info → opik_optimizer-2.2.1.dist-info}/top_level.txt +0 -0
opik_optimizer/base_optimizer.py
CHANGED
|
@@ -637,7 +637,7 @@ class BaseOptimizer(ABC):
|
|
|
637
637
|
base_config = self._deep_merge_dicts(base_config, additional_metadata)
|
|
638
638
|
|
|
639
639
|
if experiment_config:
|
|
640
|
-
base_config = self._deep_merge_dicts(
|
|
640
|
+
base_config = self._deep_merge_dicts(base_config, experiment_config)
|
|
641
641
|
|
|
642
642
|
return self._drop_none(base_config)
|
|
643
643
|
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
3
|
-
from typing import Any, ContextManager
|
|
2
|
+
from typing import Any
|
|
4
3
|
from collections.abc import Callable
|
|
5
4
|
|
|
6
5
|
import opik
|
|
7
6
|
from opik import Dataset, opik_context
|
|
7
|
+
from opik.evaluation import evaluator as opik_evaluator
|
|
8
8
|
from opik.evaluation.metrics.score_result import ScoreResult
|
|
9
9
|
|
|
10
10
|
from ..base_optimizer import BaseOptimizer
|
|
@@ -16,7 +16,9 @@ from ..utils import (
|
|
|
16
16
|
create_litellm_agent_class,
|
|
17
17
|
disable_experiment_reporting,
|
|
18
18
|
enable_experiment_reporting,
|
|
19
|
+
unique_ordered_by_key,
|
|
19
20
|
)
|
|
21
|
+
from ..task_evaluator import _create_metric_class
|
|
20
22
|
from ..reporting_utils import suppress_opik_logs
|
|
21
23
|
from .. import task_evaluator
|
|
22
24
|
from . import reporting as gepa_reporting
|
|
@@ -213,6 +215,25 @@ class GepaOptimizer(BaseOptimizer):
|
|
|
213
215
|
# Calculate max_metric_calls from max_trials and effective samples
|
|
214
216
|
effective_n_samples = len(items)
|
|
215
217
|
max_metric_calls = max_trials * effective_n_samples
|
|
218
|
+
budget_limited_trials = (
|
|
219
|
+
max_metric_calls // effective_n_samples if effective_n_samples else 0
|
|
220
|
+
)
|
|
221
|
+
if reflection_minibatch_size > max_trials:
|
|
222
|
+
logger.warning(
|
|
223
|
+
"reflection_minibatch_size (%s) exceeds max_trials (%s); GEPA reflection will not run. "
|
|
224
|
+
"Increase max_trials or lower the minibatch.",
|
|
225
|
+
reflection_minibatch_size,
|
|
226
|
+
max_trials,
|
|
227
|
+
)
|
|
228
|
+
elif (
|
|
229
|
+
budget_limited_trials and reflection_minibatch_size > budget_limited_trials
|
|
230
|
+
):
|
|
231
|
+
logger.warning(
|
|
232
|
+
"reflection_minibatch_size (%s) exceeds the number of candidates allowed by the metric budget (%s). "
|
|
233
|
+
"Consider increasing max_trials or n_samples.",
|
|
234
|
+
reflection_minibatch_size,
|
|
235
|
+
budget_limited_trials,
|
|
236
|
+
)
|
|
216
237
|
|
|
217
238
|
data_insts = self._build_data_insts(items, input_key, output_key)
|
|
218
239
|
|
|
@@ -375,6 +396,23 @@ class GepaOptimizer(BaseOptimizer):
|
|
|
375
396
|
candidates: list[dict[str, str]] = getattr(gepa_result, "candidates", []) or []
|
|
376
397
|
val_scores: list[float] = list(getattr(gepa_result, "val_aggregate_scores", []))
|
|
377
398
|
|
|
399
|
+
indexed_candidates: list[tuple[int, dict[str, str]]] = list(
|
|
400
|
+
enumerate(candidates)
|
|
401
|
+
)
|
|
402
|
+
filtered_indexed_candidates = unique_ordered_by_key(
|
|
403
|
+
indexed_candidates,
|
|
404
|
+
key=lambda item: self._extract_system_text_from_candidate(
|
|
405
|
+
item[1], seed_prompt_text
|
|
406
|
+
).strip(),
|
|
407
|
+
)
|
|
408
|
+
filtered_candidates: list[dict[str, str]] = [
|
|
409
|
+
candidate for _, candidate in filtered_indexed_candidates
|
|
410
|
+
]
|
|
411
|
+
filtered_val_scores: list[float | None] = [
|
|
412
|
+
val_scores[idx] if idx < len(val_scores) else None
|
|
413
|
+
for idx, _ in filtered_indexed_candidates
|
|
414
|
+
]
|
|
415
|
+
|
|
378
416
|
rescored: list[float] = []
|
|
379
417
|
candidate_rows: list[dict[str, Any]] = []
|
|
380
418
|
history: list[dict[str, Any]] = []
|
|
@@ -385,7 +423,9 @@ class GepaOptimizer(BaseOptimizer):
|
|
|
385
423
|
# Wrap rescoring to prevent OPIK messages and experiment link displays
|
|
386
424
|
with suppress_opik_logs():
|
|
387
425
|
with convert_tqdm_to_rich(verbose=0):
|
|
388
|
-
for idx, candidate in enumerate(
|
|
426
|
+
for idx, (original_idx, candidate) in enumerate(
|
|
427
|
+
filtered_indexed_candidates
|
|
428
|
+
):
|
|
389
429
|
candidate_prompt = self._extract_system_text_from_candidate(
|
|
390
430
|
candidate, seed_prompt_text
|
|
391
431
|
)
|
|
@@ -421,9 +461,7 @@ class GepaOptimizer(BaseOptimizer):
|
|
|
421
461
|
{
|
|
422
462
|
"iteration": idx + 1,
|
|
423
463
|
"system_prompt": candidate_prompt,
|
|
424
|
-
"gepa_score":
|
|
425
|
-
if idx < len(val_scores)
|
|
426
|
-
else None,
|
|
464
|
+
"gepa_score": filtered_val_scores[idx],
|
|
427
465
|
"opik_score": score,
|
|
428
466
|
"source": self.__class__.__name__,
|
|
429
467
|
}
|
|
@@ -435,9 +473,7 @@ class GepaOptimizer(BaseOptimizer):
|
|
|
435
473
|
"scores": [
|
|
436
474
|
{
|
|
437
475
|
"metric_name": f"GEPA-{metric.__name__}",
|
|
438
|
-
"score":
|
|
439
|
-
if idx < len(val_scores)
|
|
440
|
-
else None,
|
|
476
|
+
"score": filtered_val_scores[idx],
|
|
441
477
|
},
|
|
442
478
|
{"metric_name": metric.__name__, "score": score},
|
|
443
479
|
],
|
|
@@ -446,14 +482,45 @@ class GepaOptimizer(BaseOptimizer):
|
|
|
446
482
|
)
|
|
447
483
|
|
|
448
484
|
if rescored:
|
|
449
|
-
|
|
485
|
+
|
|
486
|
+
def _tie_break(idx: int) -> tuple[float, float, int]:
|
|
487
|
+
opik_score = rescored[idx]
|
|
488
|
+
gepa_score = filtered_val_scores[idx]
|
|
489
|
+
gepa_numeric = (
|
|
490
|
+
float(gepa_score)
|
|
491
|
+
if isinstance(gepa_score, (int, float))
|
|
492
|
+
else float("-inf")
|
|
493
|
+
)
|
|
494
|
+
return opik_score, gepa_numeric, idx
|
|
495
|
+
|
|
496
|
+
best_idx = max(range(len(rescored)), key=_tie_break)
|
|
450
497
|
best_score = rescored[best_idx]
|
|
451
498
|
else:
|
|
452
|
-
|
|
453
|
-
|
|
499
|
+
if filtered_indexed_candidates:
|
|
500
|
+
gepa_best_idx = getattr(gepa_result, "best_idx", 0) or 0
|
|
501
|
+
best_idx = next(
|
|
502
|
+
(
|
|
503
|
+
i
|
|
504
|
+
for i, (original_idx, _) in enumerate(
|
|
505
|
+
filtered_indexed_candidates
|
|
506
|
+
)
|
|
507
|
+
if original_idx == gepa_best_idx
|
|
508
|
+
),
|
|
509
|
+
0,
|
|
510
|
+
)
|
|
511
|
+
if filtered_val_scores and 0 <= best_idx < len(filtered_val_scores):
|
|
512
|
+
score_value = filtered_val_scores[best_idx]
|
|
513
|
+
best_score = float(score_value) if score_value is not None else 0.0
|
|
514
|
+
else:
|
|
515
|
+
best_score = float(initial_score)
|
|
516
|
+
else:
|
|
517
|
+
best_idx = 0
|
|
518
|
+
best_score = float(initial_score)
|
|
454
519
|
|
|
455
520
|
best_candidate = (
|
|
456
|
-
|
|
521
|
+
filtered_candidates[best_idx]
|
|
522
|
+
if filtered_candidates
|
|
523
|
+
else {"system_prompt": seed_prompt_text}
|
|
457
524
|
)
|
|
458
525
|
best_prompt_text = self._extract_system_text_from_candidate(
|
|
459
526
|
best_candidate, seed_prompt_text
|
|
@@ -469,26 +536,62 @@ class GepaOptimizer(BaseOptimizer):
|
|
|
469
536
|
}
|
|
470
537
|
final_prompt.model_kwargs = filtered_model_kwargs
|
|
471
538
|
|
|
472
|
-
|
|
473
|
-
prompt=final_prompt,
|
|
474
|
-
dataset=dataset,
|
|
475
|
-
metric=metric,
|
|
476
|
-
n_samples=n_samples,
|
|
477
|
-
optimization_id=opt_id,
|
|
478
|
-
extra_metadata={"phase": "final", "selected": True},
|
|
479
|
-
verbose=0,
|
|
480
|
-
)
|
|
481
|
-
suppress_logs: ContextManager[Any] = nullcontext()
|
|
482
|
-
try:
|
|
483
|
-
from ..reporting_utils import suppress_opik_logs as _suppress_logs
|
|
484
|
-
|
|
485
|
-
suppress_logs = _suppress_logs()
|
|
486
|
-
except Exception:
|
|
487
|
-
pass
|
|
539
|
+
final_eval_result: Any | None = None
|
|
488
540
|
|
|
489
|
-
with
|
|
541
|
+
with suppress_opik_logs():
|
|
490
542
|
try:
|
|
491
|
-
|
|
543
|
+
final_agent_cls = create_litellm_agent_class(
|
|
544
|
+
final_prompt, optimizer_ref=self
|
|
545
|
+
)
|
|
546
|
+
final_agent = final_agent_cls(final_prompt)
|
|
547
|
+
|
|
548
|
+
def final_llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
|
|
549
|
+
messages = final_prompt.get_messages(dataset_item)
|
|
550
|
+
raw = final_agent.invoke(messages)
|
|
551
|
+
if self.current_optimization_id:
|
|
552
|
+
opik_context.update_current_trace(
|
|
553
|
+
tags=[self.current_optimization_id, "Evaluation"]
|
|
554
|
+
)
|
|
555
|
+
return {mappers.EVALUATED_LLM_TASK_OUTPUT: raw.strip()}
|
|
556
|
+
|
|
557
|
+
configuration_updates = self._drop_none(
|
|
558
|
+
{"gepa": {"phase": "final", "selected": True}}
|
|
559
|
+
)
|
|
560
|
+
final_experiment_config = self._prepare_experiment_config(
|
|
561
|
+
prompt=final_prompt,
|
|
562
|
+
dataset=dataset,
|
|
563
|
+
metric=metric,
|
|
564
|
+
experiment_config=experiment_config,
|
|
565
|
+
configuration_updates=configuration_updates,
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
metric_class = _create_metric_class(metric)
|
|
569
|
+
|
|
570
|
+
if opt_id:
|
|
571
|
+
final_eval_result = opik_evaluator.evaluate_optimization_trial(
|
|
572
|
+
optimization_id=opt_id,
|
|
573
|
+
dataset=dataset,
|
|
574
|
+
task=final_llm_task,
|
|
575
|
+
project_name=final_experiment_config.get("project_name"),
|
|
576
|
+
dataset_item_ids=None,
|
|
577
|
+
scoring_metrics=[metric_class],
|
|
578
|
+
task_threads=self.n_threads,
|
|
579
|
+
nb_samples=n_samples,
|
|
580
|
+
experiment_config=final_experiment_config,
|
|
581
|
+
verbose=0,
|
|
582
|
+
)
|
|
583
|
+
else:
|
|
584
|
+
final_eval_result = opik_evaluator.evaluate(
|
|
585
|
+
dataset=dataset,
|
|
586
|
+
task=final_llm_task,
|
|
587
|
+
project_name=final_experiment_config.get("project_name"),
|
|
588
|
+
dataset_item_ids=None,
|
|
589
|
+
scoring_metrics=[metric_class],
|
|
590
|
+
task_threads=self.n_threads,
|
|
591
|
+
nb_samples=n_samples,
|
|
592
|
+
experiment_config=final_experiment_config,
|
|
593
|
+
verbose=0,
|
|
594
|
+
)
|
|
492
595
|
except Exception:
|
|
493
596
|
logger.debug("Final evaluation failed", exc_info=True)
|
|
494
597
|
|
|
@@ -518,28 +621,55 @@ class GepaOptimizer(BaseOptimizer):
|
|
|
518
621
|
except Exception:
|
|
519
622
|
logger.debug("Per-item diagnostics failed", exc_info=True)
|
|
520
623
|
|
|
624
|
+
trial_info: dict[str, Any] | None = None
|
|
625
|
+
if final_eval_result is not None:
|
|
626
|
+
experiment_name = getattr(final_eval_result, "experiment_name", None)
|
|
627
|
+
experiment_url = getattr(final_eval_result, "experiment_url", None)
|
|
628
|
+
trial_ids = []
|
|
629
|
+
try:
|
|
630
|
+
trial_ids = sorted(
|
|
631
|
+
{
|
|
632
|
+
str(test_result.trial_id)
|
|
633
|
+
for test_result in getattr(
|
|
634
|
+
final_eval_result, "test_results", []
|
|
635
|
+
)
|
|
636
|
+
if getattr(test_result, "trial_id", None) is not None
|
|
637
|
+
}
|
|
638
|
+
)
|
|
639
|
+
except Exception:
|
|
640
|
+
logger.debug("Failed to extract trial IDs", exc_info=True)
|
|
641
|
+
|
|
642
|
+
trial_info = {
|
|
643
|
+
"experiment_name": experiment_name,
|
|
644
|
+
"experiment_url": experiment_url,
|
|
645
|
+
"trial_ids": trial_ids,
|
|
646
|
+
}
|
|
647
|
+
|
|
521
648
|
details: dict[str, Any] = {
|
|
522
649
|
"model": self.model,
|
|
523
650
|
"temperature": self.model_parameters.get("temperature"),
|
|
524
651
|
"optimizer": self.__class__.__name__,
|
|
525
|
-
"num_candidates":
|
|
652
|
+
"num_candidates": len(filtered_candidates),
|
|
526
653
|
"total_metric_calls": getattr(gepa_result, "total_metric_calls", None),
|
|
527
654
|
"parents": getattr(gepa_result, "parents", None),
|
|
528
|
-
"val_scores":
|
|
655
|
+
"val_scores": filtered_val_scores,
|
|
529
656
|
"opik_rescored_scores": rescored,
|
|
530
657
|
"candidate_summary": candidate_rows,
|
|
531
658
|
"best_candidate_iteration": (
|
|
532
659
|
candidate_rows[best_idx]["iteration"] if candidate_rows else 0
|
|
533
660
|
),
|
|
534
|
-
"selected_candidate_index": best_idx,
|
|
661
|
+
"selected_candidate_index": best_idx if filtered_candidates else None,
|
|
535
662
|
"selected_candidate_gepa_score": (
|
|
536
|
-
|
|
663
|
+
filtered_val_scores[best_idx]
|
|
664
|
+
if filtered_val_scores and 0 <= best_idx < len(filtered_val_scores)
|
|
665
|
+
else None
|
|
537
666
|
),
|
|
538
667
|
"selected_candidate_opik_score": best_score,
|
|
539
668
|
"gepa_live_metric_used": True,
|
|
540
669
|
"gepa_live_metric_call_count": self._gepa_live_metric_calls,
|
|
541
670
|
"selected_candidate_item_scores": per_item_scores,
|
|
542
671
|
"dataset_item_ids": [item.get("id") for item in items],
|
|
672
|
+
"selected_candidate_trial_info": trial_info,
|
|
543
673
|
}
|
|
544
674
|
if experiment_config:
|
|
545
675
|
details["experiment"] = experiment_config
|
|
@@ -551,7 +681,10 @@ class GepaOptimizer(BaseOptimizer):
|
|
|
551
681
|
candidate_rows, verbose=self.verbose
|
|
552
682
|
)
|
|
553
683
|
gepa_reporting.display_selected_candidate(
|
|
554
|
-
best_prompt_text,
|
|
684
|
+
best_prompt_text,
|
|
685
|
+
best_score,
|
|
686
|
+
verbose=self.verbose,
|
|
687
|
+
trial_info=trial_info,
|
|
555
688
|
)
|
|
556
689
|
|
|
557
690
|
if logger.isEnabledFor(logging.DEBUG):
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from numbers import Number
|
|
1
3
|
from contextlib import contextmanager
|
|
2
4
|
from typing import Any
|
|
3
5
|
|
|
@@ -19,12 +21,70 @@ from ..reporting_utils import ( # noqa: F401
|
|
|
19
21
|
display_result,
|
|
20
22
|
get_console,
|
|
21
23
|
convert_tqdm_to_rich,
|
|
24
|
+
format_prompt_snippet,
|
|
22
25
|
suppress_opik_logs,
|
|
23
26
|
)
|
|
24
27
|
|
|
25
28
|
console = get_console()
|
|
26
29
|
|
|
27
30
|
|
|
31
|
+
def _format_pareto_note(note: str) -> str:
|
|
32
|
+
try:
|
|
33
|
+
data = json.loads(note)
|
|
34
|
+
except json.JSONDecodeError:
|
|
35
|
+
return note
|
|
36
|
+
|
|
37
|
+
if isinstance(data, dict):
|
|
38
|
+
parts: list[str] = []
|
|
39
|
+
new_scores = data.get("new_scores") or data.get("scores")
|
|
40
|
+
if isinstance(new_scores, list):
|
|
41
|
+
formatted_scores = ", ".join(
|
|
42
|
+
f"{float(score) if isinstance(score, (int, float)) else float(str(score)):.3f}"
|
|
43
|
+
if isinstance(score, Number)
|
|
44
|
+
else str(score)
|
|
45
|
+
for score in new_scores
|
|
46
|
+
)
|
|
47
|
+
parts.append(f"scores=[{formatted_scores}]")
|
|
48
|
+
|
|
49
|
+
chosen = data.get("chosen")
|
|
50
|
+
if chosen is not None:
|
|
51
|
+
parts.append(f"chosen={chosen}")
|
|
52
|
+
|
|
53
|
+
train_val = data.get("pareto_front_train_val_score")
|
|
54
|
+
if isinstance(train_val, dict) and chosen is not None:
|
|
55
|
+
chosen_entry = train_val.get(str(chosen))
|
|
56
|
+
if isinstance(chosen_entry, dict):
|
|
57
|
+
score = chosen_entry.get("score")
|
|
58
|
+
if isinstance(score, Number):
|
|
59
|
+
parts.append(
|
|
60
|
+
f"train_val={float(score) if isinstance(score, (int, float)) else float(str(score)):.3f}"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
pareto_front = data.get("pareto_front")
|
|
64
|
+
if isinstance(pareto_front, dict):
|
|
65
|
+
parts.append(f"front_size={len(pareto_front)}")
|
|
66
|
+
|
|
67
|
+
if parts:
|
|
68
|
+
return ", ".join(parts)
|
|
69
|
+
|
|
70
|
+
return note
|
|
71
|
+
|
|
72
|
+
elif isinstance(data, list):
|
|
73
|
+
return ", ".join(
|
|
74
|
+
f"{float(item) if isinstance(item, (int, float)) else float(str(item)):.3f}"
|
|
75
|
+
if isinstance(item, Number)
|
|
76
|
+
else str(item)
|
|
77
|
+
for item in data
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
elif isinstance(data, Number):
|
|
81
|
+
return (
|
|
82
|
+
f"{float(data) if isinstance(data, (int, float)) else float(str(data)):.3f}"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return str(data)
|
|
86
|
+
|
|
87
|
+
|
|
28
88
|
class RichGEPAOptimizerLogger:
|
|
29
89
|
"""Adapter for GEPA's logger that provides concise Rich output with progress tracking."""
|
|
30
90
|
|
|
@@ -58,6 +118,8 @@ class RichGEPAOptimizerLogger:
|
|
|
58
118
|
self.task_id = task_id
|
|
59
119
|
self.max_trials = max_trials
|
|
60
120
|
self.current_iteration = 0
|
|
121
|
+
self._last_best_message: tuple[str, str] | None = None
|
|
122
|
+
self._last_raw_message: str | None = None
|
|
61
123
|
|
|
62
124
|
def log(self, message: str) -> None:
|
|
63
125
|
if self.verbose < 1:
|
|
@@ -73,6 +135,13 @@ class RichGEPAOptimizerLogger:
|
|
|
73
135
|
|
|
74
136
|
first = lines[0]
|
|
75
137
|
|
|
138
|
+
if first == self._last_raw_message:
|
|
139
|
+
return
|
|
140
|
+
|
|
141
|
+
# Reset duplicate tracker when handling other messages
|
|
142
|
+
if not first.startswith("Best "):
|
|
143
|
+
self._last_best_message = None
|
|
144
|
+
|
|
76
145
|
# Track iteration changes and add separation
|
|
77
146
|
if first.startswith("Iteration "):
|
|
78
147
|
colon = first.find(":")
|
|
@@ -88,6 +157,7 @@ class RichGEPAOptimizerLogger:
|
|
|
88
157
|
|
|
89
158
|
self.optimizer._gepa_current_iteration = iteration # type: ignore[attr-defined]
|
|
90
159
|
self.current_iteration = iteration
|
|
160
|
+
self._last_raw_message = first
|
|
91
161
|
|
|
92
162
|
# Update progress bar
|
|
93
163
|
if self.progress and self.task_id is not None:
|
|
@@ -120,32 +190,34 @@ class RichGEPAOptimizerLogger:
|
|
|
120
190
|
except Exception:
|
|
121
191
|
pass
|
|
122
192
|
|
|
123
|
-
# Check if this message should be suppressed
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
193
|
+
# Check if this message should be suppressed (unless verbose >= 2)
|
|
194
|
+
if self.verbose <= 1:
|
|
195
|
+
for keyword in self.SUPPRESS_KEYWORDS:
|
|
196
|
+
if keyword in first:
|
|
197
|
+
return
|
|
127
198
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
199
|
+
for prefix in self.SUPPRESS_PREFIXES:
|
|
200
|
+
if prefix in first:
|
|
201
|
+
return
|
|
131
202
|
|
|
132
203
|
# Format proposed prompts
|
|
133
204
|
if "Proposed new text" in first and "system_prompt:" in first:
|
|
134
205
|
_, _, rest = first.partition("system_prompt:")
|
|
135
|
-
snippet = rest
|
|
136
|
-
if len(snippet) > 100:
|
|
137
|
-
snippet = snippet[:100] + "…"
|
|
206
|
+
snippet = format_prompt_snippet(rest, max_length=100)
|
|
138
207
|
console.print(f"│ │ Proposed: {snippet}", style="dim")
|
|
208
|
+
self._last_raw_message = first
|
|
139
209
|
return
|
|
140
210
|
|
|
141
211
|
# Format subsample evaluation results
|
|
142
212
|
if "New subsample score" in first and "is not better than" in first:
|
|
143
213
|
console.print("│ └─ Rejected - no improvement", style="dim yellow")
|
|
144
214
|
console.print("│") # Add spacing after rejected trials
|
|
215
|
+
self._last_raw_message = first
|
|
145
216
|
return
|
|
146
217
|
|
|
147
|
-
|
|
218
|
+
elif "New subsample score" in first and "is better than" in first:
|
|
148
219
|
console.print("│ ├─ Promising! Running full validation...", style="green")
|
|
220
|
+
self._last_raw_message = first
|
|
149
221
|
return
|
|
150
222
|
|
|
151
223
|
# Format final validation score
|
|
@@ -157,9 +229,18 @@ class RichGEPAOptimizerLogger:
|
|
|
157
229
|
console.print(f"│ ├─ Validation complete: {score}", style="bold green")
|
|
158
230
|
else:
|
|
159
231
|
console.print("│ ├─ Validation complete", style="green")
|
|
232
|
+
self._last_raw_message = first
|
|
160
233
|
return
|
|
161
234
|
|
|
162
235
|
# Format best score updates
|
|
236
|
+
if "Best score on train_val" in first:
|
|
237
|
+
parts = first.split(":")
|
|
238
|
+
if len(parts) >= 2:
|
|
239
|
+
score = parts[-1].strip()
|
|
240
|
+
console.print(f"│ Best train_val score: {score}", style="cyan")
|
|
241
|
+
self._last_raw_message = first
|
|
242
|
+
return
|
|
243
|
+
|
|
163
244
|
if (
|
|
164
245
|
"Best valset aggregate score so far" in first
|
|
165
246
|
or "Best score on valset" in first
|
|
@@ -168,10 +249,32 @@ class RichGEPAOptimizerLogger:
|
|
|
168
249
|
parts = first.split(":")
|
|
169
250
|
if len(parts) >= 2:
|
|
170
251
|
score = parts[-1].strip()
|
|
171
|
-
|
|
172
|
-
|
|
252
|
+
key = ("new_best", score)
|
|
253
|
+
if self._last_best_message != key:
|
|
254
|
+
console.print(f"│ └─ New best: {score} ✓", style="bold green")
|
|
255
|
+
console.print("│") # Add spacing after successful trials
|
|
256
|
+
self._last_best_message = key
|
|
257
|
+
self._last_raw_message = first
|
|
173
258
|
return
|
|
174
259
|
|
|
260
|
+
if self.verbose >= 2:
|
|
261
|
+
if "New valset pareto front scores" in first:
|
|
262
|
+
note = first.split(":", 1)[-1].strip()
|
|
263
|
+
console.print(
|
|
264
|
+
f"│ Pareto front scores updated: {_format_pareto_note(note)}",
|
|
265
|
+
style="cyan",
|
|
266
|
+
)
|
|
267
|
+
self._last_raw_message = first
|
|
268
|
+
return
|
|
269
|
+
if "Updated valset pareto front programs" in first:
|
|
270
|
+
console.print("│ Pareto front programs updated", style="cyan")
|
|
271
|
+
self._last_raw_message = first
|
|
272
|
+
return
|
|
273
|
+
if "New program is on the linear pareto front" in first:
|
|
274
|
+
console.print("│ Candidate added to Pareto front", style="cyan")
|
|
275
|
+
self._last_raw_message = first
|
|
276
|
+
return
|
|
277
|
+
|
|
175
278
|
# Suppress redundant "Iteration X:" prefix from detailed messages
|
|
176
279
|
if first.startswith(f"Iteration {self.current_iteration}:"):
|
|
177
280
|
# Remove the iteration prefix for cleaner output
|
|
@@ -184,6 +287,7 @@ class RichGEPAOptimizerLogger:
|
|
|
184
287
|
# Default: print with standard prefix only if not already handled
|
|
185
288
|
if first:
|
|
186
289
|
console.print(f"│ {first}", style="dim")
|
|
290
|
+
self._last_raw_message = first
|
|
187
291
|
|
|
188
292
|
|
|
189
293
|
@contextmanager
|
|
@@ -280,6 +384,7 @@ def display_selected_candidate(
|
|
|
280
384
|
*,
|
|
281
385
|
verbose: int = 1,
|
|
282
386
|
title: str = "Selected Candidate",
|
|
387
|
+
trial_info: dict[str, Any] | None = None,
|
|
283
388
|
) -> None:
|
|
284
389
|
"""Display the final selected candidate with its Opik score."""
|
|
285
390
|
if verbose < 1:
|
|
@@ -287,11 +392,33 @@ def display_selected_candidate(
|
|
|
287
392
|
|
|
288
393
|
snippet = system_prompt.strip() or "<empty>"
|
|
289
394
|
text = Text(snippet)
|
|
395
|
+
subtitle: Text | None = None
|
|
396
|
+
if trial_info:
|
|
397
|
+
trial_parts: list[str] = []
|
|
398
|
+
trial_name = trial_info.get("experiment_name")
|
|
399
|
+
trial_ids = trial_info.get("trial_ids") or []
|
|
400
|
+
if trial_name:
|
|
401
|
+
trial_parts.append(f"Trial {trial_name}")
|
|
402
|
+
elif trial_ids:
|
|
403
|
+
trial_parts.append(f"Trial {trial_ids[0]}")
|
|
404
|
+
|
|
405
|
+
compare_url = trial_info.get("compare_url")
|
|
406
|
+
experiment_url = trial_info.get("experiment_url")
|
|
407
|
+
if compare_url:
|
|
408
|
+
trial_parts.append(f"[link={compare_url}]Compare run[/link]")
|
|
409
|
+
elif experiment_url:
|
|
410
|
+
trial_parts.append(f"[link={experiment_url}]View experiment[/link]")
|
|
411
|
+
|
|
412
|
+
if trial_parts:
|
|
413
|
+
subtitle = Text.from_markup(" • ".join(trial_parts))
|
|
414
|
+
|
|
290
415
|
panel = Panel(
|
|
291
416
|
text,
|
|
292
417
|
title=f"{title} — Opik score {score:.4f}",
|
|
293
418
|
border_style="green",
|
|
294
419
|
expand=True,
|
|
420
|
+
subtitle=subtitle,
|
|
421
|
+
subtitle_align="left",
|
|
295
422
|
)
|
|
296
423
|
console.print(panel)
|
|
297
424
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import re
|
|
3
4
|
from contextlib import contextmanager
|
|
4
5
|
from typing import Any
|
|
5
6
|
|
|
@@ -94,6 +95,23 @@ def suppress_opik_logs() -> Any:
|
|
|
94
95
|
opik_logger.setLevel(original_opik_level)
|
|
95
96
|
|
|
96
97
|
|
|
98
|
+
def format_prompt_snippet(text: str, max_length: int = 100) -> str:
|
|
99
|
+
"""
|
|
100
|
+
Normalize whitespace in a prompt snippet and truncate it for compact display.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
text: Raw text to summarize.
|
|
104
|
+
max_length: Maximum characters to keep before adding an ellipsis.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
str: Condensed snippet safe for inline logging.
|
|
108
|
+
"""
|
|
109
|
+
normalized = re.sub(r"\s+", " ", text.strip())
|
|
110
|
+
if len(normalized) > max_length:
|
|
111
|
+
return normalized[:max_length] + "…"
|
|
112
|
+
return normalized
|
|
113
|
+
|
|
114
|
+
|
|
97
115
|
def display_messages(messages: list[dict[str, str]], prefix: str = "") -> None:
|
|
98
116
|
for i, msg in enumerate(messages):
|
|
99
117
|
panel = Panel(
|
opik_optimizer/utils/__init__.py
CHANGED
|
@@ -3,13 +3,16 @@
|
|
|
3
3
|
from .core import * # noqa: F401,F403
|
|
4
4
|
from .dataset_utils import * # noqa: F401,F403
|
|
5
5
|
from .prompt_segments import * # noqa: F401,F403
|
|
6
|
+
from .candidate_utils import * # noqa: F401,F403
|
|
6
7
|
|
|
7
8
|
from . import core as _core
|
|
8
9
|
from . import dataset_utils as _dataset_utils
|
|
9
10
|
from . import prompt_segments as _prompt_segments
|
|
11
|
+
from . import candidate_utils as _candidate_utils
|
|
10
12
|
|
|
11
13
|
__all__: list[str] = [
|
|
12
14
|
*getattr(_core, "__all__", []),
|
|
13
15
|
*getattr(_dataset_utils, "__all__", []),
|
|
14
16
|
*getattr(_prompt_segments, "__all__", []),
|
|
17
|
+
*getattr(_candidate_utils, "__all__", []),
|
|
15
18
|
]
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utilities for working with optimizer candidate collections.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from collections.abc import Callable, Iterable
|
|
8
|
+
from typing import TypeVar
|
|
9
|
+
|
|
10
|
+
__all__ = ["unique_ordered_by_key"]
|
|
11
|
+
|
|
12
|
+
T = TypeVar("T")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def unique_ordered_by_key(
|
|
16
|
+
items: Iterable[T],
|
|
17
|
+
key: Callable[[T], str],
|
|
18
|
+
*,
|
|
19
|
+
drop_keys: set[str] | None = None,
|
|
20
|
+
) -> list[T]:
|
|
21
|
+
"""
|
|
22
|
+
Return a list of items that preserves the original order while removing duplicates.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
items: Sequence of items to filter.
|
|
26
|
+
key: Function that extracts the comparison key from an item.
|
|
27
|
+
drop_keys: Optional set of keys to omit entirely from the result.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
List[T]: Ordered list containing the first occurrence of each unique key.
|
|
31
|
+
"""
|
|
32
|
+
seen: set[str] = set()
|
|
33
|
+
filtered: list[T] = []
|
|
34
|
+
|
|
35
|
+
for item in items:
|
|
36
|
+
try:
|
|
37
|
+
item_key = key(item)
|
|
38
|
+
except (TypeError, AttributeError, KeyError):
|
|
39
|
+
# If the key extractor fails, fall back to stringifying the item.
|
|
40
|
+
item_key = str(item)
|
|
41
|
+
|
|
42
|
+
if drop_keys and item_key in drop_keys:
|
|
43
|
+
seen.add(item_key)
|
|
44
|
+
continue
|
|
45
|
+
|
|
46
|
+
if item_key in seen:
|
|
47
|
+
continue
|
|
48
|
+
|
|
49
|
+
seen.add(item_key)
|
|
50
|
+
filtered.append(item)
|
|
51
|
+
|
|
52
|
+
return filtered
|
opik_optimizer/utils/core.py
CHANGED
|
@@ -310,6 +310,25 @@ def get_optimization_run_url_by_id(
|
|
|
310
310
|
return urllib.parse.urljoin(ensure_ending_slash(url_override), run_path)
|
|
311
311
|
|
|
312
312
|
|
|
313
|
+
def get_trial_compare_url(
|
|
314
|
+
*, dataset_id: str | None, optimization_id: str | None, trial_ids: list[str]
|
|
315
|
+
) -> str:
|
|
316
|
+
if dataset_id is None or optimization_id is None:
|
|
317
|
+
raise ValueError("dataset_id and optimization_id are required")
|
|
318
|
+
if not trial_ids:
|
|
319
|
+
raise ValueError("trial_ids must be a non-empty list")
|
|
320
|
+
|
|
321
|
+
opik_config = opik.config.get_from_user_inputs()
|
|
322
|
+
url_override = opik_config.url_override
|
|
323
|
+
base = ensure_ending_slash(url_override)
|
|
324
|
+
|
|
325
|
+
trials_query = urllib.parse.quote(json.dumps(trial_ids))
|
|
326
|
+
compare_path = (
|
|
327
|
+
f"optimizations/{optimization_id}/{dataset_id}/compare?trials={trials_query}"
|
|
328
|
+
)
|
|
329
|
+
return urllib.parse.urljoin(base, compare_path)
|
|
330
|
+
|
|
331
|
+
|
|
313
332
|
def create_litellm_agent_class(
|
|
314
333
|
prompt: "ChatPrompt", optimizer_ref: Any = None
|
|
315
334
|
) -> type["OptimizableAgent"]:
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
opik_optimizer/__init__.py,sha256=HsEIWyxeUJhzCvuML5SjBHFWtm-b5LSHyE9GRYytyeI,1592
|
|
2
2
|
opik_optimizer/_throttle.py,sha256=1JXIhYlo0IaqCgwmNB0Hnh9CYhYPkwRFdVGIcE7pVNg,1362
|
|
3
|
-
opik_optimizer/base_optimizer.py,sha256=
|
|
3
|
+
opik_optimizer/base_optimizer.py,sha256=o4U9yoU-KhR7q_3KnvV3DgCeVboOQdacgleq8D2d_20,28350
|
|
4
4
|
opik_optimizer/cache_config.py,sha256=Xd3NdUsL7bLQWoNe3pESqH4nHucU1iNTSGp-RqbwDog,599
|
|
5
5
|
opik_optimizer/logging_config.py,sha256=TmxX0C1P20amxoXuiNQvlENOjdSNfWwvL8jFy206VWM,3837
|
|
6
6
|
opik_optimizer/multi_metric_objective.py,sha256=y4jqirnhkfhB7SWonI4ldYg5fWG4JGfAxqu7ylRD1J4,1178
|
|
7
7
|
opik_optimizer/optimizable_agent.py,sha256=gB1ALuVPyEmXOTVYeK2i-inBAO-6JMZSjOrmj37okgQ,6514
|
|
8
8
|
opik_optimizer/optimization_result.py,sha256=sG-Yr-hOaH9zx_I5S6_W3v6j8nPUhwYdS333jVM4Gus,17218
|
|
9
9
|
opik_optimizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
opik_optimizer/reporting_utils.py,sha256=
|
|
10
|
+
opik_optimizer/reporting_utils.py,sha256=jN3_-tTy98KtsOv8Xp-DKFpePQQYZHHhT7kkG-jUrOg,9970
|
|
11
11
|
opik_optimizer/task_evaluator.py,sha256=7N254DU0UkWJ5saQ5AmYEsHHSrychAJtedmmjNsCOnI,5081
|
|
12
12
|
opik_optimizer/data/context7_eval.jsonl,sha256=vPR3XRfI0UbZ1hgUGaOdpraFT99RDLU1YWuPFLLQz40,1757
|
|
13
13
|
opik_optimizer/data/hotpot-500.json,sha256=YXxCtuvYvxSu5u0y4559a6b1qwgAYsWzT_SUKv_21ew,76862
|
|
@@ -43,8 +43,8 @@ opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py,sha256
|
|
|
43
43
|
opik_optimizer/few_shot_bayesian_optimizer/reporting.py,sha256=xk7gKaoTrlp1WDpW3mB5Irzty5Z5l9SJygO3PaamOvU,6283
|
|
44
44
|
opik_optimizer/gepa_optimizer/__init__.py,sha256=XcPah5t4mop7UCFo69E9l45Mem49-itqkQT7_J1aWOA,71
|
|
45
45
|
opik_optimizer/gepa_optimizer/adapter.py,sha256=KzPa4koq7aJhALMAOKPxAO4yWuEy_YbW7tGnqny3Hfo,5139
|
|
46
|
-
opik_optimizer/gepa_optimizer/gepa_optimizer.py,sha256=
|
|
47
|
-
opik_optimizer/gepa_optimizer/reporting.py,sha256=
|
|
46
|
+
opik_optimizer/gepa_optimizer/gepa_optimizer.py,sha256=RlTm71yWjRR8C1nEAuNXfAx1gkt5nsOwV6bfvu5NwbM,32849
|
|
47
|
+
opik_optimizer/gepa_optimizer/reporting.py,sha256=FiIPtHE6c5p4yMfknnhZetEjehvrA8PRejeOPT9uBCo,15836
|
|
48
48
|
opik_optimizer/hierarchical_reflective_optimizer/__init__.py,sha256=9qM3kvfAaFy-Y6Tg19MXHJxpnF5DJQQwzr6oNsxaRBM,133
|
|
49
49
|
opik_optimizer/hierarchical_reflective_optimizer/hierarchical_reflective_optimizer.py,sha256=fhB68XrGNgaHfPwV1JDow-MiAT-jhKDT_Kf-mLLzk0o,27775
|
|
50
50
|
opik_optimizer/hierarchical_reflective_optimizer/hierarchical_root_cause_analyzer.py,sha256=0D5wgx04jZvTJ0Yjqm0jtQvkjrGBB73qgcsSwLBpnv0,13814
|
|
@@ -74,13 +74,14 @@ opik_optimizer/parameter_optimizer/parameter_spec.py,sha256=HzYT_dHBTfZtx403mY-E
|
|
|
74
74
|
opik_optimizer/parameter_optimizer/reporting.py,sha256=-kEe9sQFdkUhxayEamXLR8ukyTLJrGsTs8pbJWmimQ4,4665
|
|
75
75
|
opik_optimizer/parameter_optimizer/search_space_types.py,sha256=UajTA2QKikEWazokDNO7j141gc2WxxYYiDRnFFjXi6M,512
|
|
76
76
|
opik_optimizer/parameter_optimizer/sensitivity_analysis.py,sha256=8KEMVMHsmcoiK21Cq1-We6_Pw_6LX9qBX9Az4-tmj_w,2146
|
|
77
|
-
opik_optimizer/utils/__init__.py,sha256=
|
|
77
|
+
opik_optimizer/utils/__init__.py,sha256=_sielSJdLVeyBugtsw1iSVJr_I8YbhsU-U7p8zLe_JY,633
|
|
78
|
+
opik_optimizer/utils/candidate_utils.py,sha256=PKtjREM4MFHvgDri8jCmbs6zHvxAnrfjuwwymvQNnrk,1294
|
|
78
79
|
opik_optimizer/utils/colbert.py,sha256=qSrzKUUGw7P92mLy4Ofug5pBGeTsHBLMJXlXSJSfKuo,8147
|
|
79
|
-
opik_optimizer/utils/core.py,sha256=
|
|
80
|
+
opik_optimizer/utils/core.py,sha256=56lQax3mAQkVZWfie6vhaTKZfjTBcYXf-FFkXgyFYFE,16715
|
|
80
81
|
opik_optimizer/utils/dataset_utils.py,sha256=dqRUGOekjeNWL0J15R8xFwLyKJDJynJXzVyQmt8rhHA,1464
|
|
81
82
|
opik_optimizer/utils/prompt_segments.py,sha256=eiLYT1iiPxtB7ArriN13-LgI5tID-v7MrjniTAxK2Lo,5904
|
|
82
|
-
opik_optimizer-2.2.
|
|
83
|
-
opik_optimizer-2.2.
|
|
84
|
-
opik_optimizer-2.2.
|
|
85
|
-
opik_optimizer-2.2.
|
|
86
|
-
opik_optimizer-2.2.
|
|
83
|
+
opik_optimizer-2.2.1.dist-info/licenses/LICENSE,sha256=V-0VHJOBdcA_teT8VymvsBUQ1-CZU6yJRmMEjec_8tA,11372
|
|
84
|
+
opik_optimizer-2.2.1.dist-info/METADATA,sha256=8HayPMPvWBxuCg1H3u6-d_8MwBxVF2DzbID2VrdqjKk,12807
|
|
85
|
+
opik_optimizer-2.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
86
|
+
opik_optimizer-2.2.1.dist-info/top_level.txt,sha256=ondOlpq6_yFckqpxoAHSfzZS2N-JfgmA-QQhOJfz7m0,15
|
|
87
|
+
opik_optimizer-2.2.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|