opik-optimizer 2.1.3__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. opik_optimizer/__init__.py +0 -2
  2. opik_optimizer/base_optimizer.py +314 -145
  3. opik_optimizer/evolutionary_optimizer/crossover_ops.py +31 -4
  4. opik_optimizer/evolutionary_optimizer/evaluation_ops.py +23 -3
  5. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +122 -95
  6. opik_optimizer/evolutionary_optimizer/mcp.py +11 -6
  7. opik_optimizer/evolutionary_optimizer/mutation_ops.py +25 -5
  8. opik_optimizer/evolutionary_optimizer/population_ops.py +26 -10
  9. opik_optimizer/evolutionary_optimizer/reporting.py +5 -5
  10. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +53 -99
  11. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +4 -4
  12. opik_optimizer/gepa_optimizer/gepa_optimizer.py +183 -172
  13. opik_optimizer/gepa_optimizer/reporting.py +164 -22
  14. opik_optimizer/hierarchical_reflective_optimizer/hierarchical_reflective_optimizer.py +90 -167
  15. opik_optimizer/hierarchical_reflective_optimizer/prompts.py +7 -1
  16. opik_optimizer/hierarchical_reflective_optimizer/reporting.py +168 -75
  17. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +185 -205
  18. opik_optimizer/meta_prompt_optimizer/reporting.py +4 -4
  19. opik_optimizer/mipro_optimizer/__init__.py +2 -2
  20. opik_optimizer/mipro_optimizer/_lm.py +4 -4
  21. opik_optimizer/mipro_optimizer/{_mipro_optimizer_v2.py → mipro_optimizer_v2.py} +1 -7
  22. opik_optimizer/mipro_optimizer/utils.py +1 -0
  23. opik_optimizer/optimizable_agent.py +7 -4
  24. opik_optimizer/optimization_config/chat_prompt.py +7 -10
  25. opik_optimizer/parameter_optimizer/parameter_optimizer.py +188 -40
  26. opik_optimizer/parameter_optimizer/reporting.py +148 -0
  27. opik_optimizer/reporting_utils.py +42 -15
  28. opik_optimizer/utils/core.py +16 -2
  29. opik_optimizer/utils/prompt_segments.py +1 -2
  30. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/METADATA +2 -3
  31. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/RECORD +34 -35
  32. opik_optimizer/evolutionary_optimizer/llm_support.py +0 -136
  33. opik_optimizer/mipro_optimizer/mipro_optimizer.py +0 -680
  34. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/WHEEL +0 -0
  35. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/licenses/LICENSE +0 -0
  36. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/top_level.txt +0 -0
@@ -14,6 +14,8 @@ TEST RESULTS:
14
14
  {formatted_batch}
15
15
  ```
16
16
 
17
+ Important constraint: Base your analysis exclusively on the TEST RESULTS shown above. Do not infer, speculate, or hypothesize failure modes that are not directly evidenced in the provided results.
18
+
17
19
  Think through the failures systematically:
18
20
 
19
21
  1. IDENTIFY: List all distinct types of failures you observe in the test results
@@ -86,6 +88,10 @@ INSTRUCTIONS FOR IMPROVING THE PROMPT:
86
88
 
87
89
  4. **Maintain Structure**: Keep the same message structure (role and content format). Only modify the content where necessary.
88
90
 
89
- 5. **Be Specific**: Ensure your changes provide concrete, actionable guidance that directly addresses the identified failure mode.
91
+ 5. **Do NOT Add Messages**: Do not add new messages to the prompt. Only modify existing messages. The number of messages in the prompt must remain exactly the same.
92
+
93
+ 6. **Be Specific**: Ensure your changes provide concrete, actionable guidance that directly addresses the identified failure mode.
94
+
95
+ Do not remove any variables or placeholders from any prompt message. You can reposition them within the same message content if needed but never remove them.
90
96
 
91
97
  Provide your reasoning for the changes you made, explaining WHY each change addresses the failure mode, and then provide the improved prompt."""
@@ -1,17 +1,18 @@
1
1
  from contextlib import contextmanager
2
- from typing import Any
2
+ from typing import Any, Literal
3
3
  from collections.abc import Iterator
4
+ from dataclasses import dataclass
4
5
 
5
6
  from rich.panel import Panel
6
7
  from rich.text import Text
7
8
 
8
9
  from ..optimization_config import chat_prompt
9
- from ..reporting_utils import (
10
+ from ..reporting_utils import ( # noqa: F401
10
11
  convert_tqdm_to_rich,
11
- display_configuration, # noqa: F401
12
- display_header, # noqa: F401
12
+ display_configuration,
13
+ display_header,
13
14
  display_messages,
14
- display_result, # noqa: F401
15
+ display_result,
15
16
  get_console,
16
17
  suppress_opik_logs,
17
18
  )
@@ -20,6 +21,97 @@ PANEL_WIDTH = 90
20
21
  console = get_console()
21
22
 
22
23
 
24
+ @dataclass
25
+ class MessageDiffItem:
26
+ """Represents a single message's diff information."""
27
+
28
+ role: str
29
+ change_type: Literal["added", "removed", "unchanged", "changed"]
30
+ initial_content: str | None
31
+ optimized_content: str | None
32
+
33
+
34
+ def compute_message_diff_order(
35
+ initial_messages: list[dict[str, str]],
36
+ optimized_messages: list[dict[str, str]],
37
+ ) -> list[MessageDiffItem]:
38
+ """
39
+ Compute the diff between initial and optimized messages, returning them in optimized message order.
40
+
41
+ This function groups messages by role and compares them to determine what changed.
42
+ The returned list maintains the order of roles as they appear in the optimized messages.
43
+
44
+ Args:
45
+ initial_messages: List of initial message dictionaries with 'role' and 'content' keys
46
+ optimized_messages: List of optimized message dictionaries with 'role' and 'content' keys
47
+
48
+ Returns:
49
+ List of MessageDiffItem objects in the order roles appear in optimized_messages,
50
+ followed by any removed roles that only existed in initial_messages.
51
+ """
52
+
53
+ def group_by_role(
54
+ messages: list[dict[str, str]],
55
+ ) -> dict[str, list[tuple[int, str]]]:
56
+ """Group messages by role, storing (index, content) tuples."""
57
+ groups: dict[str, list[tuple[int, str]]] = {}
58
+ for idx, msg in enumerate(messages):
59
+ role = msg.get("role", "message")
60
+ content = msg.get("content", "")
61
+ if role not in groups:
62
+ groups[role] = []
63
+ groups[role].append((idx, content))
64
+ return groups
65
+
66
+ initial_by_role = group_by_role(initial_messages)
67
+ optimized_by_role = group_by_role(optimized_messages)
68
+
69
+ # Get all unique roles maintaining order from optimized messages
70
+ all_roles = []
71
+ seen_roles = set()
72
+ for msg in optimized_messages:
73
+ role = msg.get("role", "message")
74
+ if role not in seen_roles:
75
+ all_roles.append(role)
76
+ seen_roles.add(role)
77
+ # Add any roles that were in initial but not in optimized (removed roles)
78
+ for msg in initial_messages:
79
+ role = msg.get("role", "message")
80
+ if role not in seen_roles:
81
+ all_roles.append(role)
82
+ seen_roles.add(role)
83
+
84
+ # Build diff items for each role
85
+ diff_items: list[MessageDiffItem] = []
86
+ for role in all_roles:
87
+ initial_content = (
88
+ initial_by_role[role][0][1] if role in initial_by_role else None
89
+ )
90
+ optimized_content = (
91
+ optimized_by_role[role][0][1] if role in optimized_by_role else None
92
+ )
93
+
94
+ if initial_content is None and optimized_content is not None:
95
+ change_type: Literal["added", "removed", "unchanged", "changed"] = "added"
96
+ elif initial_content is not None and optimized_content is None:
97
+ change_type = "removed"
98
+ elif initial_content == optimized_content:
99
+ change_type = "unchanged"
100
+ else:
101
+ change_type = "changed"
102
+
103
+ diff_items.append(
104
+ MessageDiffItem(
105
+ role=role,
106
+ change_type=change_type,
107
+ initial_content=initial_content,
108
+ optimized_content=optimized_content,
109
+ )
110
+ )
111
+
112
+ return diff_items
113
+
114
+
23
115
  def display_retry_attempt(
24
116
  attempt: int,
25
117
  max_attempts: int,
@@ -643,15 +735,27 @@ def display_optimized_prompt_diff(
643
735
 
644
736
  # Show score improvement
645
737
  if best_score > initial_score:
646
- perc_change = (best_score - initial_score) / initial_score
647
- console.print(
648
- Text("│ ").append(
649
- Text(
650
- f"Prompt improved from {initial_score:.4f} to {best_score:.4f} ({perc_change:.2%})",
651
- style="green",
738
+ from ..reporting_utils import safe_percentage_change
739
+
740
+ perc_change, has_percentage = safe_percentage_change(best_score, initial_score)
741
+ if has_percentage:
742
+ console.print(
743
+ Text("").append(
744
+ Text(
745
+ f"Prompt improved from {initial_score:.4f} to {best_score:.4f} ({perc_change:.2%})",
746
+ style="green",
747
+ )
748
+ )
749
+ )
750
+ else:
751
+ console.print(
752
+ Text("│ ").append(
753
+ Text(
754
+ f"Prompt improved from {initial_score:.4f} to {best_score:.4f}",
755
+ style="green",
756
+ )
652
757
  )
653
758
  )
654
- )
655
759
  else:
656
760
  console.print(
657
761
  Text("│ ").append(
@@ -663,79 +767,68 @@ def display_optimized_prompt_diff(
663
767
  console.print(Text("│ ").append(Text("Prompt Changes:", style="cyan")))
664
768
  console.print(Text("│"))
665
769
 
666
- # Compare each message
667
- for idx in range(max(len(initial_messages), len(optimized_messages))):
668
- initial_msg = initial_messages[idx] if idx < len(initial_messages) else None
669
- optimized_msg = (
670
- optimized_messages[idx] if idx < len(optimized_messages) else None
671
- )
672
-
673
- # Get role from whichever message exists
674
- role = "message"
675
- if initial_msg:
676
- role = initial_msg.get("role", "message")
677
- elif optimized_msg:
678
- role = optimized_msg.get("role", "message")
679
-
680
- initial_content = initial_msg.get("content", "") if initial_msg else ""
681
- optimized_content = optimized_msg.get("content", "") if optimized_msg else ""
770
+ # Compute diff items using the extracted function
771
+ diff_items = compute_message_diff_order(initial_messages, optimized_messages)
682
772
 
683
- # Handle added messages
684
- if not initial_msg:
773
+ # Display each diff item
774
+ for item in diff_items:
775
+ if item.change_type == "added":
776
+ # Role was added
685
777
  console.print(
686
- Text("│ ").append(Text(f"{role}: (added)", style="green bold"))
778
+ Text("│ ").append(Text(f"{item.role}: (added)", style="green bold"))
687
779
  )
688
- for line in optimized_content.splitlines():
780
+ assert item.optimized_content is not None
781
+ for line in item.optimized_content.splitlines():
689
782
  console.print(Text("│ ").append(Text(f"+{line}", style="green")))
690
783
  console.print(Text("│"))
691
- continue
692
-
693
- # Handle removed messages
694
- if not optimized_msg:
784
+ elif item.change_type == "removed":
785
+ # Role was removed
695
786
  console.print(
696
- Text("│ ").append(Text(f"{role}: (removed)", style="red bold"))
787
+ Text("│ ").append(Text(f"{item.role}: (removed)", style="red bold"))
697
788
  )
698
- for line in initial_content.splitlines():
789
+ assert item.initial_content is not None
790
+ for line in item.initial_content.splitlines():
699
791
  console.print(Text("│ ").append(Text(f"-{line}", style="red")))
700
792
  console.print(Text("│"))
701
- continue
702
-
703
- # Check if there are changes
704
- if initial_content == optimized_content:
705
- # No changes in this message
793
+ elif item.change_type == "unchanged":
794
+ # No changes
706
795
  console.print(
707
- Text("│ ").append(Text(f"{role}: (unchanged)", style="dim"))
796
+ Text("│ ").append(Text(f"{item.role}: (unchanged)", style="dim"))
708
797
  )
709
- continue
710
-
711
- # Generate unified diff
712
- diff_lines = list(
713
- difflib.unified_diff(
714
- initial_content.splitlines(keepends=False),
715
- optimized_content.splitlines(keepends=False),
716
- lineterm="",
717
- n=3, # 3 lines of context
798
+ else: # changed
799
+ # Content changed - show diff
800
+ console.print(
801
+ Text("│ ").append(
802
+ Text(f"{item.role}: (changed)", style="cyan bold")
803
+ )
718
804
  )
719
- )
720
805
 
721
- if not diff_lines:
722
- continue
723
-
724
- # Display message header
725
- console.print(Text("│ ").append(Text(f"{role}:", style="bold cyan")))
726
-
727
- # Create diff content
728
- diff_content = Text()
729
- for line in diff_lines[3:]: # Skip first 3 lines (---, +++, @@)
730
- if line.startswith("+"):
731
- diff_content.append("│ " + line + "\n", style="green")
732
- elif line.startswith("-"):
733
- diff_content.append("│ " + line + "\n", style="red")
734
- elif line.startswith("@@"):
735
- diff_content.append("│ " + line + "\n", style="cyan dim")
736
- else:
737
- # Context line
738
- diff_content.append("│ " + line + "\n", style="dim")
739
-
740
- console.print(diff_content)
741
- console.print(Text("│"))
806
+ assert item.initial_content is not None
807
+ assert item.optimized_content is not None
808
+
809
+ # Generate unified diff
810
+ diff_lines = list(
811
+ difflib.unified_diff(
812
+ item.initial_content.splitlines(keepends=False),
813
+ item.optimized_content.splitlines(keepends=False),
814
+ lineterm="",
815
+ n=3, # 3 lines of context
816
+ )
817
+ )
818
+
819
+ if diff_lines:
820
+ # Create diff content
821
+ diff_content = Text()
822
+ for line in diff_lines[3:]: # Skip first 3 lines (---, +++, @@)
823
+ if line.startswith("+"):
824
+ diff_content.append("│ " + line + "\n", style="green")
825
+ elif line.startswith("-"):
826
+ diff_content.append("│ " + line + "\n", style="red")
827
+ elif line.startswith("@@"):
828
+ diff_content.append("│ " + line + "\n", style="cyan dim")
829
+ else:
830
+ # Context line
831
+ diff_content.append("│ " + line + "\n", style="dim")
832
+
833
+ console.print(diff_content)
834
+ console.print(Text("│"))