opik-optimizer 0.9.2__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +7 -5
- opik_optimizer/_throttle.py +8 -8
- opik_optimizer/base_optimizer.py +98 -45
- opik_optimizer/cache_config.py +5 -3
- opik_optimizer/datasets/ai2_arc.py +15 -13
- opik_optimizer/datasets/cnn_dailymail.py +19 -15
- opik_optimizer/datasets/election_questions.py +10 -11
- opik_optimizer/datasets/gsm8k.py +16 -11
- opik_optimizer/datasets/halu_eval.py +6 -5
- opik_optimizer/datasets/hotpot_qa.py +17 -16
- opik_optimizer/datasets/medhallu.py +10 -7
- opik_optimizer/datasets/rag_hallucinations.py +11 -8
- opik_optimizer/datasets/ragbench.py +17 -9
- opik_optimizer/datasets/tiny_test.py +33 -37
- opik_optimizer/datasets/truthful_qa.py +18 -12
- opik_optimizer/demo/cache.py +6 -6
- opik_optimizer/demo/datasets.py +3 -7
- opik_optimizer/evolutionary_optimizer/__init__.py +3 -1
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +722 -429
- opik_optimizer/evolutionary_optimizer/reporting.py +155 -74
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +271 -188
- opik_optimizer/few_shot_bayesian_optimizer/reporting.py +79 -28
- opik_optimizer/logging_config.py +19 -15
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +209 -129
- opik_optimizer/meta_prompt_optimizer/reporting.py +121 -46
- opik_optimizer/mipro_optimizer/__init__.py +2 -0
- opik_optimizer/mipro_optimizer/_lm.py +38 -9
- opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +37 -26
- opik_optimizer/mipro_optimizer/mipro_optimizer.py +132 -63
- opik_optimizer/mipro_optimizer/utils.py +5 -2
- opik_optimizer/optimizable_agent.py +179 -0
- opik_optimizer/optimization_config/chat_prompt.py +143 -73
- opik_optimizer/optimization_config/configs.py +4 -3
- opik_optimizer/optimization_config/mappers.py +18 -6
- opik_optimizer/optimization_result.py +22 -13
- opik_optimizer/py.typed +0 -0
- opik_optimizer/reporting_utils.py +89 -58
- opik_optimizer/task_evaluator.py +12 -14
- opik_optimizer/utils.py +117 -14
- {opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.1.dist-info}/METADATA +8 -8
- opik_optimizer-1.0.1.dist-info/RECORD +50 -0
- opik_optimizer-0.9.2.dist-info/RECORD +0 -48
- {opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.1.dist-info}/WHEEL +0 -0
- {opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {opik_optimizer-0.9.2.dist-info → opik_optimizer-1.0.1.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,13 @@
|
|
1
1
|
from contextlib import contextmanager
|
2
2
|
from io import StringIO
|
3
|
-
from typing import Dict, List
|
3
|
+
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
4
4
|
|
5
|
-
import rich
|
6
5
|
from rich.panel import Panel
|
7
6
|
from rich.text import Text
|
8
7
|
|
8
|
+
if TYPE_CHECKING:
|
9
|
+
from .few_shot_bayesian_optimizer import FewShotPromptTemplate
|
10
|
+
|
9
11
|
from ..reporting_utils import (
|
10
12
|
convert_tqdm_to_rich,
|
11
13
|
display_configuration, # noqa: F401
|
@@ -21,20 +23,22 @@ console = get_console()
|
|
21
23
|
|
22
24
|
|
23
25
|
@contextmanager
|
24
|
-
def display_evaluation(
|
26
|
+
def display_evaluation(
|
27
|
+
message: str = "First we will establish the baseline performance:", verbose: int = 1
|
28
|
+
) -> Any:
|
25
29
|
"""Context manager to display messages during an evaluation phase."""
|
26
30
|
score = None
|
27
|
-
|
31
|
+
|
28
32
|
# Entry point
|
29
33
|
if verbose >= 1:
|
30
34
|
console.print(Text(f"> {message}"))
|
31
|
-
|
35
|
+
|
32
36
|
# Create a simple object with a method to set the score
|
33
37
|
class Reporter:
|
34
|
-
def set_score(self, s):
|
38
|
+
def set_score(self, s: float) -> None:
|
35
39
|
nonlocal score
|
36
40
|
score = s
|
37
|
-
|
41
|
+
|
38
42
|
# Use our log suppression context manager and yield the reporter
|
39
43
|
with suppress_opik_logs():
|
40
44
|
with convert_tqdm_to_rich(verbose=verbose):
|
@@ -42,31 +46,50 @@ def display_evaluation(message: str = "First we will establish the baseline perf
|
|
42
46
|
yield Reporter()
|
43
47
|
finally:
|
44
48
|
if verbose >= 1:
|
45
|
-
console.print(
|
49
|
+
console.print(
|
50
|
+
Text(f"\r Baseline score was: {score:.4f}.\n", style="green")
|
51
|
+
)
|
52
|
+
|
46
53
|
|
47
54
|
@contextmanager
|
48
|
-
def creation_few_shot_prompt_template(verbose: int = 1):
|
55
|
+
def creation_few_shot_prompt_template(verbose: int = 1) -> Any:
|
49
56
|
"""Context manager to display messages during the creation of a few-shot prompt template."""
|
50
|
-
console.print(
|
57
|
+
console.print(
|
58
|
+
Text("> Let's add a placeholder for few-shot examples in the messages:")
|
59
|
+
)
|
60
|
+
|
61
|
+
fewshot_template: Optional["FewShotPromptTemplate"] = None
|
51
62
|
|
52
|
-
fewshot_template = None
|
53
|
-
|
54
63
|
# Create a simple object with a method to set the prompt template
|
55
64
|
class Reporter:
|
56
|
-
def set_fewshot_template(self, s):
|
65
|
+
def set_fewshot_template(self, s: "FewShotPromptTemplate") -> None:
|
57
66
|
nonlocal fewshot_template
|
58
67
|
fewshot_template = s
|
59
|
-
|
68
|
+
|
60
69
|
# Use our log suppression context manager and yield the reporter
|
61
70
|
try:
|
62
71
|
yield Reporter()
|
63
72
|
finally:
|
64
73
|
if verbose >= 1:
|
65
|
-
console.print(
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
74
|
+
console.print(
|
75
|
+
Text("│ Created the prompt template:\n│", style="dim yellow")
|
76
|
+
)
|
77
|
+
if fewshot_template is not None:
|
78
|
+
display_messages(
|
79
|
+
fewshot_template.message_list_with_placeholder, prefix="│ "
|
80
|
+
)
|
81
|
+
console.print(
|
82
|
+
Text(
|
83
|
+
"│\n│ With the FEW_SHOT_EXAMPLE_PLACEHOLDER following the format:"
|
84
|
+
)
|
85
|
+
)
|
86
|
+
|
87
|
+
if fewshot_template is not None:
|
88
|
+
panel = Panel(
|
89
|
+
Text(fewshot_template.example_template),
|
90
|
+
width=PANEL_WIDTH,
|
91
|
+
border_style="dim",
|
92
|
+
)
|
70
93
|
# Use a temporary buffer to render the panel
|
71
94
|
buffer = StringIO()
|
72
95
|
temp_console = get_console(file=buffer, width=console.width)
|
@@ -80,7 +103,8 @@ def creation_few_shot_prompt_template(verbose: int = 1):
|
|
80
103
|
console.print(prefixed)
|
81
104
|
console.print()
|
82
105
|
|
83
|
-
|
106
|
+
|
107
|
+
def start_optimization_run(verbose: int = 1) -> None:
|
84
108
|
"""Start the optimization run"""
|
85
109
|
if verbose >= 1:
|
86
110
|
console.print(Text("\n> Starting the optimization run"))
|
@@ -88,27 +112,54 @@ def start_optimization_run(verbose: int = 1):
|
|
88
112
|
|
89
113
|
|
90
114
|
@contextmanager
|
91
|
-
def start_optimization_trial(
|
115
|
+
def start_optimization_trial(
|
116
|
+
trial_number: int, total_trials: int, verbose: int = 1
|
117
|
+
) -> Any:
|
92
118
|
"""Context manager to display messages during an evaluation phase."""
|
119
|
+
|
93
120
|
# Create a simple object with a method to set the score
|
94
121
|
class Reporter:
|
95
|
-
def start_trial(self, messages: List[Dict[str, str]]):
|
122
|
+
def start_trial(self, messages: List[Dict[str, str]]) -> None:
|
96
123
|
if verbose >= 1:
|
97
|
-
console.print(
|
124
|
+
console.print(
|
125
|
+
Text(
|
126
|
+
f"│ - Starting optimization round {trial_number + 1} of {total_trials}"
|
127
|
+
)
|
128
|
+
)
|
98
129
|
console.print(Text("│"))
|
99
130
|
display_messages(messages, prefix="│ ")
|
100
131
|
console.print("│")
|
101
132
|
|
102
|
-
def set_score(self, baseline_score, score):
|
133
|
+
def set_score(self, baseline_score: float, score: float) -> None:
|
103
134
|
if verbose >= 1:
|
104
135
|
if baseline_score == 0:
|
105
|
-
console.print(
|
136
|
+
console.print(
|
137
|
+
Text(
|
138
|
+
f"│ Trial {trial_number + 1} - score was: {score:.4f}\n│",
|
139
|
+
style="green",
|
140
|
+
)
|
141
|
+
)
|
106
142
|
elif score is not None and score > baseline_score:
|
107
|
-
console.print(
|
143
|
+
console.print(
|
144
|
+
Text(
|
145
|
+
f"│ Trial {trial_number + 1} - score was: {score:.4f} ({(score - baseline_score) / baseline_score * 100:.2f}%).\n│",
|
146
|
+
style="green",
|
147
|
+
)
|
148
|
+
)
|
108
149
|
elif score is not None and score <= baseline_score:
|
109
|
-
console.print(
|
150
|
+
console.print(
|
151
|
+
Text(
|
152
|
+
f"│ Trial {trial_number + 1} - score was: {score:.4f} ({(score - baseline_score) / baseline_score * 100:.2f}%).\n│",
|
153
|
+
style="red",
|
154
|
+
)
|
155
|
+
)
|
110
156
|
else:
|
111
|
-
console.print(
|
157
|
+
console.print(
|
158
|
+
Text(
|
159
|
+
f"│ Trial {trial_number + 1} - score was not set.\n│",
|
160
|
+
style="dim yellow",
|
161
|
+
)
|
162
|
+
)
|
112
163
|
|
113
164
|
# Use our log suppression context manager and yield the reporter
|
114
165
|
with suppress_opik_logs():
|
opik_optimizer/logging_config.py
CHANGED
@@ -1,18 +1,19 @@
|
|
1
1
|
import logging
|
2
2
|
from rich.logging import RichHandler
|
3
3
|
|
4
|
-
DEFAULT_LOG_FORMAT =
|
5
|
-
DEFAULT_DATE_FORMAT =
|
4
|
+
DEFAULT_LOG_FORMAT = "%(message)s"
|
5
|
+
DEFAULT_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
|
6
6
|
|
7
7
|
# Store configured state to prevent reconfiguration
|
8
8
|
_logging_configured = False
|
9
9
|
|
10
|
+
|
10
11
|
def setup_logging(
|
11
|
-
level=logging.WARNING,
|
12
|
-
format_string=DEFAULT_LOG_FORMAT,
|
13
|
-
date_format=DEFAULT_DATE_FORMAT,
|
14
|
-
force=False,
|
15
|
-
):
|
12
|
+
level: int = logging.WARNING,
|
13
|
+
format_string: str = DEFAULT_LOG_FORMAT,
|
14
|
+
date_format: str = DEFAULT_DATE_FORMAT,
|
15
|
+
force: bool = False,
|
16
|
+
) -> None:
|
16
17
|
"""
|
17
18
|
Configures logging for the opik_optimizer package using rich.
|
18
19
|
|
@@ -28,7 +29,7 @@ def setup_logging(
|
|
28
29
|
return
|
29
30
|
|
30
31
|
# Configure opik_optimizer package logger
|
31
|
-
package_logger = logging.getLogger(
|
32
|
+
package_logger = logging.getLogger("opik_optimizer")
|
32
33
|
|
33
34
|
# Avoid adding handlers repeatedly if force=True replaces them
|
34
35
|
if not package_logger.handlers or force:
|
@@ -36,19 +37,19 @@ def setup_logging(
|
|
36
37
|
if force and package_logger.handlers:
|
37
38
|
for handler in package_logger.handlers[:]:
|
38
39
|
package_logger.removeHandler(handler)
|
39
|
-
|
40
|
+
|
40
41
|
console_handler = RichHandler(
|
41
42
|
rich_tracebacks=True,
|
42
|
-
markup=True,
|
43
|
-
log_time_format=f"[{date_format}]"
|
43
|
+
markup=True, # Enable rich markup in log messages
|
44
|
+
log_time_format=f"[{date_format}]", # Apply date format
|
44
45
|
)
|
45
46
|
# RichHandler manages formatting, so we don't need a separate formatter
|
46
47
|
# formatter = logging.Formatter(format_string, datefmt=date_format)
|
47
48
|
# console_handler.setFormatter(formatter)
|
48
49
|
package_logger.addHandler(console_handler)
|
49
|
-
|
50
|
+
|
50
51
|
package_logger.setLevel(level)
|
51
|
-
package_logger.propagate = False
|
52
|
+
package_logger.propagate = False # Don't duplicate messages in root logger
|
52
53
|
|
53
54
|
# Set levels for noisy libraries like LiteLLM and httpx
|
54
55
|
logging.getLogger("LiteLLM").setLevel(logging.WARNING)
|
@@ -61,9 +62,12 @@ def setup_logging(
|
|
61
62
|
logging.getLogger("filelock").setLevel(logging.WARNING)
|
62
63
|
|
63
64
|
_logging_configured = True
|
64
|
-
|
65
|
+
|
65
66
|
# Use level name provided by rich handler by default
|
66
|
-
package_logger.info(
|
67
|
+
package_logger.info(
|
68
|
+
f"Opik Agent Optimizer logging configured to level: [bold cyan]{logging.getLevelName(level)}[/bold cyan]"
|
69
|
+
)
|
70
|
+
|
67
71
|
|
68
72
|
# Ensure logger obtained after setup can be used immediately if needed
|
69
73
|
logger = logging.getLogger(__name__)
|