opik-optimizer 2.1.3__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +0 -2
- opik_optimizer/base_optimizer.py +314 -145
- opik_optimizer/evolutionary_optimizer/crossover_ops.py +31 -4
- opik_optimizer/evolutionary_optimizer/evaluation_ops.py +23 -3
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +122 -95
- opik_optimizer/evolutionary_optimizer/mcp.py +11 -6
- opik_optimizer/evolutionary_optimizer/mutation_ops.py +25 -5
- opik_optimizer/evolutionary_optimizer/population_ops.py +26 -10
- opik_optimizer/evolutionary_optimizer/reporting.py +5 -5
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +53 -99
- opik_optimizer/few_shot_bayesian_optimizer/reporting.py +4 -4
- opik_optimizer/gepa_optimizer/gepa_optimizer.py +183 -172
- opik_optimizer/gepa_optimizer/reporting.py +164 -22
- opik_optimizer/hierarchical_reflective_optimizer/hierarchical_reflective_optimizer.py +90 -167
- opik_optimizer/hierarchical_reflective_optimizer/prompts.py +7 -1
- opik_optimizer/hierarchical_reflective_optimizer/reporting.py +168 -75
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +185 -205
- opik_optimizer/meta_prompt_optimizer/reporting.py +4 -4
- opik_optimizer/mipro_optimizer/__init__.py +2 -2
- opik_optimizer/mipro_optimizer/_lm.py +4 -4
- opik_optimizer/mipro_optimizer/{_mipro_optimizer_v2.py → mipro_optimizer_v2.py} +1 -7
- opik_optimizer/mipro_optimizer/utils.py +1 -0
- opik_optimizer/optimizable_agent.py +7 -4
- opik_optimizer/optimization_config/chat_prompt.py +7 -10
- opik_optimizer/parameter_optimizer/parameter_optimizer.py +188 -40
- opik_optimizer/parameter_optimizer/reporting.py +148 -0
- opik_optimizer/reporting_utils.py +42 -15
- opik_optimizer/utils/core.py +16 -2
- opik_optimizer/utils/prompt_segments.py +1 -2
- {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/METADATA +2 -3
- {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/RECORD +34 -35
- opik_optimizer/evolutionary_optimizer/llm_support.py +0 -136
- opik_optimizer/mipro_optimizer/mipro_optimizer.py +0 -680
- {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/WHEEL +0 -0
- {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/licenses/LICENSE +0 -0
- {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -6,12 +6,12 @@ from rich.panel import Panel
|
|
|
6
6
|
from rich.text import Text
|
|
7
7
|
|
|
8
8
|
from ..optimization_config import chat_prompt
|
|
9
|
-
from ..reporting_utils import (
|
|
9
|
+
from ..reporting_utils import ( # noqa: F401
|
|
10
10
|
convert_tqdm_to_rich,
|
|
11
|
-
display_configuration,
|
|
12
|
-
display_header,
|
|
13
|
-
display_messages,
|
|
14
|
-
display_result,
|
|
11
|
+
display_configuration,
|
|
12
|
+
display_header,
|
|
13
|
+
display_messages,
|
|
14
|
+
display_result,
|
|
15
15
|
get_console,
|
|
16
16
|
suppress_opik_logs,
|
|
17
17
|
)
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from typing import Any
|
|
2
2
|
from collections.abc import Callable
|
|
3
|
-
import warnings
|
|
4
3
|
|
|
5
4
|
import copy
|
|
6
5
|
import json
|
|
@@ -8,13 +7,11 @@ import logging
|
|
|
8
7
|
import random
|
|
9
8
|
from datetime import datetime
|
|
10
9
|
|
|
11
|
-
import litellm
|
|
12
10
|
import optuna
|
|
13
11
|
import optuna.samplers
|
|
14
12
|
|
|
15
13
|
import opik
|
|
16
|
-
from opik import Dataset
|
|
17
|
-
from opik.evaluation.models.litellm import opik_monitor as opik_litellm_monitor
|
|
14
|
+
from opik import Dataset, opik_context
|
|
18
15
|
from pydantic import BaseModel
|
|
19
16
|
|
|
20
17
|
from opik_optimizer import base_optimizer
|
|
@@ -64,47 +61,39 @@ class FewShotPromptTemplate(BaseModel):
|
|
|
64
61
|
|
|
65
62
|
class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
66
63
|
"""
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
2.
|
|
73
|
-
|
|
74
|
-
This algorithm is best used when you have a well
|
|
75
|
-
by providing
|
|
64
|
+
Few-Shot Bayesian Optimizer that adds few-shot examples to prompts using Bayesian optimization.
|
|
65
|
+
|
|
66
|
+
This algorithm employs a two-stage pipeline:
|
|
67
|
+
|
|
68
|
+
1. Generate a few-shot prompt template that can be inserted into the prompt
|
|
69
|
+
2. Use Bayesian Optimization to determine the best examples to include in the prompt
|
|
70
|
+
|
|
71
|
+
This algorithm is best used when you have a well-defined task and would like to guide the LLM
|
|
72
|
+
by providing examples. It automatically finds the optimal number and selection of examples.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
model: LiteLLM model name for optimizer's internal reasoning (generating few-shot templates)
|
|
76
|
+
model_parameters: Optional dict of LiteLLM parameters for optimizer's internal LLM calls.
|
|
77
|
+
Common params: temperature, max_tokens, max_completion_tokens, top_p.
|
|
78
|
+
See: https://docs.litellm.ai/docs/completion/input
|
|
79
|
+
min_examples: Minimum number of examples to include in the prompt
|
|
80
|
+
max_examples: Maximum number of examples to include in the prompt
|
|
81
|
+
n_threads: Number of threads for parallel evaluation
|
|
82
|
+
verbose: Controls internal logging/progress bars (0=off, 1=on)
|
|
83
|
+
seed: Random seed for reproducibility
|
|
76
84
|
"""
|
|
77
85
|
|
|
78
86
|
def __init__(
|
|
79
87
|
self,
|
|
80
|
-
model: str,
|
|
88
|
+
model: str = "gpt-4o",
|
|
89
|
+
model_parameters: dict[str, Any] | None = None,
|
|
81
90
|
min_examples: int = 2,
|
|
82
91
|
max_examples: int = 8,
|
|
83
|
-
seed: int = 42,
|
|
84
92
|
n_threads: int = 8,
|
|
85
93
|
verbose: int = 1,
|
|
86
|
-
|
|
94
|
+
seed: int = 42,
|
|
87
95
|
) -> None:
|
|
88
|
-
|
|
89
|
-
Args:
|
|
90
|
-
model: The model to used to evaluate the prompt
|
|
91
|
-
min_examples: Minimum number of examples to include
|
|
92
|
-
max_examples: Maximum number of examples to include
|
|
93
|
-
seed: Random seed for reproducibility
|
|
94
|
-
n_threads: Number of threads for parallel evaluation
|
|
95
|
-
verbose: Controls internal logging/progress bars (0=off, 1=on).
|
|
96
|
-
**model_kwargs: Additional model parameters
|
|
97
|
-
"""
|
|
98
|
-
if "project_name" in model_kwargs:
|
|
99
|
-
warnings.warn(
|
|
100
|
-
"The 'project_name' parameter in optimizer constructor is deprecated. "
|
|
101
|
-
"Set project_name in the ChatPrompt instead.",
|
|
102
|
-
DeprecationWarning,
|
|
103
|
-
stacklevel=2,
|
|
104
|
-
)
|
|
105
|
-
del model_kwargs["project_name"]
|
|
106
|
-
|
|
107
|
-
super().__init__(model, verbose, **model_kwargs)
|
|
96
|
+
super().__init__(model, verbose, seed=seed, model_parameters=model_parameters)
|
|
108
97
|
self.min_examples = min_examples
|
|
109
98
|
self.max_examples = max_examples
|
|
110
99
|
self.seed = seed
|
|
@@ -124,49 +113,6 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
|
124
113
|
"max_examples": self.max_examples,
|
|
125
114
|
}
|
|
126
115
|
|
|
127
|
-
@_throttle.rate_limited(_limiter)
|
|
128
|
-
def _call_model(
|
|
129
|
-
self,
|
|
130
|
-
model: str,
|
|
131
|
-
messages: list[dict[str, str]],
|
|
132
|
-
seed: int,
|
|
133
|
-
model_kwargs: dict[str, Any],
|
|
134
|
-
) -> dict[str, Any]:
|
|
135
|
-
"""
|
|
136
|
-
Args:
|
|
137
|
-
model: The model to use for the call
|
|
138
|
-
messages: List of message dictionaries with 'role' and 'content' keys
|
|
139
|
-
seed: Random seed for reproducibility
|
|
140
|
-
model_kwargs: Additional model parameters
|
|
141
|
-
|
|
142
|
-
Returns:
|
|
143
|
-
Dict containing the model's response
|
|
144
|
-
"""
|
|
145
|
-
self.increment_llm_counter()
|
|
146
|
-
|
|
147
|
-
current_model_kwargs = self.model_kwargs.copy()
|
|
148
|
-
current_model_kwargs.update(model_kwargs)
|
|
149
|
-
|
|
150
|
-
filtered_call_kwargs = current_model_kwargs.copy()
|
|
151
|
-
filtered_call_kwargs.pop("n_trials", None)
|
|
152
|
-
filtered_call_kwargs.pop("n_samples", None)
|
|
153
|
-
filtered_call_kwargs.pop("n_iterations", None)
|
|
154
|
-
filtered_call_kwargs.pop("min_examples", None)
|
|
155
|
-
filtered_call_kwargs.pop("max_examples", None)
|
|
156
|
-
|
|
157
|
-
final_params_for_litellm = (
|
|
158
|
-
opik_litellm_monitor.try_add_opik_monitoring_to_params(filtered_call_kwargs)
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
response = litellm.completion(
|
|
162
|
-
model=self.model,
|
|
163
|
-
messages=messages,
|
|
164
|
-
seed=seed,
|
|
165
|
-
num_retries=6,
|
|
166
|
-
**final_params_for_litellm,
|
|
167
|
-
)
|
|
168
|
-
return response
|
|
169
|
-
|
|
170
116
|
def _split_dataset(
|
|
171
117
|
self, dataset: list[dict[str, Any]], train_ratio: float
|
|
172
118
|
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
|
@@ -230,18 +176,18 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
|
230
176
|
]
|
|
231
177
|
|
|
232
178
|
logger.debug(f"fewshot_prompt_template - Calling LLM with: {messages}")
|
|
233
|
-
|
|
234
|
-
logger.debug(f"fewshot_prompt_template - LLM response: {
|
|
179
|
+
response_content = self._call_model(messages, model=model, seed=self.seed)
|
|
180
|
+
logger.debug(f"fewshot_prompt_template - LLM response: {response_content}")
|
|
235
181
|
|
|
236
182
|
try:
|
|
237
|
-
res = utils.json_to_dict(
|
|
183
|
+
res = utils.json_to_dict(response_content)
|
|
238
184
|
return FewShotPromptTemplate(
|
|
239
185
|
message_list_with_placeholder=res["message_list_with_placeholder"],
|
|
240
186
|
example_template=res["example_template"],
|
|
241
187
|
)
|
|
242
188
|
except Exception as e:
|
|
243
189
|
logger.error(
|
|
244
|
-
f"Failed to compute few-shot prompt template: {e} - response: {
|
|
190
|
+
f"Failed to compute few-shot prompt template: {e} - response: {response_content}"
|
|
245
191
|
)
|
|
246
192
|
raise
|
|
247
193
|
|
|
@@ -361,7 +307,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
|
361
307
|
metric=metric,
|
|
362
308
|
evaluated_task=llm_task,
|
|
363
309
|
num_threads=self.n_threads,
|
|
364
|
-
project_name=self.
|
|
310
|
+
project_name=self.project_name,
|
|
365
311
|
experiment_config=trial_config,
|
|
366
312
|
optimization_id=optimization_id,
|
|
367
313
|
verbose=self.verbose,
|
|
@@ -486,7 +432,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
|
486
432
|
"stopped_early": False,
|
|
487
433
|
"metric_name": metric.__name__,
|
|
488
434
|
"model": self.model,
|
|
489
|
-
"temperature": self.
|
|
435
|
+
"temperature": self.model_parameters.get("temperature"),
|
|
490
436
|
},
|
|
491
437
|
history=optuna_history_processed,
|
|
492
438
|
llm_calls=self.llm_call_counter,
|
|
@@ -504,6 +450,9 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
|
504
450
|
n_samples: int | None = None,
|
|
505
451
|
auto_continue: bool = False,
|
|
506
452
|
agent_class: type[OptimizableAgent] | None = None,
|
|
453
|
+
project_name: str = "Optimization",
|
|
454
|
+
max_trials: int = 10,
|
|
455
|
+
*args: Any,
|
|
507
456
|
**kwargs: Any,
|
|
508
457
|
) -> optimization_result.OptimizationResult:
|
|
509
458
|
"""
|
|
@@ -512,23 +461,21 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
|
512
461
|
dataset: Opik Dataset to optimize on
|
|
513
462
|
metric: Metric function to evaluate on
|
|
514
463
|
experiment_config: Optional configuration for the experiment, useful to log additional metadata
|
|
464
|
+
max_trials: Number of trials for Bayesian Optimization (default: 10)
|
|
515
465
|
n_samples: Optional number of items to test in the dataset
|
|
516
466
|
auto_continue: Whether to auto-continue optimization
|
|
517
467
|
agent_class: Optional agent class to use
|
|
518
|
-
|
|
519
|
-
n_trials (int): Number of trials for Bayesian Optimization (default: 10)
|
|
520
|
-
mcp_config (MCPExecutionConfig | None): MCP tool calling configuration (default: None)
|
|
468
|
+
project_name: Opik project name for logging traces (default: "Optimization")
|
|
521
469
|
|
|
522
470
|
Returns:
|
|
523
471
|
OptimizationResult: Result of the optimization
|
|
524
472
|
"""
|
|
525
473
|
# Use base class validation and setup methods
|
|
526
|
-
self.
|
|
527
|
-
self.
|
|
528
|
-
self.agent_class = self.setup_agent_class(prompt, agent_class)
|
|
474
|
+
self._validate_optimization_inputs(prompt, dataset, metric)
|
|
475
|
+
self.agent_class = self._setup_agent_class(prompt, agent_class)
|
|
529
476
|
|
|
530
|
-
#
|
|
531
|
-
|
|
477
|
+
# Set project name from parameter
|
|
478
|
+
self.project_name = project_name
|
|
532
479
|
|
|
533
480
|
optimization = None
|
|
534
481
|
try:
|
|
@@ -537,18 +484,18 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
|
537
484
|
objective_name=metric.__name__,
|
|
538
485
|
metadata={"optimizer": self.__class__.__name__},
|
|
539
486
|
)
|
|
540
|
-
|
|
487
|
+
self.current_optimization_id = optimization.id
|
|
541
488
|
except Exception:
|
|
542
489
|
logger.warning(
|
|
543
490
|
"Opik server does not support optimizations. Please upgrade opik."
|
|
544
491
|
)
|
|
545
492
|
optimization = None
|
|
546
|
-
|
|
493
|
+
self.current_optimization_id = None
|
|
547
494
|
|
|
548
495
|
# Start experiment reporting
|
|
549
496
|
reporting.display_header(
|
|
550
497
|
algorithm=self.__class__.__name__,
|
|
551
|
-
optimization_id=
|
|
498
|
+
optimization_id=self.current_optimization_id,
|
|
552
499
|
dataset_id=dataset.id,
|
|
553
500
|
verbose=self.verbose,
|
|
554
501
|
)
|
|
@@ -557,7 +504,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
|
557
504
|
optimizer_config={
|
|
558
505
|
"optimizer": self.__class__.__name__,
|
|
559
506
|
"metric": metric.__name__,
|
|
560
|
-
"
|
|
507
|
+
"max_trials": max_trials,
|
|
561
508
|
"n_samples": n_samples,
|
|
562
509
|
},
|
|
563
510
|
verbose=self.verbose,
|
|
@@ -605,11 +552,11 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
|
605
552
|
baseline_score=baseline_score,
|
|
606
553
|
optimization_id=optimization.id if optimization is not None else None,
|
|
607
554
|
experiment_config=experiment_config,
|
|
608
|
-
n_trials=
|
|
555
|
+
n_trials=max_trials,
|
|
609
556
|
n_samples=n_samples,
|
|
610
557
|
)
|
|
611
558
|
if optimization:
|
|
612
|
-
self.
|
|
559
|
+
self._update_optimization(optimization, status="completed")
|
|
613
560
|
|
|
614
561
|
utils.enable_experiment_reporting()
|
|
615
562
|
return result
|
|
@@ -643,6 +590,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
|
643
590
|
raise Exception("Can't use n_samples and dataset_item_ids")
|
|
644
591
|
|
|
645
592
|
all_ids = [dataset_item["id"] for dataset_item in dataset.get_items()]
|
|
593
|
+
n_samples = min(n_samples, len(all_ids))
|
|
646
594
|
dataset_item_ids = random.sample(all_ids, n_samples)
|
|
647
595
|
|
|
648
596
|
configuration_updates = self._drop_none(
|
|
@@ -709,6 +657,12 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
|
709
657
|
|
|
710
658
|
result = agent.invoke(messages, seed=self.seed)
|
|
711
659
|
|
|
660
|
+
# Add tags to trace for optimization tracking
|
|
661
|
+
if self.current_optimization_id:
|
|
662
|
+
opik_context.update_current_trace(
|
|
663
|
+
tags=[self.current_optimization_id, "Evaluation"]
|
|
664
|
+
)
|
|
665
|
+
|
|
712
666
|
return {mappers.EVALUATED_LLM_TASK_OUTPUT: result}
|
|
713
667
|
|
|
714
668
|
return llm_task
|
|
@@ -8,12 +8,12 @@ from rich.text import Text
|
|
|
8
8
|
if TYPE_CHECKING:
|
|
9
9
|
from .few_shot_bayesian_optimizer import FewShotPromptTemplate
|
|
10
10
|
|
|
11
|
-
from ..reporting_utils import (
|
|
11
|
+
from ..reporting_utils import ( # noqa: F401
|
|
12
12
|
convert_tqdm_to_rich,
|
|
13
|
-
display_configuration,
|
|
14
|
-
display_header,
|
|
13
|
+
display_configuration,
|
|
14
|
+
display_header,
|
|
15
15
|
display_messages,
|
|
16
|
-
display_result,
|
|
16
|
+
display_result,
|
|
17
17
|
get_console,
|
|
18
18
|
suppress_opik_logs,
|
|
19
19
|
)
|