opik-optimizer 2.1.3__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. opik_optimizer/__init__.py +0 -2
  2. opik_optimizer/base_optimizer.py +314 -145
  3. opik_optimizer/evolutionary_optimizer/crossover_ops.py +31 -4
  4. opik_optimizer/evolutionary_optimizer/evaluation_ops.py +23 -3
  5. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +122 -95
  6. opik_optimizer/evolutionary_optimizer/mcp.py +11 -6
  7. opik_optimizer/evolutionary_optimizer/mutation_ops.py +25 -5
  8. opik_optimizer/evolutionary_optimizer/population_ops.py +26 -10
  9. opik_optimizer/evolutionary_optimizer/reporting.py +5 -5
  10. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +53 -99
  11. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +4 -4
  12. opik_optimizer/gepa_optimizer/gepa_optimizer.py +183 -172
  13. opik_optimizer/gepa_optimizer/reporting.py +164 -22
  14. opik_optimizer/hierarchical_reflective_optimizer/hierarchical_reflective_optimizer.py +90 -167
  15. opik_optimizer/hierarchical_reflective_optimizer/prompts.py +7 -1
  16. opik_optimizer/hierarchical_reflective_optimizer/reporting.py +168 -75
  17. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +185 -205
  18. opik_optimizer/meta_prompt_optimizer/reporting.py +4 -4
  19. opik_optimizer/mipro_optimizer/__init__.py +2 -2
  20. opik_optimizer/mipro_optimizer/_lm.py +4 -4
  21. opik_optimizer/mipro_optimizer/{_mipro_optimizer_v2.py → mipro_optimizer_v2.py} +1 -7
  22. opik_optimizer/mipro_optimizer/utils.py +1 -0
  23. opik_optimizer/optimizable_agent.py +7 -4
  24. opik_optimizer/optimization_config/chat_prompt.py +7 -10
  25. opik_optimizer/parameter_optimizer/parameter_optimizer.py +188 -40
  26. opik_optimizer/parameter_optimizer/reporting.py +148 -0
  27. opik_optimizer/reporting_utils.py +42 -15
  28. opik_optimizer/utils/core.py +16 -2
  29. opik_optimizer/utils/prompt_segments.py +1 -2
  30. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/METADATA +2 -3
  31. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/RECORD +34 -35
  32. opik_optimizer/evolutionary_optimizer/llm_support.py +0 -136
  33. opik_optimizer/mipro_optimizer/mipro_optimizer.py +0 -680
  34. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/WHEEL +0 -0
  35. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/licenses/LICENSE +0 -0
  36. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/top_level.txt +0 -0
@@ -6,12 +6,12 @@ from rich.panel import Panel
6
6
  from rich.text import Text
7
7
 
8
8
  from ..optimization_config import chat_prompt
9
- from ..reporting_utils import (
9
+ from ..reporting_utils import ( # noqa: F401
10
10
  convert_tqdm_to_rich,
11
- display_configuration, # noqa: F401
12
- display_header, # noqa: F401
13
- display_messages, # noqa: F401
14
- display_result, # noqa: F401
11
+ display_configuration,
12
+ display_header,
13
+ display_messages,
14
+ display_result,
15
15
  get_console,
16
16
  suppress_opik_logs,
17
17
  )
@@ -1,6 +1,5 @@
1
1
  from typing import Any
2
2
  from collections.abc import Callable
3
- import warnings
4
3
 
5
4
  import copy
6
5
  import json
@@ -8,13 +7,11 @@ import logging
8
7
  import random
9
8
  from datetime import datetime
10
9
 
11
- import litellm
12
10
  import optuna
13
11
  import optuna.samplers
14
12
 
15
13
  import opik
16
- from opik import Dataset
17
- from opik.evaluation.models.litellm import opik_monitor as opik_litellm_monitor
14
+ from opik import Dataset, opik_context
18
15
  from pydantic import BaseModel
19
16
 
20
17
  from opik_optimizer import base_optimizer
@@ -64,47 +61,39 @@ class FewShotPromptTemplate(BaseModel):
64
61
 
65
62
  class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
66
63
  """
67
- The Few-Shot Bayesian Optimizer can be used to add few-shot examples to prompts. This algorithm
68
- employes a two stage pipeline:
69
-
70
- 1. We generate a few-shot prompt template that is inserted can be inserted into the prompt
71
- provided
72
- 2. We use Bayesian Optimization to determine the best examples to include in the prompt.
73
-
74
- This algorithm is best used when you have a well defined task and would like to guide the LLM
75
- by providing some examples.
64
+ Few-Shot Bayesian Optimizer that adds few-shot examples to prompts using Bayesian optimization.
65
+
66
+ This algorithm employs a two-stage pipeline:
67
+
68
+ 1. Generate a few-shot prompt template that can be inserted into the prompt
69
+ 2. Use Bayesian Optimization to determine the best examples to include in the prompt
70
+
71
+ This algorithm is best used when you have a well-defined task and would like to guide the LLM
72
+ by providing examples. It automatically finds the optimal number and selection of examples.
73
+
74
+ Args:
75
+ model: LiteLLM model name for optimizer's internal reasoning (generating few-shot templates)
76
+ model_parameters: Optional dict of LiteLLM parameters for optimizer's internal LLM calls.
77
+ Common params: temperature, max_tokens, max_completion_tokens, top_p.
78
+ See: https://docs.litellm.ai/docs/completion/input
79
+ min_examples: Minimum number of examples to include in the prompt
80
+ max_examples: Maximum number of examples to include in the prompt
81
+ n_threads: Number of threads for parallel evaluation
82
+ verbose: Controls internal logging/progress bars (0=off, 1=on)
83
+ seed: Random seed for reproducibility
76
84
  """
77
85
 
78
86
  def __init__(
79
87
  self,
80
- model: str,
88
+ model: str = "gpt-4o",
89
+ model_parameters: dict[str, Any] | None = None,
81
90
  min_examples: int = 2,
82
91
  max_examples: int = 8,
83
- seed: int = 42,
84
92
  n_threads: int = 8,
85
93
  verbose: int = 1,
86
- **model_kwargs: Any,
94
+ seed: int = 42,
87
95
  ) -> None:
88
- """
89
- Args:
90
- model: The model to used to evaluate the prompt
91
- min_examples: Minimum number of examples to include
92
- max_examples: Maximum number of examples to include
93
- seed: Random seed for reproducibility
94
- n_threads: Number of threads for parallel evaluation
95
- verbose: Controls internal logging/progress bars (0=off, 1=on).
96
- **model_kwargs: Additional model parameters
97
- """
98
- if "project_name" in model_kwargs:
99
- warnings.warn(
100
- "The 'project_name' parameter in optimizer constructor is deprecated. "
101
- "Set project_name in the ChatPrompt instead.",
102
- DeprecationWarning,
103
- stacklevel=2,
104
- )
105
- del model_kwargs["project_name"]
106
-
107
- super().__init__(model, verbose, **model_kwargs)
96
+ super().__init__(model, verbose, seed=seed, model_parameters=model_parameters)
108
97
  self.min_examples = min_examples
109
98
  self.max_examples = max_examples
110
99
  self.seed = seed
@@ -124,49 +113,6 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
124
113
  "max_examples": self.max_examples,
125
114
  }
126
115
 
127
- @_throttle.rate_limited(_limiter)
128
- def _call_model(
129
- self,
130
- model: str,
131
- messages: list[dict[str, str]],
132
- seed: int,
133
- model_kwargs: dict[str, Any],
134
- ) -> dict[str, Any]:
135
- """
136
- Args:
137
- model: The model to use for the call
138
- messages: List of message dictionaries with 'role' and 'content' keys
139
- seed: Random seed for reproducibility
140
- model_kwargs: Additional model parameters
141
-
142
- Returns:
143
- Dict containing the model's response
144
- """
145
- self.increment_llm_counter()
146
-
147
- current_model_kwargs = self.model_kwargs.copy()
148
- current_model_kwargs.update(model_kwargs)
149
-
150
- filtered_call_kwargs = current_model_kwargs.copy()
151
- filtered_call_kwargs.pop("n_trials", None)
152
- filtered_call_kwargs.pop("n_samples", None)
153
- filtered_call_kwargs.pop("n_iterations", None)
154
- filtered_call_kwargs.pop("min_examples", None)
155
- filtered_call_kwargs.pop("max_examples", None)
156
-
157
- final_params_for_litellm = (
158
- opik_litellm_monitor.try_add_opik_monitoring_to_params(filtered_call_kwargs)
159
- )
160
-
161
- response = litellm.completion(
162
- model=self.model,
163
- messages=messages,
164
- seed=seed,
165
- num_retries=6,
166
- **final_params_for_litellm,
167
- )
168
- return response
169
-
170
116
  def _split_dataset(
171
117
  self, dataset: list[dict[str, Any]], train_ratio: float
172
118
  ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
@@ -230,18 +176,18 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
230
176
  ]
231
177
 
232
178
  logger.debug(f"fewshot_prompt_template - Calling LLM with: {messages}")
233
- response = self._call_model(model, messages, self.seed, self.model_kwargs)
234
- logger.debug(f"fewshot_prompt_template - LLM response: {response}")
179
+ response_content = self._call_model(messages, model=model, seed=self.seed)
180
+ logger.debug(f"fewshot_prompt_template - LLM response: {response_content}")
235
181
 
236
182
  try:
237
- res = utils.json_to_dict(response["choices"][0]["message"]["content"])
183
+ res = utils.json_to_dict(response_content)
238
184
  return FewShotPromptTemplate(
239
185
  message_list_with_placeholder=res["message_list_with_placeholder"],
240
186
  example_template=res["example_template"],
241
187
  )
242
188
  except Exception as e:
243
189
  logger.error(
244
- f"Failed to compute few-shot prompt template: {e} - response: {response}"
190
+ f"Failed to compute few-shot prompt template: {e} - response: {response_content}"
245
191
  )
246
192
  raise
247
193
 
@@ -361,7 +307,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
361
307
  metric=metric,
362
308
  evaluated_task=llm_task,
363
309
  num_threads=self.n_threads,
364
- project_name=self.agent_class.project_name,
310
+ project_name=self.project_name,
365
311
  experiment_config=trial_config,
366
312
  optimization_id=optimization_id,
367
313
  verbose=self.verbose,
@@ -486,7 +432,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
486
432
  "stopped_early": False,
487
433
  "metric_name": metric.__name__,
488
434
  "model": self.model,
489
- "temperature": self.model_kwargs.get("temperature"),
435
+ "temperature": self.model_parameters.get("temperature"),
490
436
  },
491
437
  history=optuna_history_processed,
492
438
  llm_calls=self.llm_call_counter,
@@ -504,6 +450,9 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
504
450
  n_samples: int | None = None,
505
451
  auto_continue: bool = False,
506
452
  agent_class: type[OptimizableAgent] | None = None,
453
+ project_name: str = "Optimization",
454
+ max_trials: int = 10,
455
+ *args: Any,
507
456
  **kwargs: Any,
508
457
  ) -> optimization_result.OptimizationResult:
509
458
  """
@@ -512,23 +461,21 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
512
461
  dataset: Opik Dataset to optimize on
513
462
  metric: Metric function to evaluate on
514
463
  experiment_config: Optional configuration for the experiment, useful to log additional metadata
464
+ max_trials: Number of trials for Bayesian Optimization (default: 10)
515
465
  n_samples: Optional number of items to test in the dataset
516
466
  auto_continue: Whether to auto-continue optimization
517
467
  agent_class: Optional agent class to use
518
- **kwargs: Additional parameters including:
519
- n_trials (int): Number of trials for Bayesian Optimization (default: 10)
520
- mcp_config (MCPExecutionConfig | None): MCP tool calling configuration (default: None)
468
+ project_name: Opik project name for logging traces (default: "Optimization")
521
469
 
522
470
  Returns:
523
471
  OptimizationResult: Result of the optimization
524
472
  """
525
473
  # Use base class validation and setup methods
526
- self.validate_optimization_inputs(prompt, dataset, metric)
527
- self.configure_prompt_model(prompt)
528
- self.agent_class = self.setup_agent_class(prompt, agent_class)
474
+ self._validate_optimization_inputs(prompt, dataset, metric)
475
+ self.agent_class = self._setup_agent_class(prompt, agent_class)
529
476
 
530
- # Extract n_trials from kwargs for backward compatibility
531
- n_trials = kwargs.get("n_trials", 10)
477
+ # Set project name from parameter
478
+ self.project_name = project_name
532
479
 
533
480
  optimization = None
534
481
  try:
@@ -537,18 +484,18 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
537
484
  objective_name=metric.__name__,
538
485
  metadata={"optimizer": self.__class__.__name__},
539
486
  )
540
- optimization_run_id = optimization.id
487
+ self.current_optimization_id = optimization.id
541
488
  except Exception:
542
489
  logger.warning(
543
490
  "Opik server does not support optimizations. Please upgrade opik."
544
491
  )
545
492
  optimization = None
546
- optimization_run_id = None
493
+ self.current_optimization_id = None
547
494
 
548
495
  # Start experiment reporting
549
496
  reporting.display_header(
550
497
  algorithm=self.__class__.__name__,
551
- optimization_id=optimization_run_id,
498
+ optimization_id=self.current_optimization_id,
552
499
  dataset_id=dataset.id,
553
500
  verbose=self.verbose,
554
501
  )
@@ -557,7 +504,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
557
504
  optimizer_config={
558
505
  "optimizer": self.__class__.__name__,
559
506
  "metric": metric.__name__,
560
- "n_trials": n_trials,
507
+ "max_trials": max_trials,
561
508
  "n_samples": n_samples,
562
509
  },
563
510
  verbose=self.verbose,
@@ -605,11 +552,11 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
605
552
  baseline_score=baseline_score,
606
553
  optimization_id=optimization.id if optimization is not None else None,
607
554
  experiment_config=experiment_config,
608
- n_trials=n_trials,
555
+ n_trials=max_trials,
609
556
  n_samples=n_samples,
610
557
  )
611
558
  if optimization:
612
- self.update_optimization(optimization, status="completed")
559
+ self._update_optimization(optimization, status="completed")
613
560
 
614
561
  utils.enable_experiment_reporting()
615
562
  return result
@@ -643,6 +590,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
643
590
  raise Exception("Can't use n_samples and dataset_item_ids")
644
591
 
645
592
  all_ids = [dataset_item["id"] for dataset_item in dataset.get_items()]
593
+ n_samples = min(n_samples, len(all_ids))
646
594
  dataset_item_ids = random.sample(all_ids, n_samples)
647
595
 
648
596
  configuration_updates = self._drop_none(
@@ -709,6 +657,12 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
709
657
 
710
658
  result = agent.invoke(messages, seed=self.seed)
711
659
 
660
+ # Add tags to trace for optimization tracking
661
+ if self.current_optimization_id:
662
+ opik_context.update_current_trace(
663
+ tags=[self.current_optimization_id, "Evaluation"]
664
+ )
665
+
712
666
  return {mappers.EVALUATED_LLM_TASK_OUTPUT: result}
713
667
 
714
668
  return llm_task
@@ -8,12 +8,12 @@ from rich.text import Text
8
8
  if TYPE_CHECKING:
9
9
  from .few_shot_bayesian_optimizer import FewShotPromptTemplate
10
10
 
11
- from ..reporting_utils import (
11
+ from ..reporting_utils import ( # noqa: F401
12
12
  convert_tqdm_to_rich,
13
- display_configuration, # noqa: F401
14
- display_header, # noqa: F401
13
+ display_configuration,
14
+ display_header,
15
15
  display_messages,
16
- display_result, # noqa: F401
16
+ display_result,
17
17
  get_console,
18
18
  suppress_opik_logs,
19
19
  )