opik-optimizer 1.1.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. opik_optimizer/__init__.py +2 -0
  2. opik_optimizer/base_optimizer.py +376 -19
  3. opik_optimizer/evolutionary_optimizer/evaluation_ops.py +80 -17
  4. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +179 -39
  5. opik_optimizer/evolutionary_optimizer/llm_support.py +3 -1
  6. opik_optimizer/evolutionary_optimizer/mcp.py +249 -0
  7. opik_optimizer/evolutionary_optimizer/mutation_ops.py +17 -3
  8. opik_optimizer/evolutionary_optimizer/population_ops.py +5 -0
  9. opik_optimizer/evolutionary_optimizer/prompts.py +47 -0
  10. opik_optimizer/evolutionary_optimizer/reporting.py +12 -0
  11. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +65 -59
  12. opik_optimizer/gepa_optimizer/adapter.py +5 -3
  13. opik_optimizer/gepa_optimizer/gepa_optimizer.py +163 -66
  14. opik_optimizer/mcp_utils/mcp_workflow.py +57 -3
  15. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +75 -69
  16. opik_optimizer/mipro_optimizer/_lm.py +10 -3
  17. opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +1 -1
  18. opik_optimizer/mipro_optimizer/mipro_optimizer.py +96 -21
  19. opik_optimizer/optimizable_agent.py +5 -0
  20. opik_optimizer/optimization_result.py +1 -0
  21. opik_optimizer/utils/core.py +56 -14
  22. {opik_optimizer-1.1.0.dist-info → opik_optimizer-2.0.0.dist-info}/METADATA +96 -9
  23. {opik_optimizer-1.1.0.dist-info → opik_optimizer-2.0.0.dist-info}/RECORD +27 -26
  24. /opik_optimizer/{colbert.py → utils/colbert.py} +0 -0
  25. {opik_optimizer-1.1.0.dist-info → opik_optimizer-2.0.0.dist-info}/WHEEL +0 -0
  26. {opik_optimizer-1.1.0.dist-info → opik_optimizer-2.0.0.dist-info}/licenses/LICENSE +0 -0
  27. {opik_optimizer-1.1.0.dist-info → opik_optimizer-2.0.0.dist-info}/top_level.txt +0 -0
@@ -8,6 +8,7 @@ from deap import creator as _creator
8
8
 
9
9
  from . import prompts as evo_prompts
10
10
  from . import reporting
11
+ from .mcp import EvolutionaryMCPContext, initialize_population_mcp
11
12
  from ..optimization_config import chat_prompt
12
13
  from .. import utils
13
14
 
@@ -22,6 +23,7 @@ class PopulationOps:
22
23
  output_style_guidance: str
23
24
  _call_model: Any
24
25
  toolbox: Any
26
+ _mcp_context: EvolutionaryMCPContext | None
25
27
  # Hints for mixin attributes provided by the primary optimizer class
26
28
  _gens_since_pop_improvement: int
27
29
  _best_primary_score_history: list[float]
@@ -39,6 +41,9 @@ class PopulationOps:
39
41
  including some 'fresh start' prompts based purely on task description.
40
42
  All generated prompts should aim to elicit answers matching self.output_style_guidance.
41
43
  """
44
+ mcp_context = getattr(self, "_mcp_context", None)
45
+ if mcp_context is not None:
46
+ return initialize_population_mcp(self, prompt, mcp_context)
42
47
  with reporting.initializing_population(verbose=self.verbose) as init_pop_report:
43
48
  init_pop_report.start(self.population_size)
44
49
 
@@ -206,6 +206,53 @@ def mutation_strategy_prompts(output_style_guidance: str | None) -> dict[str, st
206
206
  }
207
207
 
208
208
 
209
+ # ---------------------------------------------------------------------------
210
+ # MCP prompts
211
+ # ---------------------------------------------------------------------------
212
+
213
+
214
+ def mcp_tool_rewrite_system_prompt() -> str:
215
+ return (
216
+ "You are an expert prompt engineer tasked with refining MCP tool descriptions. "
217
+ "Always respond with strictly valid JSON matching the requested schema."
218
+ )
219
+
220
+
221
+ def mcp_tool_rewrite_user_prompt(
222
+ *,
223
+ tool_name: str,
224
+ current_description: str,
225
+ tool_metadata_json: str,
226
+ num_variations: int,
227
+ ) -> str:
228
+ current_description = current_description.strip() or "(no description provided)"
229
+ return f"""You are improving the description of the MCP tool `{tool_name}`.
230
+
231
+ Current description:
232
+ ---
233
+ {current_description}
234
+ ---
235
+
236
+ Tool metadata (JSON):
237
+ {tool_metadata_json}
238
+
239
+ Generate {num_variations} improved descriptions for this tool. Each description should:
240
+ - Clarify expected arguments and their semantics.
241
+ - Explain how the tool output should be used in the final response.
242
+ - Avoid changing the tool name or introducing unsupported behaviour.
243
+
244
+ Respond strictly as JSON of the form:
245
+ {{
246
+ "prompts": [
247
+ {{
248
+ "tool_description": "...",
249
+ "improvement_focus": "..."
250
+ }}
251
+ ]
252
+ }}
253
+ """
254
+
255
+
209
256
  def semantic_mutation_user_prompt(
210
257
  prompt_messages: list[dict[str, str]],
211
258
  task_description: str,
@@ -20,6 +20,18 @@ PANEL_WIDTH = 70
20
20
  console = get_console()
21
21
 
22
22
 
23
+ def display_tool_description(description: str, title: str, style: str) -> None:
24
+ panel = Panel(
25
+ Text(description),
26
+ title=title,
27
+ title_align="left",
28
+ border_style=style,
29
+ width=PANEL_WIDTH,
30
+ padding=(1, 2),
31
+ )
32
+ console.print(panel)
33
+
34
+
23
35
  @contextmanager
24
36
  def infer_output_style(verbose: int = 1) -> Any:
25
37
  class Reporter:
@@ -1,6 +1,8 @@
1
1
  from typing import Any
2
2
  from collections.abc import Callable
3
+ import warnings
3
4
 
5
+ import copy
4
6
  import json
5
7
  import logging
6
8
  import random
@@ -16,7 +18,6 @@ from opik.evaluation.models.litellm import opik_monitor as opik_litellm_monitor
16
18
  from pydantic import BaseModel
17
19
 
18
20
  from opik_optimizer import base_optimizer
19
- from ..utils import create_litellm_agent_class
20
21
  from ..optimization_config import chat_prompt, mappers
21
22
  from ..optimizable_agent import OptimizableAgent
22
23
  from .. import _throttle, optimization_result, task_evaluator, utils
@@ -95,8 +96,11 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
95
96
  **model_kwargs: Additional model parameters
96
97
  """
97
98
  if "project_name" in model_kwargs:
98
- print(
99
- "Removing `project_name` from constructor; it now belongs in the ChatPrompt()"
99
+ warnings.warn(
100
+ "The 'project_name' parameter in optimizer constructor is deprecated. "
101
+ "Set project_name in the ChatPrompt instead.",
102
+ DeprecationWarning,
103
+ stacklevel=2,
100
104
  )
101
105
  del model_kwargs["project_name"]
102
106
 
@@ -112,10 +116,14 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
112
116
  elif self.verbose == 2:
113
117
  logger.setLevel(logging.DEBUG)
114
118
 
115
- self._opik_client = opik.Opik()
116
- self.llm_call_counter = 0
117
119
  logger.debug(f"Initialized FewShotBayesianOptimizer with model: {model}")
118
120
 
121
+ def get_optimizer_metadata(self) -> dict[str, Any]:
122
+ return {
123
+ "min_examples": self.min_examples,
124
+ "max_examples": self.max_examples,
125
+ }
126
+
119
127
  @_throttle.rate_limited(_limiter)
120
128
  def _call_model(
121
129
  self,
@@ -134,7 +142,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
134
142
  Returns:
135
143
  Dict containing the model's response
136
144
  """
137
- self.llm_call_counter += 1
145
+ self.increment_llm_counter()
138
146
 
139
147
  current_model_kwargs = self.model_kwargs.copy()
140
148
  current_model_kwargs.update(model_kwargs)
@@ -260,19 +268,20 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
260
268
  if n_samples is not None and n_samples < len(dataset_items):
261
269
  eval_dataset_item_ids = random.sample(all_dataset_item_ids, n_samples)
262
270
 
263
- # Define the experiment configuration
264
- experiment_config = experiment_config or {}
265
- base_experiment_config = { # Base config for reuse
266
- **experiment_config,
267
- **{
268
- "optimizer": self.__class__.__name__,
269
- "agent_class": self.agent_class.__name__,
270
- "agent_config": prompt.to_dict(),
271
- "metric": metric.__name__,
272
- "dataset": dataset.name,
273
- "configuration": {},
274
- },
275
- }
271
+ configuration_updates = self._drop_none(
272
+ {
273
+ "n_trials": n_trials,
274
+ "n_samples": n_samples,
275
+ "baseline_score": baseline_score,
276
+ }
277
+ )
278
+ base_experiment_config = self._prepare_experiment_config(
279
+ prompt=prompt,
280
+ dataset=dataset,
281
+ metric=metric,
282
+ experiment_config=experiment_config,
283
+ configuration_updates=configuration_updates,
284
+ )
276
285
 
277
286
  # Start Optuna Study
278
287
  def optimization_objective(trial: optuna.Trial) -> float:
@@ -327,7 +336,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
327
336
  ]
328
337
 
329
338
  # Log trial config
330
- trial_config = base_experiment_config.copy()
339
+ trial_config = copy.deepcopy(base_experiment_config)
331
340
  trial_config["configuration"]["prompt"] = (
332
341
  messages_for_reporting # Base instruction
333
342
  )
@@ -481,6 +490,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
481
490
  },
482
491
  history=optuna_history_processed,
483
492
  llm_calls=self.llm_call_counter,
493
+ tool_calls=self.tool_call_counter,
484
494
  dataset_id=dataset.id,
485
495
  optimization_id=optimization_id,
486
496
  )
@@ -490,47 +500,39 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
490
500
  prompt: chat_prompt.ChatPrompt,
491
501
  dataset: Dataset,
492
502
  metric: Callable,
493
- n_trials: int = 10,
494
- agent_class: type[OptimizableAgent] | None = None,
495
503
  experiment_config: dict | None = None,
496
504
  n_samples: int | None = None,
505
+ auto_continue: bool = False,
506
+ agent_class: type[OptimizableAgent] | None = None,
507
+ **kwargs: Any,
497
508
  ) -> optimization_result.OptimizationResult:
498
509
  """
499
510
  Args:
500
- prompt:
511
+ prompt: The prompt to optimize
501
512
  dataset: Opik Dataset to optimize on
502
513
  metric: Metric function to evaluate on
503
- n_trials: Number of trials for Bayesian Optimization
504
514
  experiment_config: Optional configuration for the experiment, useful to log additional metadata
505
515
  n_samples: Optional number of items to test in the dataset
516
+ auto_continue: Whether to auto-continue optimization
517
+ agent_class: Optional agent class to use
518
+ **kwargs: Additional parameters including:
519
+ n_trials (int): Number of trials for Bayesian Optimization (default: 10)
520
+ mcp_config (MCPExecutionConfig | None): MCP tool calling configuration (default: None)
506
521
 
507
522
  Returns:
508
523
  OptimizationResult: Result of the optimization
509
524
  """
510
- if not isinstance(prompt, chat_prompt.ChatPrompt):
511
- raise ValueError("Prompt must be a ChatPrompt object")
525
+ # Use base class validation and setup methods
526
+ self.validate_optimization_inputs(prompt, dataset, metric)
527
+ self.configure_prompt_model(prompt)
528
+ self.agent_class = self.setup_agent_class(prompt, agent_class)
512
529
 
513
- if not isinstance(dataset, Dataset):
514
- raise ValueError("Dataset must be a Dataset object")
515
-
516
- if not callable(metric):
517
- raise ValueError(
518
- "Metric must be a function that takes `dataset_item` and `llm_output` as arguments."
519
- )
520
-
521
- if prompt.model is None:
522
- prompt.model = self.model
523
- if prompt.model_kwargs is None:
524
- prompt.model_kwargs = self.model_kwargs
525
-
526
- if agent_class is None:
527
- self.agent_class = create_litellm_agent_class(prompt)
528
- else:
529
- self.agent_class = agent_class
530
+ # Extract n_trials from kwargs for backward compatibility
531
+ n_trials = kwargs.get("n_trials", 10)
530
532
 
531
533
  optimization = None
532
534
  try:
533
- optimization = self._opik_client.create_optimization(
535
+ optimization = self.opik_client.create_optimization(
534
536
  dataset_name=dataset.name,
535
537
  objective_name=metric.__name__,
536
538
  metadata={"optimizer": self.__class__.__name__},
@@ -636,20 +638,6 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
636
638
  """
637
639
  llm_task = self._build_task_from_messages(prompt, prompt.get_messages())
638
640
 
639
- experiment_config = experiment_config or {}
640
- experiment_config["project_name"] = self.agent_class.__name__
641
- experiment_config = {
642
- **experiment_config,
643
- **{
644
- "optimizer": self.__class__.__name__,
645
- "agent_class": self.agent_class.__name__,
646
- "agent_config": prompt.to_dict(),
647
- "metric": metric.__name__,
648
- "dataset": dataset.name,
649
- "configuration": {"prompt": prompt.get_messages()},
650
- },
651
- }
652
-
653
641
  if n_samples is not None:
654
642
  if dataset_item_ids is not None:
655
643
  raise Exception("Can't use n_samples and dataset_item_ids")
@@ -657,6 +645,24 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
657
645
  all_ids = [dataset_item["id"] for dataset_item in dataset.get_items()]
658
646
  dataset_item_ids = random.sample(all_ids, n_samples)
659
647
 
648
+ configuration_updates = self._drop_none(
649
+ {
650
+ "n_samples": n_samples,
651
+ "dataset_item_ids": dataset_item_ids,
652
+ }
653
+ )
654
+ additional_metadata = (
655
+ {"optimization_id": optimization_id} if optimization_id else None
656
+ )
657
+ experiment_config = self._prepare_experiment_config(
658
+ prompt=prompt,
659
+ dataset=dataset,
660
+ metric=metric,
661
+ experiment_config=experiment_config,
662
+ configuration_updates=configuration_updates,
663
+ additional_metadata=additional_metadata,
664
+ )
665
+
660
666
  logger.debug("Starting FewShotBayesian evaluation...")
661
667
  score = task_evaluator.evaluate(
662
668
  dataset=dataset,
@@ -664,7 +670,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
664
670
  metric=metric,
665
671
  evaluated_task=llm_task,
666
672
  num_threads=self.n_threads,
667
- project_name=self.agent_class.project_name,
673
+ project_name=experiment_config.get("project_name"),
668
674
  experiment_config=experiment_config,
669
675
  optimization_id=optimization_id,
670
676
  verbose=self.verbose,
@@ -12,7 +12,7 @@ from ..optimization_config import chat_prompt
12
12
  from ..utils import create_litellm_agent_class
13
13
 
14
14
 
15
- LOGGER = logging.getLogger("opik_optimizer.gepa.adapter")
15
+ logger = logging.getLogger(__name__)
16
16
 
17
17
 
18
18
  @dataclass
@@ -77,7 +77,9 @@ class OpikGEPAAdapter(GEPAAdapter[OpikDataInst, dict[str, Any], dict[str, Any]])
77
77
  system_text = _extract_system_text(candidate, self._system_fallback)
78
78
  prompt_variant = _apply_system_text(self._base_prompt, system_text)
79
79
 
80
- agent_class = create_litellm_agent_class(prompt_variant)
80
+ agent_class = create_litellm_agent_class(
81
+ prompt_variant, optimizer_ref=self._optimizer
82
+ )
81
83
  agent = agent_class(prompt_variant)
82
84
 
83
85
  outputs: list[dict[str, Any]] = []
@@ -144,7 +146,7 @@ class OpikGEPAAdapter(GEPAAdapter[OpikDataInst, dict[str, Any], dict[str, Any]])
144
146
 
145
147
  reflective_records = list(_records())
146
148
  if not reflective_records:
147
- LOGGER.debug(
149
+ logger.debug(
148
150
  "No trajectories captured for candidate; returning empty reflective dataset"
149
151
  )
150
152
  reflective_records = []