opik-optimizer 2.1.3__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. opik_optimizer/__init__.py +0 -2
  2. opik_optimizer/base_optimizer.py +314 -145
  3. opik_optimizer/evolutionary_optimizer/crossover_ops.py +31 -4
  4. opik_optimizer/evolutionary_optimizer/evaluation_ops.py +23 -3
  5. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +122 -95
  6. opik_optimizer/evolutionary_optimizer/mcp.py +11 -6
  7. opik_optimizer/evolutionary_optimizer/mutation_ops.py +25 -5
  8. opik_optimizer/evolutionary_optimizer/population_ops.py +26 -10
  9. opik_optimizer/evolutionary_optimizer/reporting.py +5 -5
  10. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +53 -99
  11. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +4 -4
  12. opik_optimizer/gepa_optimizer/gepa_optimizer.py +183 -172
  13. opik_optimizer/gepa_optimizer/reporting.py +164 -22
  14. opik_optimizer/hierarchical_reflective_optimizer/hierarchical_reflective_optimizer.py +90 -167
  15. opik_optimizer/hierarchical_reflective_optimizer/prompts.py +7 -1
  16. opik_optimizer/hierarchical_reflective_optimizer/reporting.py +168 -75
  17. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +185 -205
  18. opik_optimizer/meta_prompt_optimizer/reporting.py +4 -4
  19. opik_optimizer/mipro_optimizer/__init__.py +2 -2
  20. opik_optimizer/mipro_optimizer/_lm.py +4 -4
  21. opik_optimizer/mipro_optimizer/{_mipro_optimizer_v2.py → mipro_optimizer_v2.py} +1 -7
  22. opik_optimizer/mipro_optimizer/utils.py +1 -0
  23. opik_optimizer/optimizable_agent.py +7 -4
  24. opik_optimizer/optimization_config/chat_prompt.py +7 -10
  25. opik_optimizer/parameter_optimizer/parameter_optimizer.py +188 -40
  26. opik_optimizer/parameter_optimizer/reporting.py +148 -0
  27. opik_optimizer/reporting_utils.py +42 -15
  28. opik_optimizer/utils/core.py +16 -2
  29. opik_optimizer/utils/prompt_segments.py +1 -2
  30. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/METADATA +2 -3
  31. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/RECORD +34 -35
  32. opik_optimizer/evolutionary_optimizer/llm_support.py +0 -136
  33. opik_optimizer/mipro_optimizer/mipro_optimizer.py +0 -680
  34. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/WHEEL +0 -0
  35. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/licenses/LICENSE +0 -0
  36. {opik_optimizer-2.1.3.dist-info → opik_optimizer-2.2.0.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,6 @@ import json
3
3
  import logging
4
4
  import os
5
5
  import textwrap
6
- import warnings
7
6
  from typing import Any, cast
8
7
  from collections.abc import Callable
9
8
 
@@ -11,9 +10,8 @@ import litellm
11
10
  import opik
12
11
  from litellm.caching import Cache
13
12
  from litellm.types.caching import LiteLLMCacheType
14
- from opik import Dataset
13
+ from opik import Dataset, opik_context
15
14
  from opik.environment import get_tqdm_for_current_environment
16
- from opik.evaluation.models.litellm import opik_monitor as opik_litellm_monitor
17
15
 
18
16
  from opik_optimizer import task_evaluator
19
17
 
@@ -89,10 +87,32 @@ def _sync_tool_description_in_system(prompt: chat_prompt.ChatPrompt) -> None:
89
87
 
90
88
  class MetaPromptOptimizer(BaseOptimizer):
91
89
  """
92
- The Meta-Prompt Optimizer uses meta-prompting to improve prompts based on examples and performance.
93
-
94
- This algorithm is best used when you have a prompt and would like to make sure it follows best
95
- practices.
90
+ Meta-Prompt Optimizer that uses LLM-based meta-reasoning to iteratively improve prompts.
91
+
92
+ This optimizer uses an LLM to analyze prompt performance and generate improved variations
93
+ by reasoning about what changes would be most effective. It's particularly useful for:
94
+ - Ensuring prompts follow best practices
95
+ - Refining prompts for clarity and effectiveness
96
+ - Optimizing prompts for specific evaluation metrics
97
+ - Improving prompts based on performance feedback
98
+
99
+ The optimizer works by:
100
+ 1. Evaluating the current prompt on a dataset
101
+ 2. Using an LLM to reason about improvements based on performance
102
+ 3. Generating candidate prompt variations
103
+ 4. Evaluating candidates and selecting the best
104
+ 5. Repeating until max_trials is reached or performance plateaus
105
+
106
+ Args:
107
+ model: LiteLLM model name for optimizer's internal reasoning/generation calls
108
+ model_parameters: Optional dict of LiteLLM parameters for optimizer's internal LLM calls.
109
+ Common params: temperature, max_tokens, max_completion_tokens, top_p.
110
+ See: https://docs.litellm.ai/docs/completion/input
111
+ prompts_per_round: Number of candidate prompts to generate per optimization round
112
+ enable_context: Whether to include task-specific context when reasoning about improvements
113
+ n_threads: Number of parallel threads for prompt evaluation
114
+ verbose: Controls internal logging/progress bars (0=off, 1=on)
115
+ seed: Random seed for reproducibility
96
116
  """
97
117
 
98
118
  # --- Constants for Default Configuration ---
@@ -134,160 +154,30 @@ class MetaPromptOptimizer(BaseOptimizer):
134
154
 
135
155
  def __init__(
136
156
  self,
137
- model: str,
138
- reasoning_model: str | None = None,
139
- rounds: int = DEFAULT_ROUNDS,
140
- num_prompts_per_round: int = DEFAULT_PROMPTS_PER_ROUND,
141
- num_threads: int | None = None,
142
- verbose: int = 1,
157
+ model: str = "gpt-4o",
158
+ model_parameters: dict[str, Any] | None = None,
159
+ prompts_per_round: int = DEFAULT_PROMPTS_PER_ROUND,
143
160
  enable_context: bool = True,
144
161
  n_threads: int = 12,
162
+ verbose: int = 1,
145
163
  seed: int = 42,
146
- **model_kwargs: Any,
147
164
  ) -> None:
148
- """
149
- Args:
150
- model: The model to use for evaluation
151
- reasoning_model: The model to use for reasoning and prompt generation
152
- rounds: Number of optimization rounds
153
- num_prompts_per_round: Number of prompts to generate per round
154
- n_threads: Number of threads for parallel evaluation
155
- verbose: Controls internal logging/progress bars (0=off, 1=on).
156
- enable_context: Whether to include task-specific context (metrics, examples) in the reasoning prompt.
157
- **model_kwargs: Additional model parameters
158
- """
159
- if "project_name" in model_kwargs:
160
- warnings.warn(
161
- "The 'project_name' parameter in optimizer constructor is deprecated. "
162
- "Set project_name in the ChatPrompt instead.",
163
- DeprecationWarning,
164
- stacklevel=2,
165
- )
166
- del model_kwargs["project_name"]
167
-
168
- super().__init__(model=model, verbose=verbose, seed=seed, **model_kwargs)
169
- self.reasoning_model = reasoning_model if reasoning_model is not None else model
170
- self.rounds = rounds
171
- self.num_prompts_per_round = num_prompts_per_round
172
- if num_threads is not None:
173
- warnings.warn(
174
- "The 'num_threads' parameter is deprecated and will be removed in a future version. "
175
- "Use 'n_threads' instead.",
176
- DeprecationWarning,
177
- stacklevel=2,
178
- )
179
- n_threads = num_threads
180
- self.num_threads = n_threads
165
+ super().__init__(
166
+ model=model, verbose=verbose, seed=seed, model_parameters=model_parameters
167
+ )
168
+ self.prompts_per_round = prompts_per_round
169
+ self.n_threads = n_threads
181
170
  self.dataset: Dataset | None = None
182
171
  self.enable_context = enable_context
183
- logger.debug(
184
- f"Initialized MetaPromptOptimizer with model={model}, reasoning_model={self.reasoning_model}"
185
- )
186
- logger.debug(
187
- f"Optimization rounds: {rounds}, Prompts/round: {num_prompts_per_round}"
188
- )
172
+ logger.debug(f"Initialized MetaPromptOptimizer with model={model}")
173
+ logger.debug(f"Prompts/round: {prompts_per_round}")
189
174
 
190
175
  def get_optimizer_metadata(self) -> dict[str, Any]:
191
176
  return {
192
- "rounds": self.rounds,
193
- "num_prompts_per_round": self.num_prompts_per_round,
194
- "reasoning_model": self.reasoning_model,
177
+ "prompts_per_round": self.prompts_per_round,
195
178
  "enable_context": self.enable_context,
196
179
  }
197
180
 
198
- @_throttle.rate_limited(_rate_limiter)
199
- def _call_model(
200
- self,
201
- project_name: str,
202
- messages: list[dict[str, str]],
203
- is_reasoning: bool = False,
204
- optimization_id: str | None = None,
205
- ) -> str:
206
- """Call the model with the given prompt and return the response."""
207
- self.increment_llm_counter()
208
- # Note: Basic retry logic could be added here using tenacity
209
- try:
210
- # Basic LLM parameters (e.g., temperature, max_tokens)
211
- base_temperature = getattr(self, "temperature", 0.3)
212
- base_max_tokens = getattr(self, "max_tokens", 1000)
213
-
214
- # Use potentially different settings for reasoning calls
215
- reasoning_temperature = (
216
- base_temperature # Keep same temp unless specified otherwise
217
- )
218
- # Increase max_tokens for reasoning to ensure JSON fits, unless already high
219
- reasoning_max_tokens = (
220
- max(base_max_tokens, 3000) if is_reasoning else base_max_tokens
221
- )
222
-
223
- llm_config_params = {
224
- "temperature": (
225
- reasoning_temperature if is_reasoning else base_temperature
226
- ),
227
- "max_tokens": reasoning_max_tokens,
228
- "top_p": getattr(self, "top_p", 1.0),
229
- "frequency_penalty": getattr(self, "frequency_penalty", 0.0),
230
- "presence_penalty": getattr(self, "presence_penalty", 0.0),
231
- }
232
-
233
- # Prepare metadata that we want to be part of the LLM call context.
234
- metadata_for_opik: dict[str, Any] = {}
235
- if project_name:
236
- metadata_for_opik["project_name"] = (
237
- project_name # Top-level for general use
238
- )
239
- metadata_for_opik["opik"] = {"project_name": project_name}
240
-
241
- if optimization_id:
242
- # Also add to opik-specific structure if project_name was added
243
- if "opik" in metadata_for_opik:
244
- metadata_for_opik["opik"]["optimization_id"] = optimization_id
245
-
246
- metadata_for_opik["optimizer_name"] = self.__class__.__name__
247
- metadata_for_opik["opik_call_type"] = (
248
- "reasoning" if is_reasoning else "evaluation_llm_task_direct"
249
- )
250
-
251
- if metadata_for_opik:
252
- llm_config_params["metadata"] = metadata_for_opik
253
-
254
- model_to_use = self.reasoning_model if is_reasoning else self.model
255
-
256
- # Pass llm_config_params (which now includes our metadata) to the Opik monitor.
257
- # The monitor is expected to return a dictionary suitable for spreading into litellm.completion,
258
- # having handled our metadata and added any Opik-specific configurations.
259
- final_call_params = opik_litellm_monitor.try_add_opik_monitoring_to_params(
260
- llm_config_params.copy()
261
- )
262
-
263
- logger.debug(
264
- f"Calling model '{model_to_use}' with messages: {messages}, "
265
- f"final params for litellm (from monitor): {final_call_params}"
266
- )
267
-
268
- response = litellm.completion(
269
- model=model_to_use,
270
- messages=messages,
271
- num_retries=6,
272
- **final_call_params,
273
- )
274
- return response.choices[0].message.content
275
- except litellm.exceptions.RateLimitError as e:
276
- logger.error(f"LiteLLM Rate Limit Error: {e}")
277
- raise
278
- except litellm.exceptions.APIConnectionError as e:
279
- logger.error(f"LiteLLM API Connection Error: {e}")
280
- raise
281
- except litellm.exceptions.ContextWindowExceededError as e:
282
- logger.error(f"LiteLLM Context Window Exceeded Error: {e}")
283
- # Log prompt length if possible? Needs access to prompt_for_llm here.
284
- raise
285
- except Exception:
286
- # logger.error(
287
- # f"Error calling model '{model_to_use}': {type(e).__name__} - {e}"
288
- # )
289
- raise
290
-
291
181
  def _evaluate_prompt(
292
182
  self,
293
183
  prompt: chat_prompt.ChatPrompt,
@@ -433,6 +323,12 @@ class MetaPromptOptimizer(BaseOptimizer):
433
323
 
434
324
  cleaned_model_output = raw_model_output.strip()
435
325
 
326
+ # Add tags to trace for optimization tracking
327
+ if self.current_optimization_id:
328
+ opik_context.update_current_trace(
329
+ tags=[self.current_optimization_id, "Evaluation"]
330
+ )
331
+
436
332
  result = {
437
333
  mappers.EVALUATED_LLM_TASK_OUTPUT: cleaned_model_output,
438
334
  }
@@ -447,8 +343,8 @@ class MetaPromptOptimizer(BaseOptimizer):
447
343
  metric=metric,
448
344
  evaluated_task=llm_task,
449
345
  dataset_item_ids=dataset_item_ids,
450
- num_threads=self.num_threads,
451
- project_name=self.agent_class.project_name,
346
+ num_threads=self.n_threads,
347
+ project_name=self.project_name,
452
348
  n_samples=subset_size, # Use subset_size for trials, None for full dataset
453
349
  experiment_config=experiment_config,
454
350
  optimization_id=optimization_id,
@@ -466,35 +362,83 @@ class MetaPromptOptimizer(BaseOptimizer):
466
362
  n_samples: int | None = None,
467
363
  auto_continue: bool = False,
468
364
  agent_class: type[OptimizableAgent] | None = None,
365
+ project_name: str = "Optimization",
366
+ max_trials: int = 10,
367
+ mcp_config: MCPExecutionConfig | None = None,
368
+ candidate_generator: Callable[..., list[chat_prompt.ChatPrompt]] | None = None,
369
+ candidate_generator_kwargs: dict[str, Any] | None = None,
370
+ *args: Any,
469
371
  **kwargs: Any,
470
372
  ) -> OptimizationResult:
471
- mcp_config = kwargs.pop("mcp_config", None)
472
- candidate_generator = kwargs.pop("candidate_generator", None)
473
- candidate_generator_kwargs = kwargs.pop("candidate_generator_kwargs", None)
474
-
475
373
  """
476
- Optimize a prompt using meta-reasoning.
374
+ Optimize a prompt using LLM-based meta-reasoning to iteratively improve performance.
375
+
376
+ The optimizer evaluates the initial prompt, uses an LLM to reason about improvements,
377
+ generates candidate variations, and iteratively selects the best performers until
378
+ max_trials is reached.
477
379
 
478
380
  Args:
479
- prompt: The prompt to optimize
480
- dataset: The dataset to evaluate against
481
- metric: The metric to use for evaluation
482
- experiment_config: A dictionary to log with the experiments
483
- n_samples: The number of dataset items to use for evaluation
484
- auto_continue: If True, the algorithm may continue if goal not met
485
- agent_class: Optional agent class to use
486
- **kwargs: Additional arguments for evaluation, including:
487
- mcp_config (MCPExecutionConfig | None): MCP tool calling configuration (default: None)
488
- candidate_generator: Optional candidate generator
489
- candidate_generator_kwargs: Optional kwargs for candidate generator
381
+ prompt: The ChatPrompt to optimize. Can include system/user/assistant messages,
382
+ tools, and model configuration.
383
+ dataset: Opik Dataset containing evaluation examples. Each item is passed to the
384
+ prompt during evaluation.
385
+ metric: Evaluation function that takes (dataset_item, llm_output) and returns a
386
+ score (float). Higher scores indicate better performance.
387
+ experiment_config: Optional metadata dictionary to log with Opik experiments.
388
+ Useful for tracking experiment parameters and context.
389
+ n_samples: Number of dataset items to use per evaluation. If None, uses full dataset.
390
+ Lower values speed up optimization but may be less reliable.
391
+ auto_continue: If True, optimizer may continue beyond max_trials if improvements
392
+ are still being found.
393
+ agent_class: Custom agent class for prompt execution. If None, uses default
394
+ LiteLLM-based agent. Must inherit from OptimizableAgent.
395
+ project_name: Opik project name for logging traces and experiments. Default: "Optimization"
396
+ max_trials: Maximum total number of prompts to evaluate across all rounds.
397
+ Optimizer stops when this limit is reached.
398
+ mcp_config: Optional MCP (Model Context Protocol) execution configuration for
399
+ prompts that use external tools. Enables tool-calling workflows. Default: None
400
+ candidate_generator: Optional custom function to generate candidate prompts.
401
+ Overrides default meta-reasoning generator. Should return list[ChatPrompt].
402
+ candidate_generator_kwargs: Optional kwargs to pass to candidate_generator.
490
403
 
491
404
  Returns:
492
- OptimizationResult: Structured result containing optimization details
405
+ OptimizationResult: Contains the best prompt found, final score, optimization
406
+ history, and metadata about the optimization run.
407
+
408
+ Example:
409
+ ```python
410
+ from opik_optimizer import MetaPromptOptimizer, ChatPrompt
411
+ from opik import Opik
412
+
413
+ client = Opik()
414
+ dataset = client.get_dataset("my_dataset")
415
+
416
+ prompt = ChatPrompt(
417
+ system="You are a helpful assistant.",
418
+ user_template="Answer this question: {question}"
419
+ )
420
+
421
+ def accuracy_metric(dataset_item, llm_output):
422
+ return 1.0 if llm_output == dataset_item["expected"] else 0.0
423
+
424
+ optimizer = MetaPromptOptimizer(model="gpt-4o")
425
+ result = optimizer.optimize_prompt(
426
+ prompt=prompt,
427
+ dataset=dataset,
428
+ metric=accuracy_metric,
429
+ max_trials=10
430
+ )
431
+
432
+ print(f"Best score: {result.best_score}")
433
+ print(f"Best prompt: {result.best_prompt}")
434
+ ```
493
435
  """
494
436
  # Use base class validation and setup methods
495
- self.validate_optimization_inputs(prompt, dataset, metric)
496
- self.configure_prompt_model(prompt)
497
- self.agent_class = self.setup_agent_class(prompt, agent_class)
437
+ self._validate_optimization_inputs(prompt, dataset, metric)
438
+ self.agent_class = self._setup_agent_class(prompt, agent_class)
439
+
440
+ # Set project name from parameter
441
+ self.project_name = project_name
498
442
 
499
443
  total_items = len(dataset.get_items())
500
444
  if n_samples is not None and n_samples > total_items:
@@ -510,12 +454,14 @@ class MetaPromptOptimizer(BaseOptimizer):
510
454
  objective_name=getattr(metric, "__name__", str(metric)),
511
455
  metadata={"optimizer": self.__class__.__name__},
512
456
  )
457
+ self.current_optimization_id = optimization.id
513
458
  logger.debug(f"Created optimization with ID: {optimization.id}")
514
459
  except Exception as e:
515
460
  logger.warning(
516
461
  f"Opik server does not support optimizations: {e}. Please upgrade opik."
517
462
  )
518
463
  optimization = None
464
+ self.current_optimization_id = None
519
465
 
520
466
  reporting.display_header(
521
467
  algorithm=self.__class__.__name__,
@@ -527,6 +473,8 @@ class MetaPromptOptimizer(BaseOptimizer):
527
473
  messages=prompt.get_messages(),
528
474
  optimizer_config={
529
475
  "optimizer": self.__class__.__name__,
476
+ "max_trials": max_trials,
477
+ "prompts_per_round": self.prompts_per_round,
530
478
  "n_samples": n_samples,
531
479
  "auto_continue": auto_continue,
532
480
  },
@@ -542,21 +490,21 @@ class MetaPromptOptimizer(BaseOptimizer):
542
490
  dataset=dataset,
543
491
  metric=metric,
544
492
  experiment_config=experiment_config,
493
+ max_trials=max_trials,
545
494
  n_samples=n_samples,
546
495
  auto_continue=auto_continue,
547
496
  mcp_config=mcp_config,
548
497
  candidate_generator=candidate_generator,
549
498
  candidate_generator_kwargs=candidate_generator_kwargs,
550
- **kwargs,
551
499
  )
552
500
  if optimization:
553
- self.update_optimization(optimization, status="completed")
501
+ self._update_optimization(optimization, status="completed")
554
502
  logger.debug("Optimization completed successfully")
555
503
  return result
556
504
  except Exception as e:
557
505
  logger.error(f"Optimization failed: {e}")
558
506
  if optimization:
559
- self.update_optimization(optimization, status="cancelled")
507
+ self._update_optimization(optimization, status="cancelled")
560
508
  logger.debug("Optimization marked as cancelled")
561
509
  raise e
562
510
 
@@ -601,14 +549,15 @@ class MetaPromptOptimizer(BaseOptimizer):
601
549
  if tool_segment_id not in {segment.segment_id for segment in segments}:
602
550
  raise ValueError(f"Tool '{tool_name}' not present in prompt tools")
603
551
 
604
- return self.optimize_prompt(
552
+ return self._optimize_prompt(
553
+ optimization_id=None,
605
554
  prompt=prompt,
606
555
  dataset=dataset,
607
556
  metric=metric,
608
557
  experiment_config=experiment_config,
558
+ max_trials=10,
609
559
  n_samples=n_samples,
610
560
  auto_continue=auto_continue,
611
- agent_class=agent_class,
612
561
  mcp_config=mcp_config,
613
562
  candidate_generator=self._generate_mcp_candidate_prompts,
614
563
  candidate_generator_kwargs={
@@ -617,7 +566,6 @@ class MetaPromptOptimizer(BaseOptimizer):
617
566
  "panel_style": panel_style,
618
567
  },
619
568
  tool_panel_style=panel_style,
620
- **kwargs,
621
569
  )
622
570
 
623
571
  def _optimize_prompt(
@@ -627,26 +575,25 @@ class MetaPromptOptimizer(BaseOptimizer):
627
575
  dataset: Dataset,
628
576
  metric: Callable,
629
577
  experiment_config: dict | None,
578
+ max_trials: int,
630
579
  n_samples: int | None,
631
580
  auto_continue: bool,
632
581
  mcp_config: MCPExecutionConfig | None = None,
633
- candidate_generator: None
634
- | (Callable[..., list[chat_prompt.ChatPrompt]]) = None,
582
+ candidate_generator: Callable[..., list[chat_prompt.ChatPrompt]] | None = None,
635
583
  candidate_generator_kwargs: dict[str, Any] | None = None,
636
584
  tool_panel_style: str = "bright_magenta",
637
- **kwargs: Any,
638
585
  ) -> OptimizationResult:
639
586
  self.auto_continue = auto_continue
640
587
  self.dataset = dataset
641
588
  self.prompt = prompt
642
- self.reset_counters() # Reset counters for run
589
+ self._reset_counters() # Reset counters for run
643
590
  initial_prompt = prompt
644
591
 
645
592
  current_prompt = prompt
646
593
  configuration_updates = self._drop_none(
647
594
  {
648
- "rounds": self.rounds,
649
- "num_prompts_per_round": self.num_prompts_per_round,
595
+ "max_trials": max_trials,
596
+ "prompts_per_round": self.prompts_per_round,
650
597
  }
651
598
  )
652
599
  meta_metadata = {"stage": "initial"}
@@ -678,20 +625,33 @@ class MetaPromptOptimizer(BaseOptimizer):
678
625
  baseline_reporter.set_score(initial_score)
679
626
 
680
627
  reporting.display_optimization_start_message(verbose=self.verbose)
628
+
629
+ # Calculate the maximum number of rounds, we will stop early if we hit the
630
+ # max_trials limit
631
+ estimated_rounds = max(1, max_trials // self.prompts_per_round + 1)
632
+
681
633
  with reporting.display_round_progress(
682
- self.rounds, verbose=self.verbose
634
+ estimated_rounds, verbose=self.verbose
683
635
  ) as round_reporter:
684
- for round_num in range(self.rounds):
636
+ round_num = 0
637
+ trials_used = 0
638
+
639
+ while trials_used < max_trials:
685
640
  round_reporter.round_start(round_num)
686
641
  previous_best_score = best_score
687
642
 
643
+ # Calculate how many prompts to generate this round
644
+ prompts_this_round = min(
645
+ self.prompts_per_round, max_trials - trials_used
646
+ )
647
+
688
648
  # Step 1. Create a set of candidate prompts
689
649
  generator = candidate_generator or self._generate_candidate_prompts
690
650
  generator_kwargs = dict(candidate_generator_kwargs or {})
691
651
 
692
652
  try:
693
653
  candidate_prompts = generator(
694
- project_name=self.agent_class.project_name,
654
+ project_name=self.project_name,
695
655
  current_prompt=best_prompt,
696
656
  best_score=best_score,
697
657
  round_num=round_num,
@@ -700,8 +660,11 @@ class MetaPromptOptimizer(BaseOptimizer):
700
660
  optimization_id=optimization_id,
701
661
  **generator_kwargs,
702
662
  )
663
+ # Limit to prompts_this_round
664
+ candidate_prompts = candidate_prompts[:prompts_this_round]
703
665
  except Exception as e:
704
- round_reporter.failed_to_generate(self.num_prompts_per_round, e)
666
+ round_reporter.failed_to_generate(prompts_this_round, e)
667
+ round_num += 1
705
668
  continue
706
669
 
707
670
  # Step 2. Score each candidate prompt
@@ -728,6 +691,7 @@ class MetaPromptOptimizer(BaseOptimizer):
728
691
  )
729
692
 
730
693
  eval_report.set_final_score(best_score, prompt_score)
694
+ trials_used += 1
731
695
  except Exception:
732
696
  logger.warning("Failed evaluating agent; continuing...")
733
697
  prompt_score = 0
@@ -764,6 +728,9 @@ class MetaPromptOptimizer(BaseOptimizer):
764
728
  best_score = best_cand_score_avg
765
729
  best_prompt = best_candidate_this_round
766
730
 
731
+ # Increment counters
732
+ round_num += 1
733
+
767
734
  if tool_panel_style and getattr(best_prompt, "tools", None):
768
735
  description = (
769
736
  best_prompt.tools[0].get("function", {}).get("description", "")
@@ -868,20 +835,13 @@ class MetaPromptOptimizer(BaseOptimizer):
868
835
  "total_rounds": len(rounds),
869
836
  "metric_name": getattr(metric, "__name__", str(metric)),
870
837
  "model": self.model,
871
- "temperature": self.model_kwargs.get("temperature"),
838
+ "temperature": self.model_parameters.get("temperature"),
872
839
  }
873
840
 
874
841
  if best_tools:
875
842
  details["final_tools"] = best_tools
876
843
 
877
- tool_prompts = None
878
- if best_tools:
879
- tool_prompts = {
880
- (tool.get("function", {}).get("name") or f"tool_{idx}"): tool.get(
881
- "function", {}
882
- ).get("description")
883
- for idx, tool in enumerate(best_tools)
884
- }
844
+ tool_prompts = self._extract_tool_prompts(best_tools)
885
845
 
886
846
  return OptimizationResult(
887
847
  optimizer=self.__class__.__name__,
@@ -936,7 +896,7 @@ class MetaPromptOptimizer(BaseOptimizer):
936
896
  ) -> list[chat_prompt.ChatPrompt]:
937
897
  """Generate candidate prompts using meta-prompting."""
938
898
  with reporting.display_candidate_generation_report(
939
- self.num_prompts_per_round, verbose=self.verbose
899
+ self.prompts_per_round, verbose=self.verbose
940
900
  ) as candidate_generation_report:
941
901
  logger.debug(f"\nGenerating candidate prompts for round {round_num + 1}")
942
902
  logger.debug(f"Generating from prompt: {current_prompt.get_messages()}")
@@ -972,7 +932,7 @@ class MetaPromptOptimizer(BaseOptimizer):
972
932
  {task_context_str}
973
933
 
974
934
  {analysis_instruction}
975
- Generate {self.num_prompts_per_round} improved versions of this prompt.
935
+ Generate {self.prompts_per_round} improved versions of this prompt.
976
936
  {metric_focus_instruction}
977
937
  Each version should aim to:
978
938
  {improvement_point_1}
@@ -984,15 +944,24 @@ class MetaPromptOptimizer(BaseOptimizer):
984
944
  Return a valid JSON array as specified."""
985
945
 
986
946
  try:
987
- # Use _call_model which handles selecting reasoning_model
947
+ # Prepare metadata for optimization algorithm call
948
+ metadata_for_call: dict[str, Any] = {}
949
+ if project_name:
950
+ metadata_for_call["project_name"] = project_name
951
+ metadata_for_call["opik"] = {"project_name": project_name}
952
+ if optimization_id and "opik" in metadata_for_call:
953
+ metadata_for_call["opik"]["optimization_id"] = optimization_id
954
+ metadata_for_call["optimizer_name"] = self.__class__.__name__
955
+ metadata_for_call["opik_call_type"] = "optimization_algorithm"
956
+
957
+ # Use _call_model for optimization algorithm
988
958
  content = self._call_model(
989
- project_name,
990
959
  messages=[
991
960
  {"role": "system", "content": self._REASONING_SYSTEM_PROMPT},
992
961
  {"role": "user", "content": user_prompt},
993
962
  ],
994
- is_reasoning=True,
995
963
  optimization_id=optimization_id,
964
+ metadata=metadata_for_call,
996
965
  )
997
966
  logger.debug(f"Raw response from reasoning model: {content}")
998
967
 
@@ -1056,6 +1025,8 @@ class MetaPromptOptimizer(BaseOptimizer):
1056
1025
  chat_prompt.ChatPrompt(
1057
1026
  system=item["prompt"][0]["content"],
1058
1027
  user=user_text,
1028
+ tools=current_prompt.tools,
1029
+ function_map=current_prompt.function_map,
1059
1030
  )
1060
1031
  )
1061
1032
 
@@ -1125,7 +1096,7 @@ class MetaPromptOptimizer(BaseOptimizer):
1125
1096
  Current best score: {best_score:.4f}
1126
1097
  {history_context}
1127
1098
 
1128
- Generate {self.num_prompts_per_round} improved descriptions for this tool.
1099
+ Generate {self.prompts_per_round} improved descriptions for this tool.
1129
1100
  Each description should clarify expected input arguments and set explicit expectations
1130
1101
  for how the tool output must be used in the final response.
1131
1102
  Avoid changing unrelated parts of the prompt. Focus only on the description text for `{tool_name}`.
@@ -1144,17 +1115,26 @@ class MetaPromptOptimizer(BaseOptimizer):
1144
1115
  ).strip()
1145
1116
 
1146
1117
  with reporting.display_candidate_generation_report(
1147
- self.num_prompts_per_round, verbose=self.verbose
1118
+ self.prompts_per_round, verbose=self.verbose
1148
1119
  ) as candidate_generation_report:
1149
1120
  try:
1121
+ # Prepare metadata for optimization algorithm call
1122
+ metadata_for_call_tools: dict[str, Any] = {}
1123
+ if project_name:
1124
+ metadata_for_call_tools["project_name"] = project_name
1125
+ metadata_for_call_tools["opik"] = {"project_name": project_name}
1126
+ if optimization_id and "opik" in metadata_for_call_tools:
1127
+ metadata_for_call_tools["opik"]["optimization_id"] = optimization_id
1128
+ metadata_for_call_tools["optimizer_name"] = self.__class__.__name__
1129
+ metadata_for_call_tools["opik_call_type"] = "optimization_algorithm"
1130
+
1150
1131
  content = self._call_model(
1151
- project_name,
1152
1132
  messages=[
1153
1133
  {"role": "system", "content": self._REASONING_SYSTEM_PROMPT},
1154
1134
  {"role": "user", "content": instruction},
1155
1135
  ],
1156
- is_reasoning=True,
1157
1136
  optimization_id=optimization_id,
1137
+ metadata=metadata_for_call_tools,
1158
1138
  )
1159
1139
 
1160
1140
  try:
@@ -6,12 +6,12 @@ from rich.panel import Panel
6
6
  from rich.text import Text
7
7
 
8
8
  from ..optimization_config import chat_prompt
9
- from ..reporting_utils import (
9
+ from ..reporting_utils import ( # noqa: F401
10
10
  convert_tqdm_to_rich,
11
- display_configuration, # noqa: F401
12
- display_header, # noqa: F401
11
+ display_configuration,
12
+ display_header,
13
13
  display_messages,
14
- display_result, # noqa: F401
14
+ display_result,
15
15
  get_console,
16
16
  suppress_opik_logs,
17
17
  )
@@ -1,3 +1,3 @@
1
- from .mipro_optimizer import MiproOptimizer, MIPROv2
1
+ from .mipro_optimizer_v2 import MIPROv2
2
2
 
3
- __all__ = ["MiproOptimizer", "MIPROv2"]
3
+ __all__ = ["MIPROv2"]