opik-optimizer 1.0.6__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. opik_optimizer/__init__.py +4 -0
  2. opik_optimizer/_throttle.py +2 -1
  3. opik_optimizer/base_optimizer.py +402 -28
  4. opik_optimizer/data/context7_eval.jsonl +3 -0
  5. opik_optimizer/datasets/context7_eval.py +90 -0
  6. opik_optimizer/datasets/tiny_test.py +33 -34
  7. opik_optimizer/datasets/truthful_qa.py +2 -2
  8. opik_optimizer/evolutionary_optimizer/crossover_ops.py +194 -0
  9. opik_optimizer/evolutionary_optimizer/evaluation_ops.py +136 -0
  10. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +289 -966
  11. opik_optimizer/evolutionary_optimizer/helpers.py +10 -0
  12. opik_optimizer/evolutionary_optimizer/llm_support.py +136 -0
  13. opik_optimizer/evolutionary_optimizer/mcp.py +249 -0
  14. opik_optimizer/evolutionary_optimizer/mutation_ops.py +306 -0
  15. opik_optimizer/evolutionary_optimizer/population_ops.py +228 -0
  16. opik_optimizer/evolutionary_optimizer/prompts.py +352 -0
  17. opik_optimizer/evolutionary_optimizer/reporting.py +28 -4
  18. opik_optimizer/evolutionary_optimizer/style_ops.py +86 -0
  19. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +90 -81
  20. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +12 -5
  21. opik_optimizer/gepa_optimizer/__init__.py +3 -0
  22. opik_optimizer/gepa_optimizer/adapter.py +154 -0
  23. opik_optimizer/gepa_optimizer/gepa_optimizer.py +653 -0
  24. opik_optimizer/gepa_optimizer/reporting.py +181 -0
  25. opik_optimizer/logging_config.py +42 -7
  26. opik_optimizer/mcp_utils/__init__.py +22 -0
  27. opik_optimizer/mcp_utils/mcp.py +541 -0
  28. opik_optimizer/mcp_utils/mcp_second_pass.py +152 -0
  29. opik_optimizer/mcp_utils/mcp_simulator.py +116 -0
  30. opik_optimizer/mcp_utils/mcp_workflow.py +547 -0
  31. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +470 -134
  32. opik_optimizer/meta_prompt_optimizer/reporting.py +16 -2
  33. opik_optimizer/mipro_optimizer/_lm.py +30 -23
  34. opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +52 -51
  35. opik_optimizer/mipro_optimizer/mipro_optimizer.py +126 -46
  36. opik_optimizer/mipro_optimizer/utils.py +2 -4
  37. opik_optimizer/optimizable_agent.py +21 -16
  38. opik_optimizer/optimization_config/chat_prompt.py +44 -23
  39. opik_optimizer/optimization_config/configs.py +3 -3
  40. opik_optimizer/optimization_config/mappers.py +9 -8
  41. opik_optimizer/optimization_result.py +22 -14
  42. opik_optimizer/reporting_utils.py +61 -10
  43. opik_optimizer/task_evaluator.py +9 -8
  44. opik_optimizer/utils/__init__.py +15 -0
  45. opik_optimizer/utils/colbert.py +236 -0
  46. opik_optimizer/{utils.py → utils/core.py} +160 -33
  47. opik_optimizer/utils/dataset_utils.py +49 -0
  48. opik_optimizer/utils/prompt_segments.py +186 -0
  49. opik_optimizer-2.0.0.dist-info/METADATA +345 -0
  50. opik_optimizer-2.0.0.dist-info/RECORD +74 -0
  51. opik_optimizer-2.0.0.dist-info/licenses/LICENSE +203 -0
  52. opik_optimizer-1.0.6.dist-info/METADATA +0 -181
  53. opik_optimizer-1.0.6.dist-info/RECORD +0 -50
  54. opik_optimizer-1.0.6.dist-info/licenses/LICENSE +0 -21
  55. {opik_optimizer-1.0.6.dist-info → opik_optimizer-2.0.0.dist-info}/WHEEL +0 -0
  56. {opik_optimizer-1.0.6.dist-info → opik_optimizer-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,8 @@
1
1
  import os
2
2
  import random
3
3
  from datetime import datetime
4
- from typing import Callable, Dict, List, Literal, Optional, Union
4
+ from typing import Any, Literal
5
+ from collections.abc import Callable
5
6
  import logging
6
7
 
7
8
  import dspy
@@ -14,9 +15,9 @@ from opik.integrations.dspy.callback import OpikCallback
14
15
  from opik.opik_context import get_current_span_data
15
16
 
16
17
  from ..optimization_result import OptimizationResult
17
- from ..utils import optimization_context
18
18
  from ..base_optimizer import BaseOptimizer
19
19
  from ..optimization_config.configs import TaskConfig
20
+ from ..optimization_config import chat_prompt
20
21
  from ._lm import LM
21
22
  from ._mipro_optimizer_v2 import MIPROv2
22
23
  from .utils import (
@@ -37,30 +38,42 @@ class MiproOptimizer(BaseOptimizer):
37
38
  def __init__(
38
39
  self,
39
40
  model,
40
- project_name: Optional[str] = None,
41
+ project_name: str | None = None,
41
42
  verbose: int = 1,
42
43
  **model_kwargs,
43
44
  ):
44
45
  super().__init__(model=model, verbose=verbose, **model_kwargs)
45
46
  self.tools = []
46
47
  self.project_name = project_name
48
+ if "n_threads" in self.model_kwargs:
49
+ # To allow compatibility with other optimizers:
50
+ self.model_kwargs["num_threads"] = self.model_kwargs["n_threads"]
47
51
  self.num_threads = self.model_kwargs.pop("num_threads", 6)
48
52
  self.model_kwargs["model"] = self.model
49
53
  # FIXME: add mipro_optimizer=True - It does not count the LLM calls made internally by DSPy during MiproOptimizer.optimizer.compile().
50
54
  self.lm = LM(**self.model_kwargs)
55
+ setattr(self.lm, "parent_optimizer", self)
51
56
  opik_callback = OpikCallback(project_name=self.project_name, log_graph=True)
52
57
  dspy.configure(lm=self.lm, callbacks=[opik_callback])
53
58
  logger.debug(f"Initialized MiproOptimizer with model: {model}")
54
59
 
60
+ def get_optimizer_metadata(self) -> dict[str, Any]:
61
+ return self._drop_none(
62
+ {
63
+ "project_name": self.project_name,
64
+ "num_threads": self.num_threads,
65
+ }
66
+ )
67
+
55
68
  def evaluate_prompt(
56
69
  self,
57
- dataset: Union[str, Dataset],
70
+ dataset: str | Dataset,
58
71
  metric: Callable,
59
72
  task_config: TaskConfig,
60
- prompt: Optional[Union[str, dspy.Module, OptimizationResult]] = None,
73
+ prompt: str | dspy.Module | OptimizationResult | None = None,
61
74
  n_samples: int = 10,
62
- dataset_item_ids: Optional[List[str]] = None,
63
- experiment_config: Optional[Dict] = None,
75
+ dataset_item_ids: list[str] | None = None,
76
+ experiment_config: dict | None = None,
64
77
  verbose: int = 1,
65
78
  **kwargs,
66
79
  ) -> float:
@@ -83,7 +96,7 @@ class MiproOptimizer(BaseOptimizer):
83
96
  """
84
97
  # FIMXE: call super when it is ready
85
98
  # FIXME: Intermediate values:
86
- self.llm_call_counter += 1
99
+ self.increment_llm_counter()
87
100
  input_key = task_config.input_dataset_fields[0] # FIXME: allow all inputs
88
101
  output_key = task_config.output_dataset_field
89
102
 
@@ -238,23 +251,57 @@ class MiproOptimizer(BaseOptimizer):
238
251
 
239
252
  def optimize_prompt(
240
253
  self,
241
- dataset: Union[str, Dataset],
254
+ prompt: chat_prompt.ChatPrompt,
255
+ dataset: str | Dataset,
242
256
  metric: Callable,
243
- task_config: TaskConfig,
244
- num_candidates: int = 10,
245
- experiment_config: Optional[Dict] = None,
246
- num_trials: Optional[int] = 3,
247
- n_samples: Optional[int] = 10,
248
- auto: Optional[Literal["light", "medium", "heavy"]] = "light",
257
+ experiment_config: dict | None = None,
258
+ n_samples: int | None = 10,
259
+ auto_continue: bool = False,
260
+ agent_class: str | None = None,
249
261
  **kwargs,
250
262
  ) -> OptimizationResult:
251
- self._opik_client = opik.Opik()
252
- with optimization_context(
253
- client=self._opik_client,
254
- dataset_name=dataset.name,
255
- objective_name=metric.__name__,
256
- metadata={"optimizer": self.__class__.__name__},
257
- ) as optimization:
263
+ """
264
+ Optimize a prompt using MIPRO (Multi-Input Prompt Optimization).
265
+
266
+ Args:
267
+ prompt: The chat prompt to optimize
268
+ dataset: Opik dataset (or dataset name) containing evaluation data
269
+ metric: Evaluation function that takes (dataset_item, llm_output) and returns a score
270
+ experiment_config: Optional configuration for the experiment
271
+ n_samples: Number of samples to use for optimization (default: 10)
272
+ auto_continue: Whether to auto-continue optimization (default: False)
273
+ agent_class: Custom agent class to use (default: None)
274
+ **kwargs: Additional arguments including:
275
+ task_config: TaskConfig instance (required)
276
+ num_candidates: Number of candidates to generate (default: 10)
277
+ num_trials: Number of trials to run (default: 3)
278
+ auto: Optimization mode - "light", "medium", or "heavy" (default: "light")
279
+
280
+ Returns:
281
+ OptimizationResult: The optimization result containing the optimized prompt and metrics
282
+
283
+ Raises:
284
+ ValueError: If task_config is not provided
285
+ """
286
+ # Resolve dataset names to Dataset objects for validation compatibility
287
+ if isinstance(dataset, str):
288
+ dataset_name = dataset
289
+ client = opik.Opik(project_name=self.project_name)
290
+ dataset = client.get_dataset(dataset_name)
291
+
292
+ # Use base class validation and setup methods
293
+ self.validate_optimization_inputs(prompt, dataset, metric)
294
+
295
+ # Extract MIPRO-specific parameters from kwargs
296
+ task_config = kwargs.pop("task_config", None)
297
+ if task_config is None:
298
+ raise ValueError("task_config is required for MiproOptimizer")
299
+
300
+ num_candidates = kwargs.pop("num_candidates", 10)
301
+ num_trials = kwargs.pop("num_trials", 3)
302
+ auto = kwargs.pop("auto", "light")
303
+
304
+ with self.create_optimization_context(dataset, metric) as optimization:
258
305
  result = self._optimize_prompt(
259
306
  dataset=dataset,
260
307
  metric=metric,
@@ -271,15 +318,15 @@ class MiproOptimizer(BaseOptimizer):
271
318
 
272
319
  def _optimize_prompt(
273
320
  self,
274
- dataset: Union[str, Dataset],
321
+ dataset: str | Dataset,
275
322
  metric: Callable,
276
323
  task_config: TaskConfig,
277
324
  num_candidates: int = 10,
278
- experiment_config: Optional[Dict] = None,
279
- optimization_id: Optional[str] = None,
280
- num_trials: Optional[int] = 3,
281
- n_samples: Optional[int] = 10,
282
- auto: Optional[Literal["light", "medium", "heavy"]] = "light",
325
+ experiment_config: dict | None = None,
326
+ optimization_id: str | None = None,
327
+ num_trials: int | None = 3,
328
+ n_samples: int | None = 10,
329
+ auto: Literal["light", "medium", "heavy"] | None = "light",
283
330
  **kwargs,
284
331
  ) -> OptimizationResult:
285
332
  logger.info("Preparing MIPRO optimization...")
@@ -306,27 +353,26 @@ class MiproOptimizer(BaseOptimizer):
306
353
  metric,
307
354
  task_config,
308
355
  num_candidates: int = 10,
309
- experiment_config: Optional[Dict] = None,
310
- optimization_id: Optional[str] = None,
311
- num_trials: Optional[int] = 3,
312
- n_samples: Optional[int] = 10,
313
- auto: Optional[Literal["light", "medium", "heavy"]] = "light",
356
+ experiment_config: dict | None = None,
357
+ optimization_id: str | None = None,
358
+ num_trials: int | None = 3,
359
+ n_samples: int | None = 10,
360
+ auto: Literal["light", "medium", "heavy"] | None = "light",
314
361
  **kwargs,
315
362
  ) -> None:
316
363
  # FIXME: Intermediate values:
317
- self.llm_call_counter = 0
364
+ self.reset_counters() # Reset counters for run
318
365
  prompt = task_config.instruction_prompt
319
366
  input_key = task_config.input_dataset_fields[0] # FIXME: allow all
320
367
  output_key = task_config.output_dataset_field
321
368
  self.tools = task_config.tools
322
369
  self.num_candidates = num_candidates
323
- self.seed = 42
370
+ self.auto = auto
324
371
  self.input_key = input_key
325
372
  self.output_key = output_key
326
373
  self.prompt = prompt
327
374
  self.num_trials = num_trials
328
375
  self.n_samples = n_samples
329
- self.auto = auto
330
376
 
331
377
  # Convert to values for MIPRO:
332
378
  if isinstance(dataset, str):
@@ -395,6 +441,19 @@ class MiproOptimizer(BaseOptimizer):
395
441
  logger.debug(f"Using DSPy module: {type(self.module).__name__}")
396
442
  logger.debug(f"Using metric function: {self.metric_function.__name__}")
397
443
 
444
+ def cleanup(self) -> None:
445
+ """
446
+ Clean up MIPRO-specific resources.
447
+ """
448
+ # Call parent cleanup
449
+ super().cleanup()
450
+
451
+ # Clear MIPRO-specific resources
452
+ self.tools = None
453
+ self.prompt = None
454
+
455
+ logger.debug("Cleaned up MIPRO-specific resources")
456
+
398
457
  def load_from_checkpoint(self, filename):
399
458
  """
400
459
  Load the module from a checkpoint.
@@ -508,12 +567,15 @@ class MiproOptimizer(BaseOptimizer):
508
567
  }
509
568
  ],
510
569
  score=0.0,
511
- metric_name=self.opik_metric.__name__
512
- if hasattr(self, "opik_metric")
513
- else "unknown_metric",
570
+ metric_name=(
571
+ self.opik_metric.__name__
572
+ if hasattr(self, "opik_metric")
573
+ else "unknown_metric"
574
+ ),
514
575
  details={"error": "No candidate programs generated by MIPRO"},
515
576
  history=mipro_history_processed,
516
- llm_calls=self.lm.llm_call_counter,
577
+ llm_calls=self.llm_call_counter,
578
+ tool_calls=self.tool_call_counter,
517
579
  )
518
580
 
519
581
  self.module = self.get_best().details["program"]
@@ -545,7 +607,8 @@ class MiproOptimizer(BaseOptimizer):
545
607
  demonstrations=best_program_details.demonstrations,
546
608
  details=best_program_details.details,
547
609
  history=mipro_history_processed,
548
- llm_calls=self.lm.llm_call_counter,
610
+ llm_calls=self.llm_call_counter,
611
+ tool_calls=self.tool_call_counter,
549
612
  )
550
613
 
551
614
  def get_best(self, position: int = 0) -> OptimizationResult:
@@ -553,6 +616,14 @@ class MiproOptimizer(BaseOptimizer):
553
616
  logger.error(
554
617
  "get_best() called but no best_programs found. MIPRO compile might have failed or yielded no results."
555
618
  )
619
+ # Get LLM call count from the optimizer if available
620
+ dspy_llm_calls = (
621
+ getattr(self.optimizer, "total_calls", 0)
622
+ if hasattr(self, "optimizer") and self.optimizer
623
+ else 0
624
+ )
625
+ actual_llm_calls = max(self.llm_call_counter, dspy_llm_calls)
626
+
556
627
  return OptimizationResult(
557
628
  optimizer="MiproOptimizer",
558
629
  prompt=[
@@ -564,12 +635,15 @@ class MiproOptimizer(BaseOptimizer):
564
635
  }
565
636
  ],
566
637
  score=0.0,
567
- metric_name=getattr(self, "opik_metric", None).name
568
- if hasattr(self, "opik_metric") and self.opik_metric
569
- else "unknown_metric",
638
+ metric_name=(
639
+ getattr(self, "opik_metric", None).name
640
+ if hasattr(self, "opik_metric") and self.opik_metric
641
+ else "unknown_metric"
642
+ ),
570
643
  details={"error": "No programs generated or compile failed"},
571
644
  history=[],
572
- llm_calls=self.lm.llm_call_counter,
645
+ llm_calls=actual_llm_calls,
646
+ tool_calls=self.tool_call_counter,
573
647
  )
574
648
 
575
649
  score = self.best_programs[position]["score"]
@@ -587,6 +661,11 @@ class MiproOptimizer(BaseOptimizer):
587
661
  best_prompt = state["signature"]["instructions"]
588
662
  demos = [x.toDict() for x in state["demos"]]
589
663
 
664
+ # Get LLM call count from the DSPy program module
665
+ dspy_llm_calls = getattr(program_module, "total_calls", 0)
666
+ # Use the higher of our counter or DSPy's counter
667
+ actual_llm_calls = max(self.llm_call_counter, dspy_llm_calls)
668
+
590
669
  print(best_prompt)
591
670
  return OptimizationResult(
592
671
  optimizer="MiproOptimizer",
@@ -596,5 +675,6 @@ class MiproOptimizer(BaseOptimizer):
596
675
  metric_name=self.opik_metric.__name__,
597
676
  demonstrations=demos,
598
677
  details={"program": program_module},
599
- llm_calls=self.lm.llm_call_counter,
678
+ llm_calls=actual_llm_calls,
679
+ tool_calls=self.tool_call_counter,
600
680
  )
@@ -1,5 +1,3 @@
1
- from typing import Dict, Optional
2
-
3
1
  import uuid
4
2
  import dspy
5
3
  import re
@@ -61,7 +59,7 @@ def opik_metric_to_dspy(metric, output):
61
59
 
62
60
 
63
61
  def create_dspy_training_set(
64
- data: list[dict], input: str, n_samples: Optional[int] = None
62
+ data: list[dict], input: str, n_samples: int | None = None
65
63
  ) -> list[dspy.Example]:
66
64
  """
67
65
  Turn a list of dicts into a list of dspy Examples
@@ -80,7 +78,7 @@ def create_dspy_training_set(
80
78
  return output
81
79
 
82
80
 
83
- def get_tool_prompts(tool_names, text: str) -> Dict[str, str]:
81
+ def get_tool_prompts(tool_names, text: str) -> dict[str, str]:
84
82
  """
85
83
  Extract the embedded tool prompts from a text.
86
84
  """
@@ -1,4 +1,4 @@
1
- from typing import Dict, Any, List, Optional, TYPE_CHECKING
1
+ from typing import Any, TYPE_CHECKING
2
2
  import json
3
3
  import os
4
4
 
@@ -16,7 +16,7 @@ if TYPE_CHECKING:
16
16
  from .optimization_config.chat_prompt import ChatPrompt
17
17
 
18
18
 
19
- def tools_to_dict(tools: Dict[str, Dict[str, Any]]) -> Dict[str, Any]:
19
+ def tools_to_dict(tools: dict[str, dict[str, Any]]) -> dict[str, Any]:
20
20
  retval = {}
21
21
  for name in tools:
22
22
  parts = {}
@@ -38,11 +38,11 @@ class OptimizableAgent:
38
38
  project_name (Optional[str]): The project name for tracking
39
39
  """
40
40
 
41
- model: Optional[str] = None
42
- model_kwargs: Dict[str, Any] = {}
43
- project_name: Optional[str] = "Default Project"
44
- input_dataset_field: Optional[str] = None
45
- prompts: Dict[str, "ChatPrompt"]
41
+ model: str | None = None
42
+ model_kwargs: dict[str, Any] = {}
43
+ project_name: str | None = "Default Project"
44
+ input_dataset_field: str | None = None
45
+ prompts: dict[str, "ChatPrompt"]
46
46
  prompt: "ChatPrompt"
47
47
 
48
48
  def __init__(self, prompt: "ChatPrompt") -> None:
@@ -71,8 +71,8 @@ class OptimizableAgent:
71
71
  @_throttle.rate_limited(_limiter)
72
72
  def _llm_complete(
73
73
  self,
74
- messages: List[Dict[str, str]],
75
- tools: Optional[List[Dict[str, str]]],
74
+ messages: list[dict[str, str]],
75
+ tools: list[dict[str, str]] | None,
76
76
  seed: int,
77
77
  ) -> Any:
78
78
  response = litellm.completion(
@@ -91,10 +91,10 @@ class OptimizableAgent:
91
91
 
92
92
  def llm_invoke(
93
93
  self,
94
- query: Optional[str] = None,
95
- messages: Optional[List[Dict[str, str]]] = None,
96
- seed: Optional[int] = None,
97
- allow_tool_use: Optional[bool] = False,
94
+ query: str | None = None,
95
+ messages: list[dict[str, str]] | None = None,
96
+ seed: int | None = None,
97
+ allow_tool_use: bool | None = False,
98
98
  ) -> str:
99
99
  """
100
100
  NOTE: this is the default LiteLLM API. It is used
@@ -147,6 +147,11 @@ class OptimizableAgent:
147
147
  "content": str(tool_result),
148
148
  }
149
149
  )
150
+ # Increment tool call counter if we have access to the optimizer
151
+ if hasattr(self, "optimizer") and hasattr(
152
+ self.optimizer, "increment_tool_counter"
153
+ ):
154
+ self.optimizer.increment_tool_counter()
150
155
  else:
151
156
  final_response = msg["content"]
152
157
  break
@@ -156,14 +161,14 @@ class OptimizableAgent:
156
161
  result = response.choices[0].message.content
157
162
  return result
158
163
 
159
- def invoke_dataset_item(self, dataset_item: Dict[str, str]) -> str:
164
+ def invoke_dataset_item(self, dataset_item: dict[str, str]) -> str:
160
165
  messages = self.prompt.get_messages(dataset_item)
161
166
  return self.invoke(messages)
162
167
 
163
168
  def invoke(
164
169
  self,
165
- messages: List[Dict[str, str]],
166
- seed: Optional[int] = None,
170
+ messages: list[dict[str, str]],
171
+ seed: int | None = None,
167
172
  ) -> str:
168
173
  """
169
174
  Invoke the agent with a dataset item.
@@ -1,4 +1,5 @@
1
- from typing import Any, Dict, List, Optional, Union, Callable
1
+ from typing import Any
2
+ from collections.abc import Callable
2
3
 
3
4
  import copy
4
5
 
@@ -10,7 +11,7 @@ from opik import track
10
11
  class Tool(BaseModel):
11
12
  name: str = Field(..., description="Name of the tool")
12
13
  description: str = Field(..., description="Description of the tool")
13
- parameters: Dict[str, Any] = Field(
14
+ parameters: dict[str, Any] = Field(
14
15
  ..., description="JSON Schema defining the input parameters for the tool"
15
16
  )
16
17
 
@@ -33,14 +34,14 @@ class ChatPrompt:
33
34
  def __init__(
34
35
  self,
35
36
  name: str = "chat-prompt",
36
- system: Optional[str] = None,
37
- user: Optional[str] = None,
38
- messages: Optional[List[Dict[str, str]]] = None,
39
- tools: Optional[List[Dict[str, Any]]] = None,
40
- function_map: Optional[Dict[str, Callable]] = None,
41
- model: Optional[str] = None,
42
- invoke: Optional[Callable] = None,
43
- project_name: Optional[str] = "Default Project",
37
+ system: str | None = None,
38
+ user: str | None = None,
39
+ messages: list[dict[str, str]] | None = None,
40
+ tools: list[dict[str, Any]] | None = None,
41
+ function_map: dict[str, Callable] | None = None,
42
+ model: str | None = None,
43
+ invoke: Callable | None = None,
44
+ project_name: str | None = "Default Project",
44
45
  **model_kwargs: Any,
45
46
  ) -> None:
46
47
  if system is None and user is None and messages is None:
@@ -97,8 +98,8 @@ class ChatPrompt:
97
98
 
98
99
  def get_messages(
99
100
  self,
100
- dataset_item: Optional[Dict[str, str]] = None,
101
- ) -> List[Dict[str, str]]:
101
+ dataset_item: dict[str, str] | None = None,
102
+ ) -> list[dict[str, str]]:
102
103
  # This is a copy, so we can alter the messages:
103
104
  messages = self._standardize_prompts()
104
105
 
@@ -113,8 +114,8 @@ class ChatPrompt:
113
114
  )
114
115
  return messages
115
116
 
116
- def _standardize_prompts(self, **kwargs: Any) -> List[Dict[str, str]]:
117
- standardize_messages: List[Dict[str, str]] = []
117
+ def _standardize_prompts(self, **kwargs: Any) -> list[dict[str, str]]:
118
+ standardize_messages: list[dict[str, str]] = []
118
119
 
119
120
  if self.system is not None:
120
121
  standardize_messages.append({"role": "system", "content": self.system})
@@ -128,13 +129,13 @@ class ChatPrompt:
128
129
 
129
130
  return copy.deepcopy(standardize_messages)
130
131
 
131
- def to_dict(self) -> Dict[str, Union[str, List[Dict[str, str]]]]:
132
+ def to_dict(self) -> dict[str, str | list[dict[str, str]]]:
132
133
  """Convert ChatPrompt to a dictionary for JSON serialization.
133
134
 
134
135
  Returns:
135
136
  Dict containing the serializable representation of this ChatPrompt
136
137
  """
137
- retval: Dict[str, Union[str, List[Dict[str, str]]]] = {}
138
+ retval: dict[str, str | list[dict[str, str]]] = {}
138
139
  if self.system is not None:
139
140
  retval["system"] = self.system
140
141
  if self.user is not None:
@@ -144,29 +145,49 @@ class ChatPrompt:
144
145
  return retval
145
146
 
146
147
  def copy(self) -> "ChatPrompt":
148
+ """Shallow clone preserving model configuration and tools."""
149
+
150
+ # TODO(opik-mcp): once we introduce a dedicated MCP prompt subclass,
151
+ # migrate callers away from generic copies so optimizer metadata stays typed.
152
+ model_kwargs = (
153
+ copy.deepcopy(self.model_kwargs) if self.model_kwargs is not None else {}
154
+ )
147
155
  return ChatPrompt(
156
+ name=self.name,
148
157
  system=self.system,
149
158
  user=self.user,
150
159
  messages=copy.deepcopy(self.messages),
151
- tools=self.tools,
160
+ tools=copy.deepcopy(self.tools),
152
161
  function_map=self.function_map,
162
+ model=self.model,
163
+ invoke=self.invoke,
164
+ project_name=self.project_name,
165
+ **model_kwargs,
153
166
  )
154
167
 
155
- def set_messages(self, messages: List[Dict[str, Any]]) -> None:
168
+ def set_messages(self, messages: list[dict[str, Any]]) -> None:
156
169
  self.system = None
157
170
  self.user = None
158
171
  self.messages = copy.deepcopy(messages)
159
172
 
173
+ # TODO(opik): remove this stop-gap once MetaPromptOptimizer supports MCP.
174
+ # Provides a second-pass flow so tool results can be appended before
175
+ # rerunning the model.
176
+ def with_messages(self, messages: list[dict[str, Any]]) -> "ChatPrompt":
177
+ cloned = self.copy()
178
+ cloned.set_messages(messages)
179
+ return cloned
180
+
160
181
  @classmethod
161
182
  def model_validate(
162
183
  cls,
163
184
  obj: Any,
164
185
  *,
165
- strict: Optional[bool] = None,
166
- from_attributes: Optional[bool] = None,
167
- context: Optional[Any] = None,
168
- by_alias: Optional[bool] = None,
169
- by_name: Optional[bool] = None,
186
+ strict: bool | None = None,
187
+ from_attributes: bool | None = None,
188
+ context: Any | None = None,
189
+ by_alias: bool | None = None,
190
+ by_name: bool | None = None,
170
191
  ) -> "ChatPrompt":
171
192
  """Custom validation method to handle nested objects during deserialization."""
172
193
  return ChatPrompt(
@@ -1,6 +1,6 @@
1
1
  """Module containing configuration classes for optimization."""
2
2
 
3
- from typing import Any, List
3
+ from typing import Any
4
4
 
5
5
  import pydantic
6
6
 
@@ -12,6 +12,6 @@ class TaskConfig(pydantic.BaseModel):
12
12
 
13
13
  instruction_prompt: str
14
14
  use_chat_prompt: bool = False
15
- input_dataset_fields: List[str]
15
+ input_dataset_fields: list[str]
16
16
  output_dataset_field: str
17
- tools: List[Any] = []
17
+ tools: list[Any] = []
@@ -1,4 +1,5 @@
1
- from typing import Dict, Callable, Optional, Any, Union
1
+ from typing import Any
2
+ from collections.abc import Callable
2
3
 
3
4
  EVALUATED_LLM_TASK_OUTPUT = "llm_output"
4
5
 
@@ -8,8 +9,8 @@ class Mapper:
8
9
 
9
10
  def __init__(
10
11
  self,
11
- name: Optional[str] = None,
12
- transform: Optional[Callable[[Any], Any]] = None,
12
+ name: str | None = None,
13
+ transform: Callable[[Any], Any] | None = None,
13
14
  ):
14
15
  if name is not None and transform is not None:
15
16
  raise ValueError("Only one of name or transform can be provided")
@@ -27,9 +28,9 @@ class Mapper:
27
28
 
28
29
  def from_dataset_field(
29
30
  *,
30
- name: Optional[str] = None,
31
- transform: Optional[Callable[[Dict[str, Any]], Any]] = None,
32
- ) -> Union[str, Callable[[Dict[str, Any]], Any]]:
31
+ name: str | None = None,
32
+ transform: Callable[[dict[str, Any]], Any] | None = None,
33
+ ) -> str | Callable[[dict[str, Any]], Any]:
33
34
  if name is not None and transform is not None:
34
35
  raise ValueError("Only one of name or transform can be provided")
35
36
 
@@ -47,8 +48,8 @@ def from_llm_response_text() -> str:
47
48
 
48
49
 
49
50
  def from_agent_output(
50
- *, name: Optional[str] = None, transform: Optional[Callable[[Any], Any]] = None
51
- ) -> Union[str, Callable[[Any], Any]]:
51
+ *, name: str | None = None, transform: Callable[[Any], Any] | None = None
52
+ ) -> str | Callable[[Any], Any]:
52
53
  if name is not None and transform is not None:
53
54
  raise ValueError("Only one of name or transform can be provided")
54
55