opik-optimizer 1.0.5__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. opik_optimizer/__init__.py +2 -0
  2. opik_optimizer/_throttle.py +2 -1
  3. opik_optimizer/base_optimizer.py +28 -11
  4. opik_optimizer/colbert.py +236 -0
  5. opik_optimizer/data/context7_eval.jsonl +3 -0
  6. opik_optimizer/datasets/context7_eval.py +90 -0
  7. opik_optimizer/datasets/tiny_test.py +33 -34
  8. opik_optimizer/datasets/truthful_qa.py +2 -2
  9. opik_optimizer/evolutionary_optimizer/crossover_ops.py +194 -0
  10. opik_optimizer/evolutionary_optimizer/evaluation_ops.py +73 -0
  11. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +124 -941
  12. opik_optimizer/evolutionary_optimizer/helpers.py +10 -0
  13. opik_optimizer/evolutionary_optimizer/llm_support.py +134 -0
  14. opik_optimizer/evolutionary_optimizer/mutation_ops.py +292 -0
  15. opik_optimizer/evolutionary_optimizer/population_ops.py +223 -0
  16. opik_optimizer/evolutionary_optimizer/prompts.py +305 -0
  17. opik_optimizer/evolutionary_optimizer/reporting.py +16 -4
  18. opik_optimizer/evolutionary_optimizer/style_ops.py +86 -0
  19. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +26 -23
  20. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +12 -5
  21. opik_optimizer/gepa_optimizer/__init__.py +3 -0
  22. opik_optimizer/gepa_optimizer/adapter.py +152 -0
  23. opik_optimizer/gepa_optimizer/gepa_optimizer.py +556 -0
  24. opik_optimizer/gepa_optimizer/reporting.py +181 -0
  25. opik_optimizer/logging_config.py +42 -7
  26. opik_optimizer/mcp_utils/__init__.py +22 -0
  27. opik_optimizer/mcp_utils/mcp.py +541 -0
  28. opik_optimizer/mcp_utils/mcp_second_pass.py +152 -0
  29. opik_optimizer/mcp_utils/mcp_simulator.py +116 -0
  30. opik_optimizer/mcp_utils/mcp_workflow.py +493 -0
  31. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +399 -69
  32. opik_optimizer/meta_prompt_optimizer/reporting.py +16 -2
  33. opik_optimizer/mipro_optimizer/_lm.py +20 -20
  34. opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +51 -50
  35. opik_optimizer/mipro_optimizer/mipro_optimizer.py +33 -28
  36. opik_optimizer/mipro_optimizer/utils.py +2 -4
  37. opik_optimizer/optimizable_agent.py +18 -17
  38. opik_optimizer/optimization_config/chat_prompt.py +44 -23
  39. opik_optimizer/optimization_config/configs.py +3 -3
  40. opik_optimizer/optimization_config/mappers.py +9 -8
  41. opik_optimizer/optimization_result.py +21 -14
  42. opik_optimizer/reporting_utils.py +61 -10
  43. opik_optimizer/task_evaluator.py +9 -8
  44. opik_optimizer/utils/__init__.py +15 -0
  45. opik_optimizer/{utils.py → utils/core.py} +111 -26
  46. opik_optimizer/utils/dataset_utils.py +49 -0
  47. opik_optimizer/utils/prompt_segments.py +186 -0
  48. {opik_optimizer-1.0.5.dist-info → opik_optimizer-1.1.0.dist-info}/METADATA +93 -16
  49. opik_optimizer-1.1.0.dist-info/RECORD +73 -0
  50. opik_optimizer-1.1.0.dist-info/licenses/LICENSE +203 -0
  51. opik_optimizer-1.0.5.dist-info/RECORD +0 -50
  52. opik_optimizer-1.0.5.dist-info/licenses/LICENSE +0 -21
  53. {opik_optimizer-1.0.5.dist-info → opik_optimizer-1.1.0.dist-info}/WHEEL +0 -0
  54. {opik_optimizer-1.0.5.dist-info → opik_optimizer-1.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,305 @@
1
+ # Centralized prompt templates used by EvolutionaryOptimizer. This file contains
2
+ # only string builders and constants; it has no side effects.
3
+
4
+
5
+ INFER_STYLE_SYSTEM_PROMPT = """You are an expert in linguistic analysis and prompt engineering. Your task is to analyze a few input-output examples from a dataset and provide a concise, actionable description of the desired output style. This description will be used to guide other LLMs in generating and refining prompts.
6
+
7
+ Focus on characteristics like:
8
+ - **Length**: (e.g., single word, short phrase, one sentence, multiple sentences, a paragraph)
9
+ - **Tone**: (e.g., factual, formal, informal, conversational, academic)
10
+ - **Structure**: (e.g., direct answer first, explanation then answer, list, yes/no then explanation)
11
+ - **Content Details**: (e.g., includes only the answer, includes reasoning, provides examples, avoids pleasantries)
12
+ - **Keywords/Phrasing**: Any recurring keywords or phrasing patterns in the outputs.
13
+
14
+ Provide a single string that summarizes this style. This summary should be directly usable as an instruction for another LLM.
15
+ For example: 'Outputs should be a single, concise proper noun.' OR 'Outputs should be a short paragraph explaining the reasoning, followed by a direct answer, avoiding conversational pleasantries.' OR 'Outputs are typically 1-2 sentences, providing a direct factual answer.'
16
+ Return ONLY this descriptive string, with no preamble or extra formatting.
17
+ """
18
+
19
+
20
+ def style_inference_user_prompt(examples_str: str) -> str:
21
+ return f"""Please analyze the following examples from a dataset and provide a concise, actionable description of the REQUIRED output style for the target LLM. Before describing the output style, make sure to understand the dataset content and structure as it can include input, output and metadata fields. This description will be used to guide other LLMs in generating and refining prompts.
22
+
23
+ {examples_str}
24
+
25
+ Based on these examples, what is the desired output style description?
26
+ Remember to focus on aspects like length, tone, structure, content details, and any recurring keywords or phrasing patterns in the outputs.
27
+ The description should be a single string that can be directly used as an instruction for another LLM.
28
+ Return ONLY this descriptive string.
29
+ """
30
+
31
+
32
+ def semantic_mutation_system_prompt(output_style_guidance: str | None) -> str:
33
+ style = (
34
+ output_style_guidance
35
+ or "Produce clear, effective, and high-quality responses suitable for the task."
36
+ )
37
+ return (
38
+ "You are a prompt engineering expert. Your goal is to modify prompts to improve their "
39
+ f"effectiveness in eliciting specific types of answers, particularly matching the style: '{style}'. "
40
+ "Follow the specific modification instruction provided."
41
+ )
42
+
43
+
44
+ def synonyms_system_prompt() -> str:
45
+ return (
46
+ "You are a helpful assistant that provides synonyms. Return only the synonym word, "
47
+ "no explanation or additional text."
48
+ )
49
+
50
+
51
+ def rephrase_system_prompt() -> str:
52
+ return (
53
+ "You are a helpful assistant that rephrases text. Return only the modified phrase, "
54
+ "no explanation or additional text."
55
+ )
56
+
57
+
58
+ def fresh_start_system_prompt(output_style_guidance: str | None) -> str:
59
+ style = (
60
+ output_style_guidance
61
+ or "Produce clear, effective, and high-quality responses suitable for the task."
62
+ )
63
+ return (
64
+ "You are an expert prompt engineer. Your task is to generate novel, effective prompts from scratch "
65
+ "based on a task description, specifically aiming for prompts that elicit answers in the style: "
66
+ f"'{style}'. Output ONLY a raw JSON list of strings."
67
+ )
68
+
69
+
70
+ def variation_system_prompt(output_style_guidance: str | None) -> str:
71
+ style = (
72
+ output_style_guidance
73
+ or "Produce clear, effective, and high-quality responses suitable for the task."
74
+ )
75
+ return f"""You are an expert prompt engineer specializing in creating diverse and effective prompts. Given an initial prompt, your task is to generate a diverse set of alternative prompts.
76
+
77
+ For each prompt variation, consider:
78
+ 1. Different levels of specificity and detail, including significantly more detailed and longer versions.
79
+ 2. Various ways to structure the instruction, exploring more complex sentence structures and phrasings.
80
+ 3. Alternative phrasings that maintain the core intent but vary in style and complexity.
81
+ 4. Different emphasis on key components, potentially elaborating on them.
82
+ 5. Various ways to express constraints or requirements.
83
+ 6. Different approaches to clarity and conciseness, but also explore more verbose and explanatory styles.
84
+ 7. Alternative ways to guide the model's response format.
85
+ 8. Consider variations that are substantially longer and more descriptive than the original.
86
+
87
+ The generated prompts should guide a target LLM to produce outputs in the following style: '{style}'
88
+
89
+ Return a JSON array of prompts with the following structure:
90
+ {{
91
+ "prompts": [
92
+ {{
93
+ "prompt": "alternative prompt 1",
94
+ "strategy": "brief description of the variation strategy used, e.g., 'focused on eliciting specific output style'"
95
+ }},
96
+ {{
97
+ "prompt": "alternative prompt 2",
98
+ "strategy": "brief description of the variation strategy used"
99
+ }}
100
+ ]
101
+ }}
102
+ Each prompt variation should aim to get the target LLM to produce answers matching the desired style: '{style}'.
103
+ """
104
+
105
+
106
+ def llm_crossover_system_prompt(output_style_guidance: str | None) -> str:
107
+ style = (
108
+ output_style_guidance
109
+ or "Produce clear, effective, and high-quality responses suitable for the task."
110
+ )
111
+ return f"""You are an expert prompt engineer specializing in creating novel prompts by intelligently blending existing ones.
112
+ Given two parent prompts, your task is to generate one or two new child prompts that effectively combine the strengths, styles, or core ideas of both parents.
113
+ The children should be coherent and aim to explore a potentially more effective region of the prompt design space, with a key goal of eliciting responses from the target language model in the following style: '{style}'.
114
+
115
+ Consider the following when generating children:
116
+ - Identify the key instructions, constraints, and desired output formats in each parent, paying attention to any hints about desired output style.
117
+ - Explore ways to merge these elements such that the resulting prompt strongly guides the target LLM towards the desired output style.
118
+ - You can create a child that is a direct blend, or one that takes a primary structure from one parent and incorporates specific elements from the other, always optimizing for clear instruction towards the desired output style.
119
+ - If generating two children, try to make them distinct from each other and from the parents, perhaps by emphasizing different aspects of the parental combination that could lead to the desired output style.
120
+
121
+ All generated prompts must aim for eliciting answers in the style: '{style}'.
122
+
123
+ Return a JSON object that is a list of both child prompts. Each child prompt is a list of LLM messages. Example:
124
+ [
125
+ {{"role": "<role>", "content": "<content>"}},
126
+ {{"role": "<role>", "content": "<content>"}}
127
+ ]
128
+
129
+
130
+ """
131
+
132
+
133
+ def radical_innovation_system_prompt(output_style_guidance: str | None) -> str:
134
+ style = (
135
+ output_style_guidance
136
+ or "Produce clear, effective, and high-quality responses suitable for the task."
137
+ )
138
+ return f"""You are an expert prompt engineer and a creative problem solver.
139
+ Given a task description and an existing prompt for that task (which might be underperforming), your goal is to generate a new, significantly improved, and potentially very different prompt.
140
+ Do not just make minor edits. Think about alternative approaches, structures, and phrasings that could lead to better performance.
141
+ Consider clarity, specificity, constraints, and how to best guide the language model for the described task TO PRODUCE OUTPUTS IN THE FOLLOWING STYLE: '{style}'.
142
+ Return only the new prompt string, with no preamble or explanation.
143
+ """
144
+
145
+
146
+ def llm_crossover_user_prompt(
147
+ parent1_messages: list[dict[str, str]],
148
+ parent2_messages: list[dict[str, str]],
149
+ output_style_guidance: str | None,
150
+ ) -> str:
151
+ style = (
152
+ output_style_guidance
153
+ or "Produce clear, effective, and high-quality responses suitable for the task."
154
+ )
155
+ return f"""Parent Prompt 1:
156
+ '''{parent1_messages}'''
157
+
158
+ Parent Prompt 2:
159
+ '''{parent2_messages}'''
160
+
161
+ Desired output style from target LLM for children prompts: '{style}'
162
+
163
+ Please generate TWO child prompts by intelligently blending the ideas, styles, or structures from these two parents, ensuring the children aim to elicit the desired output style.
164
+ Follow the instructions provided in the system prompt regarding the JSON output format:
165
+ [
166
+ {{"role": "<role>", "content": "<content>"}}, {{"role": "<role>", "content": "<content>"}}
167
+ ]
168
+ """
169
+
170
+
171
+ def mutation_strategy_prompts(output_style_guidance: str | None) -> dict[str, str]:
172
+ style = (
173
+ output_style_guidance
174
+ or "Produce clear, effective, and high-quality responses suitable for the task."
175
+ )
176
+ return {
177
+ "rephrase": (
178
+ "Create a different way to express the same instruction, possibly with a different "
179
+ "length or structure, ensuring it still aims for an answer from the target LLM in the style of: "
180
+ f"'{style}'."
181
+ ),
182
+ "simplify": (
183
+ "Simplify the instruction while maintaining its core meaning, potentially making it more concise, "
184
+ "to elicit an answer in the style of: "
185
+ f"'{style}'."
186
+ ),
187
+ "elaborate": (
188
+ "Add more relevant detail and specificity to the instruction, potentially increasing its length, "
189
+ "but only if it helps achieve a more accurate answer from the target LLM in the style of: "
190
+ f"'{style}'."
191
+ ),
192
+ "restructure": (
193
+ "Change the structure of the instruction (e.g., reorder sentences, combine/split ideas) while keeping its intent, ensuring the new structure strongly guides towards an output in the style of: "
194
+ f"'{style}'."
195
+ ),
196
+ "focus": (
197
+ "Emphasize the key aspects of the instruction, perhaps by rephrasing or adding clarifying statements, "
198
+ "to better elicit an answer in the style of: "
199
+ f"'{style}'."
200
+ ),
201
+ "increase_complexity_and_detail": (
202
+ "Significantly elaborate on this instruction. Add more details, examples, context, or constraints to make it more comprehensive. "
203
+ "The goal of this elaboration is to make the prompt itself more detailed, so that it VERY CLEARLY guides the target LLM to produce a highly accurate final answer in the style of: "
204
+ f"'{style}'. The prompt can be long if needed to achieve this output style."
205
+ ),
206
+ }
207
+
208
+
209
+ def semantic_mutation_user_prompt(
210
+ prompt_messages: list[dict[str, str]],
211
+ task_description: str,
212
+ output_style_guidance: str | None,
213
+ strategy_instruction: str,
214
+ ) -> str:
215
+ style = (
216
+ output_style_guidance
217
+ or "Produce clear, effective, and high-quality responses suitable for the task."
218
+ )
219
+ return f"""Given this prompt: '{prompt_messages}'
220
+ Task context: {task_description}
221
+ Desired output style from target LLM: '{style}'
222
+ Instruction for this modification: {strategy_instruction}.
223
+ Return only the modified prompt message list, nothing else. Make sure to return a valid JSON object.
224
+ """
225
+
226
+
227
+ def radical_innovation_user_prompt(
228
+ task_description: str,
229
+ output_style_guidance: str | None,
230
+ existing_prompt_messages: list[dict[str, str]],
231
+ ) -> str:
232
+ style = (
233
+ output_style_guidance
234
+ or "Produce clear, effective, and high-quality responses suitable for the task."
235
+ )
236
+ return f"""Task Context:
237
+ {task_description}
238
+ Desired output style from target LLM: '{style}'
239
+
240
+ Existing Prompt (which may be underperforming):
241
+ '''{existing_prompt_messages}'''
242
+
243
+ Please generate a new, significantly improved, and potentially very different prompt for this task.
244
+ Focus on alternative approaches, better clarity, or more effective guidance for the language model, aiming for the desired output style.
245
+ Return only the new prompt list object.
246
+ """
247
+
248
+
249
+ def fresh_start_user_prompt(
250
+ task_description: str,
251
+ output_style_guidance: str | None,
252
+ num_to_generate: int,
253
+ ) -> str:
254
+ style = (
255
+ output_style_guidance
256
+ or "Produce clear, effective, and high-quality responses suitable for the task."
257
+ )
258
+ return f"""Here is a description of a task: ```{task_description}```
259
+
260
+ The goal is to generate prompts that will make a target LLM produce responses in the following style: ```{style}```.
261
+
262
+ Please generate {num_to_generate} diverse and effective prompt(s) for a language model to accomplish this task, ensuring they guide towards this specific output style.
263
+ Focus on clarity, completeness, and guiding the model effectively towards the desired style. Explore different structural approaches.
264
+
265
+ Example of valid response: [
266
+ ["role": "<role>", "content": "<Prompt targeting specified style.>"],
267
+ ["role": "<role>", "content": "<Another prompt designed for the output style.>"]
268
+ ]
269
+
270
+ Your response MUST be a valid JSON list of AI messages. Do NOT include any other text, explanations, or Markdown formatting like ```json ... ``` around the list.
271
+ """
272
+
273
+
274
+ def variation_user_prompt(
275
+ initial_prompt_messages: list[dict[str, str]],
276
+ task_description: str,
277
+ output_style_guidance: str | None,
278
+ num_variations: int,
279
+ ) -> str:
280
+ style = (
281
+ output_style_guidance
282
+ or "Produce clear, effective, and high-quality responses suitable for the task."
283
+ )
284
+ return f"""Initial prompt:'''{initial_prompt_messages}'''
285
+ Task context: ```{task_description}```
286
+ Desired output style from target LLM: '{style}'
287
+
288
+ Generate {num_variations} diverse alternative prompts based on the initial prompt above, keeping the task context and desired output style in mind.
289
+ All generated prompt variations should strongly aim to elicit answers from the target LLM matching the style: '{style}'.
290
+ For each variation, consider how to best achieve this style, e.g., by adjusting specificity, structure, phrasing, constraints, or by explicitly requesting it.
291
+
292
+ Return a JSON array of prompts with the following structure:
293
+ {{
294
+ "prompts": [
295
+ {{
296
+ "prompt": [{{"role": "<role>", "content": "<content>"}}],
297
+ "strategy": "brief description of the variation strategy used, e.g., 'direct instruction for target style'"
298
+ }}
299
+ // ... more prompts if num_variations > 1
300
+ ]
301
+ }}
302
+ Ensure a good mix of variations, all targeting the specified output style from the end LLM.
303
+
304
+ Return a valid JSON object that is correctly escaped. Return nothing else, do not include any additional text or Markdown formatting.
305
+ """
@@ -1,6 +1,6 @@
1
1
  from contextlib import contextmanager
2
2
  from io import StringIO
3
- from typing import Any, List
3
+ from typing import Any
4
4
 
5
5
  from rich.panel import Panel
6
6
  from rich.text import Text
@@ -117,6 +117,16 @@ def initializing_population(verbose: int = 1) -> Any:
117
117
  f"│ Generating {num_fresh_starts} fresh prompts based on the task description."
118
118
  )
119
119
 
120
+ def failed_fresh_prompts(self, num_fresh_starts: int, error: str) -> None:
121
+ if verbose >= 1:
122
+ console.print(
123
+ Text(
124
+ f"│ Failed to generate {num_fresh_starts} fresh prompts: {error}",
125
+ style="dim red",
126
+ )
127
+ )
128
+ console.print("│")
129
+
120
130
  def success_fresh_prompts(self, num_fresh_starts: int) -> None:
121
131
  if verbose >= 1:
122
132
  console.print(
@@ -155,7 +165,7 @@ def initializing_population(verbose: int = 1) -> Any:
155
165
  )
156
166
  console.print("│")
157
167
 
158
- def end(self, population_prompts: List[chat_prompt.ChatPrompt]) -> None:
168
+ def end(self, population_prompts: list[chat_prompt.ChatPrompt]) -> None:
159
169
  if verbose >= 1:
160
170
  console.print(
161
171
  f"│ Successfully initialized population with {len(population_prompts)} prompts."
@@ -205,11 +215,13 @@ def evaluate_initial_population(verbose: int = 1) -> Any:
205
215
  if verbose >= 1:
206
216
  if score >= baseline_score:
207
217
  console.print(
208
- Text(f"\r Prompt {index+1} score was: {score}.", style="green")
218
+ Text(
219
+ f"\r Prompt {index + 1} score was: {score}.", style="green"
220
+ )
209
221
  )
210
222
  else:
211
223
  console.print(
212
- Text(f"\r Prompt {index+1} score was: {score}.", style="dim")
224
+ Text(f"\r Prompt {index + 1} score was: {score}.", style="dim")
213
225
  )
214
226
 
215
227
  # Use our log suppression context manager and yield the reporter
@@ -0,0 +1,86 @@
1
+ from typing import TYPE_CHECKING, Any
2
+
3
+ import logging
4
+
5
+ import opik
6
+
7
+ from . import prompts as evo_prompts
8
+ from . import reporting
9
+ from ..optimization_config import chat_prompt
10
+
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class StyleOps:
16
+ if TYPE_CHECKING:
17
+ verbose: int
18
+ _call_model: Any
19
+
20
+ def _infer_output_style_from_dataset(
21
+ self, dataset: opik.Dataset, prompt: chat_prompt.ChatPrompt, n_examples: int = 5
22
+ ) -> str | None:
23
+ """Analyzes dataset examples to infer the desired output style using the LLM."""
24
+ with reporting.infer_output_style(
25
+ verbose=self.verbose
26
+ ) as report_infer_output_style:
27
+ report_infer_output_style.start_style_inference()
28
+
29
+ try:
30
+ items_to_process = dataset.get_items(n_examples)
31
+ except Exception as e:
32
+ report_infer_output_style.error(
33
+ f"Failed to get items from dataset '{dataset.name}': {e}"
34
+ )
35
+ return None
36
+
37
+ if not items_to_process:
38
+ report_infer_output_style.error(
39
+ f"Dataset '{dataset.name}' is empty. Cannot infer output style."
40
+ )
41
+ return None
42
+
43
+ if len(items_to_process) < min(n_examples, 2):
44
+ report_infer_output_style.error(
45
+ f"Not enough dataset items (found {len(items_to_process)}) to reliably infer output style. Need at least {min(n_examples, 2)}."
46
+ )
47
+ return None
48
+
49
+ examples_str = ""
50
+ for i, item_content in enumerate(items_to_process):
51
+ filtered_content: dict[str, str] = {
52
+ x: y for x, y in item_content.items() if x != "id"
53
+ }
54
+ examples_str += (
55
+ f"Example {i + 1}:\nDataset Item:\n{filtered_content}\n---\n"
56
+ )
57
+
58
+ user_prompt_for_style_inference = evo_prompts.style_inference_user_prompt(
59
+ examples_str
60
+ )
61
+
62
+ try:
63
+ inferred_style = self._call_model(
64
+ messages=[
65
+ {
66
+ "role": "system",
67
+ "content": evo_prompts.INFER_STYLE_SYSTEM_PROMPT,
68
+ },
69
+ {"role": "user", "content": user_prompt_for_style_inference},
70
+ ],
71
+ is_reasoning=True,
72
+ )
73
+ inferred_style = inferred_style.strip()
74
+ if inferred_style:
75
+ report_infer_output_style.success(inferred_style)
76
+ return inferred_style
77
+ else:
78
+ report_infer_output_style.error(
79
+ "LLM returned empty string for inferred output style."
80
+ )
81
+ return None
82
+ except Exception as e:
83
+ report_infer_output_style.error(
84
+ f"Error during output style inference: {e}"
85
+ )
86
+ return None
@@ -1,4 +1,5 @@
1
- from typing import Any, Callable, Dict, List, Optional, Tuple, Type
1
+ from typing import Any
2
+ from collections.abc import Callable
2
3
 
3
4
  import json
4
5
  import logging
@@ -56,7 +57,7 @@ Respond only with the JSON object. Do not include any explanation or extra text.
56
57
 
57
58
 
58
59
  class FewShotPromptTemplate(BaseModel):
59
- message_list_with_placeholder: List[Dict[str, str]]
60
+ message_list_with_placeholder: list[dict[str, str]]
60
61
  example_template: str
61
62
 
62
63
 
@@ -119,10 +120,10 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
119
120
  def _call_model(
120
121
  self,
121
122
  model: str,
122
- messages: List[Dict[str, str]],
123
+ messages: list[dict[str, str]],
123
124
  seed: int,
124
- model_kwargs: Dict[str, Any],
125
- ) -> Dict[str, Any]:
125
+ model_kwargs: dict[str, Any],
126
+ ) -> dict[str, Any]:
126
127
  """
127
128
  Args:
128
129
  model: The model to use for the call
@@ -159,8 +160,8 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
159
160
  return response
160
161
 
161
162
  def _split_dataset(
162
- self, dataset: List[Dict[str, Any]], train_ratio: float
163
- ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
163
+ self, dataset: list[dict[str, Any]], train_ratio: float
164
+ ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
164
165
  """
165
166
  Split the dataset into training and validation sets.
166
167
 
@@ -194,7 +195,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
194
195
  self,
195
196
  model: str,
196
197
  prompt: chat_prompt.ChatPrompt,
197
- few_shot_examples: List[Dict[str, Any]],
198
+ few_shot_examples: list[dict[str, Any]],
198
199
  ) -> FewShotPromptTemplate:
199
200
  """
200
201
  Generate a few-shot prompt template that can be used to insert examples into the prompt.
@@ -215,7 +216,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
215
216
  "examples": few_shot_examples,
216
217
  }
217
218
 
218
- messages: List[Dict[str, str]] = [
219
+ messages: list[dict[str, str]] = [
219
220
  {"role": "system", "content": SYSTEM_PROMPT_TEMPLATE},
220
221
  {"role": "user", "content": json.dumps(user_message)},
221
222
  ]
@@ -244,9 +245,9 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
244
245
  metric: Callable,
245
246
  baseline_score: float,
246
247
  n_trials: int = 10,
247
- optimization_id: Optional[str] = None,
248
- experiment_config: Optional[Dict] = None,
249
- n_samples: Optional[int] = None,
248
+ optimization_id: str | None = None,
249
+ experiment_config: dict | None = None,
250
+ n_samples: int | None = None,
250
251
  ) -> optimization_result.OptimizationResult:
251
252
  reporting.start_optimization_run(verbose=self.verbose)
252
253
 
@@ -450,6 +451,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
450
451
  best_score=best_score,
451
452
  best_prompt=best_prompt,
452
453
  verbose=self.verbose,
454
+ tools=getattr(prompt, "tools", None),
453
455
  )
454
456
 
455
457
  return optimization_result.OptimizationResult(
@@ -489,9 +491,9 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
489
491
  dataset: Dataset,
490
492
  metric: Callable,
491
493
  n_trials: int = 10,
492
- agent_class: Optional[Type[OptimizableAgent]] = None,
493
- experiment_config: Optional[Dict] = None,
494
- n_samples: Optional[int] = None,
494
+ agent_class: type[OptimizableAgent] | None = None,
495
+ experiment_config: dict | None = None,
496
+ n_samples: int | None = None,
495
497
  ) -> optimization_result.OptimizationResult:
496
498
  """
497
499
  Args:
@@ -557,6 +559,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
557
559
  "n_samples": n_samples,
558
560
  },
559
561
  verbose=self.verbose,
562
+ tools=getattr(prompt, "tools", None),
560
563
  )
561
564
 
562
565
  utils.disable_experiment_reporting()
@@ -614,10 +617,10 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
614
617
  prompt: chat_prompt.ChatPrompt,
615
618
  dataset: opik.Dataset,
616
619
  metric: Callable,
617
- n_samples: Optional[int] = None,
618
- dataset_item_ids: Optional[List[str]] = None,
619
- experiment_config: Optional[Dict] = None,
620
- optimization_id: Optional[str] = None,
620
+ n_samples: int | None = None,
621
+ dataset_item_ids: list[str] | None = None,
622
+ experiment_config: dict | None = None,
623
+ optimization_id: str | None = None,
621
624
  **kwargs: Any,
622
625
  ) -> float:
623
626
  """
@@ -673,14 +676,14 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
673
676
  def _build_task_from_messages(
674
677
  self,
675
678
  prompt: chat_prompt.ChatPrompt,
676
- messages: List[Dict[str, str]],
677
- few_shot_examples: Optional[str] = None,
678
- ) -> Callable[[Dict[str, Any]], Dict[str, Any]]:
679
+ messages: list[dict[str, str]],
680
+ few_shot_examples: str | None = None,
681
+ ) -> Callable[[dict[str, Any]], dict[str, Any]]:
679
682
  new_prompt = prompt.copy()
680
683
  new_prompt.set_messages(messages)
681
684
  agent = self.agent_class(new_prompt)
682
685
 
683
- def llm_task(dataset_item: Dict[str, Any]) -> Dict[str, Any]:
686
+ def llm_task(dataset_item: dict[str, Any]) -> dict[str, Any]:
684
687
  """
685
688
  Process a single dataset item through the LLM task.
686
689
 
@@ -1,6 +1,6 @@
1
1
  from contextlib import contextmanager
2
2
  from io import StringIO
3
- from typing import Any, Dict, List, Optional, TYPE_CHECKING
3
+ from typing import Any, Optional, TYPE_CHECKING
4
4
 
5
5
  from rich.panel import Panel
6
6
  from rich.text import Text
@@ -46,9 +46,16 @@ def display_evaluation(
46
46
  yield Reporter()
47
47
  finally:
48
48
  if verbose >= 1:
49
- console.print(
50
- Text(f"\r Baseline score was: {score:.4f}.\n", style="green")
51
- )
49
+ if score is not None:
50
+ console.print(
51
+ Text(
52
+ f"\r Baseline score was: {score:.4f}.\n", style="green"
53
+ )
54
+ )
55
+ else:
56
+ console.print(
57
+ Text("\r Baseline score was: None\n", style="red")
58
+ )
52
59
 
53
60
 
54
61
  @contextmanager
@@ -121,7 +128,7 @@ def start_optimization_trial(
121
128
 
122
129
  # Create a simple object with a method to set the score
123
130
  class Reporter:
124
- def start_trial(self, messages: List[Dict[str, str]]) -> None:
131
+ def start_trial(self, messages: list[dict[str, str]]) -> None:
125
132
  if verbose >= 1:
126
133
  console.print(
127
134
  Text(
@@ -0,0 +1,3 @@
1
+ from .gepa_optimizer import GepaOptimizer
2
+
3
+ __all__ = ["GepaOptimizer"]