opik-optimizer 1.0.6__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. opik_optimizer/__init__.py +2 -0
  2. opik_optimizer/_throttle.py +2 -1
  3. opik_optimizer/base_optimizer.py +28 -11
  4. opik_optimizer/colbert.py +236 -0
  5. opik_optimizer/data/context7_eval.jsonl +3 -0
  6. opik_optimizer/datasets/context7_eval.py +90 -0
  7. opik_optimizer/datasets/tiny_test.py +33 -34
  8. opik_optimizer/datasets/truthful_qa.py +2 -2
  9. opik_optimizer/evolutionary_optimizer/crossover_ops.py +194 -0
  10. opik_optimizer/evolutionary_optimizer/evaluation_ops.py +73 -0
  11. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +124 -941
  12. opik_optimizer/evolutionary_optimizer/helpers.py +10 -0
  13. opik_optimizer/evolutionary_optimizer/llm_support.py +134 -0
  14. opik_optimizer/evolutionary_optimizer/mutation_ops.py +292 -0
  15. opik_optimizer/evolutionary_optimizer/population_ops.py +223 -0
  16. opik_optimizer/evolutionary_optimizer/prompts.py +305 -0
  17. opik_optimizer/evolutionary_optimizer/reporting.py +16 -4
  18. opik_optimizer/evolutionary_optimizer/style_ops.py +86 -0
  19. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +26 -23
  20. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +12 -5
  21. opik_optimizer/gepa_optimizer/__init__.py +3 -0
  22. opik_optimizer/gepa_optimizer/adapter.py +152 -0
  23. opik_optimizer/gepa_optimizer/gepa_optimizer.py +556 -0
  24. opik_optimizer/gepa_optimizer/reporting.py +181 -0
  25. opik_optimizer/logging_config.py +42 -7
  26. opik_optimizer/mcp_utils/__init__.py +22 -0
  27. opik_optimizer/mcp_utils/mcp.py +541 -0
  28. opik_optimizer/mcp_utils/mcp_second_pass.py +152 -0
  29. opik_optimizer/mcp_utils/mcp_simulator.py +116 -0
  30. opik_optimizer/mcp_utils/mcp_workflow.py +493 -0
  31. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +399 -69
  32. opik_optimizer/meta_prompt_optimizer/reporting.py +16 -2
  33. opik_optimizer/mipro_optimizer/_lm.py +20 -20
  34. opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +51 -50
  35. opik_optimizer/mipro_optimizer/mipro_optimizer.py +33 -28
  36. opik_optimizer/mipro_optimizer/utils.py +2 -4
  37. opik_optimizer/optimizable_agent.py +16 -16
  38. opik_optimizer/optimization_config/chat_prompt.py +44 -23
  39. opik_optimizer/optimization_config/configs.py +3 -3
  40. opik_optimizer/optimization_config/mappers.py +9 -8
  41. opik_optimizer/optimization_result.py +21 -14
  42. opik_optimizer/reporting_utils.py +61 -10
  43. opik_optimizer/task_evaluator.py +9 -8
  44. opik_optimizer/utils/__init__.py +15 -0
  45. opik_optimizer/{utils.py → utils/core.py} +111 -26
  46. opik_optimizer/utils/dataset_utils.py +49 -0
  47. opik_optimizer/utils/prompt_segments.py +186 -0
  48. {opik_optimizer-1.0.6.dist-info → opik_optimizer-1.1.0.dist-info}/METADATA +93 -16
  49. opik_optimizer-1.1.0.dist-info/RECORD +73 -0
  50. opik_optimizer-1.1.0.dist-info/licenses/LICENSE +203 -0
  51. opik_optimizer-1.0.6.dist-info/RECORD +0 -50
  52. opik_optimizer-1.0.6.dist-info/licenses/LICENSE +0 -21
  53. {opik_optimizer-1.0.6.dist-info → opik_optimizer-1.1.0.dist-info}/WHEEL +0 -0
  54. {opik_optimizer-1.0.6.dist-info → opik_optimizer-1.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,10 @@
1
+ from ..optimization_config import chat_prompt
2
+
3
+
4
+ class Helpers:
5
+ def _get_task_description_for_llm(self, prompt: chat_prompt.ChatPrompt) -> str:
6
+ """Generates a concise task description for LLM prompts that need context."""
7
+ description = "Task: Given a list of AI messages with placeholder values, generate an effective prompt. "
8
+ description += f"The original high-level instruction being optimized is: '{prompt.get_messages()}'. "
9
+ description += "The goal is to create an effective prompt that guides a language model to perform this task well."
10
+ return description
@@ -0,0 +1,134 @@
1
+ from typing import Any, TYPE_CHECKING
2
+
3
+ import logging
4
+ import os
5
+ import time
6
+ import random
7
+
8
+ import litellm
9
+ from litellm import exceptions as litellm_exceptions
10
+ from litellm.caching import Cache
11
+ from litellm.types.caching import LiteLLMCacheType
12
+ from opik.evaluation.models.litellm import opik_monitor as opik_litellm_monitor
13
+
14
+ from .. import _throttle
15
+
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ # Configure LiteLLM cache with safe fallback
21
+ try:
22
+ # Prefer a disk cache in a user-writable location
23
+ cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "litellm")
24
+ os.makedirs(cache_dir, exist_ok=True)
25
+ litellm.cache = Cache(type=LiteLLMCacheType.DISK, cache_dir=cache_dir)
26
+ except (PermissionError, OSError, FileNotFoundError):
27
+ # Fall back to in-memory cache to avoid disk timeouts/locks
28
+ litellm.cache = Cache(type=LiteLLMCacheType.MEMORY)
29
+
30
+ _rate_limiter = _throttle.get_rate_limiter_for_current_opik_installation()
31
+
32
+
33
+ class LlmSupport:
34
+ if TYPE_CHECKING:
35
+ model: str
36
+ llm_call_counter: int
37
+ project_name: str | None
38
+ disable_litellm_monitoring: bool
39
+ temperature: float
40
+ max_tokens: int
41
+ top_p: float
42
+ frequency_penalty: float
43
+ presence_penalty: float
44
+
45
+ @_throttle.rate_limited(_rate_limiter)
46
+ def _call_model(
47
+ self,
48
+ messages: list[dict[str, str]],
49
+ is_reasoning: bool = False,
50
+ optimization_id: str | None = None,
51
+ ) -> str:
52
+ """Call the model with the given prompt and return the response string."""
53
+ # Build base call params
54
+ llm_config_params: dict[str, Any] = {
55
+ "temperature": getattr(self, "temperature", 0.3),
56
+ "max_tokens": getattr(self, "max_tokens", 1000),
57
+ "top_p": getattr(self, "top_p", 1.0),
58
+ "frequency_penalty": getattr(self, "frequency_penalty", 0.0),
59
+ "presence_penalty": getattr(self, "presence_penalty", 0.0),
60
+ }
61
+
62
+ # Add Opik metadata unless disabled
63
+ try:
64
+ disable_monitoring_env = os.getenv(
65
+ "OPIK_OPTIMIZER_DISABLE_LITELLM_MONITORING", "0"
66
+ )
67
+ disable_monitoring = getattr(
68
+ self, "disable_litellm_monitoring", False
69
+ ) or disable_monitoring_env.lower() in ("1", "true", "yes")
70
+
71
+ if not disable_monitoring:
72
+ metadata_for_opik: dict[str, Any] = {}
73
+ pn = getattr(self, "project_name", None)
74
+ if pn:
75
+ metadata_for_opik["project_name"] = pn
76
+ metadata_for_opik["opik"] = {"project_name": pn}
77
+ if optimization_id and "opik" in metadata_for_opik:
78
+ metadata_for_opik["opik"]["optimization_id"] = optimization_id
79
+ metadata_for_opik["optimizer_name"] = self.__class__.__name__
80
+ metadata_for_opik["opik_call_type"] = (
81
+ "reasoning" if is_reasoning else "evaluation_llm_task_direct"
82
+ )
83
+ if metadata_for_opik:
84
+ llm_config_params["metadata"] = metadata_for_opik
85
+
86
+ # Try to add Opik monitoring callbacks; fall back silently on failure
87
+ llm_config_params = (
88
+ opik_litellm_monitor.try_add_opik_monitoring_to_params( # type: ignore
89
+ llm_config_params.copy()
90
+ )
91
+ )
92
+ except Exception as e:
93
+ logger.debug(f"Skipping Opik-LiteLLM monitoring setup: {e}")
94
+
95
+ # Retry policy for transient errors
96
+ max_retries = int(os.getenv("OPIK_OPTIMIZER_LITELLM_MAX_RETRIES", "3"))
97
+ base_sleep = float(os.getenv("OPIK_OPTIMIZER_LITELLM_BACKOFF", "0.5"))
98
+
99
+ for attempt in range(max_retries + 1):
100
+ try:
101
+ logger.debug(
102
+ f"Calling model '{self.model}' with messages: {messages}, params: {llm_config_params} (attempt {attempt + 1})"
103
+ )
104
+ response = litellm.completion(
105
+ model=self.model, messages=messages, **llm_config_params
106
+ )
107
+ self.llm_call_counter += 1
108
+ return response.choices[0].message.content
109
+ except (
110
+ litellm_exceptions.RateLimitError,
111
+ litellm_exceptions.APIConnectionError,
112
+ litellm_exceptions.InternalServerError,
113
+ ) as e:
114
+ if attempt < max_retries:
115
+ sleep_s = min(10.0, base_sleep * (2**attempt)) + random.uniform(
116
+ 0, 0.25
117
+ )
118
+ logger.warning(
119
+ f"LiteLLM transient error ({type(e).__name__}): {e}. Retrying in {sleep_s:.2f}s..."
120
+ )
121
+ time.sleep(sleep_s)
122
+ continue
123
+ logger.error(f"LiteLLM error (final attempt): {e}")
124
+ raise
125
+ except litellm_exceptions.ContextWindowExceededError as e:
126
+ logger.error(f"LiteLLM Context Window Exceeded Error: {e}")
127
+ raise
128
+ except Exception as e:
129
+ logger.error(
130
+ f"Error calling model '{self.model}': {type(e).__name__} - {e}"
131
+ )
132
+ raise
133
+ # Should never reach here
134
+ raise RuntimeError("LLM call did not return a response and did not raise")
@@ -0,0 +1,292 @@
1
+ from typing import Any, TYPE_CHECKING
2
+
3
+ import json
4
+ import logging
5
+ import random
6
+
7
+ from . import prompts as evo_prompts
8
+ from ..optimization_config import chat_prompt
9
+ from .. import utils
10
+ from . import reporting
11
+
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class MutationOps:
17
+ if TYPE_CHECKING:
18
+ _calculate_population_diversity: Any
19
+ DEFAULT_DIVERSITY_THRESHOLD: float
20
+ verbose: int
21
+ output_style_guidance: str
22
+ _get_task_description_for_llm: Any
23
+ _call_model: Any
24
+
25
+ def _deap_mutation(
26
+ self, individual: Any, initial_prompt: chat_prompt.ChatPrompt
27
+ ) -> Any:
28
+ """Enhanced mutation operation with multiple strategies."""
29
+ prompt = chat_prompt.ChatPrompt(messages=individual)
30
+
31
+ # Choose mutation strategy based on current diversity
32
+ diversity = self._calculate_population_diversity()
33
+
34
+ # Determine thresholds based on diversity
35
+ if diversity < self.DEFAULT_DIVERSITY_THRESHOLD:
36
+ # Low diversity - use more aggressive mutations (higher chance for semantic)
37
+ semantic_threshold = 0.5
38
+ structural_threshold = 0.8 # semantic_threshold + 0.3
39
+ else:
40
+ # Good diversity - use more conservative mutations (higher chance for word_level)
41
+ semantic_threshold = 0.4
42
+ structural_threshold = 0.7 # semantic_threshold + 0.3
43
+
44
+ mutation_choice = random.random()
45
+
46
+ if mutation_choice > structural_threshold:
47
+ mutated_prompt = self._word_level_mutation_prompt(prompt)
48
+ reporting.display_success(
49
+ " Mutation successful, prompt has been edited by randomizing words (word-level mutation).",
50
+ verbose=self.verbose,
51
+ )
52
+ return type(individual)(mutated_prompt.get_messages())
53
+ elif mutation_choice > semantic_threshold:
54
+ mutated_prompt = self._structural_mutation(prompt)
55
+ reporting.display_success(
56
+ " Mutation successful, prompt has been edited by reordering, combining, or splitting sentences (structural mutation).",
57
+ verbose=self.verbose,
58
+ )
59
+ return type(individual)(mutated_prompt.get_messages())
60
+ else:
61
+ mutated_prompt = self._semantic_mutation(prompt, initial_prompt)
62
+ reporting.display_success(
63
+ " Mutation successful, prompt has been edited using an LLM (semantic mutation).",
64
+ verbose=self.verbose,
65
+ )
66
+ return type(individual)(mutated_prompt.get_messages())
67
+
68
+ def _semantic_mutation(
69
+ self, prompt: chat_prompt.ChatPrompt, initial_prompt: chat_prompt.ChatPrompt
70
+ ) -> chat_prompt.ChatPrompt:
71
+ """Enhanced semantic mutation with multiple strategies."""
72
+ current_output_style_guidance = self.output_style_guidance
73
+ if random.random() < 0.1:
74
+ return self._radical_innovation_mutation(prompt, initial_prompt)
75
+
76
+ try:
77
+ strategy = random.choice(
78
+ [
79
+ "rephrase",
80
+ "simplify",
81
+ "elaborate",
82
+ "restructure",
83
+ "focus",
84
+ "increase_complexity_and_detail",
85
+ ]
86
+ )
87
+
88
+ strategy_prompts = evo_prompts.mutation_strategy_prompts(
89
+ current_output_style_guidance
90
+ )
91
+ user_prompt_for_semantic_mutation = (
92
+ evo_prompts.semantic_mutation_user_prompt(
93
+ prompt.get_messages(),
94
+ self._get_task_description_for_llm(initial_prompt),
95
+ current_output_style_guidance,
96
+ strategy_prompts[strategy],
97
+ )
98
+ )
99
+ response = self._call_model(
100
+ messages=[
101
+ {
102
+ "role": "system",
103
+ "content": evo_prompts.semantic_mutation_system_prompt(
104
+ current_output_style_guidance
105
+ ),
106
+ },
107
+ {"role": "user", "content": user_prompt_for_semantic_mutation},
108
+ ],
109
+ is_reasoning=True,
110
+ )
111
+
112
+ try:
113
+ messages = utils.json_to_dict(response.strip())
114
+ except Exception as parse_exc:
115
+ raise RuntimeError(
116
+ f"Error parsing semantic mutation response as JSON. "
117
+ f"Response: {response!r}\nOriginal error: {parse_exc}"
118
+ ) from parse_exc
119
+ return chat_prompt.ChatPrompt(messages=messages)
120
+ except Exception as e:
121
+ reporting.display_error(
122
+ f" Error in semantic mutation, this is usually a parsing error: {e}",
123
+ verbose=self.verbose,
124
+ )
125
+ return prompt
126
+
127
+ def _structural_mutation(
128
+ self, prompt: chat_prompt.ChatPrompt
129
+ ) -> chat_prompt.ChatPrompt:
130
+ """Perform structural mutation (reordering, combining, splitting)."""
131
+ mutated_messages: list[dict[str, str]] = []
132
+
133
+ for message in prompt.get_messages():
134
+ content = message["content"]
135
+ role = message["role"]
136
+
137
+ sentences = [s.strip() for s in content.split(".") if s.strip()]
138
+ if len(sentences) <= 1:
139
+ mutated_messages.append(
140
+ {"role": role, "content": self._word_level_mutation(content)}
141
+ )
142
+ continue
143
+
144
+ mutation_type = random.random()
145
+ if mutation_type < 0.3:
146
+ random.shuffle(sentences)
147
+ mutated_messages.append(
148
+ {"role": role, "content": ". ".join(sentences) + "."}
149
+ )
150
+ continue
151
+ elif mutation_type < 0.6:
152
+ if len(sentences) >= 2:
153
+ idx = random.randint(0, len(sentences) - 2)
154
+ combined = sentences[idx] + " and " + sentences[idx + 1]
155
+ sentences[idx : idx + 2] = [combined]
156
+ mutated_messages.append(
157
+ {"role": role, "content": ". ".join(sentences) + "."}
158
+ )
159
+ continue
160
+ else:
161
+ idx = random.randint(0, len(sentences) - 1)
162
+ words = sentences[idx].split()
163
+ if len(words) > 3:
164
+ split_point = random.randint(2, len(words) - 2)
165
+ sentences[idx : idx + 1] = [
166
+ " ".join(words[:split_point]),
167
+ " ".join(words[split_point:]),
168
+ ]
169
+ mutated_messages.append(
170
+ {"role": role, "content": ". ".join(sentences) + "."}
171
+ )
172
+ continue
173
+ else:
174
+ mutated_messages.append({"role": role, "content": content})
175
+
176
+ return chat_prompt.ChatPrompt(messages=mutated_messages)
177
+
178
+ def _word_level_mutation_prompt(
179
+ self, prompt: chat_prompt.ChatPrompt
180
+ ) -> chat_prompt.ChatPrompt:
181
+ mutated_messages: list[dict[str, str]] = []
182
+ for message in prompt.get_messages():
183
+ mutated_messages.append(
184
+ {
185
+ "role": message["role"],
186
+ "content": self._word_level_mutation(message["content"]),
187
+ }
188
+ )
189
+ return chat_prompt.ChatPrompt(messages=mutated_messages)
190
+
191
+ def _word_level_mutation(self, msg_content: str) -> str:
192
+ """Perform word-level mutation."""
193
+ words = msg_content.split()
194
+ if len(words) <= 1:
195
+ return msg_content
196
+
197
+ mutation_type = random.random()
198
+ if mutation_type < 0.3:
199
+ idx = random.randint(0, len(words) - 1)
200
+ words[idx] = self._get_synonym(words[idx])
201
+ elif mutation_type < 0.6:
202
+ if len(words) > 2:
203
+ i, j = random.sample(range(len(words)), 2)
204
+ words[i], words[j] = words[j], words[i]
205
+ else:
206
+ idx = random.randint(0, len(words) - 1)
207
+ words[idx] = self._modify_phrase(words[idx])
208
+
209
+ return " ".join(words)
210
+
211
+ def _get_synonym(self, word: str) -> str:
212
+ """Get a synonym for a word using LLM."""
213
+ try:
214
+ response = self._call_model(
215
+ messages=[
216
+ {"role": "system", "content": evo_prompts.synonyms_system_prompt()},
217
+ {
218
+ "role": "user",
219
+ "content": (
220
+ f"Give me a single synonym for the word '{word}'. Return only the synonym, nothing else."
221
+ ),
222
+ },
223
+ ],
224
+ is_reasoning=True,
225
+ )
226
+ return response.strip()
227
+ except Exception as e:
228
+ logger.warning(f"Error getting synonym for '{word}': {e}")
229
+ return word
230
+
231
+ def _modify_phrase(self, phrase: str) -> str:
232
+ """Modify a phrase while preserving meaning using LLM."""
233
+ try:
234
+ response = self._call_model(
235
+ messages=[
236
+ {"role": "system", "content": evo_prompts.rephrase_system_prompt()},
237
+ {
238
+ "role": "user",
239
+ "content": (
240
+ f"Modify this phrase while keeping the same meaning: '{phrase}'. Return only the modified phrase, nothing else."
241
+ ),
242
+ },
243
+ ],
244
+ is_reasoning=True,
245
+ )
246
+ return response.strip()
247
+ except Exception as e:
248
+ logger.warning(f"Error modifying phrase '{phrase}': {e}")
249
+ return phrase
250
+
251
+ def _radical_innovation_mutation(
252
+ self, prompt: chat_prompt.ChatPrompt, initial_prompt: chat_prompt.ChatPrompt
253
+ ) -> chat_prompt.ChatPrompt:
254
+ """Attempts to generate a significantly improved and potentially very different prompt using an LLM."""
255
+ logger.debug(
256
+ f"Attempting radical innovation for prompt: {json.dumps(prompt.get_messages())[:70]}..."
257
+ )
258
+ task_desc_for_llm = self._get_task_description_for_llm(initial_prompt)
259
+ current_output_style_guidance = self.output_style_guidance
260
+
261
+ user_prompt_for_radical_innovation = evo_prompts.radical_innovation_user_prompt(
262
+ task_desc_for_llm, current_output_style_guidance, prompt.get_messages()
263
+ )
264
+ try:
265
+ new_prompt_str = self._call_model(
266
+ messages=[
267
+ {
268
+ "role": "system",
269
+ "content": evo_prompts.radical_innovation_system_prompt(
270
+ current_output_style_guidance
271
+ ),
272
+ },
273
+ {"role": "user", "content": user_prompt_for_radical_innovation},
274
+ ],
275
+ is_reasoning=True,
276
+ )
277
+ logger.info(
278
+ f"Radical innovation LLM result (truncated): {new_prompt_str[:200]}"
279
+ )
280
+ try:
281
+ new_messages = utils.json_to_dict(new_prompt_str)
282
+ except Exception as parse_exc:
283
+ logger.warning(
284
+ f"Failed to parse LLM output in radical innovation mutation for prompt '{json.dumps(prompt.get_messages())[:50]}...'. Output: {new_prompt_str[:200]}. Error: {parse_exc}. Returning original."
285
+ )
286
+ return prompt
287
+ return chat_prompt.ChatPrompt(messages=new_messages)
288
+ except Exception as e:
289
+ logger.warning(
290
+ f"Radical innovation mutation failed for prompt '{json.dumps(prompt.get_messages())[:50]}...': {e}. Returning original."
291
+ )
292
+ return prompt
@@ -0,0 +1,223 @@
1
+ from typing import Any, TYPE_CHECKING
2
+
3
+ import json
4
+ import logging
5
+
6
+ from deap import tools
7
+ from deap import creator as _creator
8
+
9
+ from . import prompts as evo_prompts
10
+ from . import reporting
11
+ from ..optimization_config import chat_prompt
12
+ from .. import utils
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+ creator = _creator
17
+
18
+
19
+ class PopulationOps:
20
+ if TYPE_CHECKING:
21
+ _get_task_description_for_llm: Any
22
+ output_style_guidance: str
23
+ _call_model: Any
24
+ toolbox: Any
25
+ # Hints for mixin attributes provided by the primary optimizer class
26
+ _gens_since_pop_improvement: int
27
+ _best_primary_score_history: list[float]
28
+ DEFAULT_RESTART_THRESHOLD: float
29
+ DEFAULT_RESTART_GENERATIONS: int
30
+ enable_moo: bool
31
+ elitism_size: int
32
+ population_size: int
33
+ verbose: int
34
+
35
+ def _initialize_population(
36
+ self, prompt: chat_prompt.ChatPrompt
37
+ ) -> list[chat_prompt.ChatPrompt]:
38
+ """Initialize the population with diverse variations of the initial prompt,
39
+ including some 'fresh start' prompts based purely on task description.
40
+ All generated prompts should aim to elicit answers matching self.output_style_guidance.
41
+ """
42
+ with reporting.initializing_population(verbose=self.verbose) as init_pop_report:
43
+ init_pop_report.start(self.population_size)
44
+
45
+ population = [prompt]
46
+ if self.population_size <= 1:
47
+ return population
48
+
49
+ num_to_generate_total = self.population_size - 1
50
+ num_fresh_starts = max(1, int(num_to_generate_total * 0.2))
51
+ num_variations_on_initial = num_to_generate_total - num_fresh_starts
52
+
53
+ task_desc_for_llm = self._get_task_description_for_llm(prompt)
54
+ current_output_style_guidance = self.output_style_guidance
55
+
56
+ # Fresh starts
57
+ if num_fresh_starts > 0:
58
+ init_pop_report.start_fresh_prompts(num_fresh_starts)
59
+ fresh_start_user_prompt = evo_prompts.fresh_start_user_prompt(
60
+ task_desc_for_llm, current_output_style_guidance, num_fresh_starts
61
+ )
62
+ try:
63
+ response_content = self._call_model(
64
+ messages=[
65
+ {
66
+ "role": "system",
67
+ "content": evo_prompts.fresh_start_system_prompt(
68
+ current_output_style_guidance
69
+ ),
70
+ },
71
+ {"role": "user", "content": fresh_start_user_prompt},
72
+ ],
73
+ is_reasoning=True,
74
+ )
75
+
76
+ logger.debug(
77
+ f"Raw LLM response for fresh start prompts: {response_content}"
78
+ )
79
+
80
+ fresh_prompts = utils.json_to_dict(response_content)
81
+ if isinstance(fresh_prompts, list):
82
+ if all(isinstance(p, dict) for p in fresh_prompts) and all(
83
+ p.get("role") is not None for p in fresh_prompts
84
+ ):
85
+ population.append(
86
+ chat_prompt.ChatPrompt(messages=fresh_prompts)
87
+ )
88
+ init_pop_report.success_fresh_prompts(1)
89
+ elif all(isinstance(p, list) for p in fresh_prompts):
90
+ population.extend(
91
+ [
92
+ chat_prompt.ChatPrompt(messages=p)
93
+ for p in fresh_prompts[:num_fresh_starts]
94
+ ]
95
+ )
96
+ init_pop_report.success_fresh_prompts(
97
+ len(fresh_prompts[:num_fresh_starts])
98
+ )
99
+ else:
100
+ init_pop_report.failed_fresh_prompts(
101
+ num_fresh_starts,
102
+ f"LLM response for fresh starts was not a valid list of strings or was empty: {response_content}. Skipping fresh start prompts.",
103
+ )
104
+ except json.JSONDecodeError as e_json:
105
+ init_pop_report.failed_fresh_prompts(
106
+ num_fresh_starts,
107
+ f"JSONDecodeError generating fresh start prompts: {e_json}. LLM response: '{response_content}'. Skipping fresh start prompts.",
108
+ )
109
+ except Exception as e:
110
+ init_pop_report.failed_fresh_prompts(
111
+ num_fresh_starts,
112
+ f"Error generating fresh start prompts: {e}. Skipping fresh start prompts.",
113
+ )
114
+
115
+ # Variations on the initial prompt
116
+ if num_variations_on_initial > 0:
117
+ init_pop_report.start_variations(num_variations_on_initial)
118
+ user_prompt_for_variation = evo_prompts.variation_user_prompt(
119
+ prompt.get_messages(),
120
+ task_desc_for_llm,
121
+ current_output_style_guidance,
122
+ num_variations_on_initial,
123
+ )
124
+ try:
125
+ response_content_variations = self._call_model(
126
+ messages=[
127
+ {
128
+ "role": "system",
129
+ "content": evo_prompts.variation_system_prompt(
130
+ current_output_style_guidance
131
+ ),
132
+ },
133
+ {"role": "user", "content": user_prompt_for_variation},
134
+ ],
135
+ is_reasoning=True,
136
+ )
137
+ logger.debug(
138
+ f"Raw response for population variations: {response_content_variations}"
139
+ )
140
+ json_response_variations = json.loads(response_content_variations)
141
+ generated_prompts_variations = [
142
+ p["prompt"]
143
+ for p in json_response_variations.get("prompts", [])
144
+ if isinstance(p, dict) and "prompt" in p
145
+ ]
146
+
147
+ if generated_prompts_variations:
148
+ init_pop_report.success_variations(
149
+ len(
150
+ generated_prompts_variations[:num_variations_on_initial]
151
+ )
152
+ )
153
+ population.extend(
154
+ [
155
+ chat_prompt.ChatPrompt(messages=p)
156
+ for p in generated_prompts_variations[
157
+ :num_variations_on_initial
158
+ ]
159
+ ]
160
+ )
161
+ else:
162
+ init_pop_report.failed_variations(
163
+ num_variations_on_initial,
164
+ "Could not parse 'prompts' list for variations. Skipping variations.",
165
+ )
166
+ except Exception as e:
167
+ init_pop_report.failed_variations(
168
+ num_variations_on_initial,
169
+ f"Error calling LLM for initial population variations: {e}",
170
+ )
171
+
172
+ # Ensure population is of the required size using unique prompts
173
+ final_population_set: set[str] = set()
174
+ final_population_list: list[chat_prompt.ChatPrompt] = []
175
+ for p in population:
176
+ if json.dumps(p.get_messages()) not in final_population_set:
177
+ final_population_set.add(json.dumps(p.get_messages()))
178
+ final_population_list.append(p)
179
+
180
+ init_pop_report.end(final_population_list)
181
+ return final_population_list[: self.population_size]
182
+
183
+ def _should_restart_population(self, curr_best: float) -> bool:
184
+ """Update internal counters and decide if we should trigger a population restart."""
185
+ if self._best_primary_score_history:
186
+ threshold = self._best_primary_score_history[-1] * (
187
+ 1 + self.DEFAULT_RESTART_THRESHOLD
188
+ )
189
+ if curr_best < threshold:
190
+ self._gens_since_pop_improvement += 1 # type: ignore[attr-defined]
191
+ else:
192
+ self._gens_since_pop_improvement = 0 # type: ignore[attr-defined]
193
+ self._best_primary_score_history.append(curr_best)
194
+ return self._gens_since_pop_improvement >= self.DEFAULT_RESTART_GENERATIONS # type: ignore[attr-defined]
195
+
196
+ def _restart_population(
197
+ self,
198
+ hof: tools.HallOfFame,
199
+ population: list[Any],
200
+ best_prompt_so_far: chat_prompt.ChatPrompt,
201
+ ) -> list[Any]:
202
+ """Return a fresh, evaluated population seeded by elites."""
203
+ if self.enable_moo:
204
+ elites = list(hof)
205
+ else:
206
+ elites = tools.selBest(population, self.elitism_size)
207
+
208
+ seed_prompt = (
209
+ chat_prompt.ChatPrompt(
210
+ messages=max(elites, key=lambda x: x.fitness.values[0])
211
+ )
212
+ if elites
213
+ else best_prompt_so_far
214
+ )
215
+
216
+ prompt_variants = self._initialize_population(seed_prompt)
217
+ new_pop = [creator.Individual(p.get_messages()) for p in prompt_variants]
218
+
219
+ for ind, fit in zip(new_pop, map(self.toolbox.evaluate, new_pop)):
220
+ ind.fitness.values = fit
221
+
222
+ self._gens_since_pop_improvement = 0 # type: ignore[attr-defined]
223
+ return new_pop