opik-optimizer 1.0.6__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. opik_optimizer/__init__.py +4 -0
  2. opik_optimizer/_throttle.py +2 -1
  3. opik_optimizer/base_optimizer.py +402 -28
  4. opik_optimizer/data/context7_eval.jsonl +3 -0
  5. opik_optimizer/datasets/context7_eval.py +90 -0
  6. opik_optimizer/datasets/tiny_test.py +33 -34
  7. opik_optimizer/datasets/truthful_qa.py +2 -2
  8. opik_optimizer/evolutionary_optimizer/crossover_ops.py +194 -0
  9. opik_optimizer/evolutionary_optimizer/evaluation_ops.py +136 -0
  10. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +289 -966
  11. opik_optimizer/evolutionary_optimizer/helpers.py +10 -0
  12. opik_optimizer/evolutionary_optimizer/llm_support.py +136 -0
  13. opik_optimizer/evolutionary_optimizer/mcp.py +249 -0
  14. opik_optimizer/evolutionary_optimizer/mutation_ops.py +306 -0
  15. opik_optimizer/evolutionary_optimizer/population_ops.py +228 -0
  16. opik_optimizer/evolutionary_optimizer/prompts.py +352 -0
  17. opik_optimizer/evolutionary_optimizer/reporting.py +28 -4
  18. opik_optimizer/evolutionary_optimizer/style_ops.py +86 -0
  19. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +90 -81
  20. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +12 -5
  21. opik_optimizer/gepa_optimizer/__init__.py +3 -0
  22. opik_optimizer/gepa_optimizer/adapter.py +154 -0
  23. opik_optimizer/gepa_optimizer/gepa_optimizer.py +653 -0
  24. opik_optimizer/gepa_optimizer/reporting.py +181 -0
  25. opik_optimizer/logging_config.py +42 -7
  26. opik_optimizer/mcp_utils/__init__.py +22 -0
  27. opik_optimizer/mcp_utils/mcp.py +541 -0
  28. opik_optimizer/mcp_utils/mcp_second_pass.py +152 -0
  29. opik_optimizer/mcp_utils/mcp_simulator.py +116 -0
  30. opik_optimizer/mcp_utils/mcp_workflow.py +547 -0
  31. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +470 -134
  32. opik_optimizer/meta_prompt_optimizer/reporting.py +16 -2
  33. opik_optimizer/mipro_optimizer/_lm.py +30 -23
  34. opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +52 -51
  35. opik_optimizer/mipro_optimizer/mipro_optimizer.py +126 -46
  36. opik_optimizer/mipro_optimizer/utils.py +2 -4
  37. opik_optimizer/optimizable_agent.py +21 -16
  38. opik_optimizer/optimization_config/chat_prompt.py +44 -23
  39. opik_optimizer/optimization_config/configs.py +3 -3
  40. opik_optimizer/optimization_config/mappers.py +9 -8
  41. opik_optimizer/optimization_result.py +22 -14
  42. opik_optimizer/reporting_utils.py +61 -10
  43. opik_optimizer/task_evaluator.py +9 -8
  44. opik_optimizer/utils/__init__.py +15 -0
  45. opik_optimizer/utils/colbert.py +236 -0
  46. opik_optimizer/{utils.py → utils/core.py} +160 -33
  47. opik_optimizer/utils/dataset_utils.py +49 -0
  48. opik_optimizer/utils/prompt_segments.py +186 -0
  49. opik_optimizer-2.0.0.dist-info/METADATA +345 -0
  50. opik_optimizer-2.0.0.dist-info/RECORD +74 -0
  51. opik_optimizer-2.0.0.dist-info/licenses/LICENSE +203 -0
  52. opik_optimizer-1.0.6.dist-info/METADATA +0 -181
  53. opik_optimizer-1.0.6.dist-info/RECORD +0 -50
  54. opik_optimizer-1.0.6.dist-info/licenses/LICENSE +0 -21
  55. {opik_optimizer-1.0.6.dist-info → opik_optimizer-2.0.0.dist-info}/WHEEL +0 -0
  56. {opik_optimizer-1.0.6.dist-info → opik_optimizer-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,10 @@
1
+ from ..optimization_config import chat_prompt
2
+
3
+
4
+ class Helpers:
5
+ def _get_task_description_for_llm(self, prompt: chat_prompt.ChatPrompt) -> str:
6
+ """Generates a concise task description for LLM prompts that need context."""
7
+ description = "Task: Given a list of AI messages with placeholder values, generate an effective prompt. "
8
+ description += f"The original high-level instruction being optimized is: '{prompt.get_messages()}'. "
9
+ description += "The goal is to create an effective prompt that guides a language model to perform this task well."
10
+ return description
@@ -0,0 +1,136 @@
1
+ from typing import Any, TYPE_CHECKING
2
+
3
+ import logging
4
+ import os
5
+ import time
6
+ import random
7
+
8
+ import litellm
9
+ from litellm import exceptions as litellm_exceptions
10
+ from litellm.caching import Cache
11
+ from litellm.types.caching import LiteLLMCacheType
12
+ from opik.evaluation.models.litellm import opik_monitor as opik_litellm_monitor
13
+
14
+ from .. import _throttle
15
+
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ # Configure LiteLLM cache with safe fallback
21
+ try:
22
+ # Prefer a disk cache in a user-writable location
23
+ cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "litellm")
24
+ os.makedirs(cache_dir, exist_ok=True)
25
+ litellm.cache = Cache(type=LiteLLMCacheType.DISK, cache_dir=cache_dir)
26
+ except (PermissionError, OSError, FileNotFoundError):
27
+ # Fall back to in-memory cache to avoid disk timeouts/locks
28
+ litellm.cache = Cache(type=LiteLLMCacheType.MEMORY)
29
+
30
+ _rate_limiter = _throttle.get_rate_limiter_for_current_opik_installation()
31
+
32
+
33
+ class LlmSupport:
34
+ if TYPE_CHECKING:
35
+ model: str
36
+ llm_call_counter: int
37
+ project_name: str | None
38
+ disable_litellm_monitoring: bool
39
+ temperature: float
40
+ max_tokens: int
41
+ top_p: float
42
+ frequency_penalty: float
43
+ presence_penalty: float
44
+
45
+ def increment_llm_counter(self) -> None: ...
46
+
47
+ @_throttle.rate_limited(_rate_limiter)
48
+ def _call_model(
49
+ self,
50
+ messages: list[dict[str, str]],
51
+ is_reasoning: bool = False,
52
+ optimization_id: str | None = None,
53
+ ) -> str:
54
+ """Call the model with the given prompt and return the response string."""
55
+ # Build base call params
56
+ llm_config_params: dict[str, Any] = {
57
+ "temperature": getattr(self, "temperature", 0.3),
58
+ "max_tokens": getattr(self, "max_tokens", 1000),
59
+ "top_p": getattr(self, "top_p", 1.0),
60
+ "frequency_penalty": getattr(self, "frequency_penalty", 0.0),
61
+ "presence_penalty": getattr(self, "presence_penalty", 0.0),
62
+ }
63
+
64
+ # Add Opik metadata unless disabled
65
+ try:
66
+ disable_monitoring_env = os.getenv(
67
+ "OPIK_OPTIMIZER_DISABLE_LITELLM_MONITORING", "0"
68
+ )
69
+ disable_monitoring = getattr(
70
+ self, "disable_litellm_monitoring", False
71
+ ) or disable_monitoring_env.lower() in ("1", "true", "yes")
72
+
73
+ if not disable_monitoring:
74
+ metadata_for_opik: dict[str, Any] = {}
75
+ pn = getattr(self, "project_name", None)
76
+ if pn:
77
+ metadata_for_opik["project_name"] = pn
78
+ metadata_for_opik["opik"] = {"project_name": pn}
79
+ if optimization_id and "opik" in metadata_for_opik:
80
+ metadata_for_opik["opik"]["optimization_id"] = optimization_id
81
+ metadata_for_opik["optimizer_name"] = self.__class__.__name__
82
+ metadata_for_opik["opik_call_type"] = (
83
+ "reasoning" if is_reasoning else "evaluation_llm_task_direct"
84
+ )
85
+ if metadata_for_opik:
86
+ llm_config_params["metadata"] = metadata_for_opik
87
+
88
+ # Try to add Opik monitoring callbacks; fall back silently on failure
89
+ llm_config_params = (
90
+ opik_litellm_monitor.try_add_opik_monitoring_to_params( # type: ignore
91
+ llm_config_params.copy()
92
+ )
93
+ )
94
+ except Exception as e:
95
+ logger.debug(f"Skipping Opik-LiteLLM monitoring setup: {e}")
96
+
97
+ # Retry policy for transient errors
98
+ max_retries = int(os.getenv("OPIK_OPTIMIZER_LITELLM_MAX_RETRIES", "3"))
99
+ base_sleep = float(os.getenv("OPIK_OPTIMIZER_LITELLM_BACKOFF", "0.5"))
100
+
101
+ for attempt in range(max_retries + 1):
102
+ try:
103
+ logger.debug(
104
+ f"Calling model '{self.model}' with messages: {messages}, params: {llm_config_params} (attempt {attempt + 1})"
105
+ )
106
+ response = litellm.completion(
107
+ model=self.model, messages=messages, **llm_config_params
108
+ )
109
+ self.increment_llm_counter()
110
+ return response.choices[0].message.content
111
+ except (
112
+ litellm_exceptions.RateLimitError,
113
+ litellm_exceptions.APIConnectionError,
114
+ litellm_exceptions.InternalServerError,
115
+ ) as e:
116
+ if attempt < max_retries:
117
+ sleep_s = min(10.0, base_sleep * (2**attempt)) + random.uniform(
118
+ 0, 0.25
119
+ )
120
+ logger.warning(
121
+ f"LiteLLM transient error ({type(e).__name__}): {e}. Retrying in {sleep_s:.2f}s..."
122
+ )
123
+ time.sleep(sleep_s)
124
+ continue
125
+ logger.error(f"LiteLLM error (final attempt): {e}")
126
+ raise
127
+ except litellm_exceptions.ContextWindowExceededError as e:
128
+ logger.error(f"LiteLLM Context Window Exceeded Error: {e}")
129
+ raise
130
+ except Exception as e:
131
+ logger.error(
132
+ f"Error calling model '{self.model}': {type(e).__name__} - {e}"
133
+ )
134
+ raise
135
+ # Should never reach here
136
+ raise RuntimeError("LLM call did not return a response and did not raise")
@@ -0,0 +1,249 @@
1
+ """MCP helper utilities for the Evolutionary Optimizer."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import re
8
+ import textwrap
9
+ from dataclasses import dataclass
10
+ from typing import Any
11
+
12
+ from opik_optimizer.optimization_config import chat_prompt
13
+ from opik_optimizer.utils.prompt_segments import (
14
+ apply_segment_updates,
15
+ extract_prompt_segments,
16
+ )
17
+
18
+ from . import prompts as evo_prompts
19
+ from . import reporting
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ @dataclass
25
+ class EvolutionaryMCPContext:
26
+ tool_name: str
27
+ tool_segment_id: str
28
+ original_description: str
29
+ tool_metadata: dict[str, Any]
30
+ panel_style: str
31
+
32
+
33
+ def _tool_metadata_json(metadata: dict[str, Any]) -> str:
34
+ try:
35
+ return json.dumps(metadata, indent=2)
36
+ except (
37
+ TypeError,
38
+ ValueError,
39
+ ): # pragma: no cover - defensive, shouldn't happen under normal circumstances
40
+ return str(metadata)
41
+
42
+
43
+ def generate_tool_description_variations(
44
+ optimizer: Any,
45
+ base_prompt: chat_prompt.ChatPrompt,
46
+ context: EvolutionaryMCPContext,
47
+ num_variations: int,
48
+ ) -> list[chat_prompt.ChatPrompt]:
49
+ if num_variations <= 0:
50
+ return []
51
+
52
+ instruction = textwrap.dedent(
53
+ evo_prompts.mcp_tool_rewrite_user_prompt(
54
+ tool_name=context.tool_name,
55
+ current_description=_current_tool_description(
56
+ base_prompt, context.tool_segment_id
57
+ )
58
+ or context.original_description,
59
+ tool_metadata_json=_tool_metadata_json(context.tool_metadata),
60
+ num_variations=num_variations,
61
+ )
62
+ ).strip()
63
+
64
+ try:
65
+ response = optimizer._call_model( # type: ignore[attr-defined]
66
+ messages=[
67
+ {
68
+ "role": "system",
69
+ "content": evo_prompts.mcp_tool_rewrite_system_prompt(),
70
+ },
71
+ {"role": "user", "content": instruction},
72
+ ],
73
+ is_reasoning=True,
74
+ optimization_id=getattr(optimizer, "_current_optimization_id", None),
75
+ )
76
+
77
+ payload = _extract_json_payload(response)
78
+ prompts_payload = payload.get("prompts")
79
+ if not isinstance(prompts_payload, list):
80
+ raise ValueError("LLM response missing 'prompts' list")
81
+
82
+ candidates: list[chat_prompt.ChatPrompt] = []
83
+ seen: set[str] = set()
84
+ for item in prompts_payload:
85
+ if not isinstance(item, dict):
86
+ continue
87
+ description = item.get("tool_description")
88
+ if not isinstance(description, str) or not description.strip():
89
+ continue
90
+ normalized = description.strip()
91
+ if normalized in seen:
92
+ continue
93
+ seen.add(normalized)
94
+ updated_prompt = _apply_description(base_prompt, context, normalized)
95
+ reporting.display_tool_description(
96
+ normalized,
97
+ f"Candidate tool description ({context.tool_name})",
98
+ context.panel_style,
99
+ )
100
+ candidates.append(updated_prompt)
101
+ if len(candidates) >= num_variations:
102
+ break
103
+
104
+ return candidates
105
+ except Exception as exc: # pragma: no cover - fallback path
106
+ logger.warning(f"Failed to generate MCP tool descriptions: {exc}")
107
+ return []
108
+
109
+
110
+ def initialize_population_mcp(
111
+ optimizer: Any,
112
+ prompt: chat_prompt.ChatPrompt,
113
+ context: EvolutionaryMCPContext,
114
+ ) -> list[chat_prompt.ChatPrompt]:
115
+ population_size = getattr(optimizer, "population_size", 1)
116
+ with reporting.initializing_population(
117
+ verbose=getattr(optimizer, "verbose", 1)
118
+ ) as init_pop_report:
119
+ init_pop_report.start(population_size)
120
+
121
+ population = [prompt]
122
+ num_to_generate = max(0, population_size - 1)
123
+ if num_to_generate > 0:
124
+ candidates = generate_tool_description_variations(
125
+ optimizer,
126
+ prompt,
127
+ context,
128
+ num_to_generate,
129
+ )
130
+ population.extend(candidates[:num_to_generate])
131
+
132
+ seen: set[str] = set()
133
+ final_population: list[chat_prompt.ChatPrompt] = []
134
+ for candidate in population:
135
+ key = json.dumps(candidate.get_messages())
136
+ if key in seen:
137
+ continue
138
+ seen.add(key)
139
+ final_population.append(candidate)
140
+
141
+ while len(final_population) < population_size:
142
+ final_population.append(prompt)
143
+
144
+ init_pop_report.end(final_population)
145
+ return final_population[:population_size]
146
+
147
+
148
+ def tool_description_mutation(
149
+ optimizer: Any,
150
+ prompt: chat_prompt.ChatPrompt,
151
+ context: EvolutionaryMCPContext,
152
+ ) -> chat_prompt.ChatPrompt | None:
153
+ candidates = generate_tool_description_variations(optimizer, prompt, context, 1)
154
+ if not candidates:
155
+ return None
156
+
157
+ description = _current_tool_description(candidates[0], context.tool_segment_id)
158
+ if description:
159
+ reporting.display_tool_description(
160
+ description,
161
+ f"Updated tool description ({context.tool_name})",
162
+ context.panel_style,
163
+ )
164
+ return candidates[0]
165
+
166
+
167
+ def finalize_mcp_result(
168
+ result: Any,
169
+ context: EvolutionaryMCPContext,
170
+ panel_style: str,
171
+ ) -> None:
172
+ final_tools = (
173
+ result.details.get("final_tools") if isinstance(result.details, dict) else None
174
+ )
175
+ tool_prompts = {
176
+ (tool.get("function", {}).get("name") or tool.get("name")): tool.get(
177
+ "function", {}
178
+ ).get("description")
179
+ for tool in (final_tools or [])
180
+ }
181
+ if tool_prompts.get(context.tool_name):
182
+ reporting.display_tool_description(
183
+ tool_prompts[context.tool_name],
184
+ f"Final tool description ({context.tool_name})",
185
+ panel_style,
186
+ )
187
+
188
+ if not tool_prompts and context.original_description:
189
+ tool_prompts = {context.tool_name: context.original_description}
190
+
191
+ if tool_prompts:
192
+ result.tool_prompts = tool_prompts
193
+
194
+
195
+ # ---------------------------------------------------------------------------
196
+ # Internal helpers
197
+ # ---------------------------------------------------------------------------
198
+
199
+
200
+ def _current_tool_description(
201
+ prompt: chat_prompt.ChatPrompt,
202
+ tool_segment_id: str,
203
+ ) -> str:
204
+ segments = {
205
+ segment.segment_id: segment for segment in extract_prompt_segments(prompt)
206
+ }
207
+ target = segments.get(tool_segment_id)
208
+ return target.content if target else ""
209
+
210
+
211
+ def _extract_json_payload(response: str) -> dict[str, Any]:
212
+ try:
213
+ return json.loads(response)
214
+ except json.JSONDecodeError:
215
+ match = re.search(r"\{.*\}", response, re.DOTALL)
216
+ if not match:
217
+ raise ValueError("No JSON object found in LLM response")
218
+ return json.loads(match.group())
219
+
220
+
221
+ def _apply_description(
222
+ prompt: chat_prompt.ChatPrompt,
223
+ context: EvolutionaryMCPContext,
224
+ description: str,
225
+ ) -> chat_prompt.ChatPrompt:
226
+ updated_prompt = apply_segment_updates(
227
+ prompt,
228
+ {context.tool_segment_id: description},
229
+ )
230
+ _sync_system_description(updated_prompt, description)
231
+ return updated_prompt
232
+
233
+
234
+ def _sync_system_description(prompt: chat_prompt.ChatPrompt, description: str) -> None:
235
+ if not prompt.system:
236
+ return
237
+
238
+ marker_start = "<<TOOL_DESCRIPTION>>"
239
+ marker_end = "<<END_TOOL_DESCRIPTION>>"
240
+
241
+ start = prompt.system.find(marker_start)
242
+ end = prompt.system.find(marker_end)
243
+ if start == -1 or end == -1 or end <= start:
244
+ return
245
+
246
+ prefix = prompt.system[: start + len(marker_start)]
247
+ suffix = prompt.system[end:]
248
+ formatted_description = f"\n{description.strip()}\n"
249
+ prompt.system = f"{prefix}{formatted_description}{suffix}"