opik-optimizer 1.0.6__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +4 -0
- opik_optimizer/_throttle.py +2 -1
- opik_optimizer/base_optimizer.py +402 -28
- opik_optimizer/data/context7_eval.jsonl +3 -0
- opik_optimizer/datasets/context7_eval.py +90 -0
- opik_optimizer/datasets/tiny_test.py +33 -34
- opik_optimizer/datasets/truthful_qa.py +2 -2
- opik_optimizer/evolutionary_optimizer/crossover_ops.py +194 -0
- opik_optimizer/evolutionary_optimizer/evaluation_ops.py +136 -0
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +289 -966
- opik_optimizer/evolutionary_optimizer/helpers.py +10 -0
- opik_optimizer/evolutionary_optimizer/llm_support.py +136 -0
- opik_optimizer/evolutionary_optimizer/mcp.py +249 -0
- opik_optimizer/evolutionary_optimizer/mutation_ops.py +306 -0
- opik_optimizer/evolutionary_optimizer/population_ops.py +228 -0
- opik_optimizer/evolutionary_optimizer/prompts.py +352 -0
- opik_optimizer/evolutionary_optimizer/reporting.py +28 -4
- opik_optimizer/evolutionary_optimizer/style_ops.py +86 -0
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +90 -81
- opik_optimizer/few_shot_bayesian_optimizer/reporting.py +12 -5
- opik_optimizer/gepa_optimizer/__init__.py +3 -0
- opik_optimizer/gepa_optimizer/adapter.py +154 -0
- opik_optimizer/gepa_optimizer/gepa_optimizer.py +653 -0
- opik_optimizer/gepa_optimizer/reporting.py +181 -0
- opik_optimizer/logging_config.py +42 -7
- opik_optimizer/mcp_utils/__init__.py +22 -0
- opik_optimizer/mcp_utils/mcp.py +541 -0
- opik_optimizer/mcp_utils/mcp_second_pass.py +152 -0
- opik_optimizer/mcp_utils/mcp_simulator.py +116 -0
- opik_optimizer/mcp_utils/mcp_workflow.py +547 -0
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +470 -134
- opik_optimizer/meta_prompt_optimizer/reporting.py +16 -2
- opik_optimizer/mipro_optimizer/_lm.py +30 -23
- opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +52 -51
- opik_optimizer/mipro_optimizer/mipro_optimizer.py +126 -46
- opik_optimizer/mipro_optimizer/utils.py +2 -4
- opik_optimizer/optimizable_agent.py +21 -16
- opik_optimizer/optimization_config/chat_prompt.py +44 -23
- opik_optimizer/optimization_config/configs.py +3 -3
- opik_optimizer/optimization_config/mappers.py +9 -8
- opik_optimizer/optimization_result.py +22 -14
- opik_optimizer/reporting_utils.py +61 -10
- opik_optimizer/task_evaluator.py +9 -8
- opik_optimizer/utils/__init__.py +15 -0
- opik_optimizer/utils/colbert.py +236 -0
- opik_optimizer/{utils.py → utils/core.py} +160 -33
- opik_optimizer/utils/dataset_utils.py +49 -0
- opik_optimizer/utils/prompt_segments.py +186 -0
- opik_optimizer-2.0.0.dist-info/METADATA +345 -0
- opik_optimizer-2.0.0.dist-info/RECORD +74 -0
- opik_optimizer-2.0.0.dist-info/licenses/LICENSE +203 -0
- opik_optimizer-1.0.6.dist-info/METADATA +0 -181
- opik_optimizer-1.0.6.dist-info/RECORD +0 -50
- opik_optimizer-1.0.6.dist-info/licenses/LICENSE +0 -21
- {opik_optimizer-1.0.6.dist-info → opik_optimizer-2.0.0.dist-info}/WHEEL +0 -0
- {opik_optimizer-1.0.6.dist-info → opik_optimizer-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,10 @@
|
|
1
|
+
from ..optimization_config import chat_prompt
|
2
|
+
|
3
|
+
|
4
|
+
class Helpers:
|
5
|
+
def _get_task_description_for_llm(self, prompt: chat_prompt.ChatPrompt) -> str:
|
6
|
+
"""Generates a concise task description for LLM prompts that need context."""
|
7
|
+
description = "Task: Given a list of AI messages with placeholder values, generate an effective prompt. "
|
8
|
+
description += f"The original high-level instruction being optimized is: '{prompt.get_messages()}'. "
|
9
|
+
description += "The goal is to create an effective prompt that guides a language model to perform this task well."
|
10
|
+
return description
|
@@ -0,0 +1,136 @@
|
|
1
|
+
from typing import Any, TYPE_CHECKING
|
2
|
+
|
3
|
+
import logging
|
4
|
+
import os
|
5
|
+
import time
|
6
|
+
import random
|
7
|
+
|
8
|
+
import litellm
|
9
|
+
from litellm import exceptions as litellm_exceptions
|
10
|
+
from litellm.caching import Cache
|
11
|
+
from litellm.types.caching import LiteLLMCacheType
|
12
|
+
from opik.evaluation.models.litellm import opik_monitor as opik_litellm_monitor
|
13
|
+
|
14
|
+
from .. import _throttle
|
15
|
+
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
# Configure LiteLLM cache with safe fallback
|
21
|
+
try:
|
22
|
+
# Prefer a disk cache in a user-writable location
|
23
|
+
cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "litellm")
|
24
|
+
os.makedirs(cache_dir, exist_ok=True)
|
25
|
+
litellm.cache = Cache(type=LiteLLMCacheType.DISK, cache_dir=cache_dir)
|
26
|
+
except (PermissionError, OSError, FileNotFoundError):
|
27
|
+
# Fall back to in-memory cache to avoid disk timeouts/locks
|
28
|
+
litellm.cache = Cache(type=LiteLLMCacheType.MEMORY)
|
29
|
+
|
30
|
+
_rate_limiter = _throttle.get_rate_limiter_for_current_opik_installation()
|
31
|
+
|
32
|
+
|
33
|
+
class LlmSupport:
|
34
|
+
if TYPE_CHECKING:
|
35
|
+
model: str
|
36
|
+
llm_call_counter: int
|
37
|
+
project_name: str | None
|
38
|
+
disable_litellm_monitoring: bool
|
39
|
+
temperature: float
|
40
|
+
max_tokens: int
|
41
|
+
top_p: float
|
42
|
+
frequency_penalty: float
|
43
|
+
presence_penalty: float
|
44
|
+
|
45
|
+
def increment_llm_counter(self) -> None: ...
|
46
|
+
|
47
|
+
@_throttle.rate_limited(_rate_limiter)
|
48
|
+
def _call_model(
|
49
|
+
self,
|
50
|
+
messages: list[dict[str, str]],
|
51
|
+
is_reasoning: bool = False,
|
52
|
+
optimization_id: str | None = None,
|
53
|
+
) -> str:
|
54
|
+
"""Call the model with the given prompt and return the response string."""
|
55
|
+
# Build base call params
|
56
|
+
llm_config_params: dict[str, Any] = {
|
57
|
+
"temperature": getattr(self, "temperature", 0.3),
|
58
|
+
"max_tokens": getattr(self, "max_tokens", 1000),
|
59
|
+
"top_p": getattr(self, "top_p", 1.0),
|
60
|
+
"frequency_penalty": getattr(self, "frequency_penalty", 0.0),
|
61
|
+
"presence_penalty": getattr(self, "presence_penalty", 0.0),
|
62
|
+
}
|
63
|
+
|
64
|
+
# Add Opik metadata unless disabled
|
65
|
+
try:
|
66
|
+
disable_monitoring_env = os.getenv(
|
67
|
+
"OPIK_OPTIMIZER_DISABLE_LITELLM_MONITORING", "0"
|
68
|
+
)
|
69
|
+
disable_monitoring = getattr(
|
70
|
+
self, "disable_litellm_monitoring", False
|
71
|
+
) or disable_monitoring_env.lower() in ("1", "true", "yes")
|
72
|
+
|
73
|
+
if not disable_monitoring:
|
74
|
+
metadata_for_opik: dict[str, Any] = {}
|
75
|
+
pn = getattr(self, "project_name", None)
|
76
|
+
if pn:
|
77
|
+
metadata_for_opik["project_name"] = pn
|
78
|
+
metadata_for_opik["opik"] = {"project_name": pn}
|
79
|
+
if optimization_id and "opik" in metadata_for_opik:
|
80
|
+
metadata_for_opik["opik"]["optimization_id"] = optimization_id
|
81
|
+
metadata_for_opik["optimizer_name"] = self.__class__.__name__
|
82
|
+
metadata_for_opik["opik_call_type"] = (
|
83
|
+
"reasoning" if is_reasoning else "evaluation_llm_task_direct"
|
84
|
+
)
|
85
|
+
if metadata_for_opik:
|
86
|
+
llm_config_params["metadata"] = metadata_for_opik
|
87
|
+
|
88
|
+
# Try to add Opik monitoring callbacks; fall back silently on failure
|
89
|
+
llm_config_params = (
|
90
|
+
opik_litellm_monitor.try_add_opik_monitoring_to_params( # type: ignore
|
91
|
+
llm_config_params.copy()
|
92
|
+
)
|
93
|
+
)
|
94
|
+
except Exception as e:
|
95
|
+
logger.debug(f"Skipping Opik-LiteLLM monitoring setup: {e}")
|
96
|
+
|
97
|
+
# Retry policy for transient errors
|
98
|
+
max_retries = int(os.getenv("OPIK_OPTIMIZER_LITELLM_MAX_RETRIES", "3"))
|
99
|
+
base_sleep = float(os.getenv("OPIK_OPTIMIZER_LITELLM_BACKOFF", "0.5"))
|
100
|
+
|
101
|
+
for attempt in range(max_retries + 1):
|
102
|
+
try:
|
103
|
+
logger.debug(
|
104
|
+
f"Calling model '{self.model}' with messages: {messages}, params: {llm_config_params} (attempt {attempt + 1})"
|
105
|
+
)
|
106
|
+
response = litellm.completion(
|
107
|
+
model=self.model, messages=messages, **llm_config_params
|
108
|
+
)
|
109
|
+
self.increment_llm_counter()
|
110
|
+
return response.choices[0].message.content
|
111
|
+
except (
|
112
|
+
litellm_exceptions.RateLimitError,
|
113
|
+
litellm_exceptions.APIConnectionError,
|
114
|
+
litellm_exceptions.InternalServerError,
|
115
|
+
) as e:
|
116
|
+
if attempt < max_retries:
|
117
|
+
sleep_s = min(10.0, base_sleep * (2**attempt)) + random.uniform(
|
118
|
+
0, 0.25
|
119
|
+
)
|
120
|
+
logger.warning(
|
121
|
+
f"LiteLLM transient error ({type(e).__name__}): {e}. Retrying in {sleep_s:.2f}s..."
|
122
|
+
)
|
123
|
+
time.sleep(sleep_s)
|
124
|
+
continue
|
125
|
+
logger.error(f"LiteLLM error (final attempt): {e}")
|
126
|
+
raise
|
127
|
+
except litellm_exceptions.ContextWindowExceededError as e:
|
128
|
+
logger.error(f"LiteLLM Context Window Exceeded Error: {e}")
|
129
|
+
raise
|
130
|
+
except Exception as e:
|
131
|
+
logger.error(
|
132
|
+
f"Error calling model '{self.model}': {type(e).__name__} - {e}"
|
133
|
+
)
|
134
|
+
raise
|
135
|
+
# Should never reach here
|
136
|
+
raise RuntimeError("LLM call did not return a response and did not raise")
|
@@ -0,0 +1,249 @@
|
|
1
|
+
"""MCP helper utilities for the Evolutionary Optimizer."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import json
|
6
|
+
import logging
|
7
|
+
import re
|
8
|
+
import textwrap
|
9
|
+
from dataclasses import dataclass
|
10
|
+
from typing import Any
|
11
|
+
|
12
|
+
from opik_optimizer.optimization_config import chat_prompt
|
13
|
+
from opik_optimizer.utils.prompt_segments import (
|
14
|
+
apply_segment_updates,
|
15
|
+
extract_prompt_segments,
|
16
|
+
)
|
17
|
+
|
18
|
+
from . import prompts as evo_prompts
|
19
|
+
from . import reporting
|
20
|
+
|
21
|
+
logger = logging.getLogger(__name__)
|
22
|
+
|
23
|
+
|
24
|
+
@dataclass
|
25
|
+
class EvolutionaryMCPContext:
|
26
|
+
tool_name: str
|
27
|
+
tool_segment_id: str
|
28
|
+
original_description: str
|
29
|
+
tool_metadata: dict[str, Any]
|
30
|
+
panel_style: str
|
31
|
+
|
32
|
+
|
33
|
+
def _tool_metadata_json(metadata: dict[str, Any]) -> str:
|
34
|
+
try:
|
35
|
+
return json.dumps(metadata, indent=2)
|
36
|
+
except (
|
37
|
+
TypeError,
|
38
|
+
ValueError,
|
39
|
+
): # pragma: no cover - defensive, shouldn't happen under normal circumstances
|
40
|
+
return str(metadata)
|
41
|
+
|
42
|
+
|
43
|
+
def generate_tool_description_variations(
|
44
|
+
optimizer: Any,
|
45
|
+
base_prompt: chat_prompt.ChatPrompt,
|
46
|
+
context: EvolutionaryMCPContext,
|
47
|
+
num_variations: int,
|
48
|
+
) -> list[chat_prompt.ChatPrompt]:
|
49
|
+
if num_variations <= 0:
|
50
|
+
return []
|
51
|
+
|
52
|
+
instruction = textwrap.dedent(
|
53
|
+
evo_prompts.mcp_tool_rewrite_user_prompt(
|
54
|
+
tool_name=context.tool_name,
|
55
|
+
current_description=_current_tool_description(
|
56
|
+
base_prompt, context.tool_segment_id
|
57
|
+
)
|
58
|
+
or context.original_description,
|
59
|
+
tool_metadata_json=_tool_metadata_json(context.tool_metadata),
|
60
|
+
num_variations=num_variations,
|
61
|
+
)
|
62
|
+
).strip()
|
63
|
+
|
64
|
+
try:
|
65
|
+
response = optimizer._call_model( # type: ignore[attr-defined]
|
66
|
+
messages=[
|
67
|
+
{
|
68
|
+
"role": "system",
|
69
|
+
"content": evo_prompts.mcp_tool_rewrite_system_prompt(),
|
70
|
+
},
|
71
|
+
{"role": "user", "content": instruction},
|
72
|
+
],
|
73
|
+
is_reasoning=True,
|
74
|
+
optimization_id=getattr(optimizer, "_current_optimization_id", None),
|
75
|
+
)
|
76
|
+
|
77
|
+
payload = _extract_json_payload(response)
|
78
|
+
prompts_payload = payload.get("prompts")
|
79
|
+
if not isinstance(prompts_payload, list):
|
80
|
+
raise ValueError("LLM response missing 'prompts' list")
|
81
|
+
|
82
|
+
candidates: list[chat_prompt.ChatPrompt] = []
|
83
|
+
seen: set[str] = set()
|
84
|
+
for item in prompts_payload:
|
85
|
+
if not isinstance(item, dict):
|
86
|
+
continue
|
87
|
+
description = item.get("tool_description")
|
88
|
+
if not isinstance(description, str) or not description.strip():
|
89
|
+
continue
|
90
|
+
normalized = description.strip()
|
91
|
+
if normalized in seen:
|
92
|
+
continue
|
93
|
+
seen.add(normalized)
|
94
|
+
updated_prompt = _apply_description(base_prompt, context, normalized)
|
95
|
+
reporting.display_tool_description(
|
96
|
+
normalized,
|
97
|
+
f"Candidate tool description ({context.tool_name})",
|
98
|
+
context.panel_style,
|
99
|
+
)
|
100
|
+
candidates.append(updated_prompt)
|
101
|
+
if len(candidates) >= num_variations:
|
102
|
+
break
|
103
|
+
|
104
|
+
return candidates
|
105
|
+
except Exception as exc: # pragma: no cover - fallback path
|
106
|
+
logger.warning(f"Failed to generate MCP tool descriptions: {exc}")
|
107
|
+
return []
|
108
|
+
|
109
|
+
|
110
|
+
def initialize_population_mcp(
|
111
|
+
optimizer: Any,
|
112
|
+
prompt: chat_prompt.ChatPrompt,
|
113
|
+
context: EvolutionaryMCPContext,
|
114
|
+
) -> list[chat_prompt.ChatPrompt]:
|
115
|
+
population_size = getattr(optimizer, "population_size", 1)
|
116
|
+
with reporting.initializing_population(
|
117
|
+
verbose=getattr(optimizer, "verbose", 1)
|
118
|
+
) as init_pop_report:
|
119
|
+
init_pop_report.start(population_size)
|
120
|
+
|
121
|
+
population = [prompt]
|
122
|
+
num_to_generate = max(0, population_size - 1)
|
123
|
+
if num_to_generate > 0:
|
124
|
+
candidates = generate_tool_description_variations(
|
125
|
+
optimizer,
|
126
|
+
prompt,
|
127
|
+
context,
|
128
|
+
num_to_generate,
|
129
|
+
)
|
130
|
+
population.extend(candidates[:num_to_generate])
|
131
|
+
|
132
|
+
seen: set[str] = set()
|
133
|
+
final_population: list[chat_prompt.ChatPrompt] = []
|
134
|
+
for candidate in population:
|
135
|
+
key = json.dumps(candidate.get_messages())
|
136
|
+
if key in seen:
|
137
|
+
continue
|
138
|
+
seen.add(key)
|
139
|
+
final_population.append(candidate)
|
140
|
+
|
141
|
+
while len(final_population) < population_size:
|
142
|
+
final_population.append(prompt)
|
143
|
+
|
144
|
+
init_pop_report.end(final_population)
|
145
|
+
return final_population[:population_size]
|
146
|
+
|
147
|
+
|
148
|
+
def tool_description_mutation(
|
149
|
+
optimizer: Any,
|
150
|
+
prompt: chat_prompt.ChatPrompt,
|
151
|
+
context: EvolutionaryMCPContext,
|
152
|
+
) -> chat_prompt.ChatPrompt | None:
|
153
|
+
candidates = generate_tool_description_variations(optimizer, prompt, context, 1)
|
154
|
+
if not candidates:
|
155
|
+
return None
|
156
|
+
|
157
|
+
description = _current_tool_description(candidates[0], context.tool_segment_id)
|
158
|
+
if description:
|
159
|
+
reporting.display_tool_description(
|
160
|
+
description,
|
161
|
+
f"Updated tool description ({context.tool_name})",
|
162
|
+
context.panel_style,
|
163
|
+
)
|
164
|
+
return candidates[0]
|
165
|
+
|
166
|
+
|
167
|
+
def finalize_mcp_result(
|
168
|
+
result: Any,
|
169
|
+
context: EvolutionaryMCPContext,
|
170
|
+
panel_style: str,
|
171
|
+
) -> None:
|
172
|
+
final_tools = (
|
173
|
+
result.details.get("final_tools") if isinstance(result.details, dict) else None
|
174
|
+
)
|
175
|
+
tool_prompts = {
|
176
|
+
(tool.get("function", {}).get("name") or tool.get("name")): tool.get(
|
177
|
+
"function", {}
|
178
|
+
).get("description")
|
179
|
+
for tool in (final_tools or [])
|
180
|
+
}
|
181
|
+
if tool_prompts.get(context.tool_name):
|
182
|
+
reporting.display_tool_description(
|
183
|
+
tool_prompts[context.tool_name],
|
184
|
+
f"Final tool description ({context.tool_name})",
|
185
|
+
panel_style,
|
186
|
+
)
|
187
|
+
|
188
|
+
if not tool_prompts and context.original_description:
|
189
|
+
tool_prompts = {context.tool_name: context.original_description}
|
190
|
+
|
191
|
+
if tool_prompts:
|
192
|
+
result.tool_prompts = tool_prompts
|
193
|
+
|
194
|
+
|
195
|
+
# ---------------------------------------------------------------------------
|
196
|
+
# Internal helpers
|
197
|
+
# ---------------------------------------------------------------------------
|
198
|
+
|
199
|
+
|
200
|
+
def _current_tool_description(
|
201
|
+
prompt: chat_prompt.ChatPrompt,
|
202
|
+
tool_segment_id: str,
|
203
|
+
) -> str:
|
204
|
+
segments = {
|
205
|
+
segment.segment_id: segment for segment in extract_prompt_segments(prompt)
|
206
|
+
}
|
207
|
+
target = segments.get(tool_segment_id)
|
208
|
+
return target.content if target else ""
|
209
|
+
|
210
|
+
|
211
|
+
def _extract_json_payload(response: str) -> dict[str, Any]:
|
212
|
+
try:
|
213
|
+
return json.loads(response)
|
214
|
+
except json.JSONDecodeError:
|
215
|
+
match = re.search(r"\{.*\}", response, re.DOTALL)
|
216
|
+
if not match:
|
217
|
+
raise ValueError("No JSON object found in LLM response")
|
218
|
+
return json.loads(match.group())
|
219
|
+
|
220
|
+
|
221
|
+
def _apply_description(
|
222
|
+
prompt: chat_prompt.ChatPrompt,
|
223
|
+
context: EvolutionaryMCPContext,
|
224
|
+
description: str,
|
225
|
+
) -> chat_prompt.ChatPrompt:
|
226
|
+
updated_prompt = apply_segment_updates(
|
227
|
+
prompt,
|
228
|
+
{context.tool_segment_id: description},
|
229
|
+
)
|
230
|
+
_sync_system_description(updated_prompt, description)
|
231
|
+
return updated_prompt
|
232
|
+
|
233
|
+
|
234
|
+
def _sync_system_description(prompt: chat_prompt.ChatPrompt, description: str) -> None:
|
235
|
+
if not prompt.system:
|
236
|
+
return
|
237
|
+
|
238
|
+
marker_start = "<<TOOL_DESCRIPTION>>"
|
239
|
+
marker_end = "<<END_TOOL_DESCRIPTION>>"
|
240
|
+
|
241
|
+
start = prompt.system.find(marker_start)
|
242
|
+
end = prompt.system.find(marker_end)
|
243
|
+
if start == -1 or end == -1 or end <= start:
|
244
|
+
return
|
245
|
+
|
246
|
+
prefix = prompt.system[: start + len(marker_start)]
|
247
|
+
suffix = prompt.system[end:]
|
248
|
+
formatted_description = f"\n{description.strip()}\n"
|
249
|
+
prompt.system = f"{prefix}{formatted_description}{suffix}"
|