opik-optimizer 1.0.6__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +2 -0
- opik_optimizer/_throttle.py +2 -1
- opik_optimizer/base_optimizer.py +28 -11
- opik_optimizer/colbert.py +236 -0
- opik_optimizer/data/context7_eval.jsonl +3 -0
- opik_optimizer/datasets/context7_eval.py +90 -0
- opik_optimizer/datasets/tiny_test.py +33 -34
- opik_optimizer/datasets/truthful_qa.py +2 -2
- opik_optimizer/evolutionary_optimizer/crossover_ops.py +194 -0
- opik_optimizer/evolutionary_optimizer/evaluation_ops.py +73 -0
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +124 -941
- opik_optimizer/evolutionary_optimizer/helpers.py +10 -0
- opik_optimizer/evolutionary_optimizer/llm_support.py +134 -0
- opik_optimizer/evolutionary_optimizer/mutation_ops.py +292 -0
- opik_optimizer/evolutionary_optimizer/population_ops.py +223 -0
- opik_optimizer/evolutionary_optimizer/prompts.py +305 -0
- opik_optimizer/evolutionary_optimizer/reporting.py +16 -4
- opik_optimizer/evolutionary_optimizer/style_ops.py +86 -0
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +26 -23
- opik_optimizer/few_shot_bayesian_optimizer/reporting.py +12 -5
- opik_optimizer/gepa_optimizer/__init__.py +3 -0
- opik_optimizer/gepa_optimizer/adapter.py +152 -0
- opik_optimizer/gepa_optimizer/gepa_optimizer.py +556 -0
- opik_optimizer/gepa_optimizer/reporting.py +181 -0
- opik_optimizer/logging_config.py +42 -7
- opik_optimizer/mcp_utils/__init__.py +22 -0
- opik_optimizer/mcp_utils/mcp.py +541 -0
- opik_optimizer/mcp_utils/mcp_second_pass.py +152 -0
- opik_optimizer/mcp_utils/mcp_simulator.py +116 -0
- opik_optimizer/mcp_utils/mcp_workflow.py +493 -0
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +399 -69
- opik_optimizer/meta_prompt_optimizer/reporting.py +16 -2
- opik_optimizer/mipro_optimizer/_lm.py +20 -20
- opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +51 -50
- opik_optimizer/mipro_optimizer/mipro_optimizer.py +33 -28
- opik_optimizer/mipro_optimizer/utils.py +2 -4
- opik_optimizer/optimizable_agent.py +16 -16
- opik_optimizer/optimization_config/chat_prompt.py +44 -23
- opik_optimizer/optimization_config/configs.py +3 -3
- opik_optimizer/optimization_config/mappers.py +9 -8
- opik_optimizer/optimization_result.py +21 -14
- opik_optimizer/reporting_utils.py +61 -10
- opik_optimizer/task_evaluator.py +9 -8
- opik_optimizer/utils/__init__.py +15 -0
- opik_optimizer/{utils.py → utils/core.py} +111 -26
- opik_optimizer/utils/dataset_utils.py +49 -0
- opik_optimizer/utils/prompt_segments.py +186 -0
- {opik_optimizer-1.0.6.dist-info → opik_optimizer-1.1.0.dist-info}/METADATA +93 -16
- opik_optimizer-1.1.0.dist-info/RECORD +73 -0
- opik_optimizer-1.1.0.dist-info/licenses/LICENSE +203 -0
- opik_optimizer-1.0.6.dist-info/RECORD +0 -50
- opik_optimizer-1.0.6.dist-info/licenses/LICENSE +0 -21
- {opik_optimizer-1.0.6.dist-info → opik_optimizer-1.1.0.dist-info}/WHEEL +0 -0
- {opik_optimizer-1.0.6.dist-info → opik_optimizer-1.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,152 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from dataclasses import dataclass
|
4
|
+
from typing import Any
|
5
|
+
from collections.abc import Callable, Iterable
|
6
|
+
|
7
|
+
import logging
|
8
|
+
|
9
|
+
from gepa.core.adapter import EvaluationBatch, GEPAAdapter
|
10
|
+
|
11
|
+
from ..optimization_config import chat_prompt
|
12
|
+
from ..utils import create_litellm_agent_class
|
13
|
+
|
14
|
+
|
15
|
+
LOGGER = logging.getLogger("opik_optimizer.gepa.adapter")
|
16
|
+
|
17
|
+
|
18
|
+
@dataclass
|
19
|
+
class OpikDataInst:
|
20
|
+
"""Data instance handed to GEPA.
|
21
|
+
|
22
|
+
We keep the original Opik dataset item so metrics and prompt formatting can use it
|
23
|
+
directly without duplicated bookkeeping.
|
24
|
+
"""
|
25
|
+
|
26
|
+
input_text: str
|
27
|
+
answer: str
|
28
|
+
additional_context: dict[str, str]
|
29
|
+
opik_item: dict[str, Any]
|
30
|
+
|
31
|
+
|
32
|
+
def _extract_system_text(candidate: dict[str, str], fallback: str) -> str:
|
33
|
+
for key in ("system_prompt", "system", "prompt"):
|
34
|
+
value = candidate.get(key)
|
35
|
+
if isinstance(value, str) and value.strip():
|
36
|
+
return value
|
37
|
+
return fallback
|
38
|
+
|
39
|
+
|
40
|
+
def _apply_system_text(
|
41
|
+
prompt_obj: chat_prompt.ChatPrompt, system_text: str
|
42
|
+
) -> chat_prompt.ChatPrompt:
|
43
|
+
updated = prompt_obj.copy()
|
44
|
+
if updated.messages is not None:
|
45
|
+
messages = updated.get_messages()
|
46
|
+
if messages and messages[0].get("role") == "system":
|
47
|
+
messages[0]["content"] = system_text
|
48
|
+
else:
|
49
|
+
messages.insert(0, {"role": "system", "content": system_text})
|
50
|
+
updated.set_messages(messages)
|
51
|
+
else:
|
52
|
+
updated.system = system_text
|
53
|
+
return updated
|
54
|
+
|
55
|
+
|
56
|
+
class OpikGEPAAdapter(GEPAAdapter[OpikDataInst, dict[str, Any], dict[str, Any]]):
|
57
|
+
"""Minimal GEPA adapter that routes evaluation through Opik's metric."""
|
58
|
+
|
59
|
+
def __init__(
|
60
|
+
self,
|
61
|
+
base_prompt: chat_prompt.ChatPrompt,
|
62
|
+
optimizer: Any,
|
63
|
+
metric: Callable[[dict[str, Any], str], Any],
|
64
|
+
system_fallback: str,
|
65
|
+
) -> None:
|
66
|
+
self._base_prompt = base_prompt
|
67
|
+
self._optimizer = optimizer
|
68
|
+
self._metric = metric
|
69
|
+
self._system_fallback = system_fallback
|
70
|
+
|
71
|
+
def evaluate(
|
72
|
+
self,
|
73
|
+
batch: list[OpikDataInst],
|
74
|
+
candidate: dict[str, str],
|
75
|
+
capture_traces: bool = False,
|
76
|
+
) -> EvaluationBatch[dict[str, Any], dict[str, Any]]:
|
77
|
+
system_text = _extract_system_text(candidate, self._system_fallback)
|
78
|
+
prompt_variant = _apply_system_text(self._base_prompt, system_text)
|
79
|
+
|
80
|
+
agent_class = create_litellm_agent_class(prompt_variant)
|
81
|
+
agent = agent_class(prompt_variant)
|
82
|
+
|
83
|
+
outputs: list[dict[str, Any]] = []
|
84
|
+
scores: list[float] = []
|
85
|
+
trajectories: list[dict[str, Any]] | None = [] if capture_traces else None
|
86
|
+
|
87
|
+
for inst in batch:
|
88
|
+
dataset_item = inst.opik_item
|
89
|
+
messages = prompt_variant.get_messages(dataset_item)
|
90
|
+
raw_output = agent.invoke(messages).strip()
|
91
|
+
|
92
|
+
metric_result = self._metric(dataset_item, raw_output)
|
93
|
+
if hasattr(metric_result, "value"):
|
94
|
+
score = float(metric_result.value)
|
95
|
+
elif hasattr(metric_result, "score"):
|
96
|
+
score = float(metric_result.score)
|
97
|
+
else:
|
98
|
+
score = float(metric_result)
|
99
|
+
|
100
|
+
outputs.append({"output": raw_output})
|
101
|
+
scores.append(score)
|
102
|
+
try:
|
103
|
+
self._optimizer._gepa_live_metric_calls += 1
|
104
|
+
except Exception:
|
105
|
+
pass
|
106
|
+
|
107
|
+
if trajectories is not None:
|
108
|
+
trajectories.append(
|
109
|
+
{
|
110
|
+
"input": dataset_item,
|
111
|
+
"output": raw_output,
|
112
|
+
"score": score,
|
113
|
+
}
|
114
|
+
)
|
115
|
+
|
116
|
+
return EvaluationBatch(
|
117
|
+
outputs=outputs, scores=scores, trajectories=trajectories
|
118
|
+
)
|
119
|
+
|
120
|
+
def make_reflective_dataset(
|
121
|
+
self,
|
122
|
+
candidate: dict[str, str],
|
123
|
+
eval_batch: EvaluationBatch[dict[str, Any], dict[str, Any]],
|
124
|
+
components_to_update: list[str],
|
125
|
+
) -> dict[str, list[dict[str, Any]]]:
|
126
|
+
components = components_to_update or ["system_prompt"]
|
127
|
+
trajectories = eval_batch.trajectories or []
|
128
|
+
|
129
|
+
def _records() -> Iterable[dict[str, Any]]:
|
130
|
+
for traj in trajectories:
|
131
|
+
dataset_item = traj.get("input", {})
|
132
|
+
output_text = traj.get("output", "")
|
133
|
+
score = traj.get("score", 0.0)
|
134
|
+
feedback = f"Observed score={score:.4f}. Expected answer: {dataset_item.get('answer', '')}"
|
135
|
+
yield {
|
136
|
+
"Inputs": {
|
137
|
+
"text": dataset_item.get("input")
|
138
|
+
or dataset_item.get("question")
|
139
|
+
or "",
|
140
|
+
},
|
141
|
+
"Generated Outputs": output_text,
|
142
|
+
"Feedback": feedback,
|
143
|
+
}
|
144
|
+
|
145
|
+
reflective_records = list(_records())
|
146
|
+
if not reflective_records:
|
147
|
+
LOGGER.debug(
|
148
|
+
"No trajectories captured for candidate; returning empty reflective dataset"
|
149
|
+
)
|
150
|
+
reflective_records = []
|
151
|
+
|
152
|
+
return {component: reflective_records for component in components}
|