opik-optimizer 1.0.5__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. opik_optimizer/__init__.py +2 -0
  2. opik_optimizer/_throttle.py +2 -1
  3. opik_optimizer/base_optimizer.py +28 -11
  4. opik_optimizer/colbert.py +236 -0
  5. opik_optimizer/data/context7_eval.jsonl +3 -0
  6. opik_optimizer/datasets/context7_eval.py +90 -0
  7. opik_optimizer/datasets/tiny_test.py +33 -34
  8. opik_optimizer/datasets/truthful_qa.py +2 -2
  9. opik_optimizer/evolutionary_optimizer/crossover_ops.py +194 -0
  10. opik_optimizer/evolutionary_optimizer/evaluation_ops.py +73 -0
  11. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +124 -941
  12. opik_optimizer/evolutionary_optimizer/helpers.py +10 -0
  13. opik_optimizer/evolutionary_optimizer/llm_support.py +134 -0
  14. opik_optimizer/evolutionary_optimizer/mutation_ops.py +292 -0
  15. opik_optimizer/evolutionary_optimizer/population_ops.py +223 -0
  16. opik_optimizer/evolutionary_optimizer/prompts.py +305 -0
  17. opik_optimizer/evolutionary_optimizer/reporting.py +16 -4
  18. opik_optimizer/evolutionary_optimizer/style_ops.py +86 -0
  19. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +26 -23
  20. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +12 -5
  21. opik_optimizer/gepa_optimizer/__init__.py +3 -0
  22. opik_optimizer/gepa_optimizer/adapter.py +152 -0
  23. opik_optimizer/gepa_optimizer/gepa_optimizer.py +556 -0
  24. opik_optimizer/gepa_optimizer/reporting.py +181 -0
  25. opik_optimizer/logging_config.py +42 -7
  26. opik_optimizer/mcp_utils/__init__.py +22 -0
  27. opik_optimizer/mcp_utils/mcp.py +541 -0
  28. opik_optimizer/mcp_utils/mcp_second_pass.py +152 -0
  29. opik_optimizer/mcp_utils/mcp_simulator.py +116 -0
  30. opik_optimizer/mcp_utils/mcp_workflow.py +493 -0
  31. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +399 -69
  32. opik_optimizer/meta_prompt_optimizer/reporting.py +16 -2
  33. opik_optimizer/mipro_optimizer/_lm.py +20 -20
  34. opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +51 -50
  35. opik_optimizer/mipro_optimizer/mipro_optimizer.py +33 -28
  36. opik_optimizer/mipro_optimizer/utils.py +2 -4
  37. opik_optimizer/optimizable_agent.py +18 -17
  38. opik_optimizer/optimization_config/chat_prompt.py +44 -23
  39. opik_optimizer/optimization_config/configs.py +3 -3
  40. opik_optimizer/optimization_config/mappers.py +9 -8
  41. opik_optimizer/optimization_result.py +21 -14
  42. opik_optimizer/reporting_utils.py +61 -10
  43. opik_optimizer/task_evaluator.py +9 -8
  44. opik_optimizer/utils/__init__.py +15 -0
  45. opik_optimizer/{utils.py → utils/core.py} +111 -26
  46. opik_optimizer/utils/dataset_utils.py +49 -0
  47. opik_optimizer/utils/prompt_segments.py +186 -0
  48. {opik_optimizer-1.0.5.dist-info → opik_optimizer-1.1.0.dist-info}/METADATA +93 -16
  49. opik_optimizer-1.1.0.dist-info/RECORD +73 -0
  50. opik_optimizer-1.1.0.dist-info/licenses/LICENSE +203 -0
  51. opik_optimizer-1.0.5.dist-info/RECORD +0 -50
  52. opik_optimizer-1.0.5.dist-info/licenses/LICENSE +0 -21
  53. {opik_optimizer-1.0.5.dist-info → opik_optimizer-1.1.0.dist-info}/WHEEL +0 -0
  54. {opik_optimizer-1.0.5.dist-info → opik_optimizer-1.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,152 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any
5
+ from collections.abc import Callable, Iterable
6
+
7
+ import logging
8
+
9
+ from gepa.core.adapter import EvaluationBatch, GEPAAdapter
10
+
11
+ from ..optimization_config import chat_prompt
12
+ from ..utils import create_litellm_agent_class
13
+
14
+
15
+ LOGGER = logging.getLogger("opik_optimizer.gepa.adapter")
16
+
17
+
18
+ @dataclass
19
+ class OpikDataInst:
20
+ """Data instance handed to GEPA.
21
+
22
+ We keep the original Opik dataset item so metrics and prompt formatting can use it
23
+ directly without duplicated bookkeeping.
24
+ """
25
+
26
+ input_text: str
27
+ answer: str
28
+ additional_context: dict[str, str]
29
+ opik_item: dict[str, Any]
30
+
31
+
32
+ def _extract_system_text(candidate: dict[str, str], fallback: str) -> str:
33
+ for key in ("system_prompt", "system", "prompt"):
34
+ value = candidate.get(key)
35
+ if isinstance(value, str) and value.strip():
36
+ return value
37
+ return fallback
38
+
39
+
40
+ def _apply_system_text(
41
+ prompt_obj: chat_prompt.ChatPrompt, system_text: str
42
+ ) -> chat_prompt.ChatPrompt:
43
+ updated = prompt_obj.copy()
44
+ if updated.messages is not None:
45
+ messages = updated.get_messages()
46
+ if messages and messages[0].get("role") == "system":
47
+ messages[0]["content"] = system_text
48
+ else:
49
+ messages.insert(0, {"role": "system", "content": system_text})
50
+ updated.set_messages(messages)
51
+ else:
52
+ updated.system = system_text
53
+ return updated
54
+
55
+
56
+ class OpikGEPAAdapter(GEPAAdapter[OpikDataInst, dict[str, Any], dict[str, Any]]):
57
+ """Minimal GEPA adapter that routes evaluation through Opik's metric."""
58
+
59
+ def __init__(
60
+ self,
61
+ base_prompt: chat_prompt.ChatPrompt,
62
+ optimizer: Any,
63
+ metric: Callable[[dict[str, Any], str], Any],
64
+ system_fallback: str,
65
+ ) -> None:
66
+ self._base_prompt = base_prompt
67
+ self._optimizer = optimizer
68
+ self._metric = metric
69
+ self._system_fallback = system_fallback
70
+
71
+ def evaluate(
72
+ self,
73
+ batch: list[OpikDataInst],
74
+ candidate: dict[str, str],
75
+ capture_traces: bool = False,
76
+ ) -> EvaluationBatch[dict[str, Any], dict[str, Any]]:
77
+ system_text = _extract_system_text(candidate, self._system_fallback)
78
+ prompt_variant = _apply_system_text(self._base_prompt, system_text)
79
+
80
+ agent_class = create_litellm_agent_class(prompt_variant)
81
+ agent = agent_class(prompt_variant)
82
+
83
+ outputs: list[dict[str, Any]] = []
84
+ scores: list[float] = []
85
+ trajectories: list[dict[str, Any]] | None = [] if capture_traces else None
86
+
87
+ for inst in batch:
88
+ dataset_item = inst.opik_item
89
+ messages = prompt_variant.get_messages(dataset_item)
90
+ raw_output = agent.invoke(messages).strip()
91
+
92
+ metric_result = self._metric(dataset_item, raw_output)
93
+ if hasattr(metric_result, "value"):
94
+ score = float(metric_result.value)
95
+ elif hasattr(metric_result, "score"):
96
+ score = float(metric_result.score)
97
+ else:
98
+ score = float(metric_result)
99
+
100
+ outputs.append({"output": raw_output})
101
+ scores.append(score)
102
+ try:
103
+ self._optimizer._gepa_live_metric_calls += 1
104
+ except Exception:
105
+ pass
106
+
107
+ if trajectories is not None:
108
+ trajectories.append(
109
+ {
110
+ "input": dataset_item,
111
+ "output": raw_output,
112
+ "score": score,
113
+ }
114
+ )
115
+
116
+ return EvaluationBatch(
117
+ outputs=outputs, scores=scores, trajectories=trajectories
118
+ )
119
+
120
+ def make_reflective_dataset(
121
+ self,
122
+ candidate: dict[str, str],
123
+ eval_batch: EvaluationBatch[dict[str, Any], dict[str, Any]],
124
+ components_to_update: list[str],
125
+ ) -> dict[str, list[dict[str, Any]]]:
126
+ components = components_to_update or ["system_prompt"]
127
+ trajectories = eval_batch.trajectories or []
128
+
129
+ def _records() -> Iterable[dict[str, Any]]:
130
+ for traj in trajectories:
131
+ dataset_item = traj.get("input", {})
132
+ output_text = traj.get("output", "")
133
+ score = traj.get("score", 0.0)
134
+ feedback = f"Observed score={score:.4f}. Expected answer: {dataset_item.get('answer', '')}"
135
+ yield {
136
+ "Inputs": {
137
+ "text": dataset_item.get("input")
138
+ or dataset_item.get("question")
139
+ or "",
140
+ },
141
+ "Generated Outputs": output_text,
142
+ "Feedback": feedback,
143
+ }
144
+
145
+ reflective_records = list(_records())
146
+ if not reflective_records:
147
+ LOGGER.debug(
148
+ "No trajectories captured for candidate; returning empty reflective dataset"
149
+ )
150
+ reflective_records = []
151
+
152
+ return {component: reflective_records for component in components}