opik-optimizer 1.0.6__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +2 -0
- opik_optimizer/_throttle.py +2 -1
- opik_optimizer/base_optimizer.py +28 -11
- opik_optimizer/colbert.py +236 -0
- opik_optimizer/data/context7_eval.jsonl +3 -0
- opik_optimizer/datasets/context7_eval.py +90 -0
- opik_optimizer/datasets/tiny_test.py +33 -34
- opik_optimizer/datasets/truthful_qa.py +2 -2
- opik_optimizer/evolutionary_optimizer/crossover_ops.py +194 -0
- opik_optimizer/evolutionary_optimizer/evaluation_ops.py +73 -0
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +124 -941
- opik_optimizer/evolutionary_optimizer/helpers.py +10 -0
- opik_optimizer/evolutionary_optimizer/llm_support.py +134 -0
- opik_optimizer/evolutionary_optimizer/mutation_ops.py +292 -0
- opik_optimizer/evolutionary_optimizer/population_ops.py +223 -0
- opik_optimizer/evolutionary_optimizer/prompts.py +305 -0
- opik_optimizer/evolutionary_optimizer/reporting.py +16 -4
- opik_optimizer/evolutionary_optimizer/style_ops.py +86 -0
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +26 -23
- opik_optimizer/few_shot_bayesian_optimizer/reporting.py +12 -5
- opik_optimizer/gepa_optimizer/__init__.py +3 -0
- opik_optimizer/gepa_optimizer/adapter.py +152 -0
- opik_optimizer/gepa_optimizer/gepa_optimizer.py +556 -0
- opik_optimizer/gepa_optimizer/reporting.py +181 -0
- opik_optimizer/logging_config.py +42 -7
- opik_optimizer/mcp_utils/__init__.py +22 -0
- opik_optimizer/mcp_utils/mcp.py +541 -0
- opik_optimizer/mcp_utils/mcp_second_pass.py +152 -0
- opik_optimizer/mcp_utils/mcp_simulator.py +116 -0
- opik_optimizer/mcp_utils/mcp_workflow.py +493 -0
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +399 -69
- opik_optimizer/meta_prompt_optimizer/reporting.py +16 -2
- opik_optimizer/mipro_optimizer/_lm.py +20 -20
- opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +51 -50
- opik_optimizer/mipro_optimizer/mipro_optimizer.py +33 -28
- opik_optimizer/mipro_optimizer/utils.py +2 -4
- opik_optimizer/optimizable_agent.py +16 -16
- opik_optimizer/optimization_config/chat_prompt.py +44 -23
- opik_optimizer/optimization_config/configs.py +3 -3
- opik_optimizer/optimization_config/mappers.py +9 -8
- opik_optimizer/optimization_result.py +21 -14
- opik_optimizer/reporting_utils.py +61 -10
- opik_optimizer/task_evaluator.py +9 -8
- opik_optimizer/utils/__init__.py +15 -0
- opik_optimizer/{utils.py → utils/core.py} +111 -26
- opik_optimizer/utils/dataset_utils.py +49 -0
- opik_optimizer/utils/prompt_segments.py +186 -0
- {opik_optimizer-1.0.6.dist-info → opik_optimizer-1.1.0.dist-info}/METADATA +93 -16
- opik_optimizer-1.1.0.dist-info/RECORD +73 -0
- opik_optimizer-1.1.0.dist-info/licenses/LICENSE +203 -0
- opik_optimizer-1.0.6.dist-info/RECORD +0 -50
- opik_optimizer-1.0.6.dist-info/licenses/LICENSE +0 -21
- {opik_optimizer-1.0.6.dist-info → opik_optimizer-1.1.0.dist-info}/WHEEL +0 -0
- {opik_optimizer-1.0.6.dist-info → opik_optimizer-1.1.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
|
|
1
|
-
from typing import Any
|
1
|
+
from typing import Any
|
2
|
+
from collections.abc import Callable
|
2
3
|
|
3
4
|
import copy
|
4
5
|
|
@@ -10,7 +11,7 @@ from opik import track
|
|
10
11
|
class Tool(BaseModel):
|
11
12
|
name: str = Field(..., description="Name of the tool")
|
12
13
|
description: str = Field(..., description="Description of the tool")
|
13
|
-
parameters:
|
14
|
+
parameters: dict[str, Any] = Field(
|
14
15
|
..., description="JSON Schema defining the input parameters for the tool"
|
15
16
|
)
|
16
17
|
|
@@ -33,14 +34,14 @@ class ChatPrompt:
|
|
33
34
|
def __init__(
|
34
35
|
self,
|
35
36
|
name: str = "chat-prompt",
|
36
|
-
system:
|
37
|
-
user:
|
38
|
-
messages:
|
39
|
-
tools:
|
40
|
-
function_map:
|
41
|
-
model:
|
42
|
-
invoke:
|
43
|
-
project_name:
|
37
|
+
system: str | None = None,
|
38
|
+
user: str | None = None,
|
39
|
+
messages: list[dict[str, str]] | None = None,
|
40
|
+
tools: list[dict[str, Any]] | None = None,
|
41
|
+
function_map: dict[str, Callable] | None = None,
|
42
|
+
model: str | None = None,
|
43
|
+
invoke: Callable | None = None,
|
44
|
+
project_name: str | None = "Default Project",
|
44
45
|
**model_kwargs: Any,
|
45
46
|
) -> None:
|
46
47
|
if system is None and user is None and messages is None:
|
@@ -97,8 +98,8 @@ class ChatPrompt:
|
|
97
98
|
|
98
99
|
def get_messages(
|
99
100
|
self,
|
100
|
-
dataset_item:
|
101
|
-
) ->
|
101
|
+
dataset_item: dict[str, str] | None = None,
|
102
|
+
) -> list[dict[str, str]]:
|
102
103
|
# This is a copy, so we can alter the messages:
|
103
104
|
messages = self._standardize_prompts()
|
104
105
|
|
@@ -113,8 +114,8 @@ class ChatPrompt:
|
|
113
114
|
)
|
114
115
|
return messages
|
115
116
|
|
116
|
-
def _standardize_prompts(self, **kwargs: Any) ->
|
117
|
-
standardize_messages:
|
117
|
+
def _standardize_prompts(self, **kwargs: Any) -> list[dict[str, str]]:
|
118
|
+
standardize_messages: list[dict[str, str]] = []
|
118
119
|
|
119
120
|
if self.system is not None:
|
120
121
|
standardize_messages.append({"role": "system", "content": self.system})
|
@@ -128,13 +129,13 @@ class ChatPrompt:
|
|
128
129
|
|
129
130
|
return copy.deepcopy(standardize_messages)
|
130
131
|
|
131
|
-
def to_dict(self) ->
|
132
|
+
def to_dict(self) -> dict[str, str | list[dict[str, str]]]:
|
132
133
|
"""Convert ChatPrompt to a dictionary for JSON serialization.
|
133
134
|
|
134
135
|
Returns:
|
135
136
|
Dict containing the serializable representation of this ChatPrompt
|
136
137
|
"""
|
137
|
-
retval:
|
138
|
+
retval: dict[str, str | list[dict[str, str]]] = {}
|
138
139
|
if self.system is not None:
|
139
140
|
retval["system"] = self.system
|
140
141
|
if self.user is not None:
|
@@ -144,29 +145,49 @@ class ChatPrompt:
|
|
144
145
|
return retval
|
145
146
|
|
146
147
|
def copy(self) -> "ChatPrompt":
|
148
|
+
"""Shallow clone preserving model configuration and tools."""
|
149
|
+
|
150
|
+
# TODO(opik-mcp): once we introduce a dedicated MCP prompt subclass,
|
151
|
+
# migrate callers away from generic copies so optimizer metadata stays typed.
|
152
|
+
model_kwargs = (
|
153
|
+
copy.deepcopy(self.model_kwargs) if self.model_kwargs is not None else {}
|
154
|
+
)
|
147
155
|
return ChatPrompt(
|
156
|
+
name=self.name,
|
148
157
|
system=self.system,
|
149
158
|
user=self.user,
|
150
159
|
messages=copy.deepcopy(self.messages),
|
151
|
-
tools=self.tools,
|
160
|
+
tools=copy.deepcopy(self.tools),
|
152
161
|
function_map=self.function_map,
|
162
|
+
model=self.model,
|
163
|
+
invoke=self.invoke,
|
164
|
+
project_name=self.project_name,
|
165
|
+
**model_kwargs,
|
153
166
|
)
|
154
167
|
|
155
|
-
def set_messages(self, messages:
|
168
|
+
def set_messages(self, messages: list[dict[str, Any]]) -> None:
|
156
169
|
self.system = None
|
157
170
|
self.user = None
|
158
171
|
self.messages = copy.deepcopy(messages)
|
159
172
|
|
173
|
+
# TODO(opik): remove this stop-gap once MetaPromptOptimizer supports MCP.
|
174
|
+
# Provides a second-pass flow so tool results can be appended before
|
175
|
+
# rerunning the model.
|
176
|
+
def with_messages(self, messages: list[dict[str, Any]]) -> "ChatPrompt":
|
177
|
+
cloned = self.copy()
|
178
|
+
cloned.set_messages(messages)
|
179
|
+
return cloned
|
180
|
+
|
160
181
|
@classmethod
|
161
182
|
def model_validate(
|
162
183
|
cls,
|
163
184
|
obj: Any,
|
164
185
|
*,
|
165
|
-
strict:
|
166
|
-
from_attributes:
|
167
|
-
context:
|
168
|
-
by_alias:
|
169
|
-
by_name:
|
186
|
+
strict: bool | None = None,
|
187
|
+
from_attributes: bool | None = None,
|
188
|
+
context: Any | None = None,
|
189
|
+
by_alias: bool | None = None,
|
190
|
+
by_name: bool | None = None,
|
170
191
|
) -> "ChatPrompt":
|
171
192
|
"""Custom validation method to handle nested objects during deserialization."""
|
172
193
|
return ChatPrompt(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
"""Module containing configuration classes for optimization."""
|
2
2
|
|
3
|
-
from typing import Any
|
3
|
+
from typing import Any
|
4
4
|
|
5
5
|
import pydantic
|
6
6
|
|
@@ -12,6 +12,6 @@ class TaskConfig(pydantic.BaseModel):
|
|
12
12
|
|
13
13
|
instruction_prompt: str
|
14
14
|
use_chat_prompt: bool = False
|
15
|
-
input_dataset_fields:
|
15
|
+
input_dataset_fields: list[str]
|
16
16
|
output_dataset_field: str
|
17
|
-
tools:
|
17
|
+
tools: list[Any] = []
|
@@ -1,4 +1,5 @@
|
|
1
|
-
from typing import
|
1
|
+
from typing import Any
|
2
|
+
from collections.abc import Callable
|
2
3
|
|
3
4
|
EVALUATED_LLM_TASK_OUTPUT = "llm_output"
|
4
5
|
|
@@ -8,8 +9,8 @@ class Mapper:
|
|
8
9
|
|
9
10
|
def __init__(
|
10
11
|
self,
|
11
|
-
name:
|
12
|
-
transform:
|
12
|
+
name: str | None = None,
|
13
|
+
transform: Callable[[Any], Any] | None = None,
|
13
14
|
):
|
14
15
|
if name is not None and transform is not None:
|
15
16
|
raise ValueError("Only one of name or transform can be provided")
|
@@ -27,9 +28,9 @@ class Mapper:
|
|
27
28
|
|
28
29
|
def from_dataset_field(
|
29
30
|
*,
|
30
|
-
name:
|
31
|
-
transform:
|
32
|
-
) ->
|
31
|
+
name: str | None = None,
|
32
|
+
transform: Callable[[dict[str, Any]], Any] | None = None,
|
33
|
+
) -> str | Callable[[dict[str, Any]], Any]:
|
33
34
|
if name is not None and transform is not None:
|
34
35
|
raise ValueError("Only one of name or transform can be provided")
|
35
36
|
|
@@ -47,8 +48,8 @@ def from_llm_response_text() -> str:
|
|
47
48
|
|
48
49
|
|
49
50
|
def from_agent_output(
|
50
|
-
*, name:
|
51
|
-
) ->
|
51
|
+
*, name: str | None = None, transform: Callable[[Any], Any] | None = None
|
52
|
+
) -> str | Callable[[Any], Any]:
|
52
53
|
if name is not None and transform is not None:
|
53
54
|
raise ValueError("Only one of name or transform can be provided")
|
54
55
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
"""Module containing the OptimizationResult class."""
|
2
2
|
|
3
|
-
from typing import Any
|
3
|
+
from typing import Any
|
4
4
|
|
5
5
|
import pydantic
|
6
6
|
import rich
|
@@ -13,25 +13,25 @@ class OptimizationResult(pydantic.BaseModel):
|
|
13
13
|
|
14
14
|
optimizer: str = "Optimizer"
|
15
15
|
|
16
|
-
prompt:
|
16
|
+
prompt: list[dict[str, str]]
|
17
17
|
score: float
|
18
18
|
metric_name: str
|
19
19
|
|
20
|
-
optimization_id:
|
21
|
-
dataset_id:
|
20
|
+
optimization_id: str | None = None
|
21
|
+
dataset_id: str | None = None
|
22
22
|
|
23
23
|
# Initial score
|
24
|
-
initial_prompt:
|
25
|
-
initial_score:
|
24
|
+
initial_prompt: list[dict[str, str]] | None = None
|
25
|
+
initial_score: float | None = None
|
26
26
|
|
27
|
-
details:
|
28
|
-
history:
|
29
|
-
llm_calls:
|
27
|
+
details: dict[str, Any] = pydantic.Field(default_factory=dict)
|
28
|
+
history: list[dict[str, Any]] = []
|
29
|
+
llm_calls: int | None = None
|
30
30
|
|
31
31
|
# MIPRO specific
|
32
|
-
demonstrations:
|
33
|
-
mipro_prompt:
|
34
|
-
tool_prompts:
|
32
|
+
demonstrations: list[dict[str, Any]] | None = None
|
33
|
+
mipro_prompt: str | None = None
|
34
|
+
tool_prompts: dict[str, str] | None = None
|
35
35
|
|
36
36
|
model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
|
37
37
|
|
@@ -40,7 +40,7 @@ class OptimizationResult(pydantic.BaseModel):
|
|
40
40
|
optimization_id=self.optimization_id, dataset_id=self.dataset_id
|
41
41
|
)
|
42
42
|
|
43
|
-
def model_dump(self, *kargs: Any, **kwargs: Any) ->
|
43
|
+
def model_dump(self, *kargs: Any, **kwargs: Any) -> dict[str, Any]:
|
44
44
|
return super().model_dump(*kargs, **kwargs)
|
45
45
|
|
46
46
|
def _calculate_improvement_str(self) -> str:
|
@@ -205,4 +205,11 @@ class OptimizationResult(pydantic.BaseModel):
|
|
205
205
|
"""
|
206
206
|
console = get_console()
|
207
207
|
console.print(self)
|
208
|
-
|
208
|
+
# Gracefully handle cases where optimization tracking isn't available
|
209
|
+
if self.dataset_id and self.optimization_id:
|
210
|
+
try:
|
211
|
+
print("Optimization run link:", self.get_run_link())
|
212
|
+
except Exception:
|
213
|
+
print("Optimization run link: No optimization run link available")
|
214
|
+
else:
|
215
|
+
print("Optimization run link: No optimization run link available")
|
@@ -1,6 +1,7 @@
|
|
1
|
+
import json
|
1
2
|
import logging
|
2
3
|
from contextlib import contextmanager
|
3
|
-
from typing import Any
|
4
|
+
from typing import Any
|
4
5
|
|
5
6
|
from rich import box
|
6
7
|
from rich.console import Console, Group
|
@@ -20,7 +21,7 @@ def get_console(*args: Any, **kwargs: Any) -> Console:
|
|
20
21
|
|
21
22
|
|
22
23
|
@contextmanager
|
23
|
-
def convert_tqdm_to_rich(description:
|
24
|
+
def convert_tqdm_to_rich(description: str | None = None, verbose: int = 1) -> Any:
|
24
25
|
"""Context manager to convert tqdm to rich."""
|
25
26
|
import opik.evaluation.engine.evaluation_tasks_executor
|
26
27
|
|
@@ -66,7 +67,7 @@ def suppress_opik_logs() -> Any:
|
|
66
67
|
opik_logger.setLevel(original_level)
|
67
68
|
|
68
69
|
|
69
|
-
def display_messages(messages:
|
70
|
+
def display_messages(messages: list[dict[str, str]], prefix: str = "") -> None:
|
70
71
|
for i, msg in enumerate(messages):
|
71
72
|
panel = Panel(
|
72
73
|
Text(msg.get("content", ""), overflow="fold"),
|
@@ -90,11 +91,53 @@ def display_messages(messages: List[Dict[str, str]], prefix: str = "") -> None:
|
|
90
91
|
console.print(Text(prefix) + Text.from_ansi(line))
|
91
92
|
|
92
93
|
|
94
|
+
def _format_tool_panel(tool: dict[str, Any]) -> Panel:
|
95
|
+
function_block = tool.get("function", {})
|
96
|
+
name = function_block.get("name") or tool.get("name", "unknown_tool")
|
97
|
+
description = function_block.get("description", "")
|
98
|
+
parameters = function_block.get("parameters", {})
|
99
|
+
|
100
|
+
body_lines: list[str] = []
|
101
|
+
if description:
|
102
|
+
body_lines.append(description)
|
103
|
+
if parameters:
|
104
|
+
formatted_schema = json.dumps(parameters, indent=2, sort_keys=True)
|
105
|
+
body_lines.append("\nSchema:\n" + formatted_schema)
|
106
|
+
|
107
|
+
content = Text(
|
108
|
+
"\n".join(body_lines) if body_lines else "(no metadata)", overflow="fold"
|
109
|
+
)
|
110
|
+
return Panel(
|
111
|
+
content,
|
112
|
+
title=f"tool: {name}",
|
113
|
+
title_align="left",
|
114
|
+
border_style="cyan",
|
115
|
+
width=PANEL_WIDTH,
|
116
|
+
padding=(1, 2),
|
117
|
+
)
|
118
|
+
|
119
|
+
|
120
|
+
def _display_tools(tools: list[dict[str, Any]] | None) -> None:
|
121
|
+
if not tools:
|
122
|
+
return
|
123
|
+
|
124
|
+
console = get_console()
|
125
|
+
console.print(Text("\nTools registered:\n", style="bold"))
|
126
|
+
for tool in tools:
|
127
|
+
panel = _format_tool_panel(tool)
|
128
|
+
with console.capture() as capture:
|
129
|
+
console.print(panel)
|
130
|
+
rendered_panel = capture.get()
|
131
|
+
for line in rendered_panel.splitlines():
|
132
|
+
console.print(Text.from_ansi(line))
|
133
|
+
console.print("")
|
134
|
+
|
135
|
+
|
93
136
|
def get_link_text(
|
94
137
|
pre_text: str,
|
95
138
|
link_text: str,
|
96
|
-
optimization_id:
|
97
|
-
dataset_id:
|
139
|
+
optimization_id: str | None = None,
|
140
|
+
dataset_id: str | None = None,
|
98
141
|
) -> Text:
|
99
142
|
if optimization_id is not None and dataset_id is not None:
|
100
143
|
optimization_url = get_optimization_run_url_by_id(
|
@@ -112,8 +155,8 @@ def get_link_text(
|
|
112
155
|
|
113
156
|
def display_header(
|
114
157
|
algorithm: str,
|
115
|
-
optimization_id:
|
116
|
-
dataset_id:
|
158
|
+
optimization_id: str | None = None,
|
159
|
+
dataset_id: str | None = None,
|
117
160
|
verbose: int = 1,
|
118
161
|
) -> None:
|
119
162
|
if verbose < 1:
|
@@ -140,8 +183,9 @@ def display_header(
|
|
140
183
|
def display_result(
|
141
184
|
initial_score: float,
|
142
185
|
best_score: float,
|
143
|
-
best_prompt:
|
186
|
+
best_prompt: list[dict[str, str]],
|
144
187
|
verbose: int = 1,
|
188
|
+
tools: list[dict[str, Any]] | None = None,
|
145
189
|
) -> None:
|
146
190
|
if verbose < 1:
|
147
191
|
return
|
@@ -149,7 +193,7 @@ def display_result(
|
|
149
193
|
console = get_console()
|
150
194
|
console.print(Text("\n> Optimization complete\n"))
|
151
195
|
|
152
|
-
content:
|
196
|
+
content: Text | Panel = []
|
153
197
|
|
154
198
|
if best_score > initial_score:
|
155
199
|
if initial_score == 0:
|
@@ -199,9 +243,15 @@ def display_result(
|
|
199
243
|
)
|
200
244
|
)
|
201
245
|
|
246
|
+
if tools:
|
247
|
+
_display_tools(tools)
|
248
|
+
|
202
249
|
|
203
250
|
def display_configuration(
|
204
|
-
messages:
|
251
|
+
messages: list[dict[str, str]],
|
252
|
+
optimizer_config: dict[str, Any],
|
253
|
+
verbose: int = 1,
|
254
|
+
tools: list[dict[str, Any]] | None = None,
|
205
255
|
) -> None:
|
206
256
|
"""Displays the LLM messages and optimizer configuration using Rich panels."""
|
207
257
|
|
@@ -213,6 +263,7 @@ def display_configuration(
|
|
213
263
|
console.print(Text("> Let's optimize the prompt:\n"))
|
214
264
|
|
215
265
|
display_messages(messages)
|
266
|
+
_display_tools(tools)
|
216
267
|
|
217
268
|
# Panel for configuration
|
218
269
|
console.print(
|
opik_optimizer/task_evaluator.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
import logging
|
2
|
-
from typing import Any
|
2
|
+
from typing import Any
|
3
|
+
from collections.abc import Callable
|
3
4
|
|
4
5
|
import opik
|
5
6
|
from opik.evaluation import evaluator as opik_evaluator
|
@@ -38,14 +39,14 @@ def _create_metric_class(metric: Callable) -> base_metric.BaseMetric:
|
|
38
39
|
|
39
40
|
def evaluate(
|
40
41
|
dataset: opik.Dataset,
|
41
|
-
evaluated_task: Callable[[
|
42
|
+
evaluated_task: Callable[[dict[str, Any]], dict[str, Any]],
|
42
43
|
metric: Callable,
|
43
44
|
num_threads: int,
|
44
|
-
optimization_id:
|
45
|
-
dataset_item_ids:
|
46
|
-
project_name:
|
47
|
-
n_samples:
|
48
|
-
experiment_config:
|
45
|
+
optimization_id: str | None = None,
|
46
|
+
dataset_item_ids: list[str] | None = None,
|
47
|
+
project_name: str | None = None,
|
48
|
+
n_samples: int | None = None,
|
49
|
+
experiment_config: dict[str, Any] | None = None,
|
49
50
|
verbose: int = 1,
|
50
51
|
) -> float:
|
51
52
|
"""
|
@@ -107,7 +108,7 @@ def evaluate(
|
|
107
108
|
return 0.0
|
108
109
|
|
109
110
|
# We may allow score aggregation customization.
|
110
|
-
score_results:
|
111
|
+
score_results: list[score_result.ScoreResult] = [
|
111
112
|
test_result.score_results[0] for test_result in result.test_results
|
112
113
|
]
|
113
114
|
if not score_results:
|
@@ -0,0 +1,15 @@
|
|
1
|
+
"""Utility helpers exposed as part of the opik_optimizer package."""
|
2
|
+
|
3
|
+
from .core import * # noqa: F401,F403
|
4
|
+
from .dataset_utils import * # noqa: F401,F403
|
5
|
+
from .prompt_segments import * # noqa: F401,F403
|
6
|
+
|
7
|
+
from . import core as _core
|
8
|
+
from . import dataset_utils as _dataset_utils
|
9
|
+
from . import prompt_segments as _prompt_segments
|
10
|
+
|
11
|
+
__all__: list[str] = [
|
12
|
+
*getattr(_core, "__all__", []),
|
13
|
+
*getattr(_dataset_utils, "__all__", []),
|
14
|
+
*getattr(_prompt_segments, "__all__", []),
|
15
|
+
]
|
@@ -2,18 +2,13 @@
|
|
2
2
|
|
3
3
|
from typing import (
|
4
4
|
Any,
|
5
|
-
Dict,
|
6
5
|
Final,
|
7
6
|
Literal,
|
8
|
-
Optional,
|
9
|
-
Type,
|
10
7
|
TYPE_CHECKING,
|
11
|
-
List,
|
12
|
-
Callable,
|
13
8
|
)
|
9
|
+
from collections.abc import Callable
|
14
10
|
|
15
11
|
import inspect
|
16
|
-
import typing
|
17
12
|
import base64
|
18
13
|
import json
|
19
14
|
import logging
|
@@ -22,6 +17,8 @@ import string
|
|
22
17
|
import urllib.parse
|
23
18
|
from types import TracebackType
|
24
19
|
|
20
|
+
import requests
|
21
|
+
|
25
22
|
import opik
|
26
23
|
from opik.api_objects.opik_client import Opik
|
27
24
|
from opik.api_objects.optimization import Optimization
|
@@ -30,8 +27,8 @@ ALLOWED_URL_CHARACTERS: Final[str] = ":/&?="
|
|
30
27
|
logger = logging.getLogger(__name__)
|
31
28
|
|
32
29
|
if TYPE_CHECKING:
|
33
|
-
from .optimizable_agent import OptimizableAgent
|
34
|
-
from .optimization_config.chat_prompt import ChatPrompt
|
30
|
+
from opik_optimizer.optimizable_agent import OptimizableAgent
|
31
|
+
from opik_optimizer.optimization_config.chat_prompt import ChatPrompt
|
35
32
|
|
36
33
|
|
37
34
|
class OptimizationContextManager:
|
@@ -45,8 +42,8 @@ class OptimizationContextManager:
|
|
45
42
|
client: Opik,
|
46
43
|
dataset_name: str,
|
47
44
|
objective_name: str,
|
48
|
-
name:
|
49
|
-
metadata:
|
45
|
+
name: str | None = None,
|
46
|
+
metadata: dict[str, Any] | None = None,
|
50
47
|
):
|
51
48
|
"""
|
52
49
|
Initialize the optimization context.
|
@@ -63,9 +60,9 @@ class OptimizationContextManager:
|
|
63
60
|
self.objective_name = objective_name
|
64
61
|
self.name = name
|
65
62
|
self.metadata = metadata
|
66
|
-
self.optimization:
|
63
|
+
self.optimization: Optimization | None = None
|
67
64
|
|
68
|
-
def __enter__(self) ->
|
65
|
+
def __enter__(self) -> Optimization | None:
|
69
66
|
"""Create and return the optimization."""
|
70
67
|
try:
|
71
68
|
self.optimization = self.client.create_optimization(
|
@@ -88,9 +85,9 @@ class OptimizationContextManager:
|
|
88
85
|
|
89
86
|
def __exit__(
|
90
87
|
self,
|
91
|
-
exc_type:
|
92
|
-
exc_val:
|
93
|
-
exc_tb:
|
88
|
+
exc_type: type[BaseException] | None,
|
89
|
+
exc_val: BaseException | None,
|
90
|
+
exc_tb: TracebackType | None,
|
94
91
|
) -> Literal[False]:
|
95
92
|
"""Update optimization status based on context exit."""
|
96
93
|
if self.optimization is None:
|
@@ -227,8 +224,8 @@ def optimization_context(
|
|
227
224
|
client: Opik,
|
228
225
|
dataset_name: str,
|
229
226
|
objective_name: str,
|
230
|
-
name:
|
231
|
-
metadata:
|
227
|
+
name: str | None = None,
|
228
|
+
metadata: dict[str, Any] | None = None,
|
232
229
|
) -> OptimizationContextManager:
|
233
230
|
"""
|
234
231
|
Create a context manager for handling optimization lifecycle.
|
@@ -258,7 +255,7 @@ def ensure_ending_slash(url: str) -> str:
|
|
258
255
|
|
259
256
|
|
260
257
|
def get_optimization_run_url_by_id(
|
261
|
-
dataset_id:
|
258
|
+
dataset_id: str | None, optimization_id: str | None
|
262
259
|
) -> str:
|
263
260
|
if dataset_id is None or optimization_id is None:
|
264
261
|
raise ValueError(
|
@@ -276,11 +273,11 @@ def get_optimization_run_url_by_id(
|
|
276
273
|
return urllib.parse.urljoin(ensure_ending_slash(url_override), run_path)
|
277
274
|
|
278
275
|
|
279
|
-
def create_litellm_agent_class(prompt: "ChatPrompt") ->
|
276
|
+
def create_litellm_agent_class(prompt: "ChatPrompt") -> type["OptimizableAgent"]:
|
280
277
|
"""
|
281
278
|
Create a LiteLLMAgent from a chat prompt.
|
282
279
|
"""
|
283
|
-
from .optimizable_agent import OptimizableAgent
|
280
|
+
from opik_optimizer.optimizable_agent import OptimizableAgent
|
284
281
|
|
285
282
|
if prompt.invoke is not None:
|
286
283
|
|
@@ -290,7 +287,7 @@ def create_litellm_agent_class(prompt: "ChatPrompt") -> Type["OptimizableAgent"]
|
|
290
287
|
project_name = prompt.project_name
|
291
288
|
|
292
289
|
def invoke(
|
293
|
-
self, messages:
|
290
|
+
self, messages: list[dict[str, str]], seed: int | None = None
|
294
291
|
) -> str:
|
295
292
|
return prompt.invoke(
|
296
293
|
self.model, messages, prompt.tools, **self.model_kwargs
|
@@ -307,13 +304,13 @@ def create_litellm_agent_class(prompt: "ChatPrompt") -> Type["OptimizableAgent"]
|
|
307
304
|
|
308
305
|
|
309
306
|
def function_to_tool_definition(
|
310
|
-
func: Callable, description:
|
311
|
-
) ->
|
307
|
+
func: Callable, description: str | None = None
|
308
|
+
) -> dict[str, Any]:
|
312
309
|
sig = inspect.signature(func)
|
313
310
|
doc = description or func.__doc__ or ""
|
314
311
|
|
315
|
-
properties:
|
316
|
-
required:
|
312
|
+
properties: dict[str, dict[str, str]] = {}
|
313
|
+
required: list[str] = []
|
317
314
|
|
318
315
|
for name, param in sig.parameters.items():
|
319
316
|
param_type = (
|
@@ -350,7 +347,95 @@ def python_type_to_json_type(python_type: type) -> str:
|
|
350
347
|
return "boolean"
|
351
348
|
elif python_type in [dict]:
|
352
349
|
return "object"
|
353
|
-
elif python_type in [list,
|
350
|
+
elif python_type in [list, list]:
|
354
351
|
return "array"
|
355
352
|
else:
|
356
353
|
return "string" # default fallback
|
354
|
+
|
355
|
+
|
356
|
+
def search_wikipedia(query: str, use_api: bool = False) -> list[str]:
|
357
|
+
"""
|
358
|
+
This agent is used to search wikipedia. It can retrieve additional details
|
359
|
+
about a topic.
|
360
|
+
|
361
|
+
Args:
|
362
|
+
query: The search query string
|
363
|
+
use_api: If True, directly use Wikipedia API instead of ColBERTv2.
|
364
|
+
If False (default), try ColBERTv2 first with API fallback.
|
365
|
+
"""
|
366
|
+
if use_api:
|
367
|
+
# Directly use Wikipedia API when requested
|
368
|
+
try:
|
369
|
+
return _search_wikipedia_api(query)
|
370
|
+
except Exception as api_error:
|
371
|
+
print(f"Wikipedia API failed: {api_error}")
|
372
|
+
return [f"Wikipedia search unavailable. Query was: {query}"]
|
373
|
+
|
374
|
+
# Default behavior: Try ColBERTv2 first with API fallback
|
375
|
+
from .colbert import ColBERTv2
|
376
|
+
|
377
|
+
# Try ColBERTv2 first with a short timeout
|
378
|
+
try:
|
379
|
+
colbert = ColBERTv2(url="http://20.102.90.50:2017/wiki17_abstracts")
|
380
|
+
# Use a shorter timeout by modifying the max_retries parameter
|
381
|
+
results = colbert(query, k=3, max_retries=1)
|
382
|
+
return [str(item.text) for item in results if hasattr(item, "text")]
|
383
|
+
except Exception as e:
|
384
|
+
print(f"ColBERTv2 search failed: {e}")
|
385
|
+
# Fallback to Wikipedia API
|
386
|
+
try:
|
387
|
+
return _search_wikipedia_api(query)
|
388
|
+
except Exception as api_error:
|
389
|
+
print(f"Wikipedia API fallback also failed: {api_error}")
|
390
|
+
return [f"Wikipedia search unavailable. Query was: {query}"]
|
391
|
+
|
392
|
+
|
393
|
+
def _search_wikipedia_api(query: str, max_results: int = 3) -> list[str]:
|
394
|
+
"""
|
395
|
+
Fallback Wikipedia search using the Wikipedia API.
|
396
|
+
"""
|
397
|
+
try:
|
398
|
+
# First, search for pages using the search API
|
399
|
+
search_params: dict[str, str | int] = {
|
400
|
+
"action": "query",
|
401
|
+
"format": "json",
|
402
|
+
"list": "search",
|
403
|
+
"srsearch": query,
|
404
|
+
"srlimit": max_results,
|
405
|
+
"srprop": "snippet",
|
406
|
+
}
|
407
|
+
|
408
|
+
headers = {
|
409
|
+
"User-Agent": "OpikOptimizer/1.0 (https://github.com/opik-ai/opik-optimizer)"
|
410
|
+
}
|
411
|
+
search_response = requests.get(
|
412
|
+
"https://en.wikipedia.org/w/api.php",
|
413
|
+
params=search_params,
|
414
|
+
headers=headers,
|
415
|
+
timeout=5,
|
416
|
+
)
|
417
|
+
|
418
|
+
if search_response.status_code != 200:
|
419
|
+
raise Exception(f"Search API returned status {search_response.status_code}")
|
420
|
+
|
421
|
+
search_data = search_response.json()
|
422
|
+
|
423
|
+
results = []
|
424
|
+
if "query" in search_data and "search" in search_data["query"]:
|
425
|
+
for item in search_data["query"]["search"][:max_results]:
|
426
|
+
page_title = item["title"]
|
427
|
+
snippet = item.get("snippet", "")
|
428
|
+
|
429
|
+
# Clean up the snippet (remove HTML tags)
|
430
|
+
import re
|
431
|
+
|
432
|
+
clean_snippet = re.sub(r"<[^>]+>", "", snippet)
|
433
|
+
clean_snippet = re.sub(r"&[^;]+;", " ", clean_snippet)
|
434
|
+
|
435
|
+
if clean_snippet.strip():
|
436
|
+
results.append(f"{page_title}: {clean_snippet.strip()}")
|
437
|
+
|
438
|
+
return results if results else [f"No Wikipedia results found for: {query}"]
|
439
|
+
|
440
|
+
except Exception as e:
|
441
|
+
raise Exception(f"Wikipedia API request failed: {e}") from e
|