opik-optimizer 1.0.5__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. opik_optimizer/__init__.py +2 -0
  2. opik_optimizer/_throttle.py +2 -1
  3. opik_optimizer/base_optimizer.py +28 -11
  4. opik_optimizer/colbert.py +236 -0
  5. opik_optimizer/data/context7_eval.jsonl +3 -0
  6. opik_optimizer/datasets/context7_eval.py +90 -0
  7. opik_optimizer/datasets/tiny_test.py +33 -34
  8. opik_optimizer/datasets/truthful_qa.py +2 -2
  9. opik_optimizer/evolutionary_optimizer/crossover_ops.py +194 -0
  10. opik_optimizer/evolutionary_optimizer/evaluation_ops.py +73 -0
  11. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +124 -941
  12. opik_optimizer/evolutionary_optimizer/helpers.py +10 -0
  13. opik_optimizer/evolutionary_optimizer/llm_support.py +134 -0
  14. opik_optimizer/evolutionary_optimizer/mutation_ops.py +292 -0
  15. opik_optimizer/evolutionary_optimizer/population_ops.py +223 -0
  16. opik_optimizer/evolutionary_optimizer/prompts.py +305 -0
  17. opik_optimizer/evolutionary_optimizer/reporting.py +16 -4
  18. opik_optimizer/evolutionary_optimizer/style_ops.py +86 -0
  19. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +26 -23
  20. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +12 -5
  21. opik_optimizer/gepa_optimizer/__init__.py +3 -0
  22. opik_optimizer/gepa_optimizer/adapter.py +152 -0
  23. opik_optimizer/gepa_optimizer/gepa_optimizer.py +556 -0
  24. opik_optimizer/gepa_optimizer/reporting.py +181 -0
  25. opik_optimizer/logging_config.py +42 -7
  26. opik_optimizer/mcp_utils/__init__.py +22 -0
  27. opik_optimizer/mcp_utils/mcp.py +541 -0
  28. opik_optimizer/mcp_utils/mcp_second_pass.py +152 -0
  29. opik_optimizer/mcp_utils/mcp_simulator.py +116 -0
  30. opik_optimizer/mcp_utils/mcp_workflow.py +493 -0
  31. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +399 -69
  32. opik_optimizer/meta_prompt_optimizer/reporting.py +16 -2
  33. opik_optimizer/mipro_optimizer/_lm.py +20 -20
  34. opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +51 -50
  35. opik_optimizer/mipro_optimizer/mipro_optimizer.py +33 -28
  36. opik_optimizer/mipro_optimizer/utils.py +2 -4
  37. opik_optimizer/optimizable_agent.py +18 -17
  38. opik_optimizer/optimization_config/chat_prompt.py +44 -23
  39. opik_optimizer/optimization_config/configs.py +3 -3
  40. opik_optimizer/optimization_config/mappers.py +9 -8
  41. opik_optimizer/optimization_result.py +21 -14
  42. opik_optimizer/reporting_utils.py +61 -10
  43. opik_optimizer/task_evaluator.py +9 -8
  44. opik_optimizer/utils/__init__.py +15 -0
  45. opik_optimizer/{utils.py → utils/core.py} +111 -26
  46. opik_optimizer/utils/dataset_utils.py +49 -0
  47. opik_optimizer/utils/prompt_segments.py +186 -0
  48. {opik_optimizer-1.0.5.dist-info → opik_optimizer-1.1.0.dist-info}/METADATA +93 -16
  49. opik_optimizer-1.1.0.dist-info/RECORD +73 -0
  50. opik_optimizer-1.1.0.dist-info/licenses/LICENSE +203 -0
  51. opik_optimizer-1.0.5.dist-info/RECORD +0 -50
  52. opik_optimizer-1.0.5.dist-info/licenses/LICENSE +0 -21
  53. {opik_optimizer-1.0.5.dist-info → opik_optimizer-1.1.0.dist-info}/WHEEL +0 -0
  54. {opik_optimizer-1.0.5.dist-info → opik_optimizer-1.1.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
- from typing import Any, Dict, List, Optional, Union, Callable
1
+ from typing import Any
2
+ from collections.abc import Callable
2
3
 
3
4
  import copy
4
5
 
@@ -10,7 +11,7 @@ from opik import track
10
11
  class Tool(BaseModel):
11
12
  name: str = Field(..., description="Name of the tool")
12
13
  description: str = Field(..., description="Description of the tool")
13
- parameters: Dict[str, Any] = Field(
14
+ parameters: dict[str, Any] = Field(
14
15
  ..., description="JSON Schema defining the input parameters for the tool"
15
16
  )
16
17
 
@@ -33,14 +34,14 @@ class ChatPrompt:
33
34
  def __init__(
34
35
  self,
35
36
  name: str = "chat-prompt",
36
- system: Optional[str] = None,
37
- user: Optional[str] = None,
38
- messages: Optional[List[Dict[str, str]]] = None,
39
- tools: Optional[List[Dict[str, Any]]] = None,
40
- function_map: Optional[Dict[str, Callable]] = None,
41
- model: Optional[str] = None,
42
- invoke: Optional[Callable] = None,
43
- project_name: Optional[str] = "Default Project",
37
+ system: str | None = None,
38
+ user: str | None = None,
39
+ messages: list[dict[str, str]] | None = None,
40
+ tools: list[dict[str, Any]] | None = None,
41
+ function_map: dict[str, Callable] | None = None,
42
+ model: str | None = None,
43
+ invoke: Callable | None = None,
44
+ project_name: str | None = "Default Project",
44
45
  **model_kwargs: Any,
45
46
  ) -> None:
46
47
  if system is None and user is None and messages is None:
@@ -97,8 +98,8 @@ class ChatPrompt:
97
98
 
98
99
  def get_messages(
99
100
  self,
100
- dataset_item: Optional[Dict[str, str]] = None,
101
- ) -> List[Dict[str, str]]:
101
+ dataset_item: dict[str, str] | None = None,
102
+ ) -> list[dict[str, str]]:
102
103
  # This is a copy, so we can alter the messages:
103
104
  messages = self._standardize_prompts()
104
105
 
@@ -113,8 +114,8 @@ class ChatPrompt:
113
114
  )
114
115
  return messages
115
116
 
116
- def _standardize_prompts(self, **kwargs: Any) -> List[Dict[str, str]]:
117
- standardize_messages: List[Dict[str, str]] = []
117
+ def _standardize_prompts(self, **kwargs: Any) -> list[dict[str, str]]:
118
+ standardize_messages: list[dict[str, str]] = []
118
119
 
119
120
  if self.system is not None:
120
121
  standardize_messages.append({"role": "system", "content": self.system})
@@ -128,13 +129,13 @@ class ChatPrompt:
128
129
 
129
130
  return copy.deepcopy(standardize_messages)
130
131
 
131
- def to_dict(self) -> Dict[str, Union[str, List[Dict[str, str]]]]:
132
+ def to_dict(self) -> dict[str, str | list[dict[str, str]]]:
132
133
  """Convert ChatPrompt to a dictionary for JSON serialization.
133
134
 
134
135
  Returns:
135
136
  Dict containing the serializable representation of this ChatPrompt
136
137
  """
137
- retval: Dict[str, Union[str, List[Dict[str, str]]]] = {}
138
+ retval: dict[str, str | list[dict[str, str]]] = {}
138
139
  if self.system is not None:
139
140
  retval["system"] = self.system
140
141
  if self.user is not None:
@@ -144,29 +145,49 @@ class ChatPrompt:
144
145
  return retval
145
146
 
146
147
  def copy(self) -> "ChatPrompt":
148
+ """Shallow clone preserving model configuration and tools."""
149
+
150
+ # TODO(opik-mcp): once we introduce a dedicated MCP prompt subclass,
151
+ # migrate callers away from generic copies so optimizer metadata stays typed.
152
+ model_kwargs = (
153
+ copy.deepcopy(self.model_kwargs) if self.model_kwargs is not None else {}
154
+ )
147
155
  return ChatPrompt(
156
+ name=self.name,
148
157
  system=self.system,
149
158
  user=self.user,
150
159
  messages=copy.deepcopy(self.messages),
151
- tools=self.tools,
160
+ tools=copy.deepcopy(self.tools),
152
161
  function_map=self.function_map,
162
+ model=self.model,
163
+ invoke=self.invoke,
164
+ project_name=self.project_name,
165
+ **model_kwargs,
153
166
  )
154
167
 
155
- def set_messages(self, messages: List[Dict[str, Any]]) -> None:
168
+ def set_messages(self, messages: list[dict[str, Any]]) -> None:
156
169
  self.system = None
157
170
  self.user = None
158
171
  self.messages = copy.deepcopy(messages)
159
172
 
173
+ # TODO(opik): remove this stop-gap once MetaPromptOptimizer supports MCP.
174
+ # Provides a second-pass flow so tool results can be appended before
175
+ # rerunning the model.
176
+ def with_messages(self, messages: list[dict[str, Any]]) -> "ChatPrompt":
177
+ cloned = self.copy()
178
+ cloned.set_messages(messages)
179
+ return cloned
180
+
160
181
  @classmethod
161
182
  def model_validate(
162
183
  cls,
163
184
  obj: Any,
164
185
  *,
165
- strict: Optional[bool] = None,
166
- from_attributes: Optional[bool] = None,
167
- context: Optional[Any] = None,
168
- by_alias: Optional[bool] = None,
169
- by_name: Optional[bool] = None,
186
+ strict: bool | None = None,
187
+ from_attributes: bool | None = None,
188
+ context: Any | None = None,
189
+ by_alias: bool | None = None,
190
+ by_name: bool | None = None,
170
191
  ) -> "ChatPrompt":
171
192
  """Custom validation method to handle nested objects during deserialization."""
172
193
  return ChatPrompt(
@@ -1,6 +1,6 @@
1
1
  """Module containing configuration classes for optimization."""
2
2
 
3
- from typing import Any, List
3
+ from typing import Any
4
4
 
5
5
  import pydantic
6
6
 
@@ -12,6 +12,6 @@ class TaskConfig(pydantic.BaseModel):
12
12
 
13
13
  instruction_prompt: str
14
14
  use_chat_prompt: bool = False
15
- input_dataset_fields: List[str]
15
+ input_dataset_fields: list[str]
16
16
  output_dataset_field: str
17
- tools: List[Any] = []
17
+ tools: list[Any] = []
@@ -1,4 +1,5 @@
1
- from typing import Dict, Callable, Optional, Any, Union
1
+ from typing import Any
2
+ from collections.abc import Callable
2
3
 
3
4
  EVALUATED_LLM_TASK_OUTPUT = "llm_output"
4
5
 
@@ -8,8 +9,8 @@ class Mapper:
8
9
 
9
10
  def __init__(
10
11
  self,
11
- name: Optional[str] = None,
12
- transform: Optional[Callable[[Any], Any]] = None,
12
+ name: str | None = None,
13
+ transform: Callable[[Any], Any] | None = None,
13
14
  ):
14
15
  if name is not None and transform is not None:
15
16
  raise ValueError("Only one of name or transform can be provided")
@@ -27,9 +28,9 @@ class Mapper:
27
28
 
28
29
  def from_dataset_field(
29
30
  *,
30
- name: Optional[str] = None,
31
- transform: Optional[Callable[[Dict[str, Any]], Any]] = None,
32
- ) -> Union[str, Callable[[Dict[str, Any]], Any]]:
31
+ name: str | None = None,
32
+ transform: Callable[[dict[str, Any]], Any] | None = None,
33
+ ) -> str | Callable[[dict[str, Any]], Any]:
33
34
  if name is not None and transform is not None:
34
35
  raise ValueError("Only one of name or transform can be provided")
35
36
 
@@ -47,8 +48,8 @@ def from_llm_response_text() -> str:
47
48
 
48
49
 
49
50
  def from_agent_output(
50
- *, name: Optional[str] = None, transform: Optional[Callable[[Any], Any]] = None
51
- ) -> Union[str, Callable[[Any], Any]]:
51
+ *, name: str | None = None, transform: Callable[[Any], Any] | None = None
52
+ ) -> str | Callable[[Any], Any]:
52
53
  if name is not None and transform is not None:
53
54
  raise ValueError("Only one of name or transform can be provided")
54
55
 
@@ -1,6 +1,6 @@
1
1
  """Module containing the OptimizationResult class."""
2
2
 
3
- from typing import Any, Dict, List, Optional
3
+ from typing import Any
4
4
 
5
5
  import pydantic
6
6
  import rich
@@ -13,25 +13,25 @@ class OptimizationResult(pydantic.BaseModel):
13
13
 
14
14
  optimizer: str = "Optimizer"
15
15
 
16
- prompt: List[Dict[str, str]]
16
+ prompt: list[dict[str, str]]
17
17
  score: float
18
18
  metric_name: str
19
19
 
20
- optimization_id: Optional[str] = None
21
- dataset_id: Optional[str] = None
20
+ optimization_id: str | None = None
21
+ dataset_id: str | None = None
22
22
 
23
23
  # Initial score
24
- initial_prompt: Optional[List[Dict[str, str]]] = None
25
- initial_score: Optional[float] = None
24
+ initial_prompt: list[dict[str, str]] | None = None
25
+ initial_score: float | None = None
26
26
 
27
- details: Dict[str, Any] = pydantic.Field(default_factory=dict)
28
- history: List[Dict[str, Any]] = []
29
- llm_calls: Optional[int] = None
27
+ details: dict[str, Any] = pydantic.Field(default_factory=dict)
28
+ history: list[dict[str, Any]] = []
29
+ llm_calls: int | None = None
30
30
 
31
31
  # MIPRO specific
32
- demonstrations: Optional[List[Dict[str, Any]]] = None
33
- mipro_prompt: Optional[str] = None
34
- tool_prompts: Optional[Dict[str, str]] = None
32
+ demonstrations: list[dict[str, Any]] | None = None
33
+ mipro_prompt: str | None = None
34
+ tool_prompts: dict[str, str] | None = None
35
35
 
36
36
  model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
37
37
 
@@ -40,7 +40,7 @@ class OptimizationResult(pydantic.BaseModel):
40
40
  optimization_id=self.optimization_id, dataset_id=self.dataset_id
41
41
  )
42
42
 
43
- def model_dump(self, *kargs: Any, **kwargs: Any) -> Dict[str, Any]:
43
+ def model_dump(self, *kargs: Any, **kwargs: Any) -> dict[str, Any]:
44
44
  return super().model_dump(*kargs, **kwargs)
45
45
 
46
46
  def _calculate_improvement_str(self) -> str:
@@ -205,4 +205,11 @@ class OptimizationResult(pydantic.BaseModel):
205
205
  """
206
206
  console = get_console()
207
207
  console.print(self)
208
- print("Optimization run link:", self.get_run_link())
208
+ # Gracefully handle cases where optimization tracking isn't available
209
+ if self.dataset_id and self.optimization_id:
210
+ try:
211
+ print("Optimization run link:", self.get_run_link())
212
+ except Exception:
213
+ print("Optimization run link: No optimization run link available")
214
+ else:
215
+ print("Optimization run link: No optimization run link available")
@@ -1,6 +1,7 @@
1
+ import json
1
2
  import logging
2
3
  from contextlib import contextmanager
3
- from typing import Any, Dict, List, Optional, Union
4
+ from typing import Any
4
5
 
5
6
  from rich import box
6
7
  from rich.console import Console, Group
@@ -20,7 +21,7 @@ def get_console(*args: Any, **kwargs: Any) -> Console:
20
21
 
21
22
 
22
23
  @contextmanager
23
- def convert_tqdm_to_rich(description: Optional[str] = None, verbose: int = 1) -> Any:
24
+ def convert_tqdm_to_rich(description: str | None = None, verbose: int = 1) -> Any:
24
25
  """Context manager to convert tqdm to rich."""
25
26
  import opik.evaluation.engine.evaluation_tasks_executor
26
27
 
@@ -66,7 +67,7 @@ def suppress_opik_logs() -> Any:
66
67
  opik_logger.setLevel(original_level)
67
68
 
68
69
 
69
- def display_messages(messages: List[Dict[str, str]], prefix: str = "") -> None:
70
+ def display_messages(messages: list[dict[str, str]], prefix: str = "") -> None:
70
71
  for i, msg in enumerate(messages):
71
72
  panel = Panel(
72
73
  Text(msg.get("content", ""), overflow="fold"),
@@ -90,11 +91,53 @@ def display_messages(messages: List[Dict[str, str]], prefix: str = "") -> None:
90
91
  console.print(Text(prefix) + Text.from_ansi(line))
91
92
 
92
93
 
94
+ def _format_tool_panel(tool: dict[str, Any]) -> Panel:
95
+ function_block = tool.get("function", {})
96
+ name = function_block.get("name") or tool.get("name", "unknown_tool")
97
+ description = function_block.get("description", "")
98
+ parameters = function_block.get("parameters", {})
99
+
100
+ body_lines: list[str] = []
101
+ if description:
102
+ body_lines.append(description)
103
+ if parameters:
104
+ formatted_schema = json.dumps(parameters, indent=2, sort_keys=True)
105
+ body_lines.append("\nSchema:\n" + formatted_schema)
106
+
107
+ content = Text(
108
+ "\n".join(body_lines) if body_lines else "(no metadata)", overflow="fold"
109
+ )
110
+ return Panel(
111
+ content,
112
+ title=f"tool: {name}",
113
+ title_align="left",
114
+ border_style="cyan",
115
+ width=PANEL_WIDTH,
116
+ padding=(1, 2),
117
+ )
118
+
119
+
120
+ def _display_tools(tools: list[dict[str, Any]] | None) -> None:
121
+ if not tools:
122
+ return
123
+
124
+ console = get_console()
125
+ console.print(Text("\nTools registered:\n", style="bold"))
126
+ for tool in tools:
127
+ panel = _format_tool_panel(tool)
128
+ with console.capture() as capture:
129
+ console.print(panel)
130
+ rendered_panel = capture.get()
131
+ for line in rendered_panel.splitlines():
132
+ console.print(Text.from_ansi(line))
133
+ console.print("")
134
+
135
+
93
136
  def get_link_text(
94
137
  pre_text: str,
95
138
  link_text: str,
96
- optimization_id: Optional[str] = None,
97
- dataset_id: Optional[str] = None,
139
+ optimization_id: str | None = None,
140
+ dataset_id: str | None = None,
98
141
  ) -> Text:
99
142
  if optimization_id is not None and dataset_id is not None:
100
143
  optimization_url = get_optimization_run_url_by_id(
@@ -112,8 +155,8 @@ def get_link_text(
112
155
 
113
156
  def display_header(
114
157
  algorithm: str,
115
- optimization_id: Optional[str] = None,
116
- dataset_id: Optional[str] = None,
158
+ optimization_id: str | None = None,
159
+ dataset_id: str | None = None,
117
160
  verbose: int = 1,
118
161
  ) -> None:
119
162
  if verbose < 1:
@@ -140,8 +183,9 @@ def display_header(
140
183
  def display_result(
141
184
  initial_score: float,
142
185
  best_score: float,
143
- best_prompt: List[Dict[str, str]],
186
+ best_prompt: list[dict[str, str]],
144
187
  verbose: int = 1,
188
+ tools: list[dict[str, Any]] | None = None,
145
189
  ) -> None:
146
190
  if verbose < 1:
147
191
  return
@@ -149,7 +193,7 @@ def display_result(
149
193
  console = get_console()
150
194
  console.print(Text("\n> Optimization complete\n"))
151
195
 
152
- content: Union[Text, Panel] = []
196
+ content: Text | Panel = []
153
197
 
154
198
  if best_score > initial_score:
155
199
  if initial_score == 0:
@@ -199,9 +243,15 @@ def display_result(
199
243
  )
200
244
  )
201
245
 
246
+ if tools:
247
+ _display_tools(tools)
248
+
202
249
 
203
250
  def display_configuration(
204
- messages: List[Dict[str, str]], optimizer_config: Dict[str, Any], verbose: int = 1
251
+ messages: list[dict[str, str]],
252
+ optimizer_config: dict[str, Any],
253
+ verbose: int = 1,
254
+ tools: list[dict[str, Any]] | None = None,
205
255
  ) -> None:
206
256
  """Displays the LLM messages and optimizer configuration using Rich panels."""
207
257
 
@@ -213,6 +263,7 @@ def display_configuration(
213
263
  console.print(Text("> Let's optimize the prompt:\n"))
214
264
 
215
265
  display_messages(messages)
266
+ _display_tools(tools)
216
267
 
217
268
  # Panel for configuration
218
269
  console.print(
@@ -1,5 +1,6 @@
1
1
  import logging
2
- from typing import Any, Callable, Dict, List, Optional
2
+ from typing import Any
3
+ from collections.abc import Callable
3
4
 
4
5
  import opik
5
6
  from opik.evaluation import evaluator as opik_evaluator
@@ -38,14 +39,14 @@ def _create_metric_class(metric: Callable) -> base_metric.BaseMetric:
38
39
 
39
40
  def evaluate(
40
41
  dataset: opik.Dataset,
41
- evaluated_task: Callable[[Dict[str, Any]], Dict[str, Any]],
42
+ evaluated_task: Callable[[dict[str, Any]], dict[str, Any]],
42
43
  metric: Callable,
43
44
  num_threads: int,
44
- optimization_id: Optional[str] = None,
45
- dataset_item_ids: Optional[List[str]] = None,
46
- project_name: Optional[str] = None,
47
- n_samples: Optional[int] = None,
48
- experiment_config: Optional[Dict[str, Any]] = None,
45
+ optimization_id: str | None = None,
46
+ dataset_item_ids: list[str] | None = None,
47
+ project_name: str | None = None,
48
+ n_samples: int | None = None,
49
+ experiment_config: dict[str, Any] | None = None,
49
50
  verbose: int = 1,
50
51
  ) -> float:
51
52
  """
@@ -107,7 +108,7 @@ def evaluate(
107
108
  return 0.0
108
109
 
109
110
  # We may allow score aggregation customization.
110
- score_results: List[score_result.ScoreResult] = [
111
+ score_results: list[score_result.ScoreResult] = [
111
112
  test_result.score_results[0] for test_result in result.test_results
112
113
  ]
113
114
  if not score_results:
@@ -0,0 +1,15 @@
1
+ """Utility helpers exposed as part of the opik_optimizer package."""
2
+
3
+ from .core import * # noqa: F401,F403
4
+ from .dataset_utils import * # noqa: F401,F403
5
+ from .prompt_segments import * # noqa: F401,F403
6
+
7
+ from . import core as _core
8
+ from . import dataset_utils as _dataset_utils
9
+ from . import prompt_segments as _prompt_segments
10
+
11
+ __all__: list[str] = [
12
+ *getattr(_core, "__all__", []),
13
+ *getattr(_dataset_utils, "__all__", []),
14
+ *getattr(_prompt_segments, "__all__", []),
15
+ ]
@@ -2,18 +2,13 @@
2
2
 
3
3
  from typing import (
4
4
  Any,
5
- Dict,
6
5
  Final,
7
6
  Literal,
8
- Optional,
9
- Type,
10
7
  TYPE_CHECKING,
11
- List,
12
- Callable,
13
8
  )
9
+ from collections.abc import Callable
14
10
 
15
11
  import inspect
16
- import typing
17
12
  import base64
18
13
  import json
19
14
  import logging
@@ -22,6 +17,8 @@ import string
22
17
  import urllib.parse
23
18
  from types import TracebackType
24
19
 
20
+ import requests
21
+
25
22
  import opik
26
23
  from opik.api_objects.opik_client import Opik
27
24
  from opik.api_objects.optimization import Optimization
@@ -30,8 +27,8 @@ ALLOWED_URL_CHARACTERS: Final[str] = ":/&?="
30
27
  logger = logging.getLogger(__name__)
31
28
 
32
29
  if TYPE_CHECKING:
33
- from .optimizable_agent import OptimizableAgent
34
- from .optimization_config.chat_prompt import ChatPrompt
30
+ from opik_optimizer.optimizable_agent import OptimizableAgent
31
+ from opik_optimizer.optimization_config.chat_prompt import ChatPrompt
35
32
 
36
33
 
37
34
  class OptimizationContextManager:
@@ -45,8 +42,8 @@ class OptimizationContextManager:
45
42
  client: Opik,
46
43
  dataset_name: str,
47
44
  objective_name: str,
48
- name: Optional[str] = None,
49
- metadata: Optional[Dict[str, Any]] = None,
45
+ name: str | None = None,
46
+ metadata: dict[str, Any] | None = None,
50
47
  ):
51
48
  """
52
49
  Initialize the optimization context.
@@ -63,9 +60,9 @@ class OptimizationContextManager:
63
60
  self.objective_name = objective_name
64
61
  self.name = name
65
62
  self.metadata = metadata
66
- self.optimization: Optional[Optimization] = None
63
+ self.optimization: Optimization | None = None
67
64
 
68
- def __enter__(self) -> Optional[Optimization]:
65
+ def __enter__(self) -> Optimization | None:
69
66
  """Create and return the optimization."""
70
67
  try:
71
68
  self.optimization = self.client.create_optimization(
@@ -88,9 +85,9 @@ class OptimizationContextManager:
88
85
 
89
86
  def __exit__(
90
87
  self,
91
- exc_type: Optional[Type[BaseException]],
92
- exc_val: Optional[BaseException],
93
- exc_tb: Optional[TracebackType],
88
+ exc_type: type[BaseException] | None,
89
+ exc_val: BaseException | None,
90
+ exc_tb: TracebackType | None,
94
91
  ) -> Literal[False]:
95
92
  """Update optimization status based on context exit."""
96
93
  if self.optimization is None:
@@ -227,8 +224,8 @@ def optimization_context(
227
224
  client: Opik,
228
225
  dataset_name: str,
229
226
  objective_name: str,
230
- name: Optional[str] = None,
231
- metadata: Optional[Dict[str, Any]] = None,
227
+ name: str | None = None,
228
+ metadata: dict[str, Any] | None = None,
232
229
  ) -> OptimizationContextManager:
233
230
  """
234
231
  Create a context manager for handling optimization lifecycle.
@@ -258,7 +255,7 @@ def ensure_ending_slash(url: str) -> str:
258
255
 
259
256
 
260
257
  def get_optimization_run_url_by_id(
261
- dataset_id: Optional[str], optimization_id: Optional[str]
258
+ dataset_id: str | None, optimization_id: str | None
262
259
  ) -> str:
263
260
  if dataset_id is None or optimization_id is None:
264
261
  raise ValueError(
@@ -276,11 +273,11 @@ def get_optimization_run_url_by_id(
276
273
  return urllib.parse.urljoin(ensure_ending_slash(url_override), run_path)
277
274
 
278
275
 
279
- def create_litellm_agent_class(prompt: "ChatPrompt") -> Type["OptimizableAgent"]:
276
+ def create_litellm_agent_class(prompt: "ChatPrompt") -> type["OptimizableAgent"]:
280
277
  """
281
278
  Create a LiteLLMAgent from a chat prompt.
282
279
  """
283
- from .optimizable_agent import OptimizableAgent
280
+ from opik_optimizer.optimizable_agent import OptimizableAgent
284
281
 
285
282
  if prompt.invoke is not None:
286
283
 
@@ -290,7 +287,7 @@ def create_litellm_agent_class(prompt: "ChatPrompt") -> Type["OptimizableAgent"]
290
287
  project_name = prompt.project_name
291
288
 
292
289
  def invoke(
293
- self, messages: List[Dict[str, str]], seed: Optional[int] = None
290
+ self, messages: list[dict[str, str]], seed: int | None = None
294
291
  ) -> str:
295
292
  return prompt.invoke(
296
293
  self.model, messages, prompt.tools, **self.model_kwargs
@@ -307,13 +304,13 @@ def create_litellm_agent_class(prompt: "ChatPrompt") -> Type["OptimizableAgent"]
307
304
 
308
305
 
309
306
  def function_to_tool_definition(
310
- func: Callable, description: Optional[str] = None
311
- ) -> Dict[str, Any]:
307
+ func: Callable, description: str | None = None
308
+ ) -> dict[str, Any]:
312
309
  sig = inspect.signature(func)
313
310
  doc = description or func.__doc__ or ""
314
311
 
315
- properties: Dict[str, Dict[str, str]] = {}
316
- required: List[str] = []
312
+ properties: dict[str, dict[str, str]] = {}
313
+ required: list[str] = []
317
314
 
318
315
  for name, param in sig.parameters.items():
319
316
  param_type = (
@@ -350,7 +347,95 @@ def python_type_to_json_type(python_type: type) -> str:
350
347
  return "boolean"
351
348
  elif python_type in [dict]:
352
349
  return "object"
353
- elif python_type in [list, typing.List]:
350
+ elif python_type in [list, list]:
354
351
  return "array"
355
352
  else:
356
353
  return "string" # default fallback
354
+
355
+
356
+ def search_wikipedia(query: str, use_api: bool = False) -> list[str]:
357
+ """
358
+ This agent is used to search wikipedia. It can retrieve additional details
359
+ about a topic.
360
+
361
+ Args:
362
+ query: The search query string
363
+ use_api: If True, directly use Wikipedia API instead of ColBERTv2.
364
+ If False (default), try ColBERTv2 first with API fallback.
365
+ """
366
+ if use_api:
367
+ # Directly use Wikipedia API when requested
368
+ try:
369
+ return _search_wikipedia_api(query)
370
+ except Exception as api_error:
371
+ print(f"Wikipedia API failed: {api_error}")
372
+ return [f"Wikipedia search unavailable. Query was: {query}"]
373
+
374
+ # Default behavior: Try ColBERTv2 first with API fallback
375
+ from .colbert import ColBERTv2
376
+
377
+ # Try ColBERTv2 first with a short timeout
378
+ try:
379
+ colbert = ColBERTv2(url="http://20.102.90.50:2017/wiki17_abstracts")
380
+ # Use a shorter timeout by modifying the max_retries parameter
381
+ results = colbert(query, k=3, max_retries=1)
382
+ return [str(item.text) for item in results if hasattr(item, "text")]
383
+ except Exception as e:
384
+ print(f"ColBERTv2 search failed: {e}")
385
+ # Fallback to Wikipedia API
386
+ try:
387
+ return _search_wikipedia_api(query)
388
+ except Exception as api_error:
389
+ print(f"Wikipedia API fallback also failed: {api_error}")
390
+ return [f"Wikipedia search unavailable. Query was: {query}"]
391
+
392
+
393
+ def _search_wikipedia_api(query: str, max_results: int = 3) -> list[str]:
394
+ """
395
+ Fallback Wikipedia search using the Wikipedia API.
396
+ """
397
+ try:
398
+ # First, search for pages using the search API
399
+ search_params: dict[str, str | int] = {
400
+ "action": "query",
401
+ "format": "json",
402
+ "list": "search",
403
+ "srsearch": query,
404
+ "srlimit": max_results,
405
+ "srprop": "snippet",
406
+ }
407
+
408
+ headers = {
409
+ "User-Agent": "OpikOptimizer/1.0 (https://github.com/opik-ai/opik-optimizer)"
410
+ }
411
+ search_response = requests.get(
412
+ "https://en.wikipedia.org/w/api.php",
413
+ params=search_params,
414
+ headers=headers,
415
+ timeout=5,
416
+ )
417
+
418
+ if search_response.status_code != 200:
419
+ raise Exception(f"Search API returned status {search_response.status_code}")
420
+
421
+ search_data = search_response.json()
422
+
423
+ results = []
424
+ if "query" in search_data and "search" in search_data["query"]:
425
+ for item in search_data["query"]["search"][:max_results]:
426
+ page_title = item["title"]
427
+ snippet = item.get("snippet", "")
428
+
429
+ # Clean up the snippet (remove HTML tags)
430
+ import re
431
+
432
+ clean_snippet = re.sub(r"<[^>]+>", "", snippet)
433
+ clean_snippet = re.sub(r"&[^;]+;", " ", clean_snippet)
434
+
435
+ if clean_snippet.strip():
436
+ results.append(f"{page_title}: {clean_snippet.strip()}")
437
+
438
+ return results if results else [f"No Wikipedia results found for: {query}"]
439
+
440
+ except Exception as e:
441
+ raise Exception(f"Wikipedia API request failed: {e}") from e