deepeval 3.7.4__py3-none-any.whl → 3.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/config/settings.py +35 -1
  3. deepeval/dataset/api.py +23 -1
  4. deepeval/dataset/golden.py +139 -2
  5. deepeval/evaluate/evaluate.py +16 -11
  6. deepeval/evaluate/execute.py +13 -181
  7. deepeval/evaluate/utils.py +6 -26
  8. deepeval/integrations/pydantic_ai/agent.py +19 -2
  9. deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  10. deepeval/key_handler.py +3 -0
  11. deepeval/metrics/__init__.py +14 -16
  12. deepeval/metrics/answer_relevancy/answer_relevancy.py +118 -116
  13. deepeval/metrics/answer_relevancy/template.py +22 -3
  14. deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
  15. deepeval/metrics/arena_g_eval/template.py +17 -1
  16. deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
  17. deepeval/metrics/argument_correctness/template.py +19 -2
  18. deepeval/metrics/base_metric.py +13 -44
  19. deepeval/metrics/bias/bias.py +102 -108
  20. deepeval/metrics/bias/template.py +14 -2
  21. deepeval/metrics/contextual_precision/contextual_precision.py +96 -94
  22. deepeval/metrics/contextual_precision/template.py +115 -66
  23. deepeval/metrics/contextual_recall/contextual_recall.py +94 -84
  24. deepeval/metrics/contextual_recall/template.py +106 -55
  25. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +86 -84
  26. deepeval/metrics/contextual_relevancy/template.py +87 -58
  27. deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
  28. deepeval/metrics/conversation_completeness/template.py +23 -3
  29. deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
  30. deepeval/metrics/conversational_dag/nodes.py +66 -123
  31. deepeval/metrics/conversational_dag/templates.py +16 -0
  32. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
  33. deepeval/metrics/dag/dag.py +10 -0
  34. deepeval/metrics/dag/nodes.py +63 -126
  35. deepeval/metrics/dag/templates.py +16 -2
  36. deepeval/metrics/exact_match/exact_match.py +9 -1
  37. deepeval/metrics/faithfulness/faithfulness.py +138 -149
  38. deepeval/metrics/faithfulness/schema.py +1 -1
  39. deepeval/metrics/faithfulness/template.py +200 -115
  40. deepeval/metrics/g_eval/g_eval.py +87 -78
  41. deepeval/metrics/g_eval/template.py +18 -1
  42. deepeval/metrics/g_eval/utils.py +7 -6
  43. deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
  44. deepeval/metrics/goal_accuracy/template.py +21 -3
  45. deepeval/metrics/hallucination/hallucination.py +60 -75
  46. deepeval/metrics/hallucination/template.py +13 -0
  47. deepeval/metrics/indicator.py +7 -10
  48. deepeval/metrics/json_correctness/json_correctness.py +40 -38
  49. deepeval/metrics/json_correctness/template.py +10 -0
  50. deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
  51. deepeval/metrics/knowledge_retention/schema.py +9 -3
  52. deepeval/metrics/knowledge_retention/template.py +12 -0
  53. deepeval/metrics/mcp/mcp_task_completion.py +68 -38
  54. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
  55. deepeval/metrics/mcp/template.py +52 -0
  56. deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
  57. deepeval/metrics/mcp_use_metric/template.py +12 -0
  58. deepeval/metrics/misuse/misuse.py +77 -97
  59. deepeval/metrics/misuse/template.py +15 -0
  60. deepeval/metrics/multimodal_metrics/__init__.py +0 -19
  61. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +59 -53
  62. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +79 -95
  63. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +59 -53
  64. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +59 -53
  65. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +111 -109
  66. deepeval/metrics/non_advice/non_advice.py +79 -105
  67. deepeval/metrics/non_advice/template.py +12 -0
  68. deepeval/metrics/pattern_match/pattern_match.py +12 -4
  69. deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
  70. deepeval/metrics/pii_leakage/template.py +14 -0
  71. deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
  72. deepeval/metrics/plan_adherence/template.py +11 -0
  73. deepeval/metrics/plan_quality/plan_quality.py +63 -87
  74. deepeval/metrics/plan_quality/template.py +9 -0
  75. deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
  76. deepeval/metrics/prompt_alignment/template.py +12 -0
  77. deepeval/metrics/ragas.py +3 -3
  78. deepeval/metrics/role_adherence/role_adherence.py +48 -71
  79. deepeval/metrics/role_adherence/template.py +14 -0
  80. deepeval/metrics/role_violation/role_violation.py +75 -108
  81. deepeval/metrics/role_violation/template.py +12 -0
  82. deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
  83. deepeval/metrics/step_efficiency/template.py +11 -0
  84. deepeval/metrics/summarization/summarization.py +115 -183
  85. deepeval/metrics/summarization/template.py +19 -0
  86. deepeval/metrics/task_completion/task_completion.py +67 -73
  87. deepeval/metrics/tool_correctness/tool_correctness.py +45 -44
  88. deepeval/metrics/tool_use/tool_use.py +42 -66
  89. deepeval/metrics/topic_adherence/template.py +13 -0
  90. deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
  91. deepeval/metrics/toxicity/template.py +13 -0
  92. deepeval/metrics/toxicity/toxicity.py +80 -99
  93. deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  94. deepeval/metrics/turn_contextual_precision/template.py +187 -0
  95. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +592 -0
  96. deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  97. deepeval/metrics/turn_contextual_recall/template.py +178 -0
  98. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +563 -0
  99. deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
  100. deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  101. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +576 -0
  102. deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
  103. deepeval/metrics/turn_faithfulness/template.py +218 -0
  104. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +627 -0
  105. deepeval/metrics/turn_relevancy/template.py +14 -0
  106. deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
  107. deepeval/metrics/utils.py +158 -122
  108. deepeval/models/__init__.py +0 -12
  109. deepeval/models/base_model.py +49 -33
  110. deepeval/models/embedding_models/__init__.py +7 -0
  111. deepeval/models/embedding_models/azure_embedding_model.py +79 -33
  112. deepeval/models/embedding_models/local_embedding_model.py +39 -20
  113. deepeval/models/embedding_models/ollama_embedding_model.py +52 -19
  114. deepeval/models/embedding_models/openai_embedding_model.py +42 -22
  115. deepeval/models/llms/amazon_bedrock_model.py +226 -72
  116. deepeval/models/llms/anthropic_model.py +178 -63
  117. deepeval/models/llms/azure_model.py +218 -60
  118. deepeval/models/llms/constants.py +2032 -0
  119. deepeval/models/llms/deepseek_model.py +95 -40
  120. deepeval/models/llms/gemini_model.py +209 -64
  121. deepeval/models/llms/grok_model.py +139 -68
  122. deepeval/models/llms/kimi_model.py +140 -90
  123. deepeval/models/llms/litellm_model.py +131 -37
  124. deepeval/models/llms/local_model.py +125 -21
  125. deepeval/models/llms/ollama_model.py +147 -24
  126. deepeval/models/llms/openai_model.py +222 -269
  127. deepeval/models/llms/portkey_model.py +81 -22
  128. deepeval/models/llms/utils.py +8 -3
  129. deepeval/models/retry_policy.py +17 -14
  130. deepeval/models/utils.py +106 -5
  131. deepeval/optimizer/__init__.py +5 -0
  132. deepeval/optimizer/algorithms/__init__.py +6 -0
  133. deepeval/optimizer/algorithms/base.py +29 -0
  134. deepeval/optimizer/algorithms/configs.py +18 -0
  135. deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  136. deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
  137. deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  138. deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
  139. deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  140. deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  141. deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  142. deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  143. deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  144. deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
  145. deepeval/{optimization → optimizer}/configs.py +5 -8
  146. deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
  147. deepeval/optimizer/prompt_optimizer.py +263 -0
  148. deepeval/optimizer/rewriter/__init__.py +5 -0
  149. deepeval/optimizer/rewriter/rewriter.py +124 -0
  150. deepeval/optimizer/rewriter/utils.py +214 -0
  151. deepeval/optimizer/scorer/__init__.py +5 -0
  152. deepeval/optimizer/scorer/base.py +86 -0
  153. deepeval/optimizer/scorer/scorer.py +316 -0
  154. deepeval/optimizer/scorer/utils.py +30 -0
  155. deepeval/optimizer/types.py +148 -0
  156. deepeval/{optimization → optimizer}/utils.py +47 -165
  157. deepeval/prompt/prompt.py +5 -9
  158. deepeval/simulator/conversation_simulator.py +43 -0
  159. deepeval/simulator/template.py +13 -0
  160. deepeval/test_case/__init__.py +1 -3
  161. deepeval/test_case/api.py +26 -45
  162. deepeval/test_case/arena_test_case.py +7 -2
  163. deepeval/test_case/conversational_test_case.py +68 -1
  164. deepeval/test_case/llm_test_case.py +206 -1
  165. deepeval/test_case/utils.py +4 -8
  166. deepeval/test_run/api.py +18 -14
  167. deepeval/test_run/test_run.py +3 -3
  168. deepeval/tracing/patchers.py +9 -4
  169. deepeval/tracing/tracing.py +2 -2
  170. deepeval/utils.py +65 -0
  171. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -4
  172. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/RECORD +180 -193
  173. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  174. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  175. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  176. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  177. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  178. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  179. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  180. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  181. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  182. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  183. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  184. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  185. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  186. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  187. deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
  188. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
  189. deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
  190. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -148
  191. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
  192. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  193. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  194. deepeval/models/mlllms/__init__.py +0 -4
  195. deepeval/models/mlllms/azure_model.py +0 -343
  196. deepeval/models/mlllms/gemini_model.py +0 -313
  197. deepeval/models/mlllms/ollama_model.py +0 -175
  198. deepeval/models/mlllms/openai_model.py +0 -309
  199. deepeval/optimization/__init__.py +0 -13
  200. deepeval/optimization/adapters/__init__.py +0 -2
  201. deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
  202. deepeval/optimization/aggregates.py +0 -14
  203. deepeval/optimization/copro/configs.py +0 -31
  204. deepeval/optimization/gepa/__init__.py +0 -7
  205. deepeval/optimization/gepa/configs.py +0 -115
  206. deepeval/optimization/miprov2/configs.py +0 -134
  207. deepeval/optimization/miprov2/loop.py +0 -785
  208. deepeval/optimization/mutations/__init__.py +0 -0
  209. deepeval/optimization/mutations/prompt_rewriter.py +0 -458
  210. deepeval/optimization/policies/__init__.py +0 -16
  211. deepeval/optimization/policies/tie_breaker.py +0 -67
  212. deepeval/optimization/prompt_optimizer.py +0 -462
  213. deepeval/optimization/simba/__init__.py +0 -0
  214. deepeval/optimization/simba/configs.py +0 -33
  215. deepeval/optimization/types.py +0 -361
  216. deepeval/test_case/mllm_test_case.py +0 -170
  217. /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
  218. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
  219. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
  220. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
  221. /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
  222. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
  223. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
  224. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
@@ -3,12 +3,22 @@ import requests
3
3
  from typing import Any, Dict, List, Optional, Union
4
4
  from pydantic import AnyUrl, SecretStr
5
5
 
6
+ from deepeval.errors import DeepEvalError
6
7
  from deepeval.config.settings import get_settings
7
- from deepeval.models.utils import require_secret_api_key
8
+ from deepeval.models.utils import (
9
+ require_secret_api_key,
10
+ )
11
+ from deepeval.test_case import MLLMImage
12
+ from deepeval.utils import check_if_multimodal, convert_to_multi_modal_array
8
13
  from deepeval.models import DeepEvalBaseLLM
9
14
  from deepeval.utils import require_param
10
15
 
11
16
 
17
+ def _request_timeout_seconds() -> float:
18
+ timeout = float(get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0)
19
+ return timeout if timeout > 0 else 30.0
20
+
21
+
12
22
  class PortkeyModel(DeepEvalBaseLLM):
13
23
  def __init__(
14
24
  self,
@@ -16,20 +26,15 @@ class PortkeyModel(DeepEvalBaseLLM):
16
26
  api_key: Optional[str] = None,
17
27
  base_url: Optional[AnyUrl] = None,
18
28
  provider: Optional[str] = None,
29
+ generation_kwargs: Optional[Dict] = None,
30
+ **kwargs,
19
31
  ):
20
32
  settings = get_settings()
21
33
  model = model or settings.PORTKEY_MODEL_NAME
22
34
 
23
- self.model = require_param(
24
- model,
25
- provider_label="Portkey",
26
- env_var_name="PORTKEY_MODEL_NAME",
27
- param_hint="model",
28
- )
29
-
30
35
  if api_key is not None:
31
36
  # keep it secret, keep it safe from serializings, logging and alike
32
- self.api_key: SecretStr | None = SecretStr(api_key)
37
+ self.api_key: Optional[SecretStr] = SecretStr(api_key)
33
38
  else:
34
39
  self.api_key = settings.PORTKEY_API_KEY
35
40
 
@@ -38,6 +43,16 @@ class PortkeyModel(DeepEvalBaseLLM):
38
43
  elif settings.PORTKEY_BASE_URL is not None:
39
44
  base_url = str(settings.PORTKEY_BASE_URL).rstrip("/")
40
45
 
46
+ provider = provider or settings.PORTKEY_PROVIDER_NAME
47
+
48
+ # validation
49
+ model = require_param(
50
+ model,
51
+ provider_label="Portkey",
52
+ env_var_name="PORTKEY_MODEL_NAME",
53
+ param_hint="model",
54
+ )
55
+
41
56
  self.base_url = require_param(
42
57
  base_url,
43
58
  provider_label="Portkey",
@@ -45,13 +60,16 @@ class PortkeyModel(DeepEvalBaseLLM):
45
60
  param_hint="base_url",
46
61
  )
47
62
 
48
- provider = provider or settings.PORTKEY_PROVIDER_NAME
49
63
  self.provider = require_param(
50
64
  provider,
51
65
  provider_label="Portkey",
52
66
  env_var_name="PORTKEY_PROVIDER_NAME",
53
67
  param_hint="provider",
54
68
  )
69
+ # Keep sanitized kwargs for client call to strip legacy keys
70
+ self.kwargs = kwargs
71
+ self.generation_kwargs = generation_kwargs or {}
72
+ super().__init__(model)
55
73
 
56
74
  def _headers(self) -> Dict[str, str]:
57
75
  api_key = require_secret_api_key(
@@ -70,15 +88,51 @@ class PortkeyModel(DeepEvalBaseLLM):
70
88
  return headers
71
89
 
72
90
  def _payload(self, prompt: str) -> Dict[str, Any]:
73
- return {
74
- "model": self.model,
75
- "messages": [{"role": "user", "content": prompt}],
91
+ if check_if_multimodal(prompt):
92
+ prompt = convert_to_multi_modal_array(input=prompt)
93
+ content = self.generate_content(prompt)
94
+ else:
95
+ content = [{"type": "text", "text": prompt}]
96
+ payload = {
97
+ "model": self.name,
98
+ "messages": [{"role": "user", "content": content}],
76
99
  }
100
+ if self.generation_kwargs:
101
+ payload.update(self.generation_kwargs)
102
+ return payload
103
+
104
+ def generate_content(
105
+ self, multimodal_input: List[Union[str, MLLMImage]] = []
106
+ ):
107
+ content = []
108
+ for element in multimodal_input:
109
+ if isinstance(element, str):
110
+ content.append({"type": "text", "text": element})
111
+ elif isinstance(element, MLLMImage):
112
+ if element.url and not element.local:
113
+ content.append(
114
+ {
115
+ "type": "image_url",
116
+ "image_url": {"url": element.url},
117
+ }
118
+ )
119
+ else:
120
+ element.ensure_images_loaded()
121
+ data_uri = (
122
+ f"data:{element.mimeType};base64,{element.dataBase64}"
123
+ )
124
+ content.append(
125
+ {
126
+ "type": "image_url",
127
+ "image_url": {"url": data_uri},
128
+ }
129
+ )
130
+ return content
77
131
 
78
132
  def _extract_content(self, data: Dict[str, Any]) -> str:
79
133
  choices: Union[List[Dict[str, Any]], None] = data.get("choices")
80
134
  if not choices:
81
- raise ValueError("Portkey response did not include any choices.")
135
+ raise DeepEvalError("Portkey response did not include any choices.")
82
136
  message = choices[0].get("message", {})
83
137
  content: Union[str, List[Dict[str, Any]], None] = message.get("content")
84
138
  if isinstance(content, str):
@@ -88,12 +142,13 @@ class PortkeyModel(DeepEvalBaseLLM):
88
142
  return ""
89
143
 
90
144
  def generate(self, prompt: str) -> str:
145
+
91
146
  try:
92
147
  response = requests.post(
93
148
  f"{self.base_url}/chat/completions",
94
149
  json=self._payload(prompt),
95
150
  headers=self._headers(),
96
- timeout=60,
151
+ timeout=_request_timeout_seconds(),
97
152
  )
98
153
  response.raise_for_status()
99
154
  except requests.HTTPError as error:
@@ -102,31 +157,35 @@ class PortkeyModel(DeepEvalBaseLLM):
102
157
  body = response.json()
103
158
  except Exception:
104
159
  body = response.text
105
- raise ValueError(
160
+ raise DeepEvalError(
106
161
  f"Portkey request failed with status {response.status_code}: {body}"
107
162
  ) from error
108
163
  except requests.RequestException as error:
109
- raise ValueError(f"Portkey request failed: {error}") from error
164
+ raise DeepEvalError(f"Portkey request failed: {error}") from error
110
165
  return self._extract_content(response.json())
111
166
 
112
167
  async def a_generate(self, prompt: str) -> str:
168
+
113
169
  async with aiohttp.ClientSession() as session:
114
170
  async with session.post(
115
171
  f"{self.base_url}/chat/completions",
116
172
  json=self._payload(prompt),
117
173
  headers=self._headers(),
118
- timeout=60,
174
+ timeout=_request_timeout_seconds(),
119
175
  ) as response:
120
176
  if response.status >= 400:
121
177
  body = await response.text()
122
- raise ValueError(
178
+ raise DeepEvalError(
123
179
  f"Portkey request failed with status {response.status}: {body}"
124
180
  )
125
181
  data = await response.json()
126
182
  return self._extract_content(data)
127
183
 
128
- def get_model_name(self) -> str:
129
- return f"Portkey ({self.model})"
130
-
131
184
  def load_model(self):
132
185
  return None
186
+
187
+ def get_model_name(self):
188
+ return f"{self.name} (Portkey)"
189
+
190
+ def supports_multimodal(self):
191
+ return True
@@ -3,6 +3,11 @@ import re
3
3
  import json
4
4
  import asyncio
5
5
 
6
+ from deepeval.errors import DeepEvalError
7
+
8
+
9
+ MULTIMODAL_MODELS = ["GPTModel", "AzureModel", "GeminiModel", "OllamaModel"]
10
+
6
11
 
7
12
  def trim_and_load_json(
8
13
  input_string: str,
@@ -18,7 +23,7 @@ def trim_and_load_json(
18
23
  return json.loads(jsonStr)
19
24
  except json.JSONDecodeError:
20
25
  error_str = "Evaluation LLM outputted an invalid JSON. Please use a better evaluation model."
21
- raise ValueError(error_str)
26
+ raise DeepEvalError(error_str)
22
27
  except Exception as e:
23
28
  raise Exception(f"An unexpected error occurred: {str(e)}")
24
29
 
@@ -38,7 +43,7 @@ def safe_asyncio_run(coro):
38
43
  return loop.run_until_complete(future)
39
44
  else:
40
45
  return loop.run_until_complete(coro)
41
- except Exception as inner_e:
46
+ except Exception:
42
47
  raise
43
- except Exception as e:
48
+ except Exception:
44
49
  raise
@@ -55,6 +55,7 @@ from tenacity.stop import stop_base
55
55
  from tenacity.wait import wait_base
56
56
  from contextvars import ContextVar, copy_context
57
57
 
58
+ from deepeval.utils import require_dependency
58
59
  from deepeval.constants import (
59
60
  ProviderSlug as PS,
60
61
  slugify,
@@ -829,25 +830,23 @@ try:
829
830
  except Exception: # botocore not present (aiobotocore optional)
830
831
  BEDROCK_ERROR_POLICY = None
831
832
 
832
-
833
833
  ####################
834
834
  # Anthropic Policy #
835
835
  ####################
836
836
 
837
837
  try:
838
- from anthropic import (
839
- AuthenticationError,
840
- RateLimitError,
841
- APIConnectionError,
842
- APITimeoutError,
843
- APIStatusError,
838
+
839
+ module = require_dependency(
840
+ "anthropic",
841
+ provider_label="retry_policy",
842
+ install_hint="Install it with `pip install anthropic`.",
844
843
  )
845
844
 
846
845
  ANTHROPIC_ERROR_POLICY = ErrorPolicy(
847
- auth_excs=(AuthenticationError,),
848
- rate_limit_excs=(RateLimitError,),
849
- network_excs=(APIConnectionError, APITimeoutError),
850
- http_excs=(APIStatusError,),
846
+ auth_excs=(module.AuthenticationError,),
847
+ rate_limit_excs=(module.RateLimitError,),
848
+ network_excs=(module.APIConnectionError, module.APITimeoutError),
849
+ http_excs=(module.APIStatusError,),
851
850
  non_retryable_codes=frozenset(), # update if we learn of hard quota codes
852
851
  message_markers={},
853
852
  )
@@ -868,7 +867,11 @@ except Exception: # Anthropic optional
868
867
  # and gate retries using message markers (code sniffing).
869
868
  # See: https://github.com/googleapis/python-genai?tab=readme-ov-file#error-handling
870
869
  try:
871
- from google.genai import errors as gerrors
870
+ module = require_dependency(
871
+ "google.genai",
872
+ provider_label="retry_policy",
873
+ install_hint="Install it with `pip install google-genai`.",
874
+ )
872
875
 
873
876
  _HTTPX_NET_EXCS = _httpx_net_excs()
874
877
  _REQUESTS_EXCS = _requests_net_excs()
@@ -887,9 +890,9 @@ try:
887
890
  GOOGLE_ERROR_POLICY = ErrorPolicy(
888
891
  auth_excs=(), # we will classify 401/403 via markers below (see non-retryable codes)
889
892
  rate_limit_excs=(
890
- gerrors.ClientError,
893
+ module.gerrors.ClientError,
891
894
  ), # includes 429; markers decide retry vs not
892
- network_excs=(gerrors.ServerError,)
895
+ network_excs=(module.gerrors.ServerError,)
893
896
  + _HTTPX_NET_EXCS
894
897
  + _REQUESTS_EXCS, # treat 5xx as transient
895
898
  http_excs=(), # no reliable .status_code on exceptions; handled above
deepeval/models/utils.py CHANGED
@@ -1,10 +1,14 @@
1
- from typing import Optional
1
+ import logging
2
+ from typing import Any, Dict, Optional, Tuple
2
3
  from pydantic import SecretStr
3
4
 
4
5
  from deepeval.errors import DeepEvalError
5
6
 
6
7
 
7
- def parse_model_name(model_name: Optional[str] = None) -> str:
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def parse_model_name(model_name: Optional[str] = None) -> Optional[str]:
8
12
  """Extract base model name from provider-prefixed format.
9
13
 
10
14
  This function is useful for extracting the actual model name from a
@@ -28,9 +32,9 @@ def parse_model_name(model_name: Optional[str] = None) -> str:
28
32
  if model_name is None:
29
33
  return None
30
34
 
31
- if "/" in model_name:
32
- _, parsed_model_name = model_name.split("/", 1)
33
- return parsed_model_name
35
+ # if "/" in model_name:
36
+ # _, parsed_model_name = model_name.split("/", 1)
37
+ # return parsed_model_name
34
38
  return model_name
35
39
 
36
40
 
@@ -74,3 +78,100 @@ def require_secret_api_key(
74
78
  )
75
79
 
76
80
  return api_key
81
+
82
+
83
+ def require_costs(
84
+ model_data,
85
+ model_name: str,
86
+ input_token_envvar: str,
87
+ output_token_envvar: str,
88
+ cost_per_input_token: Optional[float] = None,
89
+ cost_per_output_token: Optional[float] = None,
90
+ ) -> Tuple[Optional[float], Optional[float]]:
91
+ """
92
+ Validates and returns the cost parameters (input and output tokens) for a model.
93
+
94
+ Arguments:
95
+ - model_data: The model's data object, which should contain `input_price` and `output_price`.
96
+ - model_name: The model name used for error messaging.
97
+ - cost_per_input_token: The input token cost provided during model initialization (optional).
98
+ - cost_per_output_token: The output token cost provided during model initialization (optional).
99
+ - input_token_envvar: The environment variable name for input cost.
100
+ - output_token_envvar: The environment variable name for output cost.
101
+
102
+ Returns:
103
+ - A tuple of validated values (input_cost, output_cost). If the values are provided, they are returned.
104
+ If not provided, they are fetched from settings or environment variables.
105
+ """
106
+
107
+ def validate_cost(
108
+ value: Optional[float], envvar_name: str
109
+ ) -> Optional[float]:
110
+ """Helper function to validate the cost values."""
111
+ if value is not None and value < 0:
112
+ raise DeepEvalError(f"{envvar_name} must be >= 0.")
113
+ return value
114
+
115
+ # Validate provided token costs
116
+ cost_per_input_token = validate_cost(
117
+ cost_per_input_token, input_token_envvar
118
+ )
119
+ cost_per_output_token = validate_cost(
120
+ cost_per_output_token, output_token_envvar
121
+ )
122
+
123
+ # If model data doesn't have pricing, use provided values or environment variables
124
+ if model_data.input_price is None or model_data.output_price is None:
125
+ if cost_per_input_token is None or cost_per_output_token is None:
126
+ raise DeepEvalError(
127
+ f"No pricing available for `{model_name}`. "
128
+ f"Please provide both `cost_per_input_token` and `cost_per_output_token` when initializing `{model_name}`, "
129
+ f"or set {input_token_envvar} and {output_token_envvar} environment variables."
130
+ )
131
+
132
+ # Return the validated cost values as a tuple
133
+ return cost_per_input_token, cost_per_output_token
134
+
135
+ # If no custom cost values are provided, return model's default cost values
136
+ return model_data.input_price, model_data.output_price
137
+
138
+
139
+ def normalize_kwargs_and_extract_aliases(
140
+ provider_label: str,
141
+ kwargs: Dict[str, Any],
142
+ alias_map: Dict[str, list],
143
+ ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
144
+ """
145
+ Normalize legacy keyword argument names according to alias_map.
146
+
147
+ alias_map is of the form: {new_name: [old_name1, old_name2, ...]}
148
+
149
+ - Returns (normalized_kwargs, extracted_values)
150
+ where:
151
+ - normalized_kwargs has all legacy keys removed (to prevent forwarding
152
+ to downstream SDK clients).
153
+ - extracted_values maps new_name -> value for any alias that was used.
154
+
155
+ - Logs a warning for each legacy keyword used, so callers know they should
156
+ migrate to the new name.
157
+ """
158
+ normalized = dict(kwargs)
159
+ extracted: Dict[str, Any] = {}
160
+
161
+ for new_name, old_names in alias_map.items():
162
+ for old_name in old_names:
163
+ if old_name in normalized:
164
+ value = normalized.pop(old_name)
165
+
166
+ logger.warning(
167
+ "%s keyword '%s' is deprecated; please use '%s' instead.",
168
+ provider_label,
169
+ old_name,
170
+ new_name,
171
+ )
172
+
173
+ # Only preserve the first alias value we see for a given new_name
174
+ if new_name not in extracted:
175
+ extracted[new_name] = value
176
+
177
+ return normalized, extracted
@@ -0,0 +1,5 @@
1
+ from deepeval.optimizer.prompt_optimizer import PromptOptimizer
2
+
3
+ __all__ = [
4
+ "PromptOptimizer",
5
+ ]
@@ -0,0 +1,6 @@
1
+ from .gepa import GEPA
2
+ from .miprov2 import MIPROV2
3
+ from .copro import COPRO
4
+ from .simba import SIMBA
5
+
6
+ __all__ = ["GEPA", "MIPROV2", "COPRO", "SIMBA"]
@@ -0,0 +1,29 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Union, List, Dict, Tuple
3
+
4
+ from deepeval.models.base_model import DeepEvalBaseLLM
5
+ from deepeval.optimizer.scorer.base import BaseScorer
6
+ from deepeval.prompt.prompt import Prompt
7
+ from deepeval.dataset.golden import Golden, ConversationalGolden
8
+
9
+
10
+ class BaseAlgorithm(ABC):
11
+ name: str
12
+ optimizer_model: DeepEvalBaseLLM
13
+ scorer: BaseScorer
14
+
15
+ @abstractmethod
16
+ def execute(
17
+ self,
18
+ prompt: Prompt,
19
+ goldens: Union[List[Golden], List[ConversationalGolden]],
20
+ ) -> Tuple[Prompt, Dict]:
21
+ raise NotImplementedError
22
+
23
+ @abstractmethod
24
+ async def a_execute(
25
+ self,
26
+ prompt: Prompt,
27
+ goldens: Union[List[Golden], List[ConversationalGolden]],
28
+ ) -> Tuple[Prompt, Dict]:
29
+ raise NotImplementedError
@@ -0,0 +1,18 @@
1
+ # Internal GEPA constants - not exposed to users
2
+ GEPA_MIN_DELTA: float = 0.0
3
+ GEPA_TIE_TOLERANCE: float = 1e-9
4
+ GEPA_REWRITE_INSTRUCTION_MAX_CHARS: int = 4096
5
+
6
+ # Internal MIPROV2 constants - not exposed to users
7
+ MIPROV2_MIN_DELTA: float = 0.0
8
+ MIPROV2_REWRITE_INSTRUCTION_MAX_CHARS: int = 4096
9
+ MIPROV2_DEFAULT_NUM_CANDIDATES: int = 10
10
+ MIPROV2_DEFAULT_NUM_TRIALS: int = 20
11
+ MIPROV2_DEFAULT_MINIBATCH_SIZE: int = 25
12
+ MIPROV2_DEFAULT_MINIBATCH_FULL_EVAL_STEPS: int = 10
13
+ MIPROV2_DEFAULT_MAX_BOOTSTRAPPED_DEMOS: int = 4
14
+ MIPROV2_DEFAULT_MAX_LABELED_DEMOS: int = 4
15
+ MIPROV2_DEFAULT_NUM_DEMO_SETS: int = 5
16
+
17
+ # Internal SIMBA constants - not exposed to users
18
+ SIMBA_DEMO_INPUT_MAX_CHARS: int = 256
@@ -0,0 +1,5 @@
1
+ from .copro import COPRO
2
+
3
+ __all__ = [
4
+ "COPRO",
5
+ ]