deepeval 3.7.4__py3-none-any.whl → 3.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/dataset/golden.py +54 -2
  3. deepeval/evaluate/evaluate.py +16 -8
  4. deepeval/evaluate/execute.py +70 -26
  5. deepeval/evaluate/utils.py +26 -22
  6. deepeval/integrations/pydantic_ai/agent.py +19 -2
  7. deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  8. deepeval/metrics/__init__.py +14 -12
  9. deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
  10. deepeval/metrics/answer_relevancy/template.py +188 -92
  11. deepeval/metrics/base_metric.py +2 -5
  12. deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
  13. deepeval/metrics/contextual_precision/template.py +115 -66
  14. deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
  15. deepeval/metrics/contextual_recall/template.py +106 -55
  16. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
  17. deepeval/metrics/contextual_relevancy/template.py +87 -58
  18. deepeval/metrics/dag/templates.py +2 -2
  19. deepeval/metrics/faithfulness/faithfulness.py +70 -27
  20. deepeval/metrics/faithfulness/schema.py +1 -1
  21. deepeval/metrics/faithfulness/template.py +200 -115
  22. deepeval/metrics/g_eval/utils.py +2 -2
  23. deepeval/metrics/indicator.py +4 -4
  24. deepeval/metrics/multimodal_metrics/__init__.py +0 -18
  25. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
  26. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
  27. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
  28. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
  29. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
  30. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
  31. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
  32. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
  33. deepeval/metrics/ragas.py +3 -3
  34. deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
  35. deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  36. deepeval/metrics/turn_contextual_precision/template.py +187 -0
  37. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
  38. deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  39. deepeval/metrics/turn_contextual_recall/template.py +178 -0
  40. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
  41. deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
  42. deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  43. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
  44. deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
  45. deepeval/metrics/turn_faithfulness/template.py +218 -0
  46. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
  47. deepeval/metrics/utils.py +39 -58
  48. deepeval/models/__init__.py +0 -12
  49. deepeval/models/base_model.py +16 -38
  50. deepeval/models/embedding_models/__init__.py +7 -0
  51. deepeval/models/embedding_models/azure_embedding_model.py +52 -28
  52. deepeval/models/embedding_models/local_embedding_model.py +18 -14
  53. deepeval/models/embedding_models/ollama_embedding_model.py +38 -16
  54. deepeval/models/embedding_models/openai_embedding_model.py +40 -21
  55. deepeval/models/llms/amazon_bedrock_model.py +1 -2
  56. deepeval/models/llms/anthropic_model.py +44 -23
  57. deepeval/models/llms/azure_model.py +121 -36
  58. deepeval/models/llms/deepseek_model.py +18 -13
  59. deepeval/models/llms/gemini_model.py +129 -43
  60. deepeval/models/llms/grok_model.py +18 -13
  61. deepeval/models/llms/kimi_model.py +18 -13
  62. deepeval/models/llms/litellm_model.py +42 -22
  63. deepeval/models/llms/local_model.py +12 -7
  64. deepeval/models/llms/ollama_model.py +114 -12
  65. deepeval/models/llms/openai_model.py +137 -41
  66. deepeval/models/llms/portkey_model.py +24 -7
  67. deepeval/models/llms/utils.py +5 -3
  68. deepeval/models/retry_policy.py +17 -14
  69. deepeval/models/utils.py +46 -1
  70. deepeval/optimizer/__init__.py +5 -0
  71. deepeval/optimizer/algorithms/__init__.py +6 -0
  72. deepeval/optimizer/algorithms/base.py +29 -0
  73. deepeval/optimizer/algorithms/configs.py +18 -0
  74. deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  75. deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
  76. deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  77. deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
  78. deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  79. deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  80. deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  81. deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  82. deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  83. deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
  84. deepeval/{optimization → optimizer}/configs.py +5 -8
  85. deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
  86. deepeval/optimizer/prompt_optimizer.py +263 -0
  87. deepeval/optimizer/rewriter/__init__.py +5 -0
  88. deepeval/optimizer/rewriter/rewriter.py +124 -0
  89. deepeval/optimizer/rewriter/utils.py +214 -0
  90. deepeval/optimizer/scorer/__init__.py +5 -0
  91. deepeval/optimizer/scorer/base.py +86 -0
  92. deepeval/optimizer/scorer/scorer.py +316 -0
  93. deepeval/optimizer/scorer/utils.py +30 -0
  94. deepeval/optimizer/types.py +148 -0
  95. deepeval/{optimization → optimizer}/utils.py +47 -165
  96. deepeval/prompt/prompt.py +5 -9
  97. deepeval/test_case/__init__.py +1 -3
  98. deepeval/test_case/api.py +12 -10
  99. deepeval/test_case/conversational_test_case.py +19 -1
  100. deepeval/test_case/llm_test_case.py +152 -1
  101. deepeval/test_case/utils.py +4 -8
  102. deepeval/test_run/api.py +15 -14
  103. deepeval/test_run/test_run.py +3 -3
  104. deepeval/tracing/patchers.py +9 -4
  105. deepeval/tracing/tracing.py +2 -2
  106. deepeval/utils.py +65 -0
  107. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
  108. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/RECORD +116 -125
  109. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  110. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  111. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  112. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  113. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  114. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  115. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  116. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  117. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  118. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  119. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  120. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  121. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  122. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  123. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  124. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  125. deepeval/models/mlllms/__init__.py +0 -4
  126. deepeval/models/mlllms/azure_model.py +0 -343
  127. deepeval/models/mlllms/gemini_model.py +0 -313
  128. deepeval/models/mlllms/ollama_model.py +0 -175
  129. deepeval/models/mlllms/openai_model.py +0 -309
  130. deepeval/optimization/__init__.py +0 -13
  131. deepeval/optimization/adapters/__init__.py +0 -2
  132. deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
  133. deepeval/optimization/aggregates.py +0 -14
  134. deepeval/optimization/copro/configs.py +0 -31
  135. deepeval/optimization/gepa/__init__.py +0 -7
  136. deepeval/optimization/gepa/configs.py +0 -115
  137. deepeval/optimization/miprov2/configs.py +0 -134
  138. deepeval/optimization/miprov2/loop.py +0 -785
  139. deepeval/optimization/mutations/__init__.py +0 -0
  140. deepeval/optimization/mutations/prompt_rewriter.py +0 -458
  141. deepeval/optimization/policies/__init__.py +0 -16
  142. deepeval/optimization/policies/tie_breaker.py +0 -67
  143. deepeval/optimization/prompt_optimizer.py +0 -462
  144. deepeval/optimization/simba/__init__.py +0 -0
  145. deepeval/optimization/simba/configs.py +0 -33
  146. deepeval/optimization/types.py +0 -361
  147. deepeval/test_case/mllm_test_case.py +0 -170
  148. /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
  149. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
  150. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
  151. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
  152. /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
  153. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
  154. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
  155. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,9 @@
1
+ import base64
1
2
  from openai.types.chat.chat_completion import ChatCompletion
2
3
  from openai import AzureOpenAI, AsyncAzureOpenAI
3
- from typing import Optional, Tuple, Union, Dict
4
+ from typing import Optional, Tuple, Union, Dict, List
4
5
  from pydantic import BaseModel, SecretStr
6
+ from io import BytesIO
5
7
 
6
8
  from deepeval.config.settings import get_settings
7
9
  from deepeval.models import DeepEvalBaseLLM
@@ -14,46 +16,75 @@ from deepeval.models.retry_policy import (
14
16
  create_retry_decorator,
15
17
  sdk_retries_for,
16
18
  )
17
-
18
- from deepeval.models.llms.utils import trim_and_load_json
19
- from deepeval.models.utils import parse_model_name, require_secret_api_key
19
+ from deepeval.test_case import MLLMImage
20
+ from deepeval.utils import convert_to_multi_modal_array, check_if_multimodal
21
+ from deepeval.models.llms.utils import (
22
+ trim_and_load_json,
23
+ )
24
+ from deepeval.models.utils import (
25
+ parse_model_name,
26
+ require_secret_api_key,
27
+ normalize_kwargs_and_extract_aliases,
28
+ )
20
29
  from deepeval.constants import ProviderSlug as PS
21
30
 
31
+ valid_multimodal_models = [
32
+ "gpt-4o",
33
+ "gpt-4o-mini",
34
+ "gpt-4.1",
35
+ "gpt-4.1-mini",
36
+ "gpt-5",
37
+ ]
22
38
 
23
39
  retry_azure = create_retry_decorator(PS.AZURE)
24
40
 
41
+ _ALIAS_MAP = {
42
+ "api_key": ["azure_openai_api_key"],
43
+ "base_url": ["azure_endpoint"],
44
+ }
45
+
25
46
 
26
47
  class AzureOpenAIModel(DeepEvalBaseLLM):
27
48
  def __init__(
28
49
  self,
50
+ model: Optional[str] = None,
51
+ api_key: Optional[str] = None,
52
+ base_url: Optional[str] = None,
53
+ temperature: float = 0,
29
54
  deployment_name: Optional[str] = None,
30
- model_name: Optional[str] = None,
31
- azure_openai_api_key: Optional[str] = None,
32
55
  openai_api_version: Optional[str] = None,
33
- azure_endpoint: Optional[str] = None,
34
- temperature: float = 0,
35
56
  generation_kwargs: Optional[Dict] = None,
36
57
  **kwargs,
37
58
  ):
59
+ normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
60
+ "AzureOpenAIModel",
61
+ kwargs,
62
+ _ALIAS_MAP,
63
+ )
64
+
65
+ # re-map depricated keywords to re-named positional args
66
+ if api_key is None and "api_key" in alias_values:
67
+ api_key = alias_values["api_key"]
68
+ if base_url is None and "base_url" in alias_values:
69
+ base_url = alias_values["base_url"]
70
+
38
71
  settings = get_settings()
39
72
 
40
73
  # fetch Azure deployment parameters
41
- model_name = model_name or settings.AZURE_MODEL_NAME
74
+ model = model or settings.AZURE_MODEL_NAME
42
75
  self.deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
43
76
 
44
- if azure_openai_api_key is not None:
77
+ if api_key is not None:
45
78
  # keep it secret, keep it safe from serializings, logging and alike
46
- self.azure_openai_api_key: SecretStr | None = SecretStr(
47
- azure_openai_api_key
48
- )
79
+ self.api_key: SecretStr | None = SecretStr(api_key)
49
80
  else:
50
- self.azure_openai_api_key = settings.AZURE_OPENAI_API_KEY
81
+ self.api_key = settings.AZURE_OPENAI_API_KEY
51
82
 
52
83
  self.openai_api_version = (
53
84
  openai_api_version or settings.OPENAI_API_VERSION
54
85
  )
55
- self.azure_endpoint = (
56
- azure_endpoint
86
+ self.base_url = (
87
+ base_url
57
88
  or settings.AZURE_OPENAI_ENDPOINT
58
89
  and str(settings.AZURE_OPENAI_ENDPOINT)
59
90
  )
@@ -62,10 +93,10 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
62
93
  raise ValueError("Temperature must be >= 0.")
63
94
  self.temperature = temperature
64
95
 
65
- # args and kwargs will be passed to the underlying model, in load_model function
66
- self.kwargs = kwargs
96
+ # Keep sanitized kwargs for client call to strip legacy keys
97
+ self.kwargs = normalized_kwargs
67
98
  self.generation_kwargs = generation_kwargs or {}
68
- super().__init__(parse_model_name(model_name))
99
+ super().__init__(parse_model_name(model))
69
100
 
70
101
  ###############################################
71
102
  # Other generate functions
@@ -76,13 +107,16 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
76
107
  self, prompt: str, schema: Optional[BaseModel] = None
77
108
  ) -> Tuple[Union[str, Dict], float]:
78
109
  client = self.load_model(async_mode=False)
110
+
111
+ if check_if_multimodal(prompt):
112
+ prompt = convert_to_multi_modal_array(prompt)
113
+ prompt = self.generate_prompt(prompt)
114
+
79
115
  if schema:
80
- if self.model_name in structured_outputs_models:
116
+ if self.name in structured_outputs_models:
81
117
  completion = client.beta.chat.completions.parse(
82
118
  model=self.deployment_name,
83
- messages=[
84
- {"role": "user", "content": prompt},
85
- ],
119
+ messages=[{"role": "user", "content": prompt}],
86
120
  response_format=schema,
87
121
  temperature=self.temperature,
88
122
  )
@@ -94,7 +128,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
94
128
  completion.usage.completion_tokens,
95
129
  )
96
130
  return structured_output, cost
97
- if self.model_name in json_mode_models:
131
+ if self.name in json_mode_models:
98
132
  completion = client.beta.chat.completions.parse(
99
133
  model=self.deployment_name,
100
134
  messages=[
@@ -135,13 +169,16 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
135
169
  self, prompt: str, schema: Optional[BaseModel] = None
136
170
  ) -> Tuple[Union[str, BaseModel], float]:
137
171
  client = self.load_model(async_mode=True)
172
+
173
+ if check_if_multimodal(prompt):
174
+ prompt = convert_to_multi_modal_array(prompt)
175
+ prompt = self.generate_prompt(prompt)
176
+
138
177
  if schema:
139
- if self.model_name in structured_outputs_models:
178
+ if self.name in structured_outputs_models:
140
179
  completion = await client.beta.chat.completions.parse(
141
180
  model=self.deployment_name,
142
- messages=[
143
- {"role": "user", "content": prompt},
144
- ],
181
+ messages=[{"role": "user", "content": prompt}],
145
182
  response_format=schema,
146
183
  temperature=self.temperature,
147
184
  )
@@ -153,7 +190,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
153
190
  completion.usage.completion_tokens,
154
191
  )
155
192
  return structured_output, cost
156
- if self.model_name in json_mode_models:
193
+ if self.name in json_mode_models:
157
194
  completion = await client.beta.chat.completions.parse(
158
195
  model=self.deployment_name,
159
196
  messages=[
@@ -203,6 +240,9 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
203
240
  ) -> Tuple[ChatCompletion, float]:
204
241
  # Generate completion
205
242
  client = self.load_model(async_mode=False)
243
+ if check_if_multimodal(prompt):
244
+ prompt = convert_to_multi_modal_array(input=prompt)
245
+ prompt = self.generate_prompt(prompt)
206
246
  completion = client.chat.completions.create(
207
247
  model=self.deployment_name,
208
248
  messages=[{"role": "user", "content": prompt}],
@@ -226,6 +266,9 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
226
266
  ) -> Tuple[ChatCompletion, float]:
227
267
  # Generate completion
228
268
  client = self.load_model(async_mode=True)
269
+ if check_if_multimodal(prompt):
270
+ prompt = convert_to_multi_modal_array(input=prompt)
271
+ prompt = self.generate_prompt(prompt)
229
272
  completion = await client.chat.completions.create(
230
273
  model=self.deployment_name,
231
274
  messages=[{"role": "user", "content": prompt}],
@@ -241,12 +284,49 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
241
284
 
242
285
  return completion, cost
243
286
 
287
+ def generate_prompt(
288
+ self, multimodal_input: List[Union[str, MLLMImage]] = []
289
+ ):
290
+ """Convert multimodal input into the proper message format for Azure OpenAI."""
291
+ prompt = []
292
+ for ele in multimodal_input:
293
+ if isinstance(ele, str):
294
+ prompt.append({"type": "text", "text": ele})
295
+ elif isinstance(ele, MLLMImage):
296
+ if ele.local:
297
+ import PIL.Image
298
+
299
+ image = PIL.Image.open(ele.url)
300
+ visual_dict = {
301
+ "type": "image_url",
302
+ "image_url": {
303
+ "url": f"data:image/jpeg;base64,{self.encode_pil_image(image)}"
304
+ },
305
+ }
306
+ else:
307
+ visual_dict = {
308
+ "type": "image_url",
309
+ "image_url": {"url": ele.url},
310
+ }
311
+ prompt.append(visual_dict)
312
+ return prompt
313
+
314
+ def encode_pil_image(self, pil_image):
315
+ """Encode a PIL image to base64 string."""
316
+ image_buffer = BytesIO()
317
+ if pil_image.mode in ("RGBA", "LA", "P"):
318
+ pil_image = pil_image.convert("RGB")
319
+ pil_image.save(image_buffer, format="JPEG")
320
+ image_bytes = image_buffer.getvalue()
321
+ base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
322
+ return base64_encoded_image
323
+
244
324
  ###############################################
245
325
  # Utilities
246
326
  ###############################################
247
327
 
248
328
  def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
249
- pricing = model_pricing.get(self.model_name, model_pricing["gpt-4.1"])
329
+ pricing = model_pricing.get(self.name, model_pricing["gpt-4.1"])
250
330
  input_cost = input_tokens * pricing["input"]
251
331
  output_cost = output_tokens * pricing["output"]
252
332
  return input_cost + output_cost
@@ -255,9 +335,6 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
255
335
  # Model
256
336
  ###############################################
257
337
 
258
- def get_model_name(self):
259
- return f"Azure OpenAI ({self.model_name})"
260
-
261
338
  def load_model(self, async_mode: bool = False):
262
339
  if not async_mode:
263
340
  return self._build_client(AzureOpenAI)
@@ -276,16 +353,16 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
276
353
 
277
354
  def _build_client(self, cls):
278
355
  api_key = require_secret_api_key(
279
- self.azure_openai_api_key,
356
+ self.api_key,
280
357
  provider_label="AzureOpenAI",
281
358
  env_var_name="AZURE_OPENAI_API_KEY",
282
- param_hint="`azure_openai_api_key` to AzureOpenAIModel(...)",
359
+ param_hint="`api_key` to AzureOpenAIModel(...)",
283
360
  )
284
361
 
285
362
  kw = dict(
286
363
  api_key=api_key,
287
364
  api_version=self.openai_api_version,
288
- azure_endpoint=self.azure_endpoint,
365
+ base_url=self.base_url,
289
366
  azure_deployment=self.deployment_name,
290
367
  **self._client_kwargs(),
291
368
  )
@@ -297,3 +374,11 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
297
374
  kw.pop("max_retries", None)
298
375
  return cls(**kw)
299
376
  raise
377
+
378
+ def supports_multimodal(self):
379
+ if self.name in valid_multimodal_models:
380
+ return True
381
+ return False
382
+
383
+ def get_model_name(self):
384
+ return f"{self.name} (Azure)"
@@ -4,7 +4,9 @@ from pydantic import BaseModel, SecretStr
4
4
 
5
5
  from deepeval.config.settings import get_settings
6
6
  from deepeval.models.llms.utils import trim_and_load_json
7
- from deepeval.models.utils import require_secret_api_key
7
+ from deepeval.models.utils import (
8
+ require_secret_api_key,
9
+ )
8
10
  from deepeval.models import DeepEvalBaseLLM
9
11
  from deepeval.models.retry_policy import (
10
12
  create_retry_decorator,
@@ -31,16 +33,16 @@ model_pricing = {
31
33
  class DeepSeekModel(DeepEvalBaseLLM):
32
34
  def __init__(
33
35
  self,
34
- api_key: Optional[str] = None,
35
36
  model: Optional[str] = None,
37
+ api_key: Optional[str] = None,
36
38
  temperature: float = 0,
37
39
  generation_kwargs: Optional[Dict] = None,
38
40
  **kwargs,
39
41
  ):
40
42
  settings = get_settings()
41
43
 
42
- model_name = model or settings.DEEPSEEK_MODEL_NAME
43
- if model_name not in model_pricing:
44
+ model = model or settings.DEEPSEEK_MODEL_NAME
45
+ if model not in model_pricing:
44
46
  raise ValueError(
45
47
  f"Invalid model. Available DeepSeek models: {', '.join(model_pricing.keys())}"
46
48
  )
@@ -59,9 +61,10 @@ class DeepSeekModel(DeepEvalBaseLLM):
59
61
  self.api_key = settings.DEEPSEEK_API_KEY
60
62
 
61
63
  self.base_url = "https://api.deepseek.com"
64
+ # Keep sanitized kwargs for client call to strip legacy keys
62
65
  self.kwargs = kwargs
63
66
  self.generation_kwargs = generation_kwargs or {}
64
- super().__init__(model_name)
67
+ super().__init__(model)
65
68
 
66
69
  ###############################################
67
70
  # Other generate functions
@@ -71,10 +74,11 @@ class DeepSeekModel(DeepEvalBaseLLM):
71
74
  def generate(
72
75
  self, prompt: str, schema: Optional[BaseModel] = None
73
76
  ) -> Tuple[Union[str, Dict], float]:
77
+
74
78
  client = self.load_model(async_mode=False)
75
79
  if schema:
76
80
  completion = client.chat.completions.create(
77
- model=self.model_name,
81
+ model=self.name,
78
82
  messages=[{"role": "user", "content": prompt}],
79
83
  response_format={"type": "json_object"},
80
84
  temperature=self.temperature,
@@ -90,7 +94,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
90
94
  return schema.model_validate(json_output), cost
91
95
  else:
92
96
  completion = client.chat.completions.create(
93
- model=self.model_name,
97
+ model=self.name,
94
98
  messages=[{"role": "user", "content": prompt}],
95
99
  **self.generation_kwargs,
96
100
  )
@@ -105,10 +109,11 @@ class DeepSeekModel(DeepEvalBaseLLM):
105
109
  async def a_generate(
106
110
  self, prompt: str, schema: Optional[BaseModel] = None
107
111
  ) -> Tuple[Union[str, Dict], float]:
112
+
108
113
  client = self.load_model(async_mode=True)
109
114
  if schema:
110
115
  completion = await client.chat.completions.create(
111
- model=self.model_name,
116
+ model=self.name,
112
117
  messages=[{"role": "user", "content": prompt}],
113
118
  response_format={"type": "json_object"},
114
119
  temperature=self.temperature,
@@ -124,7 +129,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
124
129
  return schema.model_validate(json_output), cost
125
130
  else:
126
131
  completion = await client.chat.completions.create(
127
- model=self.model_name,
132
+ model=self.name,
128
133
  messages=[{"role": "user", "content": prompt}],
129
134
  **self.generation_kwargs,
130
135
  )
@@ -144,7 +149,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
144
149
  input_tokens: int,
145
150
  output_tokens: int,
146
151
  ) -> float:
147
- pricing = model_pricing.get(self.model_name, model_pricing)
152
+ pricing = model_pricing.get(self.name, model_pricing)
148
153
  input_cost = input_tokens * pricing["input"]
149
154
  output_cost = output_tokens * pricing["output"]
150
155
  return input_cost + output_cost
@@ -158,9 +163,6 @@ class DeepSeekModel(DeepEvalBaseLLM):
158
163
  return self._build_client(OpenAI)
159
164
  return self._build_client(AsyncOpenAI)
160
165
 
161
- def get_model_name(self):
162
- return f"{self.model_name}"
163
-
164
166
  def _client_kwargs(self) -> Dict:
165
167
  kwargs = dict(self.kwargs or {})
166
168
  # if we are managing retries with Tenacity, force SDK retries off to avoid double retries.
@@ -190,3 +192,6 @@ class DeepSeekModel(DeepEvalBaseLLM):
190
192
  kw.pop("max_retries", None)
191
193
  return cls(**kw)
192
194
  raise
195
+
196
+ def get_model_name(self):
197
+ return f"{self.name} (Deepseek)"