deepeval 3.7.3__py3-none-any.whl → 3.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/cli/test.py +1 -1
  3. deepeval/config/settings.py +102 -13
  4. deepeval/dataset/golden.py +54 -2
  5. deepeval/evaluate/configs.py +1 -1
  6. deepeval/evaluate/evaluate.py +16 -8
  7. deepeval/evaluate/execute.py +74 -27
  8. deepeval/evaluate/utils.py +26 -22
  9. deepeval/integrations/pydantic_ai/agent.py +19 -2
  10. deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  11. deepeval/metrics/__init__.py +14 -12
  12. deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
  13. deepeval/metrics/answer_relevancy/template.py +188 -92
  14. deepeval/metrics/argument_correctness/template.py +2 -2
  15. deepeval/metrics/base_metric.py +2 -5
  16. deepeval/metrics/bias/template.py +3 -3
  17. deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
  18. deepeval/metrics/contextual_precision/template.py +115 -66
  19. deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
  20. deepeval/metrics/contextual_recall/template.py +106 -55
  21. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
  22. deepeval/metrics/contextual_relevancy/template.py +87 -58
  23. deepeval/metrics/conversation_completeness/template.py +2 -2
  24. deepeval/metrics/conversational_dag/templates.py +4 -4
  25. deepeval/metrics/conversational_g_eval/template.py +4 -3
  26. deepeval/metrics/dag/templates.py +5 -5
  27. deepeval/metrics/faithfulness/faithfulness.py +70 -27
  28. deepeval/metrics/faithfulness/schema.py +1 -1
  29. deepeval/metrics/faithfulness/template.py +200 -115
  30. deepeval/metrics/g_eval/utils.py +2 -2
  31. deepeval/metrics/hallucination/template.py +4 -4
  32. deepeval/metrics/indicator.py +4 -4
  33. deepeval/metrics/misuse/template.py +2 -2
  34. deepeval/metrics/multimodal_metrics/__init__.py +0 -18
  35. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
  36. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
  37. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
  38. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
  39. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
  40. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
  41. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
  42. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
  43. deepeval/metrics/non_advice/template.py +2 -2
  44. deepeval/metrics/pii_leakage/template.py +2 -2
  45. deepeval/metrics/prompt_alignment/template.py +4 -4
  46. deepeval/metrics/ragas.py +3 -3
  47. deepeval/metrics/role_violation/template.py +2 -2
  48. deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
  49. deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
  50. deepeval/metrics/toxicity/template.py +4 -4
  51. deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  52. deepeval/metrics/turn_contextual_precision/template.py +187 -0
  53. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
  54. deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  55. deepeval/metrics/turn_contextual_recall/template.py +178 -0
  56. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
  57. deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
  58. deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  59. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
  60. deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
  61. deepeval/metrics/turn_faithfulness/template.py +218 -0
  62. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
  63. deepeval/metrics/turn_relevancy/template.py +2 -2
  64. deepeval/metrics/utils.py +39 -58
  65. deepeval/models/__init__.py +0 -12
  66. deepeval/models/base_model.py +16 -38
  67. deepeval/models/embedding_models/__init__.py +7 -0
  68. deepeval/models/embedding_models/azure_embedding_model.py +69 -32
  69. deepeval/models/embedding_models/local_embedding_model.py +39 -22
  70. deepeval/models/embedding_models/ollama_embedding_model.py +42 -18
  71. deepeval/models/embedding_models/openai_embedding_model.py +50 -15
  72. deepeval/models/llms/amazon_bedrock_model.py +1 -2
  73. deepeval/models/llms/anthropic_model.py +53 -20
  74. deepeval/models/llms/azure_model.py +140 -43
  75. deepeval/models/llms/deepseek_model.py +38 -23
  76. deepeval/models/llms/gemini_model.py +222 -103
  77. deepeval/models/llms/grok_model.py +39 -27
  78. deepeval/models/llms/kimi_model.py +39 -23
  79. deepeval/models/llms/litellm_model.py +103 -45
  80. deepeval/models/llms/local_model.py +35 -22
  81. deepeval/models/llms/ollama_model.py +129 -17
  82. deepeval/models/llms/openai_model.py +151 -50
  83. deepeval/models/llms/portkey_model.py +149 -0
  84. deepeval/models/llms/utils.py +5 -3
  85. deepeval/models/retry_policy.py +17 -14
  86. deepeval/models/utils.py +94 -4
  87. deepeval/optimizer/__init__.py +5 -0
  88. deepeval/optimizer/algorithms/__init__.py +6 -0
  89. deepeval/optimizer/algorithms/base.py +29 -0
  90. deepeval/optimizer/algorithms/configs.py +18 -0
  91. deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  92. deepeval/optimizer/algorithms/copro/copro.py +836 -0
  93. deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  94. deepeval/optimizer/algorithms/gepa/gepa.py +737 -0
  95. deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  96. deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  97. deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  98. deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  99. deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  100. deepeval/optimizer/algorithms/simba/simba.py +999 -0
  101. deepeval/optimizer/algorithms/simba/types.py +15 -0
  102. deepeval/optimizer/configs.py +31 -0
  103. deepeval/optimizer/policies.py +227 -0
  104. deepeval/optimizer/prompt_optimizer.py +263 -0
  105. deepeval/optimizer/rewriter/__init__.py +5 -0
  106. deepeval/optimizer/rewriter/rewriter.py +124 -0
  107. deepeval/optimizer/rewriter/utils.py +214 -0
  108. deepeval/optimizer/scorer/__init__.py +5 -0
  109. deepeval/optimizer/scorer/base.py +86 -0
  110. deepeval/optimizer/scorer/scorer.py +316 -0
  111. deepeval/optimizer/scorer/utils.py +30 -0
  112. deepeval/optimizer/types.py +148 -0
  113. deepeval/optimizer/utils.py +480 -0
  114. deepeval/prompt/prompt.py +7 -6
  115. deepeval/test_case/__init__.py +1 -3
  116. deepeval/test_case/api.py +12 -10
  117. deepeval/test_case/conversational_test_case.py +19 -1
  118. deepeval/test_case/llm_test_case.py +152 -1
  119. deepeval/test_case/utils.py +4 -8
  120. deepeval/test_run/api.py +15 -14
  121. deepeval/test_run/cache.py +2 -0
  122. deepeval/test_run/test_run.py +9 -4
  123. deepeval/tracing/patchers.py +9 -4
  124. deepeval/tracing/tracing.py +2 -2
  125. deepeval/utils.py +89 -0
  126. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
  127. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/RECORD +134 -118
  128. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  129. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  130. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  131. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  132. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  133. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  134. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  135. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  136. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  137. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  138. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  139. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  140. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  141. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  142. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  143. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  144. deepeval/models/mlllms/__init__.py +0 -4
  145. deepeval/models/mlllms/azure_model.py +0 -334
  146. deepeval/models/mlllms/gemini_model.py +0 -284
  147. deepeval/models/mlllms/ollama_model.py +0 -144
  148. deepeval/models/mlllms/openai_model.py +0 -258
  149. deepeval/test_case/mllm_test_case.py +0 -170
  150. /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
  151. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
  152. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
  153. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
  154. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
  155. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
  156. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
@@ -1,10 +1,12 @@
1
+ import base64
1
2
  from openai.types.chat.chat_completion import ChatCompletion
2
3
  from openai import AzureOpenAI, AsyncAzureOpenAI
3
- from typing import Optional, Tuple, Union, Dict
4
- from pydantic import BaseModel
4
+ from typing import Optional, Tuple, Union, Dict, List
5
+ from pydantic import BaseModel, SecretStr
6
+ from io import BytesIO
5
7
 
8
+ from deepeval.config.settings import get_settings
6
9
  from deepeval.models import DeepEvalBaseLLM
7
- from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
8
10
  from deepeval.models.llms.openai_model import (
9
11
  structured_outputs_models,
10
12
  json_mode_models,
@@ -14,53 +16,87 @@ from deepeval.models.retry_policy import (
14
16
  create_retry_decorator,
15
17
  sdk_retries_for,
16
18
  )
17
-
18
- from deepeval.models.llms.utils import trim_and_load_json
19
- from deepeval.models.utils import parse_model_name
19
+ from deepeval.test_case import MLLMImage
20
+ from deepeval.utils import convert_to_multi_modal_array, check_if_multimodal
21
+ from deepeval.models.llms.utils import (
22
+ trim_and_load_json,
23
+ )
24
+ from deepeval.models.utils import (
25
+ parse_model_name,
26
+ require_secret_api_key,
27
+ normalize_kwargs_and_extract_aliases,
28
+ )
20
29
  from deepeval.constants import ProviderSlug as PS
21
30
 
31
+ valid_multimodal_models = [
32
+ "gpt-4o",
33
+ "gpt-4o-mini",
34
+ "gpt-4.1",
35
+ "gpt-4.1-mini",
36
+ "gpt-5",
37
+ ]
22
38
 
23
39
  retry_azure = create_retry_decorator(PS.AZURE)
24
40
 
41
+ _ALIAS_MAP = {
42
+ "api_key": ["azure_openai_api_key"],
43
+ "base_url": ["azure_endpoint"],
44
+ }
45
+
25
46
 
26
47
  class AzureOpenAIModel(DeepEvalBaseLLM):
27
48
  def __init__(
28
49
  self,
50
+ model: Optional[str] = None,
51
+ api_key: Optional[str] = None,
52
+ base_url: Optional[str] = None,
53
+ temperature: float = 0,
29
54
  deployment_name: Optional[str] = None,
30
- model_name: Optional[str] = None,
31
- azure_openai_api_key: Optional[str] = None,
32
55
  openai_api_version: Optional[str] = None,
33
- azure_endpoint: Optional[str] = None,
34
- temperature: float = 0,
35
56
  generation_kwargs: Optional[Dict] = None,
36
57
  **kwargs,
37
58
  ):
38
- # fetch Azure deployment parameters
39
- model_name = model_name or KEY_FILE_HANDLER.fetch_data(
40
- ModelKeyValues.AZURE_MODEL_NAME
41
- )
42
- self.deployment_name = deployment_name or KEY_FILE_HANDLER.fetch_data(
43
- ModelKeyValues.AZURE_DEPLOYMENT_NAME
44
- )
45
- self.azure_openai_api_key = (
46
- azure_openai_api_key
47
- or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.AZURE_OPENAI_API_KEY)
59
+ normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
60
+ "AzureOpenAIModel",
61
+ kwargs,
62
+ _ALIAS_MAP,
48
63
  )
64
+
65
+ # re-map depricated keywords to re-named positional args
66
+ if api_key is None and "api_key" in alias_values:
67
+ api_key = alias_values["api_key"]
68
+ if base_url is None and "base_url" in alias_values:
69
+ base_url = alias_values["base_url"]
70
+
71
+ settings = get_settings()
72
+
73
+ # fetch Azure deployment parameters
74
+ model = model or settings.AZURE_MODEL_NAME
75
+ self.deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
76
+
77
+ if api_key is not None:
78
+ # keep it secret, keep it safe from serializings, logging and alike
79
+ self.api_key: SecretStr | None = SecretStr(api_key)
80
+ else:
81
+ self.api_key = settings.AZURE_OPENAI_API_KEY
82
+
49
83
  self.openai_api_version = (
50
- openai_api_version
51
- or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.OPENAI_API_VERSION)
84
+ openai_api_version or settings.OPENAI_API_VERSION
52
85
  )
53
- self.azure_endpoint = azure_endpoint or KEY_FILE_HANDLER.fetch_data(
54
- ModelKeyValues.AZURE_OPENAI_ENDPOINT
86
+ self.base_url = (
87
+ base_url
88
+ or settings.AZURE_OPENAI_ENDPOINT
89
+ and str(settings.AZURE_OPENAI_ENDPOINT)
55
90
  )
91
+
56
92
  if temperature < 0:
57
93
  raise ValueError("Temperature must be >= 0.")
58
94
  self.temperature = temperature
59
95
 
60
- # args and kwargs will be passed to the underlying model, in load_model function
61
- self.kwargs = kwargs
96
+ # Keep sanitized kwargs for client call to strip legacy keys
97
+ self.kwargs = normalized_kwargs
62
98
  self.generation_kwargs = generation_kwargs or {}
63
- super().__init__(parse_model_name(model_name))
99
+ super().__init__(parse_model_name(model))
64
100
 
65
101
  ###############################################
66
102
  # Other generate functions
@@ -71,13 +107,16 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
71
107
  self, prompt: str, schema: Optional[BaseModel] = None
72
108
  ) -> Tuple[Union[str, Dict], float]:
73
109
  client = self.load_model(async_mode=False)
110
+
111
+ if check_if_multimodal(prompt):
112
+ prompt = convert_to_multi_modal_array(prompt)
113
+ prompt = self.generate_prompt(prompt)
114
+
74
115
  if schema:
75
- if self.model_name in structured_outputs_models:
116
+ if self.name in structured_outputs_models:
76
117
  completion = client.beta.chat.completions.parse(
77
118
  model=self.deployment_name,
78
- messages=[
79
- {"role": "user", "content": prompt},
80
- ],
119
+ messages=[{"role": "user", "content": prompt}],
81
120
  response_format=schema,
82
121
  temperature=self.temperature,
83
122
  )
@@ -89,7 +128,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
89
128
  completion.usage.completion_tokens,
90
129
  )
91
130
  return structured_output, cost
92
- if self.model_name in json_mode_models:
131
+ if self.name in json_mode_models:
93
132
  completion = client.beta.chat.completions.parse(
94
133
  model=self.deployment_name,
95
134
  messages=[
@@ -130,13 +169,16 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
130
169
  self, prompt: str, schema: Optional[BaseModel] = None
131
170
  ) -> Tuple[Union[str, BaseModel], float]:
132
171
  client = self.load_model(async_mode=True)
172
+
173
+ if check_if_multimodal(prompt):
174
+ prompt = convert_to_multi_modal_array(prompt)
175
+ prompt = self.generate_prompt(prompt)
176
+
133
177
  if schema:
134
- if self.model_name in structured_outputs_models:
178
+ if self.name in structured_outputs_models:
135
179
  completion = await client.beta.chat.completions.parse(
136
180
  model=self.deployment_name,
137
- messages=[
138
- {"role": "user", "content": prompt},
139
- ],
181
+ messages=[{"role": "user", "content": prompt}],
140
182
  response_format=schema,
141
183
  temperature=self.temperature,
142
184
  )
@@ -148,7 +190,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
148
190
  completion.usage.completion_tokens,
149
191
  )
150
192
  return structured_output, cost
151
- if self.model_name in json_mode_models:
193
+ if self.name in json_mode_models:
152
194
  completion = await client.beta.chat.completions.parse(
153
195
  model=self.deployment_name,
154
196
  messages=[
@@ -198,6 +240,9 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
198
240
  ) -> Tuple[ChatCompletion, float]:
199
241
  # Generate completion
200
242
  client = self.load_model(async_mode=False)
243
+ if check_if_multimodal(prompt):
244
+ prompt = convert_to_multi_modal_array(input=prompt)
245
+ prompt = self.generate_prompt(prompt)
201
246
  completion = client.chat.completions.create(
202
247
  model=self.deployment_name,
203
248
  messages=[{"role": "user", "content": prompt}],
@@ -221,6 +266,9 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
221
266
  ) -> Tuple[ChatCompletion, float]:
222
267
  # Generate completion
223
268
  client = self.load_model(async_mode=True)
269
+ if check_if_multimodal(prompt):
270
+ prompt = convert_to_multi_modal_array(input=prompt)
271
+ prompt = self.generate_prompt(prompt)
224
272
  completion = await client.chat.completions.create(
225
273
  model=self.deployment_name,
226
274
  messages=[{"role": "user", "content": prompt}],
@@ -236,12 +284,49 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
236
284
 
237
285
  return completion, cost
238
286
 
287
+ def generate_prompt(
288
+ self, multimodal_input: List[Union[str, MLLMImage]] = []
289
+ ):
290
+ """Convert multimodal input into the proper message format for Azure OpenAI."""
291
+ prompt = []
292
+ for ele in multimodal_input:
293
+ if isinstance(ele, str):
294
+ prompt.append({"type": "text", "text": ele})
295
+ elif isinstance(ele, MLLMImage):
296
+ if ele.local:
297
+ import PIL.Image
298
+
299
+ image = PIL.Image.open(ele.url)
300
+ visual_dict = {
301
+ "type": "image_url",
302
+ "image_url": {
303
+ "url": f"data:image/jpeg;base64,{self.encode_pil_image(image)}"
304
+ },
305
+ }
306
+ else:
307
+ visual_dict = {
308
+ "type": "image_url",
309
+ "image_url": {"url": ele.url},
310
+ }
311
+ prompt.append(visual_dict)
312
+ return prompt
313
+
314
+ def encode_pil_image(self, pil_image):
315
+ """Encode a PIL image to base64 string."""
316
+ image_buffer = BytesIO()
317
+ if pil_image.mode in ("RGBA", "LA", "P"):
318
+ pil_image = pil_image.convert("RGB")
319
+ pil_image.save(image_buffer, format="JPEG")
320
+ image_bytes = image_buffer.getvalue()
321
+ base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
322
+ return base64_encoded_image
323
+
239
324
  ###############################################
240
325
  # Utilities
241
326
  ###############################################
242
327
 
243
328
  def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
244
- pricing = model_pricing.get(self.model_name, model_pricing["gpt-4.1"])
329
+ pricing = model_pricing.get(self.name, model_pricing["gpt-4.1"])
245
330
  input_cost = input_tokens * pricing["input"]
246
331
  output_cost = output_tokens * pricing["output"]
247
332
  return input_cost + output_cost
@@ -250,9 +335,6 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
250
335
  # Model
251
336
  ###############################################
252
337
 
253
- def get_model_name(self):
254
- return f"Azure OpenAI ({self.model_name})"
255
-
256
338
  def load_model(self, async_mode: bool = False):
257
339
  if not async_mode:
258
340
  return self._build_client(AzureOpenAI)
@@ -270,10 +352,17 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
270
352
  return kwargs
271
353
 
272
354
  def _build_client(self, cls):
355
+ api_key = require_secret_api_key(
356
+ self.api_key,
357
+ provider_label="AzureOpenAI",
358
+ env_var_name="AZURE_OPENAI_API_KEY",
359
+ param_hint="`api_key` to AzureOpenAIModel(...)",
360
+ )
361
+
273
362
  kw = dict(
274
- api_key=self.azure_openai_api_key,
363
+ api_key=api_key,
275
364
  api_version=self.openai_api_version,
276
- azure_endpoint=self.azure_endpoint,
365
+ base_url=self.base_url,
277
366
  azure_deployment=self.deployment_name,
278
367
  **self._client_kwargs(),
279
368
  )
@@ -285,3 +374,11 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
285
374
  kw.pop("max_retries", None)
286
375
  return cls(**kw)
287
376
  raise
377
+
378
+ def supports_multimodal(self):
379
+ if self.name in valid_multimodal_models:
380
+ return True
381
+ return False
382
+
383
+ def get_model_name(self):
384
+ return f"{self.name} (Azure)"
@@ -1,9 +1,12 @@
1
1
  from typing import Optional, Tuple, Union, Dict
2
2
  from openai import OpenAI, AsyncOpenAI
3
- from pydantic import BaseModel
3
+ from pydantic import BaseModel, SecretStr
4
4
 
5
- from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
5
+ from deepeval.config.settings import get_settings
6
6
  from deepeval.models.llms.utils import trim_and_load_json
7
+ from deepeval.models.utils import (
8
+ require_secret_api_key,
9
+ )
7
10
  from deepeval.models import DeepEvalBaseLLM
8
11
  from deepeval.models.retry_policy import (
9
12
  create_retry_decorator,
@@ -30,35 +33,38 @@ model_pricing = {
30
33
  class DeepSeekModel(DeepEvalBaseLLM):
31
34
  def __init__(
32
35
  self,
33
- api_key: Optional[str] = None,
34
36
  model: Optional[str] = None,
37
+ api_key: Optional[str] = None,
35
38
  temperature: float = 0,
36
39
  generation_kwargs: Optional[Dict] = None,
37
40
  **kwargs,
38
41
  ):
39
- model_name = model or KEY_FILE_HANDLER.fetch_data(
40
- ModelKeyValues.DEEPSEEK_MODEL_NAME
41
- )
42
- if model_name not in model_pricing:
42
+ settings = get_settings()
43
+
44
+ model = model or settings.DEEPSEEK_MODEL_NAME
45
+ if model not in model_pricing:
43
46
  raise ValueError(
44
47
  f"Invalid model. Available DeepSeek models: {', '.join(model_pricing.keys())}"
45
48
  )
46
- temperature_from_key = KEY_FILE_HANDLER.fetch_data(
47
- ModelKeyValues.TEMPERATURE
48
- )
49
+ temperature_from_key = settings.TEMPERATURE
49
50
  if temperature_from_key is None:
50
51
  self.temperature = temperature
51
52
  else:
52
53
  self.temperature = float(temperature_from_key)
53
54
  if self.temperature < 0:
54
55
  raise ValueError("Temperature must be >= 0.")
55
- self.api_key = api_key or KEY_FILE_HANDLER.fetch_data(
56
- ModelKeyValues.DEEPSEEK_API_KEY
57
- )
56
+
57
+ if api_key is not None:
58
+ # keep it secret, keep it safe from serializings, logging and alike
59
+ self.api_key: SecretStr | None = SecretStr(api_key)
60
+ else:
61
+ self.api_key = settings.DEEPSEEK_API_KEY
62
+
58
63
  self.base_url = "https://api.deepseek.com"
64
+ # Keep sanitized kwargs for client call to strip legacy keys
59
65
  self.kwargs = kwargs
60
66
  self.generation_kwargs = generation_kwargs or {}
61
- super().__init__(model_name)
67
+ super().__init__(model)
62
68
 
63
69
  ###############################################
64
70
  # Other generate functions
@@ -68,10 +74,11 @@ class DeepSeekModel(DeepEvalBaseLLM):
68
74
  def generate(
69
75
  self, prompt: str, schema: Optional[BaseModel] = None
70
76
  ) -> Tuple[Union[str, Dict], float]:
77
+
71
78
  client = self.load_model(async_mode=False)
72
79
  if schema:
73
80
  completion = client.chat.completions.create(
74
- model=self.model_name,
81
+ model=self.name,
75
82
  messages=[{"role": "user", "content": prompt}],
76
83
  response_format={"type": "json_object"},
77
84
  temperature=self.temperature,
@@ -87,7 +94,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
87
94
  return schema.model_validate(json_output), cost
88
95
  else:
89
96
  completion = client.chat.completions.create(
90
- model=self.model_name,
97
+ model=self.name,
91
98
  messages=[{"role": "user", "content": prompt}],
92
99
  **self.generation_kwargs,
93
100
  )
@@ -102,10 +109,11 @@ class DeepSeekModel(DeepEvalBaseLLM):
102
109
  async def a_generate(
103
110
  self, prompt: str, schema: Optional[BaseModel] = None
104
111
  ) -> Tuple[Union[str, Dict], float]:
112
+
105
113
  client = self.load_model(async_mode=True)
106
114
  if schema:
107
115
  completion = await client.chat.completions.create(
108
- model=self.model_name,
116
+ model=self.name,
109
117
  messages=[{"role": "user", "content": prompt}],
110
118
  response_format={"type": "json_object"},
111
119
  temperature=self.temperature,
@@ -121,7 +129,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
121
129
  return schema.model_validate(json_output), cost
122
130
  else:
123
131
  completion = await client.chat.completions.create(
124
- model=self.model_name,
132
+ model=self.name,
125
133
  messages=[{"role": "user", "content": prompt}],
126
134
  **self.generation_kwargs,
127
135
  )
@@ -141,7 +149,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
141
149
  input_tokens: int,
142
150
  output_tokens: int,
143
151
  ) -> float:
144
- pricing = model_pricing.get(self.model_name, model_pricing)
152
+ pricing = model_pricing.get(self.name, model_pricing)
145
153
  input_cost = input_tokens * pricing["input"]
146
154
  output_cost = output_tokens * pricing["output"]
147
155
  return input_cost + output_cost
@@ -155,9 +163,6 @@ class DeepSeekModel(DeepEvalBaseLLM):
155
163
  return self._build_client(OpenAI)
156
164
  return self._build_client(AsyncOpenAI)
157
165
 
158
- def get_model_name(self):
159
- return f"{self.model_name}"
160
-
161
166
  def _client_kwargs(self) -> Dict:
162
167
  kwargs = dict(self.kwargs or {})
163
168
  # if we are managing retries with Tenacity, force SDK retries off to avoid double retries.
@@ -167,8 +172,15 @@ class DeepSeekModel(DeepEvalBaseLLM):
167
172
  return kwargs
168
173
 
169
174
  def _build_client(self, cls):
175
+ api_key = require_secret_api_key(
176
+ self.api_key,
177
+ provider_label="DeepSeek",
178
+ env_var_name="DEEPSEEK_API_KEY",
179
+ param_hint="`api_key` to DeepSeekModel(...)",
180
+ )
181
+
170
182
  kw = dict(
171
- api_key=self.api_key,
183
+ api_key=api_key,
172
184
  base_url=self.base_url,
173
185
  **self._client_kwargs(),
174
186
  )
@@ -180,3 +192,6 @@ class DeepSeekModel(DeepEvalBaseLLM):
180
192
  kw.pop("max_retries", None)
181
193
  return cls(**kw)
182
194
  raise
195
+
196
+ def get_model_name(self):
197
+ return f"{self.name} (Deepseek)"