deepeval 3.7.5__py3-none-any.whl → 3.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/config/settings.py +35 -1
  3. deepeval/dataset/api.py +23 -1
  4. deepeval/dataset/golden.py +106 -21
  5. deepeval/evaluate/evaluate.py +0 -3
  6. deepeval/evaluate/execute.py +10 -222
  7. deepeval/evaluate/utils.py +6 -30
  8. deepeval/key_handler.py +3 -0
  9. deepeval/metrics/__init__.py +0 -4
  10. deepeval/metrics/answer_relevancy/answer_relevancy.py +89 -132
  11. deepeval/metrics/answer_relevancy/template.py +102 -179
  12. deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
  13. deepeval/metrics/arena_g_eval/template.py +17 -1
  14. deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
  15. deepeval/metrics/argument_correctness/template.py +19 -2
  16. deepeval/metrics/base_metric.py +13 -41
  17. deepeval/metrics/bias/bias.py +102 -108
  18. deepeval/metrics/bias/template.py +14 -2
  19. deepeval/metrics/contextual_precision/contextual_precision.py +56 -92
  20. deepeval/metrics/contextual_recall/contextual_recall.py +58 -85
  21. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +53 -83
  22. deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
  23. deepeval/metrics/conversation_completeness/template.py +23 -3
  24. deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
  25. deepeval/metrics/conversational_dag/nodes.py +66 -123
  26. deepeval/metrics/conversational_dag/templates.py +16 -0
  27. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
  28. deepeval/metrics/dag/dag.py +10 -0
  29. deepeval/metrics/dag/nodes.py +63 -126
  30. deepeval/metrics/dag/templates.py +14 -0
  31. deepeval/metrics/exact_match/exact_match.py +9 -1
  32. deepeval/metrics/faithfulness/faithfulness.py +82 -136
  33. deepeval/metrics/g_eval/g_eval.py +87 -78
  34. deepeval/metrics/g_eval/template.py +18 -1
  35. deepeval/metrics/g_eval/utils.py +7 -6
  36. deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
  37. deepeval/metrics/goal_accuracy/template.py +21 -3
  38. deepeval/metrics/hallucination/hallucination.py +60 -75
  39. deepeval/metrics/hallucination/template.py +13 -0
  40. deepeval/metrics/indicator.py +3 -6
  41. deepeval/metrics/json_correctness/json_correctness.py +40 -38
  42. deepeval/metrics/json_correctness/template.py +10 -0
  43. deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
  44. deepeval/metrics/knowledge_retention/schema.py +9 -3
  45. deepeval/metrics/knowledge_retention/template.py +12 -0
  46. deepeval/metrics/mcp/mcp_task_completion.py +68 -38
  47. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
  48. deepeval/metrics/mcp/template.py +52 -0
  49. deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
  50. deepeval/metrics/mcp_use_metric/template.py +12 -0
  51. deepeval/metrics/misuse/misuse.py +77 -97
  52. deepeval/metrics/misuse/template.py +15 -0
  53. deepeval/metrics/multimodal_metrics/__init__.py +0 -1
  54. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +37 -38
  55. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +55 -76
  56. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +37 -38
  57. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +37 -38
  58. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +57 -76
  59. deepeval/metrics/non_advice/non_advice.py +79 -105
  60. deepeval/metrics/non_advice/template.py +12 -0
  61. deepeval/metrics/pattern_match/pattern_match.py +12 -4
  62. deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
  63. deepeval/metrics/pii_leakage/template.py +14 -0
  64. deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
  65. deepeval/metrics/plan_adherence/template.py +11 -0
  66. deepeval/metrics/plan_quality/plan_quality.py +63 -87
  67. deepeval/metrics/plan_quality/template.py +9 -0
  68. deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
  69. deepeval/metrics/prompt_alignment/template.py +12 -0
  70. deepeval/metrics/role_adherence/role_adherence.py +48 -71
  71. deepeval/metrics/role_adherence/template.py +14 -0
  72. deepeval/metrics/role_violation/role_violation.py +75 -108
  73. deepeval/metrics/role_violation/template.py +12 -0
  74. deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
  75. deepeval/metrics/step_efficiency/template.py +11 -0
  76. deepeval/metrics/summarization/summarization.py +115 -183
  77. deepeval/metrics/summarization/template.py +19 -0
  78. deepeval/metrics/task_completion/task_completion.py +67 -73
  79. deepeval/metrics/tool_correctness/tool_correctness.py +43 -42
  80. deepeval/metrics/tool_use/tool_use.py +42 -66
  81. deepeval/metrics/topic_adherence/template.py +13 -0
  82. deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
  83. deepeval/metrics/toxicity/template.py +13 -0
  84. deepeval/metrics/toxicity/toxicity.py +80 -99
  85. deepeval/metrics/turn_contextual_precision/schema.py +3 -3
  86. deepeval/metrics/turn_contextual_precision/template.py +1 -1
  87. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +110 -68
  88. deepeval/metrics/turn_contextual_recall/schema.py +3 -3
  89. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +104 -61
  90. deepeval/metrics/turn_contextual_relevancy/schema.py +2 -2
  91. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +106 -65
  92. deepeval/metrics/turn_faithfulness/schema.py +1 -1
  93. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +104 -73
  94. deepeval/metrics/turn_relevancy/template.py +14 -0
  95. deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
  96. deepeval/metrics/utils.py +145 -90
  97. deepeval/models/base_model.py +44 -6
  98. deepeval/models/embedding_models/azure_embedding_model.py +34 -12
  99. deepeval/models/embedding_models/local_embedding_model.py +22 -7
  100. deepeval/models/embedding_models/ollama_embedding_model.py +17 -6
  101. deepeval/models/embedding_models/openai_embedding_model.py +3 -2
  102. deepeval/models/llms/amazon_bedrock_model.py +226 -71
  103. deepeval/models/llms/anthropic_model.py +141 -47
  104. deepeval/models/llms/azure_model.py +167 -94
  105. deepeval/models/llms/constants.py +2032 -0
  106. deepeval/models/llms/deepseek_model.py +79 -29
  107. deepeval/models/llms/gemini_model.py +126 -67
  108. deepeval/models/llms/grok_model.py +125 -59
  109. deepeval/models/llms/kimi_model.py +126 -81
  110. deepeval/models/llms/litellm_model.py +92 -18
  111. deepeval/models/llms/local_model.py +114 -15
  112. deepeval/models/llms/ollama_model.py +97 -76
  113. deepeval/models/llms/openai_model.py +167 -310
  114. deepeval/models/llms/portkey_model.py +58 -16
  115. deepeval/models/llms/utils.py +5 -2
  116. deepeval/models/utils.py +60 -4
  117. deepeval/simulator/conversation_simulator.py +43 -0
  118. deepeval/simulator/template.py +13 -0
  119. deepeval/test_case/api.py +24 -45
  120. deepeval/test_case/arena_test_case.py +7 -2
  121. deepeval/test_case/conversational_test_case.py +55 -6
  122. deepeval/test_case/llm_test_case.py +60 -6
  123. deepeval/test_run/api.py +3 -0
  124. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -1
  125. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/RECORD +128 -132
  126. deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
  127. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
  128. deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
  129. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -133
  130. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
  131. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
  132. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
  133. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
@@ -2,9 +2,11 @@ from typing import Optional, Tuple, Union, Dict
2
2
  from openai import OpenAI, AsyncOpenAI
3
3
  from pydantic import BaseModel, SecretStr
4
4
 
5
+ from deepeval.errors import DeepEvalError
5
6
  from deepeval.config.settings import get_settings
6
7
  from deepeval.models.llms.utils import trim_and_load_json
7
8
  from deepeval.models.utils import (
9
+ require_costs,
8
10
  require_secret_api_key,
9
11
  )
10
12
  from deepeval.models import DeepEvalBaseLLM
@@ -13,57 +15,87 @@ from deepeval.models.retry_policy import (
13
15
  sdk_retries_for,
14
16
  )
15
17
  from deepeval.constants import ProviderSlug as PS
18
+ from deepeval.models.llms.constants import DEEPSEEK_MODELS_DATA
19
+ from deepeval.utils import require_param
16
20
 
17
21
 
18
22
  # consistent retry rules
19
23
  retry_deepseek = create_retry_decorator(PS.DEEPSEEK)
20
24
 
21
- model_pricing = {
22
- "deepseek-chat": {
23
- "input": 0.27 / 1e6,
24
- "output": 1.10 / 1e6,
25
- },
26
- "deepseek-reasoner": {
27
- "input": 0.55 / 1e6,
28
- "output": 2.19 / 1e6,
29
- },
30
- }
31
-
32
25
 
33
26
  class DeepSeekModel(DeepEvalBaseLLM):
34
27
  def __init__(
35
28
  self,
36
29
  model: Optional[str] = None,
37
30
  api_key: Optional[str] = None,
38
- temperature: float = 0,
31
+ temperature: Optional[float] = None,
32
+ cost_per_input_token: Optional[float] = None,
33
+ cost_per_output_token: Optional[float] = None,
39
34
  generation_kwargs: Optional[Dict] = None,
40
35
  **kwargs,
41
36
  ):
42
37
  settings = get_settings()
43
38
 
44
39
  model = model or settings.DEEPSEEK_MODEL_NAME
45
- if model not in model_pricing:
46
- raise ValueError(
47
- f"Invalid model. Available DeepSeek models: {', '.join(model_pricing.keys())}"
48
- )
49
- temperature_from_key = settings.TEMPERATURE
50
- if temperature_from_key is None:
51
- self.temperature = temperature
40
+
41
+ if temperature is not None:
42
+ temperature = float(temperature)
43
+ elif settings.TEMPERATURE is not None:
44
+ temperature = settings.TEMPERATURE
52
45
  else:
53
- self.temperature = float(temperature_from_key)
54
- if self.temperature < 0:
55
- raise ValueError("Temperature must be >= 0.")
46
+ temperature = 0.0
47
+
48
+ cost_per_input_token = (
49
+ cost_per_input_token
50
+ if cost_per_input_token is not None
51
+ else settings.DEEPSEEK_COST_PER_INPUT_TOKEN
52
+ )
53
+ cost_per_output_token = (
54
+ cost_per_output_token
55
+ if cost_per_output_token is not None
56
+ else settings.DEEPSEEK_COST_PER_OUTPUT_TOKEN
57
+ )
56
58
 
57
59
  if api_key is not None:
58
60
  # keep it secret, keep it safe from serializings, logging and alike
59
- self.api_key: SecretStr | None = SecretStr(api_key)
61
+ self.api_key: Optional[SecretStr] = SecretStr(api_key)
60
62
  else:
61
63
  self.api_key = settings.DEEPSEEK_API_KEY
62
64
 
63
65
  self.base_url = "https://api.deepseek.com"
66
+
67
+ # validation
68
+ model = require_param(
69
+ model,
70
+ provider_label="DeepSeekModel",
71
+ env_var_name="DEEPSEEK_MODEL_NAME",
72
+ param_hint="model",
73
+ )
74
+
75
+ if temperature < 0:
76
+ raise DeepEvalError("Temperature must be >= 0.")
77
+
78
+ self.model_data = DEEPSEEK_MODELS_DATA.get(model)
79
+ self.temperature = temperature
80
+
81
+ cost_per_input_token, cost_per_output_token = require_costs(
82
+ self.model_data,
83
+ model,
84
+ "DEEPSEEK_COST_PER_INPUT_TOKEN",
85
+ "DEEPSEEK_COST_PER_OUTPUT_TOKEN",
86
+ cost_per_input_token,
87
+ cost_per_output_token,
88
+ )
89
+ self.model_data.input_price = cost_per_input_token
90
+ self.model_data.output_price = cost_per_output_token
91
+
64
92
  # Keep sanitized kwargs for client call to strip legacy keys
65
93
  self.kwargs = kwargs
66
- self.generation_kwargs = generation_kwargs or {}
94
+ self.kwargs.pop("temperature", None)
95
+
96
+ self.generation_kwargs = dict(generation_kwargs or {})
97
+ self.generation_kwargs.pop("temperature", None)
98
+
67
99
  super().__init__(model)
68
100
 
69
101
  ###############################################
@@ -73,7 +105,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
73
105
  @retry_deepseek
74
106
  def generate(
75
107
  self, prompt: str, schema: Optional[BaseModel] = None
76
- ) -> Tuple[Union[str, Dict], float]:
108
+ ) -> Tuple[Union[str, BaseModel], float]:
77
109
 
78
110
  client = self.load_model(async_mode=False)
79
111
  if schema:
@@ -108,7 +140,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
108
140
  @retry_deepseek
109
141
  async def a_generate(
110
142
  self, prompt: str, schema: Optional[BaseModel] = None
111
- ) -> Tuple[Union[str, Dict], float]:
143
+ ) -> Tuple[Union[str, BaseModel], float]:
112
144
 
113
145
  client = self.load_model(async_mode=True)
114
146
  if schema:
@@ -149,11 +181,29 @@ class DeepSeekModel(DeepEvalBaseLLM):
149
181
  input_tokens: int,
150
182
  output_tokens: int,
151
183
  ) -> float:
152
- pricing = model_pricing.get(self.name, model_pricing)
153
- input_cost = input_tokens * pricing["input"]
154
- output_cost = output_tokens * pricing["output"]
184
+ input_cost = input_tokens * self.model_data.input_price
185
+ output_cost = output_tokens * self.model_data.output_price
155
186
  return input_cost + output_cost
156
187
 
188
+ ###############################################
189
+ # Capabilities
190
+ ###############################################
191
+
192
+ def supports_log_probs(self) -> Union[bool, None]:
193
+ return self.model_data.supports_log_probs
194
+
195
+ def supports_temperature(self) -> Union[bool, None]:
196
+ return self.model_data.supports_temperature
197
+
198
+ def supports_multimodal(self) -> Union[bool, None]:
199
+ return self.model_data.supports_multimodal
200
+
201
+ def supports_structured_outputs(self) -> Union[bool, None]:
202
+ return self.model_data.supports_structured_outputs
203
+
204
+ def supports_json_mode(self) -> Union[bool, None]:
205
+ return self.model_data.supports_json
206
+
157
207
  ###############################################
158
208
  # Model
159
209
  ###############################################
@@ -1,8 +1,9 @@
1
1
  import json
2
- import requests
2
+ import base64
3
3
  from pydantic import BaseModel, SecretStr
4
- from typing import TYPE_CHECKING, Optional, Dict, List, Union
4
+ from typing import TYPE_CHECKING, Optional, Dict, List, Union, Tuple
5
5
 
6
+ from deepeval.errors import DeepEvalError
6
7
  from deepeval.test_case import MLLMImage
7
8
  from deepeval.config.settings import get_settings
8
9
  from deepeval.models.utils import require_secret_api_key
@@ -16,14 +17,7 @@ from deepeval.utils import (
16
17
  )
17
18
  from deepeval.models.base_model import DeepEvalBaseLLM
18
19
  from deepeval.constants import ProviderSlug as PS
19
-
20
- valid_multimodal_models = [
21
- "gemini-2.5-pro",
22
- "gemini-2.5-flash",
23
- "gemini-1.5-pro",
24
- "gemini-1.5-flash",
25
- # TODO: Add more models later
26
- ]
20
+ from deepeval.models.llms.constants import GEMINI_MODELS_DATA
27
21
 
28
22
  if TYPE_CHECKING:
29
23
  from google.genai import Client
@@ -67,10 +61,10 @@ class GeminiModel(DeepEvalBaseLLM):
67
61
  self,
68
62
  model: Optional[str] = None,
69
63
  api_key: Optional[str] = None,
70
- temperature: float = 0,
64
+ temperature: Optional[float] = None,
71
65
  project: Optional[str] = None,
72
66
  location: Optional[str] = None,
73
- service_account_key: Optional[Dict[str, str]] = None,
67
+ service_account_key: Optional[Union[str, Dict[str, str]]] = None,
74
68
  generation_kwargs: Optional[Dict] = None,
75
69
  **kwargs,
76
70
  ):
@@ -78,38 +72,53 @@ class GeminiModel(DeepEvalBaseLLM):
78
72
  settings = get_settings()
79
73
 
80
74
  model = model or settings.GEMINI_MODEL_NAME or default_gemini_model
75
+ self.model_data = GEMINI_MODELS_DATA.get(model)
81
76
 
82
77
  # Get API key from settings if not provided
83
78
  if api_key is not None:
84
79
  # keep it secret, keep it safe from serializings, logging and aolike
85
- self.api_key: SecretStr | None = SecretStr(api_key)
80
+ self.api_key: Optional[SecretStr] = SecretStr(api_key)
86
81
  else:
87
82
  self.api_key = settings.GOOGLE_API_KEY
88
83
 
84
+ if temperature is not None:
85
+ temperature = float(temperature)
86
+ elif settings.TEMPERATURE is not None:
87
+ temperature = settings.TEMPERATURE
88
+ else:
89
+ temperature = 0.0
90
+
89
91
  self.project = project or settings.GOOGLE_CLOUD_PROJECT
90
- self.location = (
91
- location
92
- or settings.GOOGLE_CLOUD_LOCATION is not None
93
- and str(settings.GOOGLE_CLOUD_LOCATION)
92
+ location = (
93
+ location if location is not None else settings.GOOGLE_CLOUD_LOCATION
94
94
  )
95
+ self.location = str(location).strip() if location is not None else None
95
96
  self.use_vertexai = settings.GOOGLE_GENAI_USE_VERTEXAI
96
97
 
97
- if service_account_key:
98
- self.service_account_key = service_account_key
98
+ self.service_account_key: Optional[SecretStr] = None
99
+ if service_account_key is None:
100
+ self.service_account_key = settings.GOOGLE_SERVICE_ACCOUNT_KEY
101
+ elif isinstance(service_account_key, dict):
102
+ self.service_account_key = SecretStr(
103
+ json.dumps(service_account_key)
104
+ )
99
105
  else:
100
- service_account_key_data = settings.GOOGLE_SERVICE_ACCOUNT_KEY
101
- if service_account_key_data is None:
102
- self.service_account_key = None
103
- elif isinstance(service_account_key_data, str):
104
- self.service_account_key = json.loads(service_account_key_data)
106
+ str_value = str(service_account_key).strip()
107
+ self.service_account_key = (
108
+ SecretStr(str_value) if str_value else None
109
+ )
105
110
 
106
111
  if temperature < 0:
107
- raise ValueError("Temperature must be >= 0.")
112
+ raise DeepEvalError("Temperature must be >= 0.")
113
+
108
114
  self.temperature = temperature
109
115
 
110
116
  # Raw kwargs destined for the underlying Client
111
117
  self.kwargs = kwargs
112
- self.generation_kwargs = generation_kwargs or {}
118
+ self.kwargs.pop("temperature", None)
119
+
120
+ self.generation_kwargs = dict(generation_kwargs or {})
121
+ self.generation_kwargs.pop("temperature", None)
113
122
 
114
123
  self._module = self._require_module()
115
124
  # Configure default model generation settings
@@ -145,40 +154,34 @@ class GeminiModel(DeepEvalBaseLLM):
145
154
  True if the model should use Vertex AI, False otherwise
146
155
  """
147
156
  if self.use_vertexai is not None:
148
- return self.use_vertexai.lower() == "yes"
157
+ return self.use_vertexai
149
158
  if self.project and self.location:
150
159
  return True
151
160
  else:
152
161
  return False
153
162
 
154
163
  @retry_gemini
155
- def generate_prompt(
156
- self, multimodal_input: List[Union[str, MLLMImage]] = []
157
- ) -> List[Union[str, MLLMImage]]:
158
- """Converts DeepEval multimodal input into GenAI SDK compatible format.
159
-
160
- Args:
161
- multimodal_input: List of strings and MLLMImage objects
164
+ def generate_content(
165
+ self, multimodal_input: Optional[List[Union[str, MLLMImage]]] = None
166
+ ):
167
+ multimodal_input = (
168
+ multimodal_input if multimodal_input is not None else []
169
+ )
170
+ content = []
162
171
 
163
- Returns:
164
- List of strings and PIL Image objects ready for model input
172
+ for element in multimodal_input:
173
+ if isinstance(element, str):
174
+ content.append(element)
175
+ elif isinstance(element, MLLMImage):
176
+ # Gemini doesn't support direct external URLs
177
+ # Must convert all images to bytes
178
+ if element.url and not element.local:
179
+ import requests
165
180
 
166
- Raises:
167
- ValueError: If an invalid input type is provided
168
- """
169
- prompt = []
170
- settings = get_settings()
181
+ settings = get_settings()
171
182
 
172
- for ele in multimodal_input:
173
- if isinstance(ele, str):
174
- prompt.append(ele)
175
- elif isinstance(ele, MLLMImage):
176
- if ele.local:
177
- with open(ele.url, "rb") as f:
178
- image_data = f.read()
179
- else:
180
183
  response = requests.get(
181
- ele.url,
184
+ element.url,
182
185
  timeout=(
183
186
  settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
184
187
  settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
@@ -186,21 +189,38 @@ class GeminiModel(DeepEvalBaseLLM):
186
189
  )
187
190
  response.raise_for_status()
188
191
  image_data = response.content
192
+ mime_type = response.headers.get(
193
+ "content-type", element.mimeType or "image/jpeg"
194
+ )
195
+ else:
196
+ element.ensure_images_loaded()
197
+ try:
198
+ image_data = base64.b64decode(element.dataBase64)
199
+ except Exception:
200
+ raise ValueError(
201
+ f"Invalid base64 data in MLLMImage: {element._id}"
202
+ )
203
+
204
+ mime_type = element.mimeType or "image/jpeg"
189
205
 
206
+ # Create Part from bytes
190
207
  image_part = self._module.types.Part.from_bytes(
191
- data=image_data, mime_type="image/jpeg"
208
+ data=image_data, mime_type=mime_type
192
209
  )
193
- prompt.append(image_part)
210
+ content.append(image_part)
194
211
  else:
195
- raise ValueError(f"Invalid input type: {type(ele)}")
196
- return prompt
212
+ raise DeepEvalError(f"Invalid input type: {type(element)}")
213
+
214
+ return content
197
215
 
198
216
  ###############################################
199
217
  # Generate functions
200
218
  ###############################################
201
219
 
202
220
  @retry_gemini
203
- def generate(self, prompt: str, schema: Optional[BaseModel] = None) -> str:
221
+ def generate(
222
+ self, prompt: str, schema: Optional[BaseModel] = None
223
+ ) -> Tuple[Union[str, BaseModel], float]:
204
224
  """Generates text from a prompt.
205
225
 
206
226
  Args:
@@ -213,9 +233,8 @@ class GeminiModel(DeepEvalBaseLLM):
213
233
  client = self.load_model()
214
234
 
215
235
  if check_if_multimodal(prompt):
216
-
217
236
  prompt = convert_to_multi_modal_array(prompt)
218
- prompt = self.generate_prompt(prompt)
237
+ prompt = self.generate_content(prompt)
219
238
 
220
239
  if schema is not None:
221
240
  response = client.models.generate_content(
@@ -245,7 +264,7 @@ class GeminiModel(DeepEvalBaseLLM):
245
264
  @retry_gemini
246
265
  async def a_generate(
247
266
  self, prompt: str, schema: Optional[BaseModel] = None
248
- ) -> str:
267
+ ) -> Tuple[Union[str, BaseModel], float]:
249
268
  """Asynchronously generates text from a prompt.
250
269
 
251
270
  Args:
@@ -259,7 +278,7 @@ class GeminiModel(DeepEvalBaseLLM):
259
278
 
260
279
  if check_if_multimodal(prompt):
261
280
  prompt = convert_to_multi_modal_array(prompt)
262
- prompt = self.generate_prompt(prompt)
281
+ prompt = self.generate_content(prompt)
263
282
 
264
283
  if schema is not None:
265
284
  response = await client.aio.models.generate_content(
@@ -286,6 +305,32 @@ class GeminiModel(DeepEvalBaseLLM):
286
305
  )
287
306
  return response.text, 0
288
307
 
308
+ #########################
309
+ # Capabilities #
310
+ #########################
311
+
312
+ def supports_log_probs(self) -> Union[bool, None]:
313
+ return self.model_data.supports_log_probs
314
+
315
+ def supports_temperature(self) -> Union[bool, None]:
316
+ return self.model_data.supports_temperature
317
+
318
+ def supports_multimodal(self) -> Union[bool, None]:
319
+ return self.model_data.supports_multimodal
320
+
321
+ def supports_structured_outputs(self) -> Union[bool, None]:
322
+ """
323
+ OpenAI models that natively enforce typed structured outputs.
324
+ Used by generate(...) when a schema is provided.
325
+ """
326
+ return self.model_data.supports_structured_outputs
327
+
328
+ def supports_json_mode(self) -> Union[bool, None]:
329
+ """
330
+ OpenAI models that enforce JSON mode
331
+ """
332
+ return self.model_data.supports_json
333
+
289
334
  #########
290
335
  # Model #
291
336
  #########
@@ -326,8 +371,27 @@ class GeminiModel(DeepEvalBaseLLM):
326
371
  client_kwargs = self._client_kwargs(**self.kwargs)
327
372
 
328
373
  if self.should_use_vertexai():
374
+ service_account_key_json = require_secret_api_key(
375
+ self.service_account_key,
376
+ provider_label="Google Gemini",
377
+ env_var_name="GOOGLE_SERVICE_ACCOUNT_KEY",
378
+ param_hint="`service_account_key` to GeminiModel(...)",
379
+ )
380
+
381
+ try:
382
+ service_account_key = json.loads(service_account_key_json)
383
+ except Exception as e:
384
+ raise DeepEvalError(
385
+ "GOOGLE_SERVICE_ACCOUNT_KEY must be valid JSON for a Google service account."
386
+ ) from e
387
+
388
+ if not isinstance(service_account_key, dict):
389
+ raise DeepEvalError(
390
+ "GOOGLE_SERVICE_ACCOUNT_KEY must decode to a JSON object."
391
+ )
392
+
329
393
  if not self.project or not self.location:
330
- raise ValueError(
394
+ raise DeepEvalError(
331
395
  "When using Vertex AI API, both project and location are required. "
332
396
  "Either provide them as arguments or set GOOGLE_CLOUD_PROJECT and "
333
397
  "GOOGLE_CLOUD_LOCATION in your DeepEval configuration."
@@ -336,12 +400,12 @@ class GeminiModel(DeepEvalBaseLLM):
336
400
  oauth2 = self._require_oauth2()
337
401
  credentials = (
338
402
  oauth2.service_account.Credentials.from_service_account_info(
339
- self.service_account_key,
403
+ service_account_key,
340
404
  scopes=[
341
405
  "https://www.googleapis.com/auth/cloud-platform",
342
406
  ],
343
407
  )
344
- if self.service_account_key
408
+ if service_account_key
345
409
  else None
346
410
  )
347
411
 
@@ -364,10 +428,5 @@ class GeminiModel(DeepEvalBaseLLM):
364
428
 
365
429
  return client
366
430
 
367
- def supports_multimodal(self):
368
- if self.name in valid_multimodal_models:
369
- return True
370
- return False
371
-
372
431
  def get_model_name(self):
373
432
  return f"{self.name} (Gemini)"