deepeval 3.7.4__py3-none-any.whl → 3.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/dataset/golden.py +54 -2
  3. deepeval/evaluate/evaluate.py +16 -8
  4. deepeval/evaluate/execute.py +70 -26
  5. deepeval/evaluate/utils.py +26 -22
  6. deepeval/integrations/pydantic_ai/agent.py +19 -2
  7. deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  8. deepeval/metrics/__init__.py +14 -12
  9. deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
  10. deepeval/metrics/answer_relevancy/template.py +188 -92
  11. deepeval/metrics/base_metric.py +2 -5
  12. deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
  13. deepeval/metrics/contextual_precision/template.py +115 -66
  14. deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
  15. deepeval/metrics/contextual_recall/template.py +106 -55
  16. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
  17. deepeval/metrics/contextual_relevancy/template.py +87 -58
  18. deepeval/metrics/dag/templates.py +2 -2
  19. deepeval/metrics/faithfulness/faithfulness.py +70 -27
  20. deepeval/metrics/faithfulness/schema.py +1 -1
  21. deepeval/metrics/faithfulness/template.py +200 -115
  22. deepeval/metrics/g_eval/utils.py +2 -2
  23. deepeval/metrics/indicator.py +4 -4
  24. deepeval/metrics/multimodal_metrics/__init__.py +0 -18
  25. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
  26. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
  27. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
  28. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
  29. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
  30. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
  31. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
  32. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
  33. deepeval/metrics/ragas.py +3 -3
  34. deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
  35. deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  36. deepeval/metrics/turn_contextual_precision/template.py +187 -0
  37. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
  38. deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  39. deepeval/metrics/turn_contextual_recall/template.py +178 -0
  40. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
  41. deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
  42. deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  43. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
  44. deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
  45. deepeval/metrics/turn_faithfulness/template.py +218 -0
  46. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
  47. deepeval/metrics/utils.py +39 -58
  48. deepeval/models/__init__.py +0 -12
  49. deepeval/models/base_model.py +16 -38
  50. deepeval/models/embedding_models/__init__.py +7 -0
  51. deepeval/models/embedding_models/azure_embedding_model.py +52 -28
  52. deepeval/models/embedding_models/local_embedding_model.py +18 -14
  53. deepeval/models/embedding_models/ollama_embedding_model.py +38 -16
  54. deepeval/models/embedding_models/openai_embedding_model.py +40 -21
  55. deepeval/models/llms/amazon_bedrock_model.py +1 -2
  56. deepeval/models/llms/anthropic_model.py +44 -23
  57. deepeval/models/llms/azure_model.py +121 -36
  58. deepeval/models/llms/deepseek_model.py +18 -13
  59. deepeval/models/llms/gemini_model.py +129 -43
  60. deepeval/models/llms/grok_model.py +18 -13
  61. deepeval/models/llms/kimi_model.py +18 -13
  62. deepeval/models/llms/litellm_model.py +42 -22
  63. deepeval/models/llms/local_model.py +12 -7
  64. deepeval/models/llms/ollama_model.py +114 -12
  65. deepeval/models/llms/openai_model.py +137 -41
  66. deepeval/models/llms/portkey_model.py +24 -7
  67. deepeval/models/llms/utils.py +5 -3
  68. deepeval/models/retry_policy.py +17 -14
  69. deepeval/models/utils.py +46 -1
  70. deepeval/optimizer/__init__.py +5 -0
  71. deepeval/optimizer/algorithms/__init__.py +6 -0
  72. deepeval/optimizer/algorithms/base.py +29 -0
  73. deepeval/optimizer/algorithms/configs.py +18 -0
  74. deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  75. deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
  76. deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  77. deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
  78. deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  79. deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  80. deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  81. deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  82. deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  83. deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
  84. deepeval/{optimization → optimizer}/configs.py +5 -8
  85. deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
  86. deepeval/optimizer/prompt_optimizer.py +263 -0
  87. deepeval/optimizer/rewriter/__init__.py +5 -0
  88. deepeval/optimizer/rewriter/rewriter.py +124 -0
  89. deepeval/optimizer/rewriter/utils.py +214 -0
  90. deepeval/optimizer/scorer/__init__.py +5 -0
  91. deepeval/optimizer/scorer/base.py +86 -0
  92. deepeval/optimizer/scorer/scorer.py +316 -0
  93. deepeval/optimizer/scorer/utils.py +30 -0
  94. deepeval/optimizer/types.py +148 -0
  95. deepeval/{optimization → optimizer}/utils.py +47 -165
  96. deepeval/prompt/prompt.py +5 -9
  97. deepeval/test_case/__init__.py +1 -3
  98. deepeval/test_case/api.py +12 -10
  99. deepeval/test_case/conversational_test_case.py +19 -1
  100. deepeval/test_case/llm_test_case.py +152 -1
  101. deepeval/test_case/utils.py +4 -8
  102. deepeval/test_run/api.py +15 -14
  103. deepeval/test_run/test_run.py +3 -3
  104. deepeval/tracing/patchers.py +9 -4
  105. deepeval/tracing/tracing.py +2 -2
  106. deepeval/utils.py +65 -0
  107. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
  108. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/RECORD +116 -125
  109. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  110. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  111. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  112. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  113. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  114. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  115. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  116. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  117. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  118. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  119. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  120. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  121. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  122. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  123. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  124. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  125. deepeval/models/mlllms/__init__.py +0 -4
  126. deepeval/models/mlllms/azure_model.py +0 -343
  127. deepeval/models/mlllms/gemini_model.py +0 -313
  128. deepeval/models/mlllms/ollama_model.py +0 -175
  129. deepeval/models/mlllms/openai_model.py +0 -309
  130. deepeval/optimization/__init__.py +0 -13
  131. deepeval/optimization/adapters/__init__.py +0 -2
  132. deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
  133. deepeval/optimization/aggregates.py +0 -14
  134. deepeval/optimization/copro/configs.py +0 -31
  135. deepeval/optimization/gepa/__init__.py +0 -7
  136. deepeval/optimization/gepa/configs.py +0 -115
  137. deepeval/optimization/miprov2/configs.py +0 -134
  138. deepeval/optimization/miprov2/loop.py +0 -785
  139. deepeval/optimization/mutations/__init__.py +0 -0
  140. deepeval/optimization/mutations/prompt_rewriter.py +0 -458
  141. deepeval/optimization/policies/__init__.py +0 -16
  142. deepeval/optimization/policies/tie_breaker.py +0 -67
  143. deepeval/optimization/prompt_optimizer.py +0 -462
  144. deepeval/optimization/simba/__init__.py +0 -0
  145. deepeval/optimization/simba/configs.py +0 -33
  146. deepeval/optimization/types.py +0 -361
  147. deepeval/test_case/mllm_test_case.py +0 -170
  148. /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
  149. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
  150. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
  151. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
  152. /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
  153. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
  154. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
  155. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
@@ -1,17 +1,32 @@
1
1
  import json
2
-
2
+ import requests
3
3
  from pydantic import BaseModel, SecretStr
4
- from google.genai import types, Client
5
- from typing import Optional, Dict
4
+ from typing import TYPE_CHECKING, Optional, Dict, List, Union
6
5
 
6
+ from deepeval.test_case import MLLMImage
7
7
  from deepeval.config.settings import get_settings
8
8
  from deepeval.models.utils import require_secret_api_key
9
9
  from deepeval.models.retry_policy import (
10
10
  create_retry_decorator,
11
11
  )
12
+ from deepeval.utils import (
13
+ convert_to_multi_modal_array,
14
+ check_if_multimodal,
15
+ require_dependency,
16
+ )
12
17
  from deepeval.models.base_model import DeepEvalBaseLLM
13
18
  from deepeval.constants import ProviderSlug as PS
14
- from google.oauth2 import service_account
19
+
20
+ valid_multimodal_models = [
21
+ "gemini-2.5-pro",
22
+ "gemini-2.5-flash",
23
+ "gemini-1.5-pro",
24
+ "gemini-1.5-flash",
25
+ # TODO: Add more models later
26
+ ]
27
+
28
+ if TYPE_CHECKING:
29
+ from google.genai import Client
15
30
 
16
31
  default_gemini_model = "gemini-1.5-pro"
17
32
 
@@ -28,7 +43,7 @@ class GeminiModel(DeepEvalBaseLLM):
28
43
  To use Vertex AI API, set project and location attributes.
29
44
 
30
45
  Attributes:
31
- model_name: Name of the Gemini model to use
46
+ model: Name of the Gemini model to use
32
47
  api_key: Google API key for authentication
33
48
  project: Google Cloud project ID
34
49
  location: Google Cloud location
@@ -39,7 +54,7 @@ class GeminiModel(DeepEvalBaseLLM):
39
54
 
40
55
  # Initialize the model
41
56
  model = GeminiModel(
42
- model_name="gemini-1.5-pro-001",
57
+ model="gemini-1.5-pro-001",
43
58
  api_key="your-api-key"
44
59
  )
45
60
 
@@ -50,21 +65,19 @@ class GeminiModel(DeepEvalBaseLLM):
50
65
 
51
66
  def __init__(
52
67
  self,
53
- model_name: Optional[str] = None,
68
+ model: Optional[str] = None,
54
69
  api_key: Optional[str] = None,
70
+ temperature: float = 0,
55
71
  project: Optional[str] = None,
56
72
  location: Optional[str] = None,
57
73
  service_account_key: Optional[Dict[str, str]] = None,
58
- temperature: float = 0,
59
74
  generation_kwargs: Optional[Dict] = None,
60
75
  **kwargs,
61
76
  ):
62
77
 
63
78
  settings = get_settings()
64
79
 
65
- model_name = (
66
- model_name or settings.GEMINI_MODEL_NAME or default_gemini_model
67
- )
80
+ model = model or settings.GEMINI_MODEL_NAME or default_gemini_model
68
81
 
69
82
  # Get API key from settings if not provided
70
83
  if api_key is not None:
@@ -98,27 +111,28 @@ class GeminiModel(DeepEvalBaseLLM):
98
111
  self.kwargs = kwargs
99
112
  self.generation_kwargs = generation_kwargs or {}
100
113
 
114
+ self._module = self._require_module()
101
115
  # Configure default model generation settings
102
116
  self.model_safety_settings = [
103
- types.SafetySetting(
104
- category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
105
- threshold=types.HarmBlockThreshold.BLOCK_NONE,
117
+ self._module.types.SafetySetting(
118
+ category=self._module.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
119
+ threshold=self._module.types.HarmBlockThreshold.BLOCK_NONE,
106
120
  ),
107
- types.SafetySetting(
108
- category=types.HarmCategory.HARM_CATEGORY_HARASSMENT,
109
- threshold=types.HarmBlockThreshold.BLOCK_NONE,
121
+ self._module.types.SafetySetting(
122
+ category=self._module.types.HarmCategory.HARM_CATEGORY_HARASSMENT,
123
+ threshold=self._module.types.HarmBlockThreshold.BLOCK_NONE,
110
124
  ),
111
- types.SafetySetting(
112
- category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
113
- threshold=types.HarmBlockThreshold.BLOCK_NONE,
125
+ self._module.types.SafetySetting(
126
+ category=self._module.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
127
+ threshold=self._module.types.HarmBlockThreshold.BLOCK_NONE,
114
128
  ),
115
- types.SafetySetting(
116
- category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
117
- threshold=types.HarmBlockThreshold.BLOCK_NONE,
129
+ self._module.types.SafetySetting(
130
+ category=self._module.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
131
+ threshold=self._module.types.HarmBlockThreshold.BLOCK_NONE,
118
132
  ),
119
133
  ]
120
134
 
121
- super().__init__(model_name, **kwargs)
135
+ super().__init__(model)
122
136
 
123
137
  def should_use_vertexai(self) -> bool:
124
138
  """Checks if the model should use Vertex AI for generation.
@@ -137,6 +151,50 @@ class GeminiModel(DeepEvalBaseLLM):
137
151
  else:
138
152
  return False
139
153
 
154
+ @retry_gemini
155
+ def generate_prompt(
156
+ self, multimodal_input: List[Union[str, MLLMImage]] = []
157
+ ) -> List[Union[str, MLLMImage]]:
158
+ """Converts DeepEval multimodal input into GenAI SDK compatible format.
159
+
160
+ Args:
161
+ multimodal_input: List of strings and MLLMImage objects
162
+
163
+ Returns:
164
+ List of strings and PIL Image objects ready for model input
165
+
166
+ Raises:
167
+ ValueError: If an invalid input type is provided
168
+ """
169
+ prompt = []
170
+ settings = get_settings()
171
+
172
+ for ele in multimodal_input:
173
+ if isinstance(ele, str):
174
+ prompt.append(ele)
175
+ elif isinstance(ele, MLLMImage):
176
+ if ele.local:
177
+ with open(ele.url, "rb") as f:
178
+ image_data = f.read()
179
+ else:
180
+ response = requests.get(
181
+ ele.url,
182
+ timeout=(
183
+ settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
184
+ settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
185
+ ),
186
+ )
187
+ response.raise_for_status()
188
+ image_data = response.content
189
+
190
+ image_part = self._module.types.Part.from_bytes(
191
+ data=image_data, mime_type="image/jpeg"
192
+ )
193
+ prompt.append(image_part)
194
+ else:
195
+ raise ValueError(f"Invalid input type: {type(ele)}")
196
+ return prompt
197
+
140
198
  ###############################################
141
199
  # Generate functions
142
200
  ###############################################
@@ -154,11 +212,16 @@ class GeminiModel(DeepEvalBaseLLM):
154
212
  """
155
213
  client = self.load_model()
156
214
 
215
+ if check_if_multimodal(prompt):
216
+
217
+ prompt = convert_to_multi_modal_array(prompt)
218
+ prompt = self.generate_prompt(prompt)
219
+
157
220
  if schema is not None:
158
221
  response = client.models.generate_content(
159
- model=self.model_name,
222
+ model=self.name,
160
223
  contents=prompt,
161
- config=types.GenerateContentConfig(
224
+ config=self._module.types.GenerateContentConfig(
162
225
  response_mime_type="application/json",
163
226
  response_schema=schema,
164
227
  safety_settings=self.model_safety_settings,
@@ -169,9 +232,9 @@ class GeminiModel(DeepEvalBaseLLM):
169
232
  return response.parsed, 0
170
233
  else:
171
234
  response = client.models.generate_content(
172
- model=self.model_name,
235
+ model=self.name,
173
236
  contents=prompt,
174
- config=types.GenerateContentConfig(
237
+ config=self._module.types.GenerateContentConfig(
175
238
  safety_settings=self.model_safety_settings,
176
239
  temperature=self.temperature,
177
240
  **self.generation_kwargs,
@@ -194,11 +257,15 @@ class GeminiModel(DeepEvalBaseLLM):
194
257
  """
195
258
  client = self.load_model()
196
259
 
260
+ if check_if_multimodal(prompt):
261
+ prompt = convert_to_multi_modal_array(prompt)
262
+ prompt = self.generate_prompt(prompt)
263
+
197
264
  if schema is not None:
198
265
  response = await client.aio.models.generate_content(
199
- model=self.model_name,
266
+ model=self.name,
200
267
  contents=prompt,
201
- config=types.GenerateContentConfig(
268
+ config=self._module.types.GenerateContentConfig(
202
269
  response_mime_type="application/json",
203
270
  response_schema=schema,
204
271
  safety_settings=self.model_safety_settings,
@@ -209,9 +276,9 @@ class GeminiModel(DeepEvalBaseLLM):
209
276
  return response.parsed, 0
210
277
  else:
211
278
  response = await client.aio.models.generate_content(
212
- model=self.model_name,
279
+ model=self.name,
213
280
  contents=prompt,
214
- config=types.GenerateContentConfig(
281
+ config=self._module.types.GenerateContentConfig(
215
282
  safety_settings=self.model_safety_settings,
216
283
  temperature=self.temperature,
217
284
  **self.generation_kwargs,
@@ -223,11 +290,7 @@ class GeminiModel(DeepEvalBaseLLM):
223
290
  # Model #
224
291
  #########
225
292
 
226
- def get_model_name(self) -> str:
227
- """Returns the name of the Gemini model being used."""
228
- return self.model_name
229
-
230
- def load_model(self, *args, **kwargs):
293
+ def load_model(self):
231
294
  """Creates a client.
232
295
  With Gen AI SDK, model is set at inference time, so there is no
233
296
  model to load and initialize.
@@ -236,7 +299,21 @@ class GeminiModel(DeepEvalBaseLLM):
236
299
  Returns:
237
300
  A GenerativeModel instance configured for evaluation.
238
301
  """
239
- return self._build_client(**kwargs)
302
+ return self._build_client()
303
+
304
+ def _require_oauth2(self):
305
+ return require_dependency(
306
+ "google.oauth2",
307
+ provider_label="GeminiModel",
308
+ install_hint="Install it with `pip install google-auth`.",
309
+ )
310
+
311
+ def _require_module(self):
312
+ return require_dependency(
313
+ "google.genai",
314
+ provider_label="GeminiModel",
315
+ install_hint="Install it with `pip install google-genai`.",
316
+ )
240
317
 
241
318
  def _client_kwargs(self, **override_kwargs) -> Dict:
242
319
  """Merge ctor kwargs with any overrides passed at load_model time."""
@@ -245,8 +322,8 @@ class GeminiModel(DeepEvalBaseLLM):
245
322
  client_kwargs.update(override_kwargs)
246
323
  return client_kwargs
247
324
 
248
- def _build_client(self, **override_kwargs) -> Client:
249
- client_kwargs = self._client_kwargs(**override_kwargs)
325
+ def _build_client(self) -> "Client":
326
+ client_kwargs = self._client_kwargs(**self.kwargs)
250
327
 
251
328
  if self.should_use_vertexai():
252
329
  if not self.project or not self.location:
@@ -256,8 +333,9 @@ class GeminiModel(DeepEvalBaseLLM):
256
333
  "GOOGLE_CLOUD_LOCATION in your DeepEval configuration."
257
334
  )
258
335
 
336
+ oauth2 = self._require_oauth2()
259
337
  credentials = (
260
- service_account.Credentials.from_service_account_info(
338
+ oauth2.service_account.Credentials.from_service_account_info(
261
339
  self.service_account_key,
262
340
  scopes=[
263
341
  "https://www.googleapis.com/auth/cloud-platform",
@@ -267,7 +345,7 @@ class GeminiModel(DeepEvalBaseLLM):
267
345
  else None
268
346
  )
269
347
 
270
- client = Client(
348
+ client = self._module.Client(
271
349
  vertexai=True,
272
350
  project=self.project,
273
351
  location=self.location,
@@ -282,6 +360,14 @@ class GeminiModel(DeepEvalBaseLLM):
282
360
  param_hint="`api_key` to GeminiModel(...)",
283
361
  )
284
362
 
285
- client = Client(api_key=api_key, **client_kwargs)
363
+ client = self._module.Client(api_key=api_key, **client_kwargs)
286
364
 
287
365
  return client
366
+
367
+ def supports_multimodal(self):
368
+ if self.name in valid_multimodal_models:
369
+ return True
370
+ return False
371
+
372
+ def get_model_name(self):
373
+ return f"{self.name} (Gemini)"
@@ -7,11 +7,12 @@ from deepeval.models.retry_policy import (
7
7
  sdk_retries_for,
8
8
  )
9
9
  from deepeval.models.llms.utils import trim_and_load_json
10
- from deepeval.models.utils import require_secret_api_key
10
+ from deepeval.models.utils import (
11
+ require_secret_api_key,
12
+ )
11
13
  from deepeval.models import DeepEvalBaseLLM
12
14
  from deepeval.constants import ProviderSlug as PS
13
15
 
14
-
15
16
  # consistent retry rules
16
17
  retry_grok = create_retry_decorator(PS.GROK)
17
18
 
@@ -61,11 +62,12 @@ class GrokModel(DeepEvalBaseLLM):
61
62
  generation_kwargs: Optional[Dict] = None,
62
63
  **kwargs,
63
64
  ):
65
+
64
66
  settings = get_settings()
65
67
 
66
- model_name = model or settings.GROK_MODEL_NAME
68
+ model = model or settings.GROK_MODEL_NAME
67
69
 
68
- if model_name not in model_pricing:
70
+ if model not in model_pricing:
69
71
  raise ValueError(
70
72
  f"Invalid model. Available Grok models: {', '.join(model_pricing.keys())}"
71
73
  )
@@ -83,9 +85,10 @@ class GrokModel(DeepEvalBaseLLM):
83
85
  else:
84
86
  self.api_key = settings.GROK_API_KEY
85
87
 
88
+ # Keep sanitized kwargs for client call to strip legacy keys
86
89
  self.kwargs = kwargs
87
90
  self.generation_kwargs = generation_kwargs or {}
88
- super().__init__(model_name)
91
+ super().__init__(model)
89
92
 
90
93
  ###############################################
91
94
  # Other generate functions
@@ -95,6 +98,7 @@ class GrokModel(DeepEvalBaseLLM):
95
98
  def generate(
96
99
  self, prompt: str, schema: Optional[BaseModel] = None
97
100
  ) -> Tuple[Union[str, Dict], float]:
101
+
98
102
  try:
99
103
  from xai_sdk.chat import user
100
104
  except ImportError:
@@ -103,13 +107,13 @@ class GrokModel(DeepEvalBaseLLM):
103
107
  )
104
108
  client = self.load_model(async_mode=False)
105
109
  chat = client.chat.create(
106
- model=self.model_name,
110
+ model=self.name,
107
111
  temperature=self.temperature,
108
112
  **self.generation_kwargs,
109
113
  )
110
114
  chat.append(user(prompt))
111
115
 
112
- if schema and self.model_name in structured_outputs_models:
116
+ if schema and self.name in structured_outputs_models:
113
117
  response, structured_output = chat.parse(schema)
114
118
  cost = self.calculate_cost(
115
119
  response.usage.prompt_tokens,
@@ -133,6 +137,7 @@ class GrokModel(DeepEvalBaseLLM):
133
137
  async def a_generate(
134
138
  self, prompt: str, schema: Optional[BaseModel] = None
135
139
  ) -> Tuple[Union[str, Dict], float]:
140
+
136
141
  try:
137
142
  from xai_sdk.chat import user
138
143
  except ImportError:
@@ -141,13 +146,13 @@ class GrokModel(DeepEvalBaseLLM):
141
146
  )
142
147
  client = self.load_model(async_mode=True)
143
148
  chat = client.chat.create(
144
- model=self.model_name,
149
+ model=self.name,
145
150
  temperature=self.temperature,
146
151
  **self.generation_kwargs,
147
152
  )
148
153
  chat.append(user(prompt))
149
154
 
150
- if schema and self.model_name in structured_outputs_models:
155
+ if schema and self.name in structured_outputs_models:
151
156
  response, structured_output = await chat.parse(schema)
152
157
  cost = self.calculate_cost(
153
158
  response.usage.prompt_tokens,
@@ -176,7 +181,7 @@ class GrokModel(DeepEvalBaseLLM):
176
181
  input_tokens: int,
177
182
  output_tokens: int,
178
183
  ) -> float:
179
- pricing = model_pricing.get(self.model_name, model_pricing)
184
+ pricing = model_pricing.get(self.name, model_pricing)
180
185
  input_cost = input_tokens * pricing["input"]
181
186
  output_cost = output_tokens * pricing["output"]
182
187
  return input_cost + output_cost
@@ -198,9 +203,6 @@ class GrokModel(DeepEvalBaseLLM):
198
203
  "xai_sdk is required to use GrokModel. Please install it with: pip install xai-sdk"
199
204
  )
200
205
 
201
- def get_model_name(self):
202
- return f"{self.model_name}"
203
-
204
206
  def _client_kwargs(self) -> Dict:
205
207
  """
206
208
  If Tenacity is managing retries, disable gRPC channel retries to avoid double retry.
@@ -242,3 +244,6 @@ class GrokModel(DeepEvalBaseLLM):
242
244
  kw.pop("channel_options", None)
243
245
  return cls(**kw)
244
246
  raise
247
+
248
+ def get_model_name(self):
249
+ return f"{self.name} (Grok)"
@@ -8,7 +8,9 @@ from deepeval.models.retry_policy import (
8
8
  sdk_retries_for,
9
9
  )
10
10
  from deepeval.models.llms.utils import trim_and_load_json
11
- from deepeval.models.utils import require_secret_api_key
11
+ from deepeval.models.utils import (
12
+ require_secret_api_key,
13
+ )
12
14
  from deepeval.models import DeepEvalBaseLLM
13
15
  from deepeval.constants import ProviderSlug as PS
14
16
 
@@ -74,16 +76,16 @@ model_pricing = {
74
76
  class KimiModel(DeepEvalBaseLLM):
75
77
  def __init__(
76
78
  self,
77
- api_key: Optional[str] = None,
78
79
  model: Optional[str] = None,
80
+ api_key: Optional[str] = None,
79
81
  temperature: float = 0,
80
82
  generation_kwargs: Optional[Dict] = None,
81
83
  **kwargs,
82
84
  ):
83
85
  settings = get_settings()
84
86
 
85
- model_name = model or settings.MOONSHOT_MODEL_NAME
86
- if model_name not in model_pricing:
87
+ model = model or settings.MOONSHOT_MODEL_NAME
88
+ if model not in model_pricing:
87
89
  raise ValueError(
88
90
  f"Invalid model. Available Moonshot models: {', '.join(model_pricing.keys())}"
89
91
  )
@@ -103,9 +105,10 @@ class KimiModel(DeepEvalBaseLLM):
103
105
  self.api_key = settings.MOONSHOT_API_KEY
104
106
 
105
107
  self.base_url = "https://api.moonshot.cn/v1"
108
+ # Keep sanitized kwargs for client call to strip legacy keys
106
109
  self.kwargs = kwargs
107
110
  self.generation_kwargs = generation_kwargs or {}
108
- super().__init__(model_name)
111
+ super().__init__(model)
109
112
 
110
113
  ###############################################
111
114
  # Other generate functions
@@ -115,10 +118,11 @@ class KimiModel(DeepEvalBaseLLM):
115
118
  def generate(
116
119
  self, prompt: str, schema: Optional[BaseModel] = None
117
120
  ) -> Tuple[Union[str, Dict], float]:
121
+
118
122
  client = self.load_model(async_mode=False)
119
- if schema and self.model_name in json_mode_models:
123
+ if schema and self.name in json_mode_models:
120
124
  completion = client.chat.completions.create(
121
- model=self.model_name,
125
+ model=self.name,
122
126
  messages=[{"role": "user", "content": prompt}],
123
127
  response_format={"type": "json_object"},
124
128
  temperature=self.temperature,
@@ -134,7 +138,7 @@ class KimiModel(DeepEvalBaseLLM):
134
138
  return schema.model_validate(json_output), cost
135
139
 
136
140
  completion = client.chat.completions.create(
137
- model=self.model_name,
141
+ model=self.name,
138
142
  messages=[{"role": "user", "content": prompt}],
139
143
  **self.generation_kwargs,
140
144
  )
@@ -153,10 +157,11 @@ class KimiModel(DeepEvalBaseLLM):
153
157
  async def a_generate(
154
158
  self, prompt: str, schema: Optional[BaseModel] = None
155
159
  ) -> Tuple[Union[str, Dict], float]:
160
+
156
161
  client = self.load_model(async_mode=True)
157
- if schema and self.model_name in json_mode_models:
162
+ if schema and self.name in json_mode_models:
158
163
  completion = await client.chat.completions.create(
159
- model=self.model_name,
164
+ model=self.name,
160
165
  messages=[{"role": "user", "content": prompt}],
161
166
  response_format={"type": "json_object"},
162
167
  temperature=self.temperature,
@@ -172,7 +177,7 @@ class KimiModel(DeepEvalBaseLLM):
172
177
  return schema.model_validate(json_output), cost
173
178
 
174
179
  completion = await client.chat.completions.create(
175
- model=self.model_name,
180
+ model=self.name,
176
181
  messages=[{"role": "user", "content": prompt}],
177
182
  **self.generation_kwargs,
178
183
  )
@@ -196,7 +201,7 @@ class KimiModel(DeepEvalBaseLLM):
196
201
  input_tokens: int,
197
202
  output_tokens: int,
198
203
  ) -> float:
199
- pricing = model_pricing.get(self.model_name, model_pricing)
204
+ pricing = model_pricing.get(self.name, model_pricing)
200
205
  input_cost = input_tokens * pricing["input"]
201
206
  output_cost = output_tokens * pricing["output"]
202
207
  return input_cost + output_cost
@@ -244,4 +249,4 @@ class KimiModel(DeepEvalBaseLLM):
244
249
  raise
245
250
 
246
251
  def get_model_name(self):
247
- return f"{self.model_name}"
252
+ return f"{self.name} (KIMI)"