deepeval 3.7.4__py3-none-any.whl → 3.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/config/settings.py +35 -1
  3. deepeval/dataset/api.py +23 -1
  4. deepeval/dataset/golden.py +139 -2
  5. deepeval/evaluate/evaluate.py +16 -11
  6. deepeval/evaluate/execute.py +13 -181
  7. deepeval/evaluate/utils.py +6 -26
  8. deepeval/integrations/pydantic_ai/agent.py +19 -2
  9. deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  10. deepeval/key_handler.py +3 -0
  11. deepeval/metrics/__init__.py +14 -16
  12. deepeval/metrics/answer_relevancy/answer_relevancy.py +118 -116
  13. deepeval/metrics/answer_relevancy/template.py +22 -3
  14. deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
  15. deepeval/metrics/arena_g_eval/template.py +17 -1
  16. deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
  17. deepeval/metrics/argument_correctness/template.py +19 -2
  18. deepeval/metrics/base_metric.py +13 -44
  19. deepeval/metrics/bias/bias.py +102 -108
  20. deepeval/metrics/bias/template.py +14 -2
  21. deepeval/metrics/contextual_precision/contextual_precision.py +96 -94
  22. deepeval/metrics/contextual_precision/template.py +115 -66
  23. deepeval/metrics/contextual_recall/contextual_recall.py +94 -84
  24. deepeval/metrics/contextual_recall/template.py +106 -55
  25. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +86 -84
  26. deepeval/metrics/contextual_relevancy/template.py +87 -58
  27. deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
  28. deepeval/metrics/conversation_completeness/template.py +23 -3
  29. deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
  30. deepeval/metrics/conversational_dag/nodes.py +66 -123
  31. deepeval/metrics/conversational_dag/templates.py +16 -0
  32. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
  33. deepeval/metrics/dag/dag.py +10 -0
  34. deepeval/metrics/dag/nodes.py +63 -126
  35. deepeval/metrics/dag/templates.py +16 -2
  36. deepeval/metrics/exact_match/exact_match.py +9 -1
  37. deepeval/metrics/faithfulness/faithfulness.py +138 -149
  38. deepeval/metrics/faithfulness/schema.py +1 -1
  39. deepeval/metrics/faithfulness/template.py +200 -115
  40. deepeval/metrics/g_eval/g_eval.py +87 -78
  41. deepeval/metrics/g_eval/template.py +18 -1
  42. deepeval/metrics/g_eval/utils.py +7 -6
  43. deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
  44. deepeval/metrics/goal_accuracy/template.py +21 -3
  45. deepeval/metrics/hallucination/hallucination.py +60 -75
  46. deepeval/metrics/hallucination/template.py +13 -0
  47. deepeval/metrics/indicator.py +7 -10
  48. deepeval/metrics/json_correctness/json_correctness.py +40 -38
  49. deepeval/metrics/json_correctness/template.py +10 -0
  50. deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
  51. deepeval/metrics/knowledge_retention/schema.py +9 -3
  52. deepeval/metrics/knowledge_retention/template.py +12 -0
  53. deepeval/metrics/mcp/mcp_task_completion.py +68 -38
  54. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
  55. deepeval/metrics/mcp/template.py +52 -0
  56. deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
  57. deepeval/metrics/mcp_use_metric/template.py +12 -0
  58. deepeval/metrics/misuse/misuse.py +77 -97
  59. deepeval/metrics/misuse/template.py +15 -0
  60. deepeval/metrics/multimodal_metrics/__init__.py +0 -19
  61. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +59 -53
  62. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +79 -95
  63. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +59 -53
  64. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +59 -53
  65. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +111 -109
  66. deepeval/metrics/non_advice/non_advice.py +79 -105
  67. deepeval/metrics/non_advice/template.py +12 -0
  68. deepeval/metrics/pattern_match/pattern_match.py +12 -4
  69. deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
  70. deepeval/metrics/pii_leakage/template.py +14 -0
  71. deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
  72. deepeval/metrics/plan_adherence/template.py +11 -0
  73. deepeval/metrics/plan_quality/plan_quality.py +63 -87
  74. deepeval/metrics/plan_quality/template.py +9 -0
  75. deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
  76. deepeval/metrics/prompt_alignment/template.py +12 -0
  77. deepeval/metrics/ragas.py +3 -3
  78. deepeval/metrics/role_adherence/role_adherence.py +48 -71
  79. deepeval/metrics/role_adherence/template.py +14 -0
  80. deepeval/metrics/role_violation/role_violation.py +75 -108
  81. deepeval/metrics/role_violation/template.py +12 -0
  82. deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
  83. deepeval/metrics/step_efficiency/template.py +11 -0
  84. deepeval/metrics/summarization/summarization.py +115 -183
  85. deepeval/metrics/summarization/template.py +19 -0
  86. deepeval/metrics/task_completion/task_completion.py +67 -73
  87. deepeval/metrics/tool_correctness/tool_correctness.py +45 -44
  88. deepeval/metrics/tool_use/tool_use.py +42 -66
  89. deepeval/metrics/topic_adherence/template.py +13 -0
  90. deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
  91. deepeval/metrics/toxicity/template.py +13 -0
  92. deepeval/metrics/toxicity/toxicity.py +80 -99
  93. deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  94. deepeval/metrics/turn_contextual_precision/template.py +187 -0
  95. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +592 -0
  96. deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  97. deepeval/metrics/turn_contextual_recall/template.py +178 -0
  98. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +563 -0
  99. deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
  100. deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  101. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +576 -0
  102. deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
  103. deepeval/metrics/turn_faithfulness/template.py +218 -0
  104. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +627 -0
  105. deepeval/metrics/turn_relevancy/template.py +14 -0
  106. deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
  107. deepeval/metrics/utils.py +158 -122
  108. deepeval/models/__init__.py +0 -12
  109. deepeval/models/base_model.py +49 -33
  110. deepeval/models/embedding_models/__init__.py +7 -0
  111. deepeval/models/embedding_models/azure_embedding_model.py +79 -33
  112. deepeval/models/embedding_models/local_embedding_model.py +39 -20
  113. deepeval/models/embedding_models/ollama_embedding_model.py +52 -19
  114. deepeval/models/embedding_models/openai_embedding_model.py +42 -22
  115. deepeval/models/llms/amazon_bedrock_model.py +226 -72
  116. deepeval/models/llms/anthropic_model.py +178 -63
  117. deepeval/models/llms/azure_model.py +218 -60
  118. deepeval/models/llms/constants.py +2032 -0
  119. deepeval/models/llms/deepseek_model.py +95 -40
  120. deepeval/models/llms/gemini_model.py +209 -64
  121. deepeval/models/llms/grok_model.py +139 -68
  122. deepeval/models/llms/kimi_model.py +140 -90
  123. deepeval/models/llms/litellm_model.py +131 -37
  124. deepeval/models/llms/local_model.py +125 -21
  125. deepeval/models/llms/ollama_model.py +147 -24
  126. deepeval/models/llms/openai_model.py +222 -269
  127. deepeval/models/llms/portkey_model.py +81 -22
  128. deepeval/models/llms/utils.py +8 -3
  129. deepeval/models/retry_policy.py +17 -14
  130. deepeval/models/utils.py +106 -5
  131. deepeval/optimizer/__init__.py +5 -0
  132. deepeval/optimizer/algorithms/__init__.py +6 -0
  133. deepeval/optimizer/algorithms/base.py +29 -0
  134. deepeval/optimizer/algorithms/configs.py +18 -0
  135. deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  136. deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
  137. deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  138. deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
  139. deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  140. deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  141. deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  142. deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  143. deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  144. deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
  145. deepeval/{optimization → optimizer}/configs.py +5 -8
  146. deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
  147. deepeval/optimizer/prompt_optimizer.py +263 -0
  148. deepeval/optimizer/rewriter/__init__.py +5 -0
  149. deepeval/optimizer/rewriter/rewriter.py +124 -0
  150. deepeval/optimizer/rewriter/utils.py +214 -0
  151. deepeval/optimizer/scorer/__init__.py +5 -0
  152. deepeval/optimizer/scorer/base.py +86 -0
  153. deepeval/optimizer/scorer/scorer.py +316 -0
  154. deepeval/optimizer/scorer/utils.py +30 -0
  155. deepeval/optimizer/types.py +148 -0
  156. deepeval/{optimization → optimizer}/utils.py +47 -165
  157. deepeval/prompt/prompt.py +5 -9
  158. deepeval/simulator/conversation_simulator.py +43 -0
  159. deepeval/simulator/template.py +13 -0
  160. deepeval/test_case/__init__.py +1 -3
  161. deepeval/test_case/api.py +26 -45
  162. deepeval/test_case/arena_test_case.py +7 -2
  163. deepeval/test_case/conversational_test_case.py +68 -1
  164. deepeval/test_case/llm_test_case.py +206 -1
  165. deepeval/test_case/utils.py +4 -8
  166. deepeval/test_run/api.py +18 -14
  167. deepeval/test_run/test_run.py +3 -3
  168. deepeval/tracing/patchers.py +9 -4
  169. deepeval/tracing/tracing.py +2 -2
  170. deepeval/utils.py +65 -0
  171. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -4
  172. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/RECORD +180 -193
  173. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  174. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  175. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  176. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  177. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  178. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  179. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  180. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  181. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  182. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  183. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  184. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  185. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  186. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  187. deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
  188. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
  189. deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
  190. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -148
  191. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
  192. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  193. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  194. deepeval/models/mlllms/__init__.py +0 -4
  195. deepeval/models/mlllms/azure_model.py +0 -343
  196. deepeval/models/mlllms/gemini_model.py +0 -313
  197. deepeval/models/mlllms/ollama_model.py +0 -175
  198. deepeval/models/mlllms/openai_model.py +0 -309
  199. deepeval/optimization/__init__.py +0 -13
  200. deepeval/optimization/adapters/__init__.py +0 -2
  201. deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
  202. deepeval/optimization/aggregates.py +0 -14
  203. deepeval/optimization/copro/configs.py +0 -31
  204. deepeval/optimization/gepa/__init__.py +0 -7
  205. deepeval/optimization/gepa/configs.py +0 -115
  206. deepeval/optimization/miprov2/configs.py +0 -134
  207. deepeval/optimization/miprov2/loop.py +0 -785
  208. deepeval/optimization/mutations/__init__.py +0 -0
  209. deepeval/optimization/mutations/prompt_rewriter.py +0 -458
  210. deepeval/optimization/policies/__init__.py +0 -16
  211. deepeval/optimization/policies/tie_breaker.py +0 -67
  212. deepeval/optimization/prompt_optimizer.py +0 -462
  213. deepeval/optimization/simba/__init__.py +0 -0
  214. deepeval/optimization/simba/configs.py +0 -33
  215. deepeval/optimization/types.py +0 -361
  216. deepeval/test_case/mllm_test_case.py +0 -170
  217. /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
  218. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
  219. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
  220. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
  221. /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
  222. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
  223. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
  224. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
@@ -1,313 +0,0 @@
1
- import requests
2
- from typing import Optional, List, Union
3
- from pydantic import BaseModel, SecretStr
4
- from google.genai import types
5
- from google import genai
6
-
7
- from deepeval.config.settings import get_settings
8
- from deepeval.models.utils import require_secret_api_key
9
- from deepeval.models.retry_policy import (
10
- create_retry_decorator,
11
- )
12
- from deepeval.models.base_model import DeepEvalBaseMLLM
13
- from deepeval.test_case import MLLMImage
14
- from deepeval.constants import ProviderSlug as PS
15
-
16
-
17
- default_multimodal_gemini_model = "gemini-1.5-pro"
18
- # consistent retry rules
19
- retry_gemini = create_retry_decorator(PS.GOOGLE)
20
-
21
-
22
- class MultimodalGeminiModel(DeepEvalBaseMLLM):
23
- """Class that implements Google Gemini models for multimodal evaluation.
24
-
25
- This class provides integration with Google's Gemini models through the Google GenAI SDK,
26
- supporting both text and multimodal (text + image) inputs for evaluation tasks.
27
- To use Gemini API, set api_key attribute only.
28
- To use Vertex AI API, set project and location attributes.
29
-
30
- Attributes:
31
- model_name: Name of the Gemini model to use
32
- api_key: Google API key for authentication
33
- project: Google Cloud project ID
34
- location: Google Cloud location
35
-
36
- Example:
37
- ```python
38
- from deepeval.models import MultimodalGeminiModel
39
-
40
- # Initialize the model
41
- model = MultimodalGeminiModel(
42
- model_name="gemini-pro-vision",
43
- api_key="your-api-key"
44
- )
45
-
46
- # Generate text from text + image input
47
- response = model.generate([
48
- "Describe what you see in this image:",
49
- MLLMImage(url="path/to/image.jpg", local=True)
50
- ])
51
- ```
52
- """
53
-
54
- def __init__(
55
- self,
56
- model_name: Optional[str] = None,
57
- api_key: Optional[str] = None,
58
- project: Optional[str] = None,
59
- location: Optional[str] = None,
60
- *args,
61
- **kwargs,
62
- ):
63
- settings = get_settings()
64
- model_name = (
65
- model_name
66
- or settings.GEMINI_MODEL_NAME
67
- or default_multimodal_gemini_model
68
- )
69
-
70
- # Get API key from settings if not provided
71
- if api_key is not None:
72
- # keep it secret, keep it safe from serializings, logging and aolike
73
- self.api_key: SecretStr | None = SecretStr(api_key)
74
- else:
75
- self.api_key = settings.GOOGLE_API_KEY
76
-
77
- self.project = project or settings.GOOGLE_CLOUD_PROJECT
78
- self.location = (
79
- location
80
- or settings.GOOGLE_CLOUD_LOCATION is not None
81
- and str(settings.GOOGLE_CLOUD_LOCATION)
82
- )
83
- self.use_vertexai = settings.GOOGLE_GENAI_USE_VERTEXAI
84
-
85
- # Keep any extra kwargs for the underlying genai.Client
86
- self.args = args
87
- self.kwargs = kwargs
88
-
89
- # Configure default model generation settings
90
- self.model_safety_settings = [
91
- types.SafetySetting(
92
- category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
93
- threshold=types.HarmBlockThreshold.BLOCK_ONLY_HIGH,
94
- ),
95
- types.SafetySetting(
96
- category=types.HarmCategory.HARM_CATEGORY_HARASSMENT,
97
- threshold=types.HarmBlockThreshold.BLOCK_ONLY_HIGH,
98
- ),
99
- types.SafetySetting(
100
- category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
101
- threshold=types.HarmBlockThreshold.BLOCK_ONLY_HIGH,
102
- ),
103
- types.SafetySetting(
104
- category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
105
- threshold=types.HarmBlockThreshold.BLOCK_ONLY_HIGH,
106
- ),
107
- ]
108
- self.model_temperature = 0.0
109
-
110
- super().__init__(model_name, *args, **kwargs)
111
-
112
- def should_use_vertexai(self):
113
- """Checks if the model should use Vertex AI for generation.
114
-
115
- This is determined first by the value of `GOOGLE_GENAI_USE_VERTEXAI`
116
- environment variable. If not set, it checks for the presence of the
117
- project and location.
118
-
119
- Returns:
120
- True if the model should use Vertex AI, False otherwise
121
- """
122
- if self.use_vertexai is not None:
123
- return self.use_vertexai.lower() == "yes"
124
-
125
- if self.project and self.location:
126
- return True
127
- else:
128
- return False
129
-
130
- # TODO: Refactor generate prompt to minimize the work done on retry
131
- @retry_gemini
132
- def generate_prompt(
133
- self, multimodal_input: List[Union[str, MLLMImage]] = []
134
- ) -> List[Union[str, MLLMImage]]:
135
- """Converts DeepEval multimodal input into GenAI SDK compatible format.
136
-
137
- Args:
138
- multimodal_input: List of strings and MLLMImage objects
139
-
140
- Returns:
141
- List of strings and PIL Image objects ready for model input
142
-
143
- Raises:
144
- ValueError: If an invalid input type is provided
145
- """
146
- prompt = []
147
- settings = get_settings()
148
-
149
- for ele in multimodal_input:
150
- if isinstance(ele, str):
151
- prompt.append(ele)
152
- elif isinstance(ele, MLLMImage):
153
- if ele.local:
154
- with open(ele.url, "rb") as f:
155
- image_data = f.read()
156
- else:
157
- response = requests.get(
158
- ele.url,
159
- timeout=(
160
- settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
161
- settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
162
- ),
163
- )
164
- response.raise_for_status()
165
- image_data = response.content
166
-
167
- image_part = types.Part.from_bytes(
168
- data=image_data, mime_type="image/jpeg"
169
- )
170
- prompt.append(image_part)
171
- else:
172
- raise ValueError(f"Invalid input type: {type(ele)}")
173
- return prompt
174
-
175
- @retry_gemini
176
- def generate(
177
- self,
178
- multimodal_input: List[Union[str, MLLMImage]],
179
- schema: Optional[BaseModel] = None,
180
- ) -> str:
181
- """Generates text from multimodal input.
182
-
183
- Args:
184
- multimodal_input: List of strings and MLLMImage objects
185
- schema: Optional Pydantic model for structured output
186
-
187
- Returns:
188
- Generated text response
189
- """
190
- client = self.load_model()
191
- prompt = self.generate_prompt(multimodal_input)
192
-
193
- if schema is not None:
194
- response = client.models.generate_content(
195
- model=self.model_name,
196
- contents=prompt,
197
- config=types.GenerateContentConfig(
198
- response_mime_type="application/json",
199
- response_schema=schema,
200
- safety_settings=self.model_safety_settings,
201
- temperature=self.model_temperature,
202
- ),
203
- )
204
- return response.parsed, 0
205
- else:
206
- response = client.models.generate_content(
207
- model=self.model_name,
208
- contents=prompt,
209
- config=types.GenerateContentConfig(
210
- safety_settings=self.model_safety_settings,
211
- temperature=self.model_temperature,
212
- ),
213
- )
214
- return response.text, 0
215
-
216
- @retry_gemini
217
- async def a_generate(
218
- self,
219
- multimodal_input: List[Union[str, MLLMImage]],
220
- schema: Optional[BaseModel] = None,
221
- ) -> str:
222
- """Asynchronously generates text from multimodal input.
223
-
224
- Args:
225
- multimodal_input: List of strings and MLLMImage objects
226
- schema: Optional Pydantic model for structured output
227
-
228
- Returns:
229
- Generated text response
230
- """
231
- client = self.load_model()
232
- prompt = self.generate_prompt(multimodal_input)
233
-
234
- if schema is not None:
235
- response = await client.aio.models.generate_content(
236
- model=self.model_name,
237
- contents=prompt,
238
- config=types.GenerateContentConfig(
239
- response_mime_type="application/json",
240
- response_schema=schema,
241
- safety_settings=self.model_safety_settings,
242
- temperature=self.model_temperature,
243
- ),
244
- )
245
- return response.parsed, 0
246
- else:
247
- response = await client.aio.models.generate_content(
248
- model=self.model_name,
249
- contents=prompt,
250
- config=types.GenerateContentConfig(
251
- safety_settings=self.model_safety_settings,
252
- temperature=self.model_temperature,
253
- ),
254
- )
255
- return response.text, 0
256
-
257
- #########
258
- # Model #
259
- #########
260
-
261
- def get_model_name(self) -> str:
262
- """Returns the name of the Gemini model being used."""
263
- return self.model_name
264
-
265
- def load_model(self, *args, **kwargs):
266
- """Creates and returns a GenAI client.
267
-
268
- With the Gen AI SDK, the model is set at inference time, so we only
269
- construct the client here. Kept for compatibility with other MLLMs.
270
- """
271
- return self._build_client(**kwargs)
272
-
273
- def _client_kwargs(self, **override_kwargs) -> dict:
274
- """
275
- Return kwargs forwarded to genai.Client.
276
-
277
- Start from the ctor kwargs captured on `self.kwargs`, then apply any
278
- overrides passed via load_model(...).
279
- """
280
- client_kwargs = dict(self.kwargs or {})
281
- if override_kwargs:
282
- client_kwargs.update(override_kwargs)
283
- return client_kwargs
284
-
285
- def _build_client(self, **override_kwargs):
286
- """Build and return a genai.Client for either Gemini API or Vertex AI."""
287
- client_kwargs = self._client_kwargs(**override_kwargs)
288
-
289
- if self.should_use_vertexai():
290
- if not self.project or not self.location:
291
- raise ValueError(
292
- "When using Vertex AI API, both project and location are required."
293
- "Either provide them as arguments or set GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION environment variables, "
294
- "or set them in your DeepEval configuration."
295
- )
296
-
297
- # Create client for Vertex AI
298
- return genai.Client(
299
- vertexai=True,
300
- project=self.project,
301
- location=self.location,
302
- **client_kwargs,
303
- )
304
-
305
- api_key = require_secret_api_key(
306
- self.api_key,
307
- provider_label="Google Gemini",
308
- env_var_name="GOOGLE_API_KEY",
309
- param_hint="`api_key` to MultimodalGeminiModel(...)",
310
- )
311
-
312
- # Create client for Gemini API
313
- return genai.Client(api_key=api_key, **client_kwargs)
@@ -1,175 +0,0 @@
1
- from typing import Optional, Tuple, List, Union, Dict
2
- from ollama import Client, AsyncClient, ChatResponse
3
- from pydantic import BaseModel
4
- import requests
5
- import base64
6
- import io
7
-
8
- from deepeval.models.retry_policy import (
9
- create_retry_decorator,
10
- )
11
- from deepeval.models import DeepEvalBaseMLLM
12
- from deepeval.test_case import MLLMImage
13
- from deepeval.config.settings import get_settings
14
- from deepeval.constants import ProviderSlug as PS
15
-
16
-
17
- retry_ollama = create_retry_decorator(PS.OLLAMA)
18
-
19
-
20
- class MultimodalOllamaModel(DeepEvalBaseMLLM):
21
- def __init__(
22
- self,
23
- model: Optional[str] = None,
24
- host: Optional[str] = None,
25
- **kwargs,
26
- ):
27
- """
28
- Multimodal Ollama model.
29
-
30
- - `model`: Ollama model name (e.g. "llava").
31
- - `host`: Ollama base URL (e.g. "http://localhost:11434").
32
- - extra **kwargs are passed through to the underlying Client.
33
- """
34
- settings = get_settings()
35
-
36
- # Resolve host/base URL
37
- self.base_url = (
38
- host
39
- or settings.LOCAL_MODEL_BASE_URL
40
- and str(settings.LOCAL_MODEL_BASE_URL)
41
- )
42
-
43
- # Resolve model name
44
- model_name = model or settings.LOCAL_MODEL_NAME
45
-
46
- # Client kwargs
47
- self.kwargs = kwargs or {}
48
-
49
- super().__init__(model_name)
50
-
51
- @retry_ollama
52
- def generate(
53
- self,
54
- multimodal_input: List[Union[str, MLLMImage]],
55
- schema: Optional[BaseModel] = None,
56
- ) -> Tuple[Union[str, Dict], float]:
57
- chat_model = self.load_model()
58
- messages = self.generate_messages(multimodal_input)
59
- response: ChatResponse = chat_model.chat(
60
- model=self.model_name,
61
- messages=messages,
62
- format=schema.model_json_schema() if schema else None,
63
- )
64
- return (
65
- (
66
- schema.model_validate_json(response.message.content)
67
- if schema
68
- else response.message.content
69
- ),
70
- 0,
71
- )
72
-
73
- @retry_ollama
74
- async def a_generate(
75
- self,
76
- multimodal_input: List[Union[str, MLLMImage]],
77
- schema: Optional[BaseModel] = None,
78
- ) -> Tuple[str, float]:
79
- chat_model = self.load_model(async_mode=True)
80
- messages = self.generate_messages(multimodal_input)
81
- response: ChatResponse = await chat_model.chat(
82
- model=self.model_name,
83
- messages=messages,
84
- format=schema.model_json_schema() if schema else None,
85
- )
86
- return (
87
- (
88
- schema.model_validate_json(response.message.content)
89
- if schema
90
- else response.message.content
91
- ),
92
- 0,
93
- )
94
-
95
- def generate_messages(
96
- self, multimodal_input: List[Union[str, MLLMImage]] = []
97
- ):
98
- messages = []
99
- for ele in multimodal_input:
100
- if isinstance(ele, str):
101
- messages.append(
102
- {
103
- "role": "user",
104
- "content": ele,
105
- }
106
- )
107
- elif isinstance(ele, MLLMImage):
108
- img_b64 = self.convert_to_base64(ele.url, ele.local)
109
- if img_b64 is not None:
110
- messages.append(
111
- {
112
- "role": "user",
113
- "images": [img_b64],
114
- }
115
- )
116
- return messages
117
-
118
- ###############################################
119
- # Utilities
120
- ###############################################
121
-
122
- def convert_to_base64(self, image_source: str, is_local: bool) -> str:
123
- from PIL import Image
124
-
125
- settings = get_settings()
126
- try:
127
- if not is_local:
128
- response = requests.get(
129
- image_source,
130
- stream=True,
131
- timeout=(
132
- settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
133
- settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
134
- ),
135
- )
136
- response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
137
- image = Image.open(io.BytesIO(response.content))
138
- else:
139
- image = Image.open(image_source)
140
-
141
- buffered = io.BytesIO()
142
- image.save(buffered, format="JPEG")
143
- img_str = base64.b64encode(buffered.getvalue()).decode()
144
- return img_str
145
-
146
- except (requests.exceptions.RequestException, OSError) as e:
147
- # Log, then rethrow so @retry_ollama can retry generate_messages() on network failures
148
- print(f"Image fetch/encode failed: {e}")
149
- raise
150
- except Exception as e:
151
- print(f"Error converting image to base64: {e}")
152
- return None
153
-
154
- ###############################################
155
- # Model
156
- ###############################################
157
-
158
- def load_model(self, async_mode: bool = False):
159
- if not async_mode:
160
- return self._build_client(Client)
161
- return self._build_client(AsyncClient)
162
-
163
- def _client_kwargs(self) -> Dict:
164
- """
165
- Return client-init kwargs.
166
- Ollama's Python client doesn't have built-in retry config like OpenAI,
167
- so we just pass these through untouched.
168
- """
169
- return dict(self.kwargs or {})
170
-
171
- def _build_client(self, cls):
172
- return cls(host=self.base_url, **self._client_kwargs())
173
-
174
- def get_model_name(self):
175
- return f"{self.model_name} (Ollama)"