deepeval 3.7.3__py3-none-any.whl → 3.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/cli/test.py +1 -1
- deepeval/config/settings.py +102 -13
- deepeval/dataset/golden.py +54 -2
- deepeval/evaluate/configs.py +1 -1
- deepeval/evaluate/evaluate.py +16 -8
- deepeval/evaluate/execute.py +74 -27
- deepeval/evaluate/utils.py +26 -22
- deepeval/integrations/pydantic_ai/agent.py +19 -2
- deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
- deepeval/metrics/__init__.py +14 -12
- deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
- deepeval/metrics/answer_relevancy/template.py +188 -92
- deepeval/metrics/argument_correctness/template.py +2 -2
- deepeval/metrics/base_metric.py +2 -5
- deepeval/metrics/bias/template.py +3 -3
- deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
- deepeval/metrics/contextual_precision/template.py +115 -66
- deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
- deepeval/metrics/contextual_recall/template.py +106 -55
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
- deepeval/metrics/contextual_relevancy/template.py +87 -58
- deepeval/metrics/conversation_completeness/template.py +2 -2
- deepeval/metrics/conversational_dag/templates.py +4 -4
- deepeval/metrics/conversational_g_eval/template.py +4 -3
- deepeval/metrics/dag/templates.py +5 -5
- deepeval/metrics/faithfulness/faithfulness.py +70 -27
- deepeval/metrics/faithfulness/schema.py +1 -1
- deepeval/metrics/faithfulness/template.py +200 -115
- deepeval/metrics/g_eval/utils.py +2 -2
- deepeval/metrics/hallucination/template.py +4 -4
- deepeval/metrics/indicator.py +4 -4
- deepeval/metrics/misuse/template.py +2 -2
- deepeval/metrics/multimodal_metrics/__init__.py +0 -18
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
- deepeval/metrics/non_advice/template.py +2 -2
- deepeval/metrics/pii_leakage/template.py +2 -2
- deepeval/metrics/prompt_alignment/template.py +4 -4
- deepeval/metrics/ragas.py +3 -3
- deepeval/metrics/role_violation/template.py +2 -2
- deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
- deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
- deepeval/metrics/toxicity/template.py +4 -4
- deepeval/metrics/turn_contextual_precision/schema.py +21 -0
- deepeval/metrics/turn_contextual_precision/template.py +187 -0
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
- deepeval/metrics/turn_contextual_recall/schema.py +21 -0
- deepeval/metrics/turn_contextual_recall/template.py +178 -0
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
- deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
- deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
- deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
- deepeval/metrics/turn_faithfulness/template.py +218 -0
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
- deepeval/metrics/turn_relevancy/template.py +2 -2
- deepeval/metrics/utils.py +39 -58
- deepeval/models/__init__.py +0 -12
- deepeval/models/base_model.py +16 -38
- deepeval/models/embedding_models/__init__.py +7 -0
- deepeval/models/embedding_models/azure_embedding_model.py +69 -32
- deepeval/models/embedding_models/local_embedding_model.py +39 -22
- deepeval/models/embedding_models/ollama_embedding_model.py +42 -18
- deepeval/models/embedding_models/openai_embedding_model.py +50 -15
- deepeval/models/llms/amazon_bedrock_model.py +1 -2
- deepeval/models/llms/anthropic_model.py +53 -20
- deepeval/models/llms/azure_model.py +140 -43
- deepeval/models/llms/deepseek_model.py +38 -23
- deepeval/models/llms/gemini_model.py +222 -103
- deepeval/models/llms/grok_model.py +39 -27
- deepeval/models/llms/kimi_model.py +39 -23
- deepeval/models/llms/litellm_model.py +103 -45
- deepeval/models/llms/local_model.py +35 -22
- deepeval/models/llms/ollama_model.py +129 -17
- deepeval/models/llms/openai_model.py +151 -50
- deepeval/models/llms/portkey_model.py +149 -0
- deepeval/models/llms/utils.py +5 -3
- deepeval/models/retry_policy.py +17 -14
- deepeval/models/utils.py +94 -4
- deepeval/optimizer/__init__.py +5 -0
- deepeval/optimizer/algorithms/__init__.py +6 -0
- deepeval/optimizer/algorithms/base.py +29 -0
- deepeval/optimizer/algorithms/configs.py +18 -0
- deepeval/optimizer/algorithms/copro/__init__.py +5 -0
- deepeval/optimizer/algorithms/copro/copro.py +836 -0
- deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
- deepeval/optimizer/algorithms/gepa/gepa.py +737 -0
- deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
- deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
- deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
- deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
- deepeval/optimizer/algorithms/simba/__init__.py +5 -0
- deepeval/optimizer/algorithms/simba/simba.py +999 -0
- deepeval/optimizer/algorithms/simba/types.py +15 -0
- deepeval/optimizer/configs.py +31 -0
- deepeval/optimizer/policies.py +227 -0
- deepeval/optimizer/prompt_optimizer.py +263 -0
- deepeval/optimizer/rewriter/__init__.py +5 -0
- deepeval/optimizer/rewriter/rewriter.py +124 -0
- deepeval/optimizer/rewriter/utils.py +214 -0
- deepeval/optimizer/scorer/__init__.py +5 -0
- deepeval/optimizer/scorer/base.py +86 -0
- deepeval/optimizer/scorer/scorer.py +316 -0
- deepeval/optimizer/scorer/utils.py +30 -0
- deepeval/optimizer/types.py +148 -0
- deepeval/optimizer/utils.py +480 -0
- deepeval/prompt/prompt.py +7 -6
- deepeval/test_case/__init__.py +1 -3
- deepeval/test_case/api.py +12 -10
- deepeval/test_case/conversational_test_case.py +19 -1
- deepeval/test_case/llm_test_case.py +152 -1
- deepeval/test_case/utils.py +4 -8
- deepeval/test_run/api.py +15 -14
- deepeval/test_run/cache.py +2 -0
- deepeval/test_run/test_run.py +9 -4
- deepeval/tracing/patchers.py +9 -4
- deepeval/tracing/tracing.py +2 -2
- deepeval/utils.py +89 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/RECORD +134 -118
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
- deepeval/models/mlllms/__init__.py +0 -4
- deepeval/models/mlllms/azure_model.py +0 -334
- deepeval/models/mlllms/gemini_model.py +0 -284
- deepeval/models/mlllms/ollama_model.py +0 -144
- deepeval/models/mlllms/openai_model.py +0 -258
- deepeval/test_case/mllm_test_case.py +0 -170
- /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,284 +0,0 @@
|
|
|
1
|
-
from typing import Optional, List, Union
|
|
2
|
-
import requests
|
|
3
|
-
from pydantic import BaseModel
|
|
4
|
-
from google.genai import types
|
|
5
|
-
from google import genai
|
|
6
|
-
|
|
7
|
-
from deepeval.models.retry_policy import (
|
|
8
|
-
create_retry_decorator,
|
|
9
|
-
)
|
|
10
|
-
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
11
|
-
from deepeval.models.base_model import DeepEvalBaseMLLM
|
|
12
|
-
from deepeval.test_case import MLLMImage
|
|
13
|
-
from deepeval.config.settings import get_settings
|
|
14
|
-
from deepeval.constants import ProviderSlug as PS
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
default_multimodal_gemini_model = "gemini-1.5-pro"
|
|
18
|
-
# consistent retry rules
|
|
19
|
-
retry_gemini = create_retry_decorator(PS.GOOGLE)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
23
|
-
"""Class that implements Google Gemini models for multimodal evaluation.
|
|
24
|
-
|
|
25
|
-
This class provides integration with Google's Gemini models through the Google GenAI SDK,
|
|
26
|
-
supporting both text and multimodal (text + image) inputs for evaluation tasks.
|
|
27
|
-
To use Gemini API, set api_key attribute only.
|
|
28
|
-
To use Vertex AI API, set project and location attributes.
|
|
29
|
-
|
|
30
|
-
Attributes:
|
|
31
|
-
model_name: Name of the Gemini model to use
|
|
32
|
-
api_key: Google API key for authentication
|
|
33
|
-
project: Google Cloud project ID
|
|
34
|
-
location: Google Cloud location
|
|
35
|
-
|
|
36
|
-
Example:
|
|
37
|
-
```python
|
|
38
|
-
from deepeval.models import MultimodalGeminiModel
|
|
39
|
-
|
|
40
|
-
# Initialize the model
|
|
41
|
-
model = MultimodalGeminiModel(
|
|
42
|
-
model_name="gemini-pro-vision",
|
|
43
|
-
api_key="your-api-key"
|
|
44
|
-
)
|
|
45
|
-
|
|
46
|
-
# Generate text from text + image input
|
|
47
|
-
response = model.generate([
|
|
48
|
-
"Describe what you see in this image:",
|
|
49
|
-
MLLMImage(url="path/to/image.jpg", local=True)
|
|
50
|
-
])
|
|
51
|
-
```
|
|
52
|
-
"""
|
|
53
|
-
|
|
54
|
-
def __init__(
|
|
55
|
-
self,
|
|
56
|
-
model_name: Optional[str] = None,
|
|
57
|
-
api_key: Optional[str] = None,
|
|
58
|
-
project: Optional[str] = None,
|
|
59
|
-
location: Optional[str] = None,
|
|
60
|
-
*args,
|
|
61
|
-
**kwargs,
|
|
62
|
-
):
|
|
63
|
-
model_name = (
|
|
64
|
-
model_name
|
|
65
|
-
or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.GEMINI_MODEL_NAME)
|
|
66
|
-
or default_multimodal_gemini_model
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
# Get API key from key handler if not provided
|
|
70
|
-
self.api_key = api_key or KEY_FILE_HANDLER.fetch_data(
|
|
71
|
-
ModelKeyValues.GOOGLE_API_KEY
|
|
72
|
-
)
|
|
73
|
-
self.project = project or KEY_FILE_HANDLER.fetch_data(
|
|
74
|
-
ModelKeyValues.GOOGLE_CLOUD_PROJECT
|
|
75
|
-
)
|
|
76
|
-
self.location = location or KEY_FILE_HANDLER.fetch_data(
|
|
77
|
-
ModelKeyValues.GOOGLE_CLOUD_LOCATION
|
|
78
|
-
)
|
|
79
|
-
self.use_vertexai = KEY_FILE_HANDLER.fetch_data(
|
|
80
|
-
ModelKeyValues.GOOGLE_GENAI_USE_VERTEXAI
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
super().__init__(model_name, *args, **kwargs)
|
|
84
|
-
self.model = self.load_model(*args, **kwargs)
|
|
85
|
-
|
|
86
|
-
def should_use_vertexai(self):
|
|
87
|
-
"""Checks if the model should use Vertex AI for generation.
|
|
88
|
-
|
|
89
|
-
This is determined first by the value of `GOOGLE_GENAI_USE_VERTEXAI`
|
|
90
|
-
environment variable. If not set, it checks for the presence of the
|
|
91
|
-
project and location.
|
|
92
|
-
|
|
93
|
-
Returns:
|
|
94
|
-
True if the model should use Vertex AI, False otherwise
|
|
95
|
-
"""
|
|
96
|
-
if self.use_vertexai is not None:
|
|
97
|
-
return self.use_vertexai.lower() == "yes"
|
|
98
|
-
|
|
99
|
-
if self.project and self.location:
|
|
100
|
-
return True
|
|
101
|
-
else:
|
|
102
|
-
return False
|
|
103
|
-
|
|
104
|
-
def load_model(self, *args, **kwargs):
|
|
105
|
-
"""Creates a client.
|
|
106
|
-
With Gen AI SDK, model is set at inference time, so there is no
|
|
107
|
-
model to load and initialize.
|
|
108
|
-
This method name is kept for compatibility with other LLMs.
|
|
109
|
-
|
|
110
|
-
Returns:
|
|
111
|
-
A GenerativeModel instance configured for evaluation.
|
|
112
|
-
"""
|
|
113
|
-
if self.should_use_vertexai():
|
|
114
|
-
if not self.project or not self.location:
|
|
115
|
-
raise ValueError(
|
|
116
|
-
"When using Vertex AI API, both project and location are required."
|
|
117
|
-
"Either provide them as arguments or set GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION environment variables, "
|
|
118
|
-
"or set them in your DeepEval configuration."
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
# Create client for Vertex AI
|
|
122
|
-
self.client = genai.Client(
|
|
123
|
-
vertexai=True, project=self.project, location=self.location
|
|
124
|
-
)
|
|
125
|
-
else:
|
|
126
|
-
if not self.api_key:
|
|
127
|
-
raise ValueError(
|
|
128
|
-
"Google API key is required. Either provide it directly, set GOOGLE_API_KEY environment variable, "
|
|
129
|
-
"or set it in your DeepEval configuration."
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
# Create client for Gemini API
|
|
133
|
-
self.client = genai.Client(api_key=self.api_key)
|
|
134
|
-
|
|
135
|
-
# Configure default model generation settings
|
|
136
|
-
self.model_safety_settings = [
|
|
137
|
-
types.SafetySetting(
|
|
138
|
-
category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
|
|
139
|
-
threshold=types.HarmBlockThreshold.BLOCK_ONLY_HIGH,
|
|
140
|
-
),
|
|
141
|
-
types.SafetySetting(
|
|
142
|
-
category=types.HarmCategory.HARM_CATEGORY_HARASSMENT,
|
|
143
|
-
threshold=types.HarmBlockThreshold.BLOCK_ONLY_HIGH,
|
|
144
|
-
),
|
|
145
|
-
types.SafetySetting(
|
|
146
|
-
category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
|
|
147
|
-
threshold=types.HarmBlockThreshold.BLOCK_ONLY_HIGH,
|
|
148
|
-
),
|
|
149
|
-
types.SafetySetting(
|
|
150
|
-
category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
|
|
151
|
-
threshold=types.HarmBlockThreshold.BLOCK_ONLY_HIGH,
|
|
152
|
-
),
|
|
153
|
-
]
|
|
154
|
-
self.model_temperature = 0.0
|
|
155
|
-
return self.client.models
|
|
156
|
-
|
|
157
|
-
# TODO: Refactor genete prompt to minimize the work done on retry
|
|
158
|
-
@retry_gemini
|
|
159
|
-
def generate_prompt(
|
|
160
|
-
self, multimodal_input: List[Union[str, MLLMImage]] = []
|
|
161
|
-
) -> List[Union[str, MLLMImage]]:
|
|
162
|
-
"""Converts DeepEval multimodal input into GenAI SDK compatible format.
|
|
163
|
-
|
|
164
|
-
Args:
|
|
165
|
-
multimodal_input: List of strings and MLLMImage objects
|
|
166
|
-
|
|
167
|
-
Returns:
|
|
168
|
-
List of strings and PIL Image objects ready for model input
|
|
169
|
-
|
|
170
|
-
Raises:
|
|
171
|
-
ValueError: If an invalid input type is provided
|
|
172
|
-
"""
|
|
173
|
-
prompt = []
|
|
174
|
-
settings = get_settings()
|
|
175
|
-
|
|
176
|
-
for ele in multimodal_input:
|
|
177
|
-
if isinstance(ele, str):
|
|
178
|
-
prompt.append(ele)
|
|
179
|
-
elif isinstance(ele, MLLMImage):
|
|
180
|
-
if ele.local:
|
|
181
|
-
with open(ele.url, "rb") as f:
|
|
182
|
-
image_data = f.read()
|
|
183
|
-
else:
|
|
184
|
-
response = requests.get(
|
|
185
|
-
ele.url,
|
|
186
|
-
timeout=(
|
|
187
|
-
settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
|
|
188
|
-
settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
|
|
189
|
-
),
|
|
190
|
-
)
|
|
191
|
-
response.raise_for_status()
|
|
192
|
-
image_data = response.content
|
|
193
|
-
|
|
194
|
-
image_part = types.Part.from_bytes(
|
|
195
|
-
data=image_data, mime_type="image/jpeg"
|
|
196
|
-
)
|
|
197
|
-
prompt.append(image_part)
|
|
198
|
-
else:
|
|
199
|
-
raise ValueError(f"Invalid input type: {type(ele)}")
|
|
200
|
-
return prompt
|
|
201
|
-
|
|
202
|
-
@retry_gemini
|
|
203
|
-
def generate(
|
|
204
|
-
self,
|
|
205
|
-
multimodal_input: List[Union[str, MLLMImage]],
|
|
206
|
-
schema: Optional[BaseModel] = None,
|
|
207
|
-
) -> str:
|
|
208
|
-
"""Generates text from multimodal input.
|
|
209
|
-
|
|
210
|
-
Args:
|
|
211
|
-
multimodal_input: List of strings and MLLMImage objects
|
|
212
|
-
schema: Optional Pydantic model for structured output
|
|
213
|
-
|
|
214
|
-
Returns:
|
|
215
|
-
Generated text response
|
|
216
|
-
"""
|
|
217
|
-
prompt = self.generate_prompt(multimodal_input)
|
|
218
|
-
|
|
219
|
-
if schema is not None:
|
|
220
|
-
response = self.client.models.generate_content(
|
|
221
|
-
model=self.model_name,
|
|
222
|
-
contents=prompt,
|
|
223
|
-
config=types.GenerateContentConfig(
|
|
224
|
-
response_mime_type="application/json",
|
|
225
|
-
response_schema=schema,
|
|
226
|
-
safety_settings=self.model_safety_settings,
|
|
227
|
-
temperature=self.model_temperature,
|
|
228
|
-
),
|
|
229
|
-
)
|
|
230
|
-
return response.parsed, 0
|
|
231
|
-
else:
|
|
232
|
-
response = self.client.models.generate_content(
|
|
233
|
-
model=self.model_name,
|
|
234
|
-
contents=prompt,
|
|
235
|
-
config=types.GenerateContentConfig(
|
|
236
|
-
safety_settings=self.model_safety_settings,
|
|
237
|
-
temperature=self.model_temperature,
|
|
238
|
-
),
|
|
239
|
-
)
|
|
240
|
-
return response.text, 0
|
|
241
|
-
|
|
242
|
-
@retry_gemini
|
|
243
|
-
async def a_generate(
|
|
244
|
-
self,
|
|
245
|
-
multimodal_input: List[Union[str, MLLMImage]],
|
|
246
|
-
schema: Optional[BaseModel] = None,
|
|
247
|
-
) -> str:
|
|
248
|
-
"""Asynchronously generates text from multimodal input.
|
|
249
|
-
|
|
250
|
-
Args:
|
|
251
|
-
multimodal_input: List of strings and MLLMImage objects
|
|
252
|
-
schema: Optional Pydantic model for structured output
|
|
253
|
-
|
|
254
|
-
Returns:
|
|
255
|
-
Generated text response
|
|
256
|
-
"""
|
|
257
|
-
prompt = self.generate_prompt(multimodal_input)
|
|
258
|
-
|
|
259
|
-
if schema is not None:
|
|
260
|
-
response = await self.client.aio.models.generate_content(
|
|
261
|
-
model=self.model_name,
|
|
262
|
-
contents=prompt,
|
|
263
|
-
config=types.GenerateContentConfig(
|
|
264
|
-
response_mime_type="application/json",
|
|
265
|
-
response_schema=schema,
|
|
266
|
-
safety_settings=self.model_safety_settings,
|
|
267
|
-
temperature=self.model_temperature,
|
|
268
|
-
),
|
|
269
|
-
)
|
|
270
|
-
return response.parsed, 0
|
|
271
|
-
else:
|
|
272
|
-
response = await self.client.aio.models.generate_content(
|
|
273
|
-
model=self.model_name,
|
|
274
|
-
contents=prompt,
|
|
275
|
-
config=types.GenerateContentConfig(
|
|
276
|
-
safety_settings=self.model_safety_settings,
|
|
277
|
-
temperature=self.model_temperature,
|
|
278
|
-
),
|
|
279
|
-
)
|
|
280
|
-
return response.text, 0
|
|
281
|
-
|
|
282
|
-
def get_model_name(self) -> str:
|
|
283
|
-
"""Returns the name of the Gemini model being used."""
|
|
284
|
-
return self.model_name
|
|
@@ -1,144 +0,0 @@
|
|
|
1
|
-
from typing import Optional, Tuple, List, Union, Dict
|
|
2
|
-
from ollama import Client, AsyncClient, ChatResponse
|
|
3
|
-
from pydantic import BaseModel
|
|
4
|
-
import requests
|
|
5
|
-
import base64
|
|
6
|
-
import io
|
|
7
|
-
|
|
8
|
-
from deepeval.models.retry_policy import (
|
|
9
|
-
create_retry_decorator,
|
|
10
|
-
)
|
|
11
|
-
from deepeval.key_handler import KEY_FILE_HANDLER, ModelKeyValues
|
|
12
|
-
from deepeval.models import DeepEvalBaseMLLM
|
|
13
|
-
from deepeval.test_case import MLLMImage
|
|
14
|
-
from deepeval.config.settings import get_settings
|
|
15
|
-
from deepeval.constants import ProviderSlug as PS
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
retry_ollama = create_retry_decorator(PS.OLLAMA)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class MultimodalOllamaModel(DeepEvalBaseMLLM):
|
|
22
|
-
def __init__(self, **kwargs):
|
|
23
|
-
model_name = KEY_FILE_HANDLER.fetch_data(
|
|
24
|
-
ModelKeyValues.LOCAL_MODEL_NAME
|
|
25
|
-
)
|
|
26
|
-
self.base_url = KEY_FILE_HANDLER.fetch_data(
|
|
27
|
-
ModelKeyValues.LOCAL_MODEL_BASE_URL
|
|
28
|
-
)
|
|
29
|
-
self.kwargs = kwargs
|
|
30
|
-
super().__init__(model_name)
|
|
31
|
-
|
|
32
|
-
@retry_ollama
|
|
33
|
-
def generate(
|
|
34
|
-
self,
|
|
35
|
-
multimodal_input: List[Union[str, MLLMImage]],
|
|
36
|
-
schema: Optional[BaseModel] = None,
|
|
37
|
-
) -> Tuple[Union[str, Dict], float]:
|
|
38
|
-
chat_model = self.load_model()
|
|
39
|
-
messages = self.generate_messages(multimodal_input)
|
|
40
|
-
response: ChatResponse = chat_model.chat(
|
|
41
|
-
model=self.model_name,
|
|
42
|
-
messages=messages,
|
|
43
|
-
format=schema.model_json_schema() if schema else None,
|
|
44
|
-
)
|
|
45
|
-
return (
|
|
46
|
-
(
|
|
47
|
-
schema.model_validate_json(response.message.content)
|
|
48
|
-
if schema
|
|
49
|
-
else response.message.content
|
|
50
|
-
),
|
|
51
|
-
0,
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
@retry_ollama
|
|
55
|
-
async def a_generate(
|
|
56
|
-
self,
|
|
57
|
-
multimodal_input: List[Union[str, MLLMImage]],
|
|
58
|
-
schema: Optional[BaseModel] = None,
|
|
59
|
-
) -> Tuple[str, float]:
|
|
60
|
-
chat_model = self.load_model(async_mode=True)
|
|
61
|
-
messages = self.generate_messages(multimodal_input)
|
|
62
|
-
response: ChatResponse = await chat_model.chat(
|
|
63
|
-
model=self.model_name,
|
|
64
|
-
messages=messages,
|
|
65
|
-
format=schema.model_json_schema() if schema else None,
|
|
66
|
-
)
|
|
67
|
-
return (
|
|
68
|
-
(
|
|
69
|
-
schema.model_validate_json(response.message.content)
|
|
70
|
-
if schema
|
|
71
|
-
else response.message.content
|
|
72
|
-
),
|
|
73
|
-
0,
|
|
74
|
-
)
|
|
75
|
-
|
|
76
|
-
def generate_messages(
|
|
77
|
-
self, multimodal_input: List[Union[str, MLLMImage]] = []
|
|
78
|
-
):
|
|
79
|
-
messages = []
|
|
80
|
-
for ele in multimodal_input:
|
|
81
|
-
if isinstance(ele, str):
|
|
82
|
-
messages.append(
|
|
83
|
-
{
|
|
84
|
-
"role": "user",
|
|
85
|
-
"content": ele,
|
|
86
|
-
}
|
|
87
|
-
)
|
|
88
|
-
elif isinstance(ele, MLLMImage):
|
|
89
|
-
img_b64 = self.convert_to_base64(ele.url, ele.local)
|
|
90
|
-
if img_b64 is not None:
|
|
91
|
-
messages.append(
|
|
92
|
-
{
|
|
93
|
-
"role": "user",
|
|
94
|
-
"images": [img_b64],
|
|
95
|
-
}
|
|
96
|
-
)
|
|
97
|
-
return messages
|
|
98
|
-
|
|
99
|
-
###############################################
|
|
100
|
-
# Utilities
|
|
101
|
-
###############################################
|
|
102
|
-
|
|
103
|
-
def convert_to_base64(self, image_source: str, is_local: bool) -> str:
|
|
104
|
-
from PIL import Image
|
|
105
|
-
|
|
106
|
-
settings = get_settings()
|
|
107
|
-
try:
|
|
108
|
-
if not is_local:
|
|
109
|
-
response = requests.get(
|
|
110
|
-
image_source,
|
|
111
|
-
stream=True,
|
|
112
|
-
timeout=(
|
|
113
|
-
settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
|
|
114
|
-
settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
|
|
115
|
-
),
|
|
116
|
-
)
|
|
117
|
-
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
|
|
118
|
-
image = Image.open(io.BytesIO(response.content))
|
|
119
|
-
else:
|
|
120
|
-
image = Image.open(image_source)
|
|
121
|
-
|
|
122
|
-
buffered = io.BytesIO()
|
|
123
|
-
image.save(buffered, format="JPEG")
|
|
124
|
-
img_str = base64.b64encode(buffered.getvalue()).decode()
|
|
125
|
-
return img_str
|
|
126
|
-
|
|
127
|
-
except (requests.exceptions.RequestException, OSError) as e:
|
|
128
|
-
# Log, then rethrow so @retry_ollama can retry generate_messages() on network failures
|
|
129
|
-
print(f"Image fetch/encode failed: {e}")
|
|
130
|
-
raise
|
|
131
|
-
except Exception as e:
|
|
132
|
-
print(f"Error converting image to base64: {e}")
|
|
133
|
-
return None
|
|
134
|
-
|
|
135
|
-
def load_model(self, async_mode: bool = False):
|
|
136
|
-
if not async_mode:
|
|
137
|
-
return self._build_client(Client)
|
|
138
|
-
return self._build_client(AsyncClient)
|
|
139
|
-
|
|
140
|
-
def _build_client(self, cls):
|
|
141
|
-
return cls(host=self.base_url, **self.kwargs)
|
|
142
|
-
|
|
143
|
-
def get_model_name(self):
|
|
144
|
-
return f"{self.model_name} (Ollama)"
|
|
@@ -1,258 +0,0 @@
|
|
|
1
|
-
from typing import Optional, Tuple, List, Union
|
|
2
|
-
from openai import OpenAI, AsyncOpenAI
|
|
3
|
-
from openai.types.chat import ParsedChatCompletion
|
|
4
|
-
from pydantic import BaseModel
|
|
5
|
-
from io import BytesIO
|
|
6
|
-
import base64
|
|
7
|
-
|
|
8
|
-
from deepeval.models.llms.openai_model import (
|
|
9
|
-
model_pricing,
|
|
10
|
-
structured_outputs_models,
|
|
11
|
-
)
|
|
12
|
-
from deepeval.models import DeepEvalBaseMLLM
|
|
13
|
-
from deepeval.models.llms.utils import trim_and_load_json
|
|
14
|
-
from deepeval.test_case import MLLMImage
|
|
15
|
-
from deepeval.models.utils import parse_model_name
|
|
16
|
-
from deepeval.models.retry_policy import (
|
|
17
|
-
create_retry_decorator,
|
|
18
|
-
sdk_retries_for,
|
|
19
|
-
)
|
|
20
|
-
from deepeval.constants import ProviderSlug as PS
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
retry_openai = create_retry_decorator(PS.OPENAI)
|
|
24
|
-
|
|
25
|
-
valid_multimodal_gpt_models = [
|
|
26
|
-
"gpt-4o",
|
|
27
|
-
"gpt-4o-2024-05-13",
|
|
28
|
-
"gpt-4o-2024-08-06",
|
|
29
|
-
"gpt-4o-2024-11-20",
|
|
30
|
-
"gpt-4o-mini",
|
|
31
|
-
"gpt-4o-mini-2024-07-18",
|
|
32
|
-
"gpt-4.1",
|
|
33
|
-
"gpt-4.1-mini",
|
|
34
|
-
"gpt-4.1-nano",
|
|
35
|
-
"o1",
|
|
36
|
-
"o1-preview",
|
|
37
|
-
"o1-2024-12-17",
|
|
38
|
-
"o1-preview-2024-09-12",
|
|
39
|
-
"gpt-4.5-preview-2025-02-27",
|
|
40
|
-
"o4-mini",
|
|
41
|
-
]
|
|
42
|
-
|
|
43
|
-
default_multimodal_gpt_model = "gpt-4.1"
|
|
44
|
-
|
|
45
|
-
unsupported_log_probs_multimodal_gpt_models = [
|
|
46
|
-
"o1",
|
|
47
|
-
"o1-preview",
|
|
48
|
-
"o1-2024-12-17",
|
|
49
|
-
"o1-preview-2024-09-12",
|
|
50
|
-
"gpt-4.5-preview-2025-02-27",
|
|
51
|
-
"o4-mini",
|
|
52
|
-
]
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
class MultimodalOpenAIModel(DeepEvalBaseMLLM):
|
|
56
|
-
def __init__(
|
|
57
|
-
self,
|
|
58
|
-
model: Optional[str] = None,
|
|
59
|
-
_openai_api_key: Optional[str] = None,
|
|
60
|
-
*args,
|
|
61
|
-
**kwargs,
|
|
62
|
-
):
|
|
63
|
-
model_name = None
|
|
64
|
-
if isinstance(model, str):
|
|
65
|
-
model_name = parse_model_name(model)
|
|
66
|
-
if model_name not in valid_multimodal_gpt_models:
|
|
67
|
-
raise ValueError(
|
|
68
|
-
f"Invalid model. Available Multimodal GPT models: {', '.join(model for model in valid_multimodal_gpt_models)}"
|
|
69
|
-
)
|
|
70
|
-
elif model is None:
|
|
71
|
-
model_name = default_multimodal_gpt_model
|
|
72
|
-
|
|
73
|
-
self._openai_api_key = _openai_api_key
|
|
74
|
-
self.args = args
|
|
75
|
-
self.kwargs = kwargs
|
|
76
|
-
|
|
77
|
-
super().__init__(model_name, *args, **kwargs)
|
|
78
|
-
|
|
79
|
-
###############################################
|
|
80
|
-
# Generate functions
|
|
81
|
-
###############################################
|
|
82
|
-
|
|
83
|
-
@retry_openai
|
|
84
|
-
def generate(
|
|
85
|
-
self,
|
|
86
|
-
multimodal_input: List[Union[str, MLLMImage]],
|
|
87
|
-
schema: Optional[BaseModel] = None,
|
|
88
|
-
) -> Tuple[str, float]:
|
|
89
|
-
client = OpenAI(api_key=self._openai_api_key)
|
|
90
|
-
prompt = self.generate_prompt(multimodal_input)
|
|
91
|
-
|
|
92
|
-
if schema:
|
|
93
|
-
if self.model_name in structured_outputs_models:
|
|
94
|
-
messages = [{"role": "user", "content": prompt}]
|
|
95
|
-
response = client.beta.chat.completions.parse(
|
|
96
|
-
model=self.model_name,
|
|
97
|
-
messages=messages,
|
|
98
|
-
response_format=schema,
|
|
99
|
-
)
|
|
100
|
-
input_tokens = response.usage.prompt_tokens
|
|
101
|
-
output_tokens = response.usage.completion_tokens
|
|
102
|
-
total_cost = self.calculate_cost(input_tokens, output_tokens)
|
|
103
|
-
generated_text = response.choices[0].message.parsed
|
|
104
|
-
return generated_text, total_cost
|
|
105
|
-
|
|
106
|
-
completion = client.chat.completions.create(
|
|
107
|
-
model=self.model_name,
|
|
108
|
-
messages=[{"role": "user", "content": prompt}],
|
|
109
|
-
)
|
|
110
|
-
output = completion.choices[0].message.content
|
|
111
|
-
cost = self.calculate_cost(
|
|
112
|
-
completion.usage.prompt_tokens, completion.usage.completion_tokens
|
|
113
|
-
)
|
|
114
|
-
if schema:
|
|
115
|
-
json_output = trim_and_load_json(output)
|
|
116
|
-
return schema.model_validate(json_output), cost
|
|
117
|
-
else:
|
|
118
|
-
return output, cost
|
|
119
|
-
|
|
120
|
-
@retry_openai
|
|
121
|
-
async def a_generate(
|
|
122
|
-
self,
|
|
123
|
-
multimodal_input: List[Union[str, MLLMImage]],
|
|
124
|
-
schema: Optional[BaseModel] = None,
|
|
125
|
-
) -> Tuple[str, float]:
|
|
126
|
-
client = AsyncOpenAI(api_key=self._openai_api_key)
|
|
127
|
-
prompt = self.generate_prompt(multimodal_input)
|
|
128
|
-
|
|
129
|
-
if schema:
|
|
130
|
-
if self.model_name in structured_outputs_models:
|
|
131
|
-
messages = [{"role": "user", "content": prompt}]
|
|
132
|
-
response = await client.beta.chat.completions.parse(
|
|
133
|
-
model=self.model_name,
|
|
134
|
-
messages=messages,
|
|
135
|
-
response_format=schema,
|
|
136
|
-
)
|
|
137
|
-
input_tokens = response.usage.prompt_tokens
|
|
138
|
-
output_tokens = response.usage.completion_tokens
|
|
139
|
-
total_cost = self.calculate_cost(input_tokens, output_tokens)
|
|
140
|
-
generated_text = response.choices[0].message.parsed
|
|
141
|
-
return generated_text, total_cost
|
|
142
|
-
|
|
143
|
-
completion = await client.chat.completions.create(
|
|
144
|
-
model=self.model_name,
|
|
145
|
-
messages=[{"role": "user", "content": prompt}],
|
|
146
|
-
)
|
|
147
|
-
output = completion.choices[0].message.content
|
|
148
|
-
cost = self.calculate_cost(
|
|
149
|
-
completion.usage.prompt_tokens, completion.usage.completion_tokens
|
|
150
|
-
)
|
|
151
|
-
if schema:
|
|
152
|
-
json_output = trim_and_load_json(output)
|
|
153
|
-
return schema.model_validate(json_output), cost
|
|
154
|
-
else:
|
|
155
|
-
return output, cost
|
|
156
|
-
|
|
157
|
-
###############################################
|
|
158
|
-
# Other generate functions
|
|
159
|
-
###############################################
|
|
160
|
-
|
|
161
|
-
@retry_openai
|
|
162
|
-
def generate_raw_response(
|
|
163
|
-
self,
|
|
164
|
-
multimodal_input: List[Union[str, MLLMImage]],
|
|
165
|
-
top_logprobs: int = 5,
|
|
166
|
-
) -> Tuple[ParsedChatCompletion, float]:
|
|
167
|
-
client = self._client()
|
|
168
|
-
prompt = self.generate_prompt(multimodal_input)
|
|
169
|
-
messages = [{"role": "user", "content": prompt}]
|
|
170
|
-
completion = client.chat.completions.create(
|
|
171
|
-
model=self.model_name,
|
|
172
|
-
messages=messages,
|
|
173
|
-
logprobs=True,
|
|
174
|
-
top_logprobs=top_logprobs,
|
|
175
|
-
)
|
|
176
|
-
# Cost calculation
|
|
177
|
-
input_tokens = completion.usage.prompt_tokens
|
|
178
|
-
output_tokens = completion.usage.completion_tokens
|
|
179
|
-
cost = self.calculate_cost(input_tokens, output_tokens)
|
|
180
|
-
return completion, cost
|
|
181
|
-
|
|
182
|
-
@retry_openai
|
|
183
|
-
async def a_generate_raw_response(
|
|
184
|
-
self,
|
|
185
|
-
multimodal_input: List[Union[str, MLLMImage]],
|
|
186
|
-
top_logprobs: int = 5,
|
|
187
|
-
) -> Tuple[ParsedChatCompletion, float]:
|
|
188
|
-
client = self._client(async_mode=True)
|
|
189
|
-
prompt = self.generate_prompt(multimodal_input)
|
|
190
|
-
messages = [{"role": "user", "content": prompt}]
|
|
191
|
-
completion = await client.chat.completions.create(
|
|
192
|
-
model=self.model_name,
|
|
193
|
-
messages=messages,
|
|
194
|
-
logprobs=True,
|
|
195
|
-
top_logprobs=top_logprobs,
|
|
196
|
-
)
|
|
197
|
-
# Cost calculation
|
|
198
|
-
input_tokens = completion.usage.prompt_tokens
|
|
199
|
-
output_tokens = completion.usage.completion_tokens
|
|
200
|
-
cost = self.calculate_cost(input_tokens, output_tokens)
|
|
201
|
-
return completion, cost
|
|
202
|
-
|
|
203
|
-
###############################################
|
|
204
|
-
# Utilities
|
|
205
|
-
###############################################
|
|
206
|
-
|
|
207
|
-
def generate_prompt(
|
|
208
|
-
self, multimodal_input: List[Union[str, MLLMImage]] = []
|
|
209
|
-
):
|
|
210
|
-
prompt = []
|
|
211
|
-
for ele in multimodal_input:
|
|
212
|
-
if isinstance(ele, str):
|
|
213
|
-
prompt.append({"type": "text", "text": ele})
|
|
214
|
-
elif isinstance(ele, MLLMImage):
|
|
215
|
-
if ele.local:
|
|
216
|
-
import PIL.Image
|
|
217
|
-
|
|
218
|
-
image = PIL.Image.open(ele.url)
|
|
219
|
-
visual_dict = {
|
|
220
|
-
"type": "image_url",
|
|
221
|
-
"image_url": {
|
|
222
|
-
"url": f"data:image/jpeg;base64,{self.encode_pil_image(image)}"
|
|
223
|
-
},
|
|
224
|
-
}
|
|
225
|
-
else:
|
|
226
|
-
visual_dict = {
|
|
227
|
-
"type": "image_url",
|
|
228
|
-
"image_url": {"url": ele.url},
|
|
229
|
-
}
|
|
230
|
-
prompt.append(visual_dict)
|
|
231
|
-
return prompt
|
|
232
|
-
|
|
233
|
-
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
234
|
-
pricing = model_pricing.get(
|
|
235
|
-
self.model_name, model_pricing["gpt-4.1"]
|
|
236
|
-
) # Default to 'gpt-4.1' if model not found
|
|
237
|
-
input_cost = input_tokens * pricing["input"]
|
|
238
|
-
output_cost = output_tokens * pricing["output"]
|
|
239
|
-
return input_cost + output_cost
|
|
240
|
-
|
|
241
|
-
def encode_pil_image(self, pil_image):
|
|
242
|
-
image_buffer = BytesIO()
|
|
243
|
-
if pil_image.mode in ("RGBA", "LA", "P"):
|
|
244
|
-
pil_image = pil_image.convert("RGB")
|
|
245
|
-
pil_image.save(image_buffer, format="JPEG")
|
|
246
|
-
image_bytes = image_buffer.getvalue()
|
|
247
|
-
base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
|
|
248
|
-
return base64_encoded_image
|
|
249
|
-
|
|
250
|
-
def _client(self, async_mode: bool = False):
|
|
251
|
-
kw = {"api_key": self._openai_api_key}
|
|
252
|
-
if not sdk_retries_for(PS.OPENAI):
|
|
253
|
-
kw["max_retries"] = 0
|
|
254
|
-
Client = AsyncOpenAI if async_mode else OpenAI
|
|
255
|
-
return Client(**kw)
|
|
256
|
-
|
|
257
|
-
def get_model_name(self):
|
|
258
|
-
return self.model_name
|