deepeval 3.7.4__py3-none-any.whl → 3.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +35 -1
- deepeval/dataset/api.py +23 -1
- deepeval/dataset/golden.py +139 -2
- deepeval/evaluate/evaluate.py +16 -11
- deepeval/evaluate/execute.py +13 -181
- deepeval/evaluate/utils.py +6 -26
- deepeval/integrations/pydantic_ai/agent.py +19 -2
- deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
- deepeval/key_handler.py +3 -0
- deepeval/metrics/__init__.py +14 -16
- deepeval/metrics/answer_relevancy/answer_relevancy.py +118 -116
- deepeval/metrics/answer_relevancy/template.py +22 -3
- deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
- deepeval/metrics/arena_g_eval/template.py +17 -1
- deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
- deepeval/metrics/argument_correctness/template.py +19 -2
- deepeval/metrics/base_metric.py +13 -44
- deepeval/metrics/bias/bias.py +102 -108
- deepeval/metrics/bias/template.py +14 -2
- deepeval/metrics/contextual_precision/contextual_precision.py +96 -94
- deepeval/metrics/contextual_precision/template.py +115 -66
- deepeval/metrics/contextual_recall/contextual_recall.py +94 -84
- deepeval/metrics/contextual_recall/template.py +106 -55
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +86 -84
- deepeval/metrics/contextual_relevancy/template.py +87 -58
- deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
- deepeval/metrics/conversation_completeness/template.py +23 -3
- deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
- deepeval/metrics/conversational_dag/nodes.py +66 -123
- deepeval/metrics/conversational_dag/templates.py +16 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
- deepeval/metrics/dag/dag.py +10 -0
- deepeval/metrics/dag/nodes.py +63 -126
- deepeval/metrics/dag/templates.py +16 -2
- deepeval/metrics/exact_match/exact_match.py +9 -1
- deepeval/metrics/faithfulness/faithfulness.py +138 -149
- deepeval/metrics/faithfulness/schema.py +1 -1
- deepeval/metrics/faithfulness/template.py +200 -115
- deepeval/metrics/g_eval/g_eval.py +87 -78
- deepeval/metrics/g_eval/template.py +18 -1
- deepeval/metrics/g_eval/utils.py +7 -6
- deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
- deepeval/metrics/goal_accuracy/template.py +21 -3
- deepeval/metrics/hallucination/hallucination.py +60 -75
- deepeval/metrics/hallucination/template.py +13 -0
- deepeval/metrics/indicator.py +7 -10
- deepeval/metrics/json_correctness/json_correctness.py +40 -38
- deepeval/metrics/json_correctness/template.py +10 -0
- deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
- deepeval/metrics/knowledge_retention/schema.py +9 -3
- deepeval/metrics/knowledge_retention/template.py +12 -0
- deepeval/metrics/mcp/mcp_task_completion.py +68 -38
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
- deepeval/metrics/mcp/template.py +52 -0
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
- deepeval/metrics/mcp_use_metric/template.py +12 -0
- deepeval/metrics/misuse/misuse.py +77 -97
- deepeval/metrics/misuse/template.py +15 -0
- deepeval/metrics/multimodal_metrics/__init__.py +0 -19
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +59 -53
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +79 -95
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +59 -53
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +59 -53
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +111 -109
- deepeval/metrics/non_advice/non_advice.py +79 -105
- deepeval/metrics/non_advice/template.py +12 -0
- deepeval/metrics/pattern_match/pattern_match.py +12 -4
- deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
- deepeval/metrics/pii_leakage/template.py +14 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
- deepeval/metrics/plan_adherence/template.py +11 -0
- deepeval/metrics/plan_quality/plan_quality.py +63 -87
- deepeval/metrics/plan_quality/template.py +9 -0
- deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
- deepeval/metrics/prompt_alignment/template.py +12 -0
- deepeval/metrics/ragas.py +3 -3
- deepeval/metrics/role_adherence/role_adherence.py +48 -71
- deepeval/metrics/role_adherence/template.py +14 -0
- deepeval/metrics/role_violation/role_violation.py +75 -108
- deepeval/metrics/role_violation/template.py +12 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
- deepeval/metrics/step_efficiency/template.py +11 -0
- deepeval/metrics/summarization/summarization.py +115 -183
- deepeval/metrics/summarization/template.py +19 -0
- deepeval/metrics/task_completion/task_completion.py +67 -73
- deepeval/metrics/tool_correctness/tool_correctness.py +45 -44
- deepeval/metrics/tool_use/tool_use.py +42 -66
- deepeval/metrics/topic_adherence/template.py +13 -0
- deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
- deepeval/metrics/toxicity/template.py +13 -0
- deepeval/metrics/toxicity/toxicity.py +80 -99
- deepeval/metrics/turn_contextual_precision/schema.py +21 -0
- deepeval/metrics/turn_contextual_precision/template.py +187 -0
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +592 -0
- deepeval/metrics/turn_contextual_recall/schema.py +21 -0
- deepeval/metrics/turn_contextual_recall/template.py +178 -0
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +563 -0
- deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
- deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +576 -0
- deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
- deepeval/metrics/turn_faithfulness/template.py +218 -0
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +627 -0
- deepeval/metrics/turn_relevancy/template.py +14 -0
- deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
- deepeval/metrics/utils.py +158 -122
- deepeval/models/__init__.py +0 -12
- deepeval/models/base_model.py +49 -33
- deepeval/models/embedding_models/__init__.py +7 -0
- deepeval/models/embedding_models/azure_embedding_model.py +79 -33
- deepeval/models/embedding_models/local_embedding_model.py +39 -20
- deepeval/models/embedding_models/ollama_embedding_model.py +52 -19
- deepeval/models/embedding_models/openai_embedding_model.py +42 -22
- deepeval/models/llms/amazon_bedrock_model.py +226 -72
- deepeval/models/llms/anthropic_model.py +178 -63
- deepeval/models/llms/azure_model.py +218 -60
- deepeval/models/llms/constants.py +2032 -0
- deepeval/models/llms/deepseek_model.py +95 -40
- deepeval/models/llms/gemini_model.py +209 -64
- deepeval/models/llms/grok_model.py +139 -68
- deepeval/models/llms/kimi_model.py +140 -90
- deepeval/models/llms/litellm_model.py +131 -37
- deepeval/models/llms/local_model.py +125 -21
- deepeval/models/llms/ollama_model.py +147 -24
- deepeval/models/llms/openai_model.py +222 -269
- deepeval/models/llms/portkey_model.py +81 -22
- deepeval/models/llms/utils.py +8 -3
- deepeval/models/retry_policy.py +17 -14
- deepeval/models/utils.py +106 -5
- deepeval/optimizer/__init__.py +5 -0
- deepeval/optimizer/algorithms/__init__.py +6 -0
- deepeval/optimizer/algorithms/base.py +29 -0
- deepeval/optimizer/algorithms/configs.py +18 -0
- deepeval/optimizer/algorithms/copro/__init__.py +5 -0
- deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
- deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
- deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
- deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
- deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
- deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
- deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
- deepeval/optimizer/algorithms/simba/__init__.py +5 -0
- deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
- deepeval/{optimization → optimizer}/configs.py +5 -8
- deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
- deepeval/optimizer/prompt_optimizer.py +263 -0
- deepeval/optimizer/rewriter/__init__.py +5 -0
- deepeval/optimizer/rewriter/rewriter.py +124 -0
- deepeval/optimizer/rewriter/utils.py +214 -0
- deepeval/optimizer/scorer/__init__.py +5 -0
- deepeval/optimizer/scorer/base.py +86 -0
- deepeval/optimizer/scorer/scorer.py +316 -0
- deepeval/optimizer/scorer/utils.py +30 -0
- deepeval/optimizer/types.py +148 -0
- deepeval/{optimization → optimizer}/utils.py +47 -165
- deepeval/prompt/prompt.py +5 -9
- deepeval/simulator/conversation_simulator.py +43 -0
- deepeval/simulator/template.py +13 -0
- deepeval/test_case/__init__.py +1 -3
- deepeval/test_case/api.py +26 -45
- deepeval/test_case/arena_test_case.py +7 -2
- deepeval/test_case/conversational_test_case.py +68 -1
- deepeval/test_case/llm_test_case.py +206 -1
- deepeval/test_case/utils.py +4 -8
- deepeval/test_run/api.py +18 -14
- deepeval/test_run/test_run.py +3 -3
- deepeval/tracing/patchers.py +9 -4
- deepeval/tracing/tracing.py +2 -2
- deepeval/utils.py +65 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -4
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/RECORD +180 -193
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -148
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
- deepeval/models/mlllms/__init__.py +0 -4
- deepeval/models/mlllms/azure_model.py +0 -343
- deepeval/models/mlllms/gemini_model.py +0 -313
- deepeval/models/mlllms/ollama_model.py +0 -175
- deepeval/models/mlllms/openai_model.py +0 -309
- deepeval/optimization/__init__.py +0 -13
- deepeval/optimization/adapters/__init__.py +0 -2
- deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
- deepeval/optimization/aggregates.py +0 -14
- deepeval/optimization/copro/configs.py +0 -31
- deepeval/optimization/gepa/__init__.py +0 -7
- deepeval/optimization/gepa/configs.py +0 -115
- deepeval/optimization/miprov2/configs.py +0 -134
- deepeval/optimization/miprov2/loop.py +0 -785
- deepeval/optimization/mutations/__init__.py +0 -0
- deepeval/optimization/mutations/prompt_rewriter.py +0 -458
- deepeval/optimization/policies/__init__.py +0 -16
- deepeval/optimization/policies/tie_breaker.py +0 -67
- deepeval/optimization/prompt_optimizer.py +0 -462
- deepeval/optimization/simba/__init__.py +0 -0
- deepeval/optimization/simba/configs.py +0 -33
- deepeval/optimization/types.py +0 -361
- deepeval/test_case/mllm_test_case.py +0 -170
- /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
- /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
|
@@ -1,313 +0,0 @@
|
|
|
1
|
-
import requests
|
|
2
|
-
from typing import Optional, List, Union
|
|
3
|
-
from pydantic import BaseModel, SecretStr
|
|
4
|
-
from google.genai import types
|
|
5
|
-
from google import genai
|
|
6
|
-
|
|
7
|
-
from deepeval.config.settings import get_settings
|
|
8
|
-
from deepeval.models.utils import require_secret_api_key
|
|
9
|
-
from deepeval.models.retry_policy import (
|
|
10
|
-
create_retry_decorator,
|
|
11
|
-
)
|
|
12
|
-
from deepeval.models.base_model import DeepEvalBaseMLLM
|
|
13
|
-
from deepeval.test_case import MLLMImage
|
|
14
|
-
from deepeval.constants import ProviderSlug as PS
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
default_multimodal_gemini_model = "gemini-1.5-pro"
|
|
18
|
-
# consistent retry rules
|
|
19
|
-
retry_gemini = create_retry_decorator(PS.GOOGLE)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
23
|
-
"""Class that implements Google Gemini models for multimodal evaluation.
|
|
24
|
-
|
|
25
|
-
This class provides integration with Google's Gemini models through the Google GenAI SDK,
|
|
26
|
-
supporting both text and multimodal (text + image) inputs for evaluation tasks.
|
|
27
|
-
To use Gemini API, set api_key attribute only.
|
|
28
|
-
To use Vertex AI API, set project and location attributes.
|
|
29
|
-
|
|
30
|
-
Attributes:
|
|
31
|
-
model_name: Name of the Gemini model to use
|
|
32
|
-
api_key: Google API key for authentication
|
|
33
|
-
project: Google Cloud project ID
|
|
34
|
-
location: Google Cloud location
|
|
35
|
-
|
|
36
|
-
Example:
|
|
37
|
-
```python
|
|
38
|
-
from deepeval.models import MultimodalGeminiModel
|
|
39
|
-
|
|
40
|
-
# Initialize the model
|
|
41
|
-
model = MultimodalGeminiModel(
|
|
42
|
-
model_name="gemini-pro-vision",
|
|
43
|
-
api_key="your-api-key"
|
|
44
|
-
)
|
|
45
|
-
|
|
46
|
-
# Generate text from text + image input
|
|
47
|
-
response = model.generate([
|
|
48
|
-
"Describe what you see in this image:",
|
|
49
|
-
MLLMImage(url="path/to/image.jpg", local=True)
|
|
50
|
-
])
|
|
51
|
-
```
|
|
52
|
-
"""
|
|
53
|
-
|
|
54
|
-
def __init__(
|
|
55
|
-
self,
|
|
56
|
-
model_name: Optional[str] = None,
|
|
57
|
-
api_key: Optional[str] = None,
|
|
58
|
-
project: Optional[str] = None,
|
|
59
|
-
location: Optional[str] = None,
|
|
60
|
-
*args,
|
|
61
|
-
**kwargs,
|
|
62
|
-
):
|
|
63
|
-
settings = get_settings()
|
|
64
|
-
model_name = (
|
|
65
|
-
model_name
|
|
66
|
-
or settings.GEMINI_MODEL_NAME
|
|
67
|
-
or default_multimodal_gemini_model
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
# Get API key from settings if not provided
|
|
71
|
-
if api_key is not None:
|
|
72
|
-
# keep it secret, keep it safe from serializings, logging and aolike
|
|
73
|
-
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
74
|
-
else:
|
|
75
|
-
self.api_key = settings.GOOGLE_API_KEY
|
|
76
|
-
|
|
77
|
-
self.project = project or settings.GOOGLE_CLOUD_PROJECT
|
|
78
|
-
self.location = (
|
|
79
|
-
location
|
|
80
|
-
or settings.GOOGLE_CLOUD_LOCATION is not None
|
|
81
|
-
and str(settings.GOOGLE_CLOUD_LOCATION)
|
|
82
|
-
)
|
|
83
|
-
self.use_vertexai = settings.GOOGLE_GENAI_USE_VERTEXAI
|
|
84
|
-
|
|
85
|
-
# Keep any extra kwargs for the underlying genai.Client
|
|
86
|
-
self.args = args
|
|
87
|
-
self.kwargs = kwargs
|
|
88
|
-
|
|
89
|
-
# Configure default model generation settings
|
|
90
|
-
self.model_safety_settings = [
|
|
91
|
-
types.SafetySetting(
|
|
92
|
-
category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
|
|
93
|
-
threshold=types.HarmBlockThreshold.BLOCK_ONLY_HIGH,
|
|
94
|
-
),
|
|
95
|
-
types.SafetySetting(
|
|
96
|
-
category=types.HarmCategory.HARM_CATEGORY_HARASSMENT,
|
|
97
|
-
threshold=types.HarmBlockThreshold.BLOCK_ONLY_HIGH,
|
|
98
|
-
),
|
|
99
|
-
types.SafetySetting(
|
|
100
|
-
category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
|
|
101
|
-
threshold=types.HarmBlockThreshold.BLOCK_ONLY_HIGH,
|
|
102
|
-
),
|
|
103
|
-
types.SafetySetting(
|
|
104
|
-
category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
|
|
105
|
-
threshold=types.HarmBlockThreshold.BLOCK_ONLY_HIGH,
|
|
106
|
-
),
|
|
107
|
-
]
|
|
108
|
-
self.model_temperature = 0.0
|
|
109
|
-
|
|
110
|
-
super().__init__(model_name, *args, **kwargs)
|
|
111
|
-
|
|
112
|
-
def should_use_vertexai(self):
|
|
113
|
-
"""Checks if the model should use Vertex AI for generation.
|
|
114
|
-
|
|
115
|
-
This is determined first by the value of `GOOGLE_GENAI_USE_VERTEXAI`
|
|
116
|
-
environment variable. If not set, it checks for the presence of the
|
|
117
|
-
project and location.
|
|
118
|
-
|
|
119
|
-
Returns:
|
|
120
|
-
True if the model should use Vertex AI, False otherwise
|
|
121
|
-
"""
|
|
122
|
-
if self.use_vertexai is not None:
|
|
123
|
-
return self.use_vertexai.lower() == "yes"
|
|
124
|
-
|
|
125
|
-
if self.project and self.location:
|
|
126
|
-
return True
|
|
127
|
-
else:
|
|
128
|
-
return False
|
|
129
|
-
|
|
130
|
-
# TODO: Refactor generate prompt to minimize the work done on retry
|
|
131
|
-
@retry_gemini
|
|
132
|
-
def generate_prompt(
|
|
133
|
-
self, multimodal_input: List[Union[str, MLLMImage]] = []
|
|
134
|
-
) -> List[Union[str, MLLMImage]]:
|
|
135
|
-
"""Converts DeepEval multimodal input into GenAI SDK compatible format.
|
|
136
|
-
|
|
137
|
-
Args:
|
|
138
|
-
multimodal_input: List of strings and MLLMImage objects
|
|
139
|
-
|
|
140
|
-
Returns:
|
|
141
|
-
List of strings and PIL Image objects ready for model input
|
|
142
|
-
|
|
143
|
-
Raises:
|
|
144
|
-
ValueError: If an invalid input type is provided
|
|
145
|
-
"""
|
|
146
|
-
prompt = []
|
|
147
|
-
settings = get_settings()
|
|
148
|
-
|
|
149
|
-
for ele in multimodal_input:
|
|
150
|
-
if isinstance(ele, str):
|
|
151
|
-
prompt.append(ele)
|
|
152
|
-
elif isinstance(ele, MLLMImage):
|
|
153
|
-
if ele.local:
|
|
154
|
-
with open(ele.url, "rb") as f:
|
|
155
|
-
image_data = f.read()
|
|
156
|
-
else:
|
|
157
|
-
response = requests.get(
|
|
158
|
-
ele.url,
|
|
159
|
-
timeout=(
|
|
160
|
-
settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
|
|
161
|
-
settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
|
|
162
|
-
),
|
|
163
|
-
)
|
|
164
|
-
response.raise_for_status()
|
|
165
|
-
image_data = response.content
|
|
166
|
-
|
|
167
|
-
image_part = types.Part.from_bytes(
|
|
168
|
-
data=image_data, mime_type="image/jpeg"
|
|
169
|
-
)
|
|
170
|
-
prompt.append(image_part)
|
|
171
|
-
else:
|
|
172
|
-
raise ValueError(f"Invalid input type: {type(ele)}")
|
|
173
|
-
return prompt
|
|
174
|
-
|
|
175
|
-
@retry_gemini
|
|
176
|
-
def generate(
|
|
177
|
-
self,
|
|
178
|
-
multimodal_input: List[Union[str, MLLMImage]],
|
|
179
|
-
schema: Optional[BaseModel] = None,
|
|
180
|
-
) -> str:
|
|
181
|
-
"""Generates text from multimodal input.
|
|
182
|
-
|
|
183
|
-
Args:
|
|
184
|
-
multimodal_input: List of strings and MLLMImage objects
|
|
185
|
-
schema: Optional Pydantic model for structured output
|
|
186
|
-
|
|
187
|
-
Returns:
|
|
188
|
-
Generated text response
|
|
189
|
-
"""
|
|
190
|
-
client = self.load_model()
|
|
191
|
-
prompt = self.generate_prompt(multimodal_input)
|
|
192
|
-
|
|
193
|
-
if schema is not None:
|
|
194
|
-
response = client.models.generate_content(
|
|
195
|
-
model=self.model_name,
|
|
196
|
-
contents=prompt,
|
|
197
|
-
config=types.GenerateContentConfig(
|
|
198
|
-
response_mime_type="application/json",
|
|
199
|
-
response_schema=schema,
|
|
200
|
-
safety_settings=self.model_safety_settings,
|
|
201
|
-
temperature=self.model_temperature,
|
|
202
|
-
),
|
|
203
|
-
)
|
|
204
|
-
return response.parsed, 0
|
|
205
|
-
else:
|
|
206
|
-
response = client.models.generate_content(
|
|
207
|
-
model=self.model_name,
|
|
208
|
-
contents=prompt,
|
|
209
|
-
config=types.GenerateContentConfig(
|
|
210
|
-
safety_settings=self.model_safety_settings,
|
|
211
|
-
temperature=self.model_temperature,
|
|
212
|
-
),
|
|
213
|
-
)
|
|
214
|
-
return response.text, 0
|
|
215
|
-
|
|
216
|
-
@retry_gemini
|
|
217
|
-
async def a_generate(
|
|
218
|
-
self,
|
|
219
|
-
multimodal_input: List[Union[str, MLLMImage]],
|
|
220
|
-
schema: Optional[BaseModel] = None,
|
|
221
|
-
) -> str:
|
|
222
|
-
"""Asynchronously generates text from multimodal input.
|
|
223
|
-
|
|
224
|
-
Args:
|
|
225
|
-
multimodal_input: List of strings and MLLMImage objects
|
|
226
|
-
schema: Optional Pydantic model for structured output
|
|
227
|
-
|
|
228
|
-
Returns:
|
|
229
|
-
Generated text response
|
|
230
|
-
"""
|
|
231
|
-
client = self.load_model()
|
|
232
|
-
prompt = self.generate_prompt(multimodal_input)
|
|
233
|
-
|
|
234
|
-
if schema is not None:
|
|
235
|
-
response = await client.aio.models.generate_content(
|
|
236
|
-
model=self.model_name,
|
|
237
|
-
contents=prompt,
|
|
238
|
-
config=types.GenerateContentConfig(
|
|
239
|
-
response_mime_type="application/json",
|
|
240
|
-
response_schema=schema,
|
|
241
|
-
safety_settings=self.model_safety_settings,
|
|
242
|
-
temperature=self.model_temperature,
|
|
243
|
-
),
|
|
244
|
-
)
|
|
245
|
-
return response.parsed, 0
|
|
246
|
-
else:
|
|
247
|
-
response = await client.aio.models.generate_content(
|
|
248
|
-
model=self.model_name,
|
|
249
|
-
contents=prompt,
|
|
250
|
-
config=types.GenerateContentConfig(
|
|
251
|
-
safety_settings=self.model_safety_settings,
|
|
252
|
-
temperature=self.model_temperature,
|
|
253
|
-
),
|
|
254
|
-
)
|
|
255
|
-
return response.text, 0
|
|
256
|
-
|
|
257
|
-
#########
|
|
258
|
-
# Model #
|
|
259
|
-
#########
|
|
260
|
-
|
|
261
|
-
def get_model_name(self) -> str:
|
|
262
|
-
"""Returns the name of the Gemini model being used."""
|
|
263
|
-
return self.model_name
|
|
264
|
-
|
|
265
|
-
def load_model(self, *args, **kwargs):
|
|
266
|
-
"""Creates and returns a GenAI client.
|
|
267
|
-
|
|
268
|
-
With the Gen AI SDK, the model is set at inference time, so we only
|
|
269
|
-
construct the client here. Kept for compatibility with other MLLMs.
|
|
270
|
-
"""
|
|
271
|
-
return self._build_client(**kwargs)
|
|
272
|
-
|
|
273
|
-
def _client_kwargs(self, **override_kwargs) -> dict:
|
|
274
|
-
"""
|
|
275
|
-
Return kwargs forwarded to genai.Client.
|
|
276
|
-
|
|
277
|
-
Start from the ctor kwargs captured on `self.kwargs`, then apply any
|
|
278
|
-
overrides passed via load_model(...).
|
|
279
|
-
"""
|
|
280
|
-
client_kwargs = dict(self.kwargs or {})
|
|
281
|
-
if override_kwargs:
|
|
282
|
-
client_kwargs.update(override_kwargs)
|
|
283
|
-
return client_kwargs
|
|
284
|
-
|
|
285
|
-
def _build_client(self, **override_kwargs):
|
|
286
|
-
"""Build and return a genai.Client for either Gemini API or Vertex AI."""
|
|
287
|
-
client_kwargs = self._client_kwargs(**override_kwargs)
|
|
288
|
-
|
|
289
|
-
if self.should_use_vertexai():
|
|
290
|
-
if not self.project or not self.location:
|
|
291
|
-
raise ValueError(
|
|
292
|
-
"When using Vertex AI API, both project and location are required."
|
|
293
|
-
"Either provide them as arguments or set GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION environment variables, "
|
|
294
|
-
"or set them in your DeepEval configuration."
|
|
295
|
-
)
|
|
296
|
-
|
|
297
|
-
# Create client for Vertex AI
|
|
298
|
-
return genai.Client(
|
|
299
|
-
vertexai=True,
|
|
300
|
-
project=self.project,
|
|
301
|
-
location=self.location,
|
|
302
|
-
**client_kwargs,
|
|
303
|
-
)
|
|
304
|
-
|
|
305
|
-
api_key = require_secret_api_key(
|
|
306
|
-
self.api_key,
|
|
307
|
-
provider_label="Google Gemini",
|
|
308
|
-
env_var_name="GOOGLE_API_KEY",
|
|
309
|
-
param_hint="`api_key` to MultimodalGeminiModel(...)",
|
|
310
|
-
)
|
|
311
|
-
|
|
312
|
-
# Create client for Gemini API
|
|
313
|
-
return genai.Client(api_key=api_key, **client_kwargs)
|
|
@@ -1,175 +0,0 @@
|
|
|
1
|
-
from typing import Optional, Tuple, List, Union, Dict
|
|
2
|
-
from ollama import Client, AsyncClient, ChatResponse
|
|
3
|
-
from pydantic import BaseModel
|
|
4
|
-
import requests
|
|
5
|
-
import base64
|
|
6
|
-
import io
|
|
7
|
-
|
|
8
|
-
from deepeval.models.retry_policy import (
|
|
9
|
-
create_retry_decorator,
|
|
10
|
-
)
|
|
11
|
-
from deepeval.models import DeepEvalBaseMLLM
|
|
12
|
-
from deepeval.test_case import MLLMImage
|
|
13
|
-
from deepeval.config.settings import get_settings
|
|
14
|
-
from deepeval.constants import ProviderSlug as PS
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
retry_ollama = create_retry_decorator(PS.OLLAMA)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class MultimodalOllamaModel(DeepEvalBaseMLLM):
|
|
21
|
-
def __init__(
|
|
22
|
-
self,
|
|
23
|
-
model: Optional[str] = None,
|
|
24
|
-
host: Optional[str] = None,
|
|
25
|
-
**kwargs,
|
|
26
|
-
):
|
|
27
|
-
"""
|
|
28
|
-
Multimodal Ollama model.
|
|
29
|
-
|
|
30
|
-
- `model`: Ollama model name (e.g. "llava").
|
|
31
|
-
- `host`: Ollama base URL (e.g. "http://localhost:11434").
|
|
32
|
-
- extra **kwargs are passed through to the underlying Client.
|
|
33
|
-
"""
|
|
34
|
-
settings = get_settings()
|
|
35
|
-
|
|
36
|
-
# Resolve host/base URL
|
|
37
|
-
self.base_url = (
|
|
38
|
-
host
|
|
39
|
-
or settings.LOCAL_MODEL_BASE_URL
|
|
40
|
-
and str(settings.LOCAL_MODEL_BASE_URL)
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
# Resolve model name
|
|
44
|
-
model_name = model or settings.LOCAL_MODEL_NAME
|
|
45
|
-
|
|
46
|
-
# Client kwargs
|
|
47
|
-
self.kwargs = kwargs or {}
|
|
48
|
-
|
|
49
|
-
super().__init__(model_name)
|
|
50
|
-
|
|
51
|
-
@retry_ollama
|
|
52
|
-
def generate(
|
|
53
|
-
self,
|
|
54
|
-
multimodal_input: List[Union[str, MLLMImage]],
|
|
55
|
-
schema: Optional[BaseModel] = None,
|
|
56
|
-
) -> Tuple[Union[str, Dict], float]:
|
|
57
|
-
chat_model = self.load_model()
|
|
58
|
-
messages = self.generate_messages(multimodal_input)
|
|
59
|
-
response: ChatResponse = chat_model.chat(
|
|
60
|
-
model=self.model_name,
|
|
61
|
-
messages=messages,
|
|
62
|
-
format=schema.model_json_schema() if schema else None,
|
|
63
|
-
)
|
|
64
|
-
return (
|
|
65
|
-
(
|
|
66
|
-
schema.model_validate_json(response.message.content)
|
|
67
|
-
if schema
|
|
68
|
-
else response.message.content
|
|
69
|
-
),
|
|
70
|
-
0,
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
@retry_ollama
|
|
74
|
-
async def a_generate(
|
|
75
|
-
self,
|
|
76
|
-
multimodal_input: List[Union[str, MLLMImage]],
|
|
77
|
-
schema: Optional[BaseModel] = None,
|
|
78
|
-
) -> Tuple[str, float]:
|
|
79
|
-
chat_model = self.load_model(async_mode=True)
|
|
80
|
-
messages = self.generate_messages(multimodal_input)
|
|
81
|
-
response: ChatResponse = await chat_model.chat(
|
|
82
|
-
model=self.model_name,
|
|
83
|
-
messages=messages,
|
|
84
|
-
format=schema.model_json_schema() if schema else None,
|
|
85
|
-
)
|
|
86
|
-
return (
|
|
87
|
-
(
|
|
88
|
-
schema.model_validate_json(response.message.content)
|
|
89
|
-
if schema
|
|
90
|
-
else response.message.content
|
|
91
|
-
),
|
|
92
|
-
0,
|
|
93
|
-
)
|
|
94
|
-
|
|
95
|
-
def generate_messages(
|
|
96
|
-
self, multimodal_input: List[Union[str, MLLMImage]] = []
|
|
97
|
-
):
|
|
98
|
-
messages = []
|
|
99
|
-
for ele in multimodal_input:
|
|
100
|
-
if isinstance(ele, str):
|
|
101
|
-
messages.append(
|
|
102
|
-
{
|
|
103
|
-
"role": "user",
|
|
104
|
-
"content": ele,
|
|
105
|
-
}
|
|
106
|
-
)
|
|
107
|
-
elif isinstance(ele, MLLMImage):
|
|
108
|
-
img_b64 = self.convert_to_base64(ele.url, ele.local)
|
|
109
|
-
if img_b64 is not None:
|
|
110
|
-
messages.append(
|
|
111
|
-
{
|
|
112
|
-
"role": "user",
|
|
113
|
-
"images": [img_b64],
|
|
114
|
-
}
|
|
115
|
-
)
|
|
116
|
-
return messages
|
|
117
|
-
|
|
118
|
-
###############################################
|
|
119
|
-
# Utilities
|
|
120
|
-
###############################################
|
|
121
|
-
|
|
122
|
-
def convert_to_base64(self, image_source: str, is_local: bool) -> str:
|
|
123
|
-
from PIL import Image
|
|
124
|
-
|
|
125
|
-
settings = get_settings()
|
|
126
|
-
try:
|
|
127
|
-
if not is_local:
|
|
128
|
-
response = requests.get(
|
|
129
|
-
image_source,
|
|
130
|
-
stream=True,
|
|
131
|
-
timeout=(
|
|
132
|
-
settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
|
|
133
|
-
settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
|
|
134
|
-
),
|
|
135
|
-
)
|
|
136
|
-
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
|
|
137
|
-
image = Image.open(io.BytesIO(response.content))
|
|
138
|
-
else:
|
|
139
|
-
image = Image.open(image_source)
|
|
140
|
-
|
|
141
|
-
buffered = io.BytesIO()
|
|
142
|
-
image.save(buffered, format="JPEG")
|
|
143
|
-
img_str = base64.b64encode(buffered.getvalue()).decode()
|
|
144
|
-
return img_str
|
|
145
|
-
|
|
146
|
-
except (requests.exceptions.RequestException, OSError) as e:
|
|
147
|
-
# Log, then rethrow so @retry_ollama can retry generate_messages() on network failures
|
|
148
|
-
print(f"Image fetch/encode failed: {e}")
|
|
149
|
-
raise
|
|
150
|
-
except Exception as e:
|
|
151
|
-
print(f"Error converting image to base64: {e}")
|
|
152
|
-
return None
|
|
153
|
-
|
|
154
|
-
###############################################
|
|
155
|
-
# Model
|
|
156
|
-
###############################################
|
|
157
|
-
|
|
158
|
-
def load_model(self, async_mode: bool = False):
|
|
159
|
-
if not async_mode:
|
|
160
|
-
return self._build_client(Client)
|
|
161
|
-
return self._build_client(AsyncClient)
|
|
162
|
-
|
|
163
|
-
def _client_kwargs(self) -> Dict:
|
|
164
|
-
"""
|
|
165
|
-
Return client-init kwargs.
|
|
166
|
-
Ollama's Python client doesn't have built-in retry config like OpenAI,
|
|
167
|
-
so we just pass these through untouched.
|
|
168
|
-
"""
|
|
169
|
-
return dict(self.kwargs or {})
|
|
170
|
-
|
|
171
|
-
def _build_client(self, cls):
|
|
172
|
-
return cls(host=self.base_url, **self._client_kwargs())
|
|
173
|
-
|
|
174
|
-
def get_model_name(self):
|
|
175
|
-
return f"{self.model_name} (Ollama)"
|