deepeval 3.7.5__py3-none-any.whl → 3.7.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/cli/main.py +2022 -759
- deepeval/cli/utils.py +208 -36
- deepeval/config/dotenv_handler.py +19 -0
- deepeval/config/settings.py +675 -245
- deepeval/config/utils.py +9 -1
- deepeval/dataset/api.py +23 -1
- deepeval/dataset/golden.py +106 -21
- deepeval/evaluate/evaluate.py +0 -3
- deepeval/evaluate/execute.py +162 -315
- deepeval/evaluate/utils.py +6 -30
- deepeval/key_handler.py +124 -51
- deepeval/metrics/__init__.py +0 -4
- deepeval/metrics/answer_relevancy/answer_relevancy.py +89 -132
- deepeval/metrics/answer_relevancy/template.py +102 -179
- deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
- deepeval/metrics/arena_g_eval/template.py +17 -1
- deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
- deepeval/metrics/argument_correctness/template.py +19 -2
- deepeval/metrics/base_metric.py +19 -41
- deepeval/metrics/bias/bias.py +102 -108
- deepeval/metrics/bias/template.py +14 -2
- deepeval/metrics/contextual_precision/contextual_precision.py +56 -92
- deepeval/metrics/contextual_recall/contextual_recall.py +58 -85
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +53 -83
- deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
- deepeval/metrics/conversation_completeness/template.py +23 -3
- deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
- deepeval/metrics/conversational_dag/nodes.py +66 -123
- deepeval/metrics/conversational_dag/templates.py +16 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
- deepeval/metrics/dag/dag.py +10 -0
- deepeval/metrics/dag/nodes.py +63 -126
- deepeval/metrics/dag/templates.py +14 -0
- deepeval/metrics/exact_match/exact_match.py +9 -1
- deepeval/metrics/faithfulness/faithfulness.py +82 -136
- deepeval/metrics/g_eval/g_eval.py +93 -79
- deepeval/metrics/g_eval/template.py +18 -1
- deepeval/metrics/g_eval/utils.py +7 -6
- deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
- deepeval/metrics/goal_accuracy/template.py +21 -3
- deepeval/metrics/hallucination/hallucination.py +60 -75
- deepeval/metrics/hallucination/template.py +13 -0
- deepeval/metrics/indicator.py +11 -10
- deepeval/metrics/json_correctness/json_correctness.py +40 -38
- deepeval/metrics/json_correctness/template.py +10 -0
- deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
- deepeval/metrics/knowledge_retention/schema.py +9 -3
- deepeval/metrics/knowledge_retention/template.py +12 -0
- deepeval/metrics/mcp/mcp_task_completion.py +72 -43
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +93 -75
- deepeval/metrics/mcp/schema.py +4 -0
- deepeval/metrics/mcp/template.py +59 -0
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
- deepeval/metrics/mcp_use_metric/template.py +12 -0
- deepeval/metrics/misuse/misuse.py +77 -97
- deepeval/metrics/misuse/template.py +15 -0
- deepeval/metrics/multimodal_metrics/__init__.py +0 -1
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +37 -38
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +55 -76
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +37 -38
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +37 -38
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +57 -76
- deepeval/metrics/non_advice/non_advice.py +79 -105
- deepeval/metrics/non_advice/template.py +12 -0
- deepeval/metrics/pattern_match/pattern_match.py +12 -4
- deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
- deepeval/metrics/pii_leakage/template.py +14 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
- deepeval/metrics/plan_adherence/template.py +11 -0
- deepeval/metrics/plan_quality/plan_quality.py +63 -87
- deepeval/metrics/plan_quality/template.py +9 -0
- deepeval/metrics/prompt_alignment/prompt_alignment.py +78 -86
- deepeval/metrics/prompt_alignment/template.py +12 -0
- deepeval/metrics/role_adherence/role_adherence.py +48 -71
- deepeval/metrics/role_adherence/template.py +14 -0
- deepeval/metrics/role_violation/role_violation.py +75 -108
- deepeval/metrics/role_violation/template.py +12 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
- deepeval/metrics/step_efficiency/template.py +11 -0
- deepeval/metrics/summarization/summarization.py +115 -183
- deepeval/metrics/summarization/template.py +19 -0
- deepeval/metrics/task_completion/task_completion.py +67 -73
- deepeval/metrics/tool_correctness/tool_correctness.py +43 -42
- deepeval/metrics/tool_use/schema.py +4 -0
- deepeval/metrics/tool_use/template.py +16 -2
- deepeval/metrics/tool_use/tool_use.py +72 -94
- deepeval/metrics/topic_adherence/schema.py +4 -0
- deepeval/metrics/topic_adherence/template.py +21 -1
- deepeval/metrics/topic_adherence/topic_adherence.py +68 -81
- deepeval/metrics/toxicity/template.py +13 -0
- deepeval/metrics/toxicity/toxicity.py +80 -99
- deepeval/metrics/turn_contextual_precision/schema.py +3 -3
- deepeval/metrics/turn_contextual_precision/template.py +9 -2
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +154 -154
- deepeval/metrics/turn_contextual_recall/schema.py +3 -3
- deepeval/metrics/turn_contextual_recall/template.py +8 -1
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +148 -143
- deepeval/metrics/turn_contextual_relevancy/schema.py +2 -2
- deepeval/metrics/turn_contextual_relevancy/template.py +8 -1
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +154 -157
- deepeval/metrics/turn_faithfulness/schema.py +1 -1
- deepeval/metrics/turn_faithfulness/template.py +8 -1
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +180 -203
- deepeval/metrics/turn_relevancy/template.py +14 -0
- deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
- deepeval/metrics/utils.py +161 -91
- deepeval/models/__init__.py +2 -0
- deepeval/models/base_model.py +44 -6
- deepeval/models/embedding_models/azure_embedding_model.py +34 -12
- deepeval/models/embedding_models/local_embedding_model.py +22 -7
- deepeval/models/embedding_models/ollama_embedding_model.py +17 -6
- deepeval/models/embedding_models/openai_embedding_model.py +3 -2
- deepeval/models/llms/__init__.py +2 -0
- deepeval/models/llms/amazon_bedrock_model.py +229 -73
- deepeval/models/llms/anthropic_model.py +143 -48
- deepeval/models/llms/azure_model.py +169 -95
- deepeval/models/llms/constants.py +2032 -0
- deepeval/models/llms/deepseek_model.py +82 -35
- deepeval/models/llms/gemini_model.py +126 -67
- deepeval/models/llms/grok_model.py +128 -65
- deepeval/models/llms/kimi_model.py +129 -87
- deepeval/models/llms/litellm_model.py +94 -18
- deepeval/models/llms/local_model.py +115 -16
- deepeval/models/llms/ollama_model.py +97 -76
- deepeval/models/llms/openai_model.py +169 -311
- deepeval/models/llms/portkey_model.py +58 -16
- deepeval/models/llms/utils.py +5 -2
- deepeval/models/retry_policy.py +10 -5
- deepeval/models/utils.py +56 -4
- deepeval/simulator/conversation_simulator.py +49 -2
- deepeval/simulator/template.py +16 -1
- deepeval/synthesizer/synthesizer.py +19 -17
- deepeval/test_case/api.py +24 -45
- deepeval/test_case/arena_test_case.py +7 -2
- deepeval/test_case/conversational_test_case.py +55 -6
- deepeval/test_case/llm_test_case.py +60 -6
- deepeval/test_run/api.py +3 -0
- deepeval/test_run/test_run.py +6 -1
- deepeval/utils.py +26 -0
- {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/METADATA +3 -3
- {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/RECORD +145 -148
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -133
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
- {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/WHEEL +0 -0
- {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/entry_points.txt +0 -0
|
@@ -2,9 +2,11 @@ from typing import Optional, Tuple, Union, Dict
|
|
|
2
2
|
from openai import OpenAI, AsyncOpenAI
|
|
3
3
|
from pydantic import BaseModel, SecretStr
|
|
4
4
|
|
|
5
|
+
from deepeval.errors import DeepEvalError
|
|
5
6
|
from deepeval.config.settings import get_settings
|
|
6
7
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
7
8
|
from deepeval.models.utils import (
|
|
9
|
+
require_costs,
|
|
8
10
|
require_secret_api_key,
|
|
9
11
|
)
|
|
10
12
|
from deepeval.models import DeepEvalBaseLLM
|
|
@@ -13,57 +15,87 @@ from deepeval.models.retry_policy import (
|
|
|
13
15
|
sdk_retries_for,
|
|
14
16
|
)
|
|
15
17
|
from deepeval.constants import ProviderSlug as PS
|
|
18
|
+
from deepeval.models.llms.constants import DEEPSEEK_MODELS_DATA
|
|
19
|
+
from deepeval.utils import require_param
|
|
16
20
|
|
|
17
21
|
|
|
18
22
|
# consistent retry rules
|
|
19
23
|
retry_deepseek = create_retry_decorator(PS.DEEPSEEK)
|
|
20
24
|
|
|
21
|
-
model_pricing = {
|
|
22
|
-
"deepseek-chat": {
|
|
23
|
-
"input": 0.27 / 1e6,
|
|
24
|
-
"output": 1.10 / 1e6,
|
|
25
|
-
},
|
|
26
|
-
"deepseek-reasoner": {
|
|
27
|
-
"input": 0.55 / 1e6,
|
|
28
|
-
"output": 2.19 / 1e6,
|
|
29
|
-
},
|
|
30
|
-
}
|
|
31
|
-
|
|
32
25
|
|
|
33
26
|
class DeepSeekModel(DeepEvalBaseLLM):
|
|
34
27
|
def __init__(
|
|
35
28
|
self,
|
|
36
29
|
model: Optional[str] = None,
|
|
37
30
|
api_key: Optional[str] = None,
|
|
38
|
-
temperature: float =
|
|
31
|
+
temperature: Optional[float] = None,
|
|
32
|
+
cost_per_input_token: Optional[float] = None,
|
|
33
|
+
cost_per_output_token: Optional[float] = None,
|
|
39
34
|
generation_kwargs: Optional[Dict] = None,
|
|
40
35
|
**kwargs,
|
|
41
36
|
):
|
|
42
37
|
settings = get_settings()
|
|
43
38
|
|
|
44
39
|
model = model or settings.DEEPSEEK_MODEL_NAME
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
if temperature_from_key is None:
|
|
51
|
-
self.temperature = temperature
|
|
40
|
+
|
|
41
|
+
if temperature is not None:
|
|
42
|
+
temperature = float(temperature)
|
|
43
|
+
elif settings.TEMPERATURE is not None:
|
|
44
|
+
temperature = settings.TEMPERATURE
|
|
52
45
|
else:
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
46
|
+
temperature = 0.0
|
|
47
|
+
|
|
48
|
+
cost_per_input_token = (
|
|
49
|
+
cost_per_input_token
|
|
50
|
+
if cost_per_input_token is not None
|
|
51
|
+
else settings.DEEPSEEK_COST_PER_INPUT_TOKEN
|
|
52
|
+
)
|
|
53
|
+
cost_per_output_token = (
|
|
54
|
+
cost_per_output_token
|
|
55
|
+
if cost_per_output_token is not None
|
|
56
|
+
else settings.DEEPSEEK_COST_PER_OUTPUT_TOKEN
|
|
57
|
+
)
|
|
56
58
|
|
|
57
59
|
if api_key is not None:
|
|
58
60
|
# keep it secret, keep it safe from serializings, logging and alike
|
|
59
|
-
self.api_key: SecretStr
|
|
61
|
+
self.api_key: Optional[SecretStr] = SecretStr(api_key)
|
|
60
62
|
else:
|
|
61
63
|
self.api_key = settings.DEEPSEEK_API_KEY
|
|
62
64
|
|
|
63
65
|
self.base_url = "https://api.deepseek.com"
|
|
66
|
+
|
|
67
|
+
# validation
|
|
68
|
+
model = require_param(
|
|
69
|
+
model,
|
|
70
|
+
provider_label="DeepSeekModel",
|
|
71
|
+
env_var_name="DEEPSEEK_MODEL_NAME",
|
|
72
|
+
param_hint="model",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
if temperature < 0:
|
|
76
|
+
raise DeepEvalError("Temperature must be >= 0.")
|
|
77
|
+
|
|
78
|
+
self.model_data = DEEPSEEK_MODELS_DATA.get(model)
|
|
79
|
+
self.temperature = temperature
|
|
80
|
+
|
|
81
|
+
cost_per_input_token, cost_per_output_token = require_costs(
|
|
82
|
+
self.model_data,
|
|
83
|
+
model,
|
|
84
|
+
"DEEPSEEK_COST_PER_INPUT_TOKEN",
|
|
85
|
+
"DEEPSEEK_COST_PER_OUTPUT_TOKEN",
|
|
86
|
+
cost_per_input_token,
|
|
87
|
+
cost_per_output_token,
|
|
88
|
+
)
|
|
89
|
+
self.model_data.input_price = cost_per_input_token
|
|
90
|
+
self.model_data.output_price = cost_per_output_token
|
|
91
|
+
|
|
64
92
|
# Keep sanitized kwargs for client call to strip legacy keys
|
|
65
93
|
self.kwargs = kwargs
|
|
66
|
-
self.
|
|
94
|
+
self.kwargs.pop("temperature", None)
|
|
95
|
+
|
|
96
|
+
self.generation_kwargs = dict(generation_kwargs or {})
|
|
97
|
+
self.generation_kwargs.pop("temperature", None)
|
|
98
|
+
|
|
67
99
|
super().__init__(model)
|
|
68
100
|
|
|
69
101
|
###############################################
|
|
@@ -73,7 +105,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
73
105
|
@retry_deepseek
|
|
74
106
|
def generate(
|
|
75
107
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
76
|
-
) -> Tuple[Union[str,
|
|
108
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
77
109
|
|
|
78
110
|
client = self.load_model(async_mode=False)
|
|
79
111
|
if schema:
|
|
@@ -108,7 +140,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
108
140
|
@retry_deepseek
|
|
109
141
|
async def a_generate(
|
|
110
142
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
111
|
-
) -> Tuple[Union[str,
|
|
143
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
112
144
|
|
|
113
145
|
client = self.load_model(async_mode=True)
|
|
114
146
|
if schema:
|
|
@@ -144,15 +176,30 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
144
176
|
# Utilities
|
|
145
177
|
###############################################
|
|
146
178
|
|
|
147
|
-
def calculate_cost(
|
|
148
|
-
self
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
179
|
+
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
180
|
+
if self.model_data.input_price and self.model_data.output_price:
|
|
181
|
+
input_cost = input_tokens * self.model_data.input_price
|
|
182
|
+
output_cost = output_tokens * self.model_data.output_price
|
|
183
|
+
return input_cost + output_cost
|
|
184
|
+
|
|
185
|
+
###############################################
|
|
186
|
+
# Capabilities
|
|
187
|
+
###############################################
|
|
188
|
+
|
|
189
|
+
def supports_log_probs(self) -> Union[bool, None]:
|
|
190
|
+
return self.model_data.supports_log_probs
|
|
191
|
+
|
|
192
|
+
def supports_temperature(self) -> Union[bool, None]:
|
|
193
|
+
return self.model_data.supports_temperature
|
|
194
|
+
|
|
195
|
+
def supports_multimodal(self) -> Union[bool, None]:
|
|
196
|
+
return self.model_data.supports_multimodal
|
|
197
|
+
|
|
198
|
+
def supports_structured_outputs(self) -> Union[bool, None]:
|
|
199
|
+
return self.model_data.supports_structured_outputs
|
|
200
|
+
|
|
201
|
+
def supports_json_mode(self) -> Union[bool, None]:
|
|
202
|
+
return self.model_data.supports_json
|
|
156
203
|
|
|
157
204
|
###############################################
|
|
158
205
|
# Model
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import
|
|
2
|
+
import base64
|
|
3
3
|
from pydantic import BaseModel, SecretStr
|
|
4
|
-
from typing import TYPE_CHECKING, Optional, Dict, List, Union
|
|
4
|
+
from typing import TYPE_CHECKING, Optional, Dict, List, Union, Tuple
|
|
5
5
|
|
|
6
|
+
from deepeval.errors import DeepEvalError
|
|
6
7
|
from deepeval.test_case import MLLMImage
|
|
7
8
|
from deepeval.config.settings import get_settings
|
|
8
9
|
from deepeval.models.utils import require_secret_api_key
|
|
@@ -16,14 +17,7 @@ from deepeval.utils import (
|
|
|
16
17
|
)
|
|
17
18
|
from deepeval.models.base_model import DeepEvalBaseLLM
|
|
18
19
|
from deepeval.constants import ProviderSlug as PS
|
|
19
|
-
|
|
20
|
-
valid_multimodal_models = [
|
|
21
|
-
"gemini-2.5-pro",
|
|
22
|
-
"gemini-2.5-flash",
|
|
23
|
-
"gemini-1.5-pro",
|
|
24
|
-
"gemini-1.5-flash",
|
|
25
|
-
# TODO: Add more models later
|
|
26
|
-
]
|
|
20
|
+
from deepeval.models.llms.constants import GEMINI_MODELS_DATA
|
|
27
21
|
|
|
28
22
|
if TYPE_CHECKING:
|
|
29
23
|
from google.genai import Client
|
|
@@ -67,10 +61,10 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
67
61
|
self,
|
|
68
62
|
model: Optional[str] = None,
|
|
69
63
|
api_key: Optional[str] = None,
|
|
70
|
-
temperature: float =
|
|
64
|
+
temperature: Optional[float] = None,
|
|
71
65
|
project: Optional[str] = None,
|
|
72
66
|
location: Optional[str] = None,
|
|
73
|
-
service_account_key: Optional[Dict[str, str]] = None,
|
|
67
|
+
service_account_key: Optional[Union[str, Dict[str, str]]] = None,
|
|
74
68
|
generation_kwargs: Optional[Dict] = None,
|
|
75
69
|
**kwargs,
|
|
76
70
|
):
|
|
@@ -78,38 +72,53 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
78
72
|
settings = get_settings()
|
|
79
73
|
|
|
80
74
|
model = model or settings.GEMINI_MODEL_NAME or default_gemini_model
|
|
75
|
+
self.model_data = GEMINI_MODELS_DATA.get(model)
|
|
81
76
|
|
|
82
77
|
# Get API key from settings if not provided
|
|
83
78
|
if api_key is not None:
|
|
84
79
|
# keep it secret, keep it safe from serializings, logging and aolike
|
|
85
|
-
self.api_key: SecretStr
|
|
80
|
+
self.api_key: Optional[SecretStr] = SecretStr(api_key)
|
|
86
81
|
else:
|
|
87
82
|
self.api_key = settings.GOOGLE_API_KEY
|
|
88
83
|
|
|
84
|
+
if temperature is not None:
|
|
85
|
+
temperature = float(temperature)
|
|
86
|
+
elif settings.TEMPERATURE is not None:
|
|
87
|
+
temperature = settings.TEMPERATURE
|
|
88
|
+
else:
|
|
89
|
+
temperature = 0.0
|
|
90
|
+
|
|
89
91
|
self.project = project or settings.GOOGLE_CLOUD_PROJECT
|
|
90
|
-
|
|
91
|
-
location
|
|
92
|
-
or settings.GOOGLE_CLOUD_LOCATION is not None
|
|
93
|
-
and str(settings.GOOGLE_CLOUD_LOCATION)
|
|
92
|
+
location = (
|
|
93
|
+
location if location is not None else settings.GOOGLE_CLOUD_LOCATION
|
|
94
94
|
)
|
|
95
|
+
self.location = str(location).strip() if location is not None else None
|
|
95
96
|
self.use_vertexai = settings.GOOGLE_GENAI_USE_VERTEXAI
|
|
96
97
|
|
|
97
|
-
|
|
98
|
-
|
|
98
|
+
self.service_account_key: Optional[SecretStr] = None
|
|
99
|
+
if service_account_key is None:
|
|
100
|
+
self.service_account_key = settings.GOOGLE_SERVICE_ACCOUNT_KEY
|
|
101
|
+
elif isinstance(service_account_key, dict):
|
|
102
|
+
self.service_account_key = SecretStr(
|
|
103
|
+
json.dumps(service_account_key)
|
|
104
|
+
)
|
|
99
105
|
else:
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
self.service_account_key = json.loads(service_account_key_data)
|
|
106
|
+
str_value = str(service_account_key).strip()
|
|
107
|
+
self.service_account_key = (
|
|
108
|
+
SecretStr(str_value) if str_value else None
|
|
109
|
+
)
|
|
105
110
|
|
|
106
111
|
if temperature < 0:
|
|
107
|
-
raise
|
|
112
|
+
raise DeepEvalError("Temperature must be >= 0.")
|
|
113
|
+
|
|
108
114
|
self.temperature = temperature
|
|
109
115
|
|
|
110
116
|
# Raw kwargs destined for the underlying Client
|
|
111
117
|
self.kwargs = kwargs
|
|
112
|
-
self.
|
|
118
|
+
self.kwargs.pop("temperature", None)
|
|
119
|
+
|
|
120
|
+
self.generation_kwargs = dict(generation_kwargs or {})
|
|
121
|
+
self.generation_kwargs.pop("temperature", None)
|
|
113
122
|
|
|
114
123
|
self._module = self._require_module()
|
|
115
124
|
# Configure default model generation settings
|
|
@@ -145,40 +154,34 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
145
154
|
True if the model should use Vertex AI, False otherwise
|
|
146
155
|
"""
|
|
147
156
|
if self.use_vertexai is not None:
|
|
148
|
-
return self.use_vertexai
|
|
157
|
+
return self.use_vertexai
|
|
149
158
|
if self.project and self.location:
|
|
150
159
|
return True
|
|
151
160
|
else:
|
|
152
161
|
return False
|
|
153
162
|
|
|
154
163
|
@retry_gemini
|
|
155
|
-
def
|
|
156
|
-
self, multimodal_input: List[Union[str, MLLMImage]] =
|
|
157
|
-
)
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
164
|
+
def generate_content(
|
|
165
|
+
self, multimodal_input: Optional[List[Union[str, MLLMImage]]] = None
|
|
166
|
+
):
|
|
167
|
+
multimodal_input = (
|
|
168
|
+
multimodal_input if multimodal_input is not None else []
|
|
169
|
+
)
|
|
170
|
+
content = []
|
|
162
171
|
|
|
163
|
-
|
|
164
|
-
|
|
172
|
+
for element in multimodal_input:
|
|
173
|
+
if isinstance(element, str):
|
|
174
|
+
content.append(element)
|
|
175
|
+
elif isinstance(element, MLLMImage):
|
|
176
|
+
# Gemini doesn't support direct external URLs
|
|
177
|
+
# Must convert all images to bytes
|
|
178
|
+
if element.url and not element.local:
|
|
179
|
+
import requests
|
|
165
180
|
|
|
166
|
-
|
|
167
|
-
ValueError: If an invalid input type is provided
|
|
168
|
-
"""
|
|
169
|
-
prompt = []
|
|
170
|
-
settings = get_settings()
|
|
181
|
+
settings = get_settings()
|
|
171
182
|
|
|
172
|
-
for ele in multimodal_input:
|
|
173
|
-
if isinstance(ele, str):
|
|
174
|
-
prompt.append(ele)
|
|
175
|
-
elif isinstance(ele, MLLMImage):
|
|
176
|
-
if ele.local:
|
|
177
|
-
with open(ele.url, "rb") as f:
|
|
178
|
-
image_data = f.read()
|
|
179
|
-
else:
|
|
180
183
|
response = requests.get(
|
|
181
|
-
|
|
184
|
+
element.url,
|
|
182
185
|
timeout=(
|
|
183
186
|
settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
|
|
184
187
|
settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
|
|
@@ -186,21 +189,38 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
186
189
|
)
|
|
187
190
|
response.raise_for_status()
|
|
188
191
|
image_data = response.content
|
|
192
|
+
mime_type = response.headers.get(
|
|
193
|
+
"content-type", element.mimeType or "image/jpeg"
|
|
194
|
+
)
|
|
195
|
+
else:
|
|
196
|
+
element.ensure_images_loaded()
|
|
197
|
+
try:
|
|
198
|
+
image_data = base64.b64decode(element.dataBase64)
|
|
199
|
+
except Exception:
|
|
200
|
+
raise ValueError(
|
|
201
|
+
f"Invalid base64 data in MLLMImage: {element._id}"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
mime_type = element.mimeType or "image/jpeg"
|
|
189
205
|
|
|
206
|
+
# Create Part from bytes
|
|
190
207
|
image_part = self._module.types.Part.from_bytes(
|
|
191
|
-
data=image_data, mime_type=
|
|
208
|
+
data=image_data, mime_type=mime_type
|
|
192
209
|
)
|
|
193
|
-
|
|
210
|
+
content.append(image_part)
|
|
194
211
|
else:
|
|
195
|
-
raise
|
|
196
|
-
|
|
212
|
+
raise DeepEvalError(f"Invalid input type: {type(element)}")
|
|
213
|
+
|
|
214
|
+
return content
|
|
197
215
|
|
|
198
216
|
###############################################
|
|
199
217
|
# Generate functions
|
|
200
218
|
###############################################
|
|
201
219
|
|
|
202
220
|
@retry_gemini
|
|
203
|
-
def generate(
|
|
221
|
+
def generate(
|
|
222
|
+
self, prompt: str, schema: Optional[BaseModel] = None
|
|
223
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
204
224
|
"""Generates text from a prompt.
|
|
205
225
|
|
|
206
226
|
Args:
|
|
@@ -213,9 +233,8 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
213
233
|
client = self.load_model()
|
|
214
234
|
|
|
215
235
|
if check_if_multimodal(prompt):
|
|
216
|
-
|
|
217
236
|
prompt = convert_to_multi_modal_array(prompt)
|
|
218
|
-
prompt = self.
|
|
237
|
+
prompt = self.generate_content(prompt)
|
|
219
238
|
|
|
220
239
|
if schema is not None:
|
|
221
240
|
response = client.models.generate_content(
|
|
@@ -245,7 +264,7 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
245
264
|
@retry_gemini
|
|
246
265
|
async def a_generate(
|
|
247
266
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
248
|
-
) -> str:
|
|
267
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
249
268
|
"""Asynchronously generates text from a prompt.
|
|
250
269
|
|
|
251
270
|
Args:
|
|
@@ -259,7 +278,7 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
259
278
|
|
|
260
279
|
if check_if_multimodal(prompt):
|
|
261
280
|
prompt = convert_to_multi_modal_array(prompt)
|
|
262
|
-
prompt = self.
|
|
281
|
+
prompt = self.generate_content(prompt)
|
|
263
282
|
|
|
264
283
|
if schema is not None:
|
|
265
284
|
response = await client.aio.models.generate_content(
|
|
@@ -286,6 +305,32 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
286
305
|
)
|
|
287
306
|
return response.text, 0
|
|
288
307
|
|
|
308
|
+
#########################
|
|
309
|
+
# Capabilities #
|
|
310
|
+
#########################
|
|
311
|
+
|
|
312
|
+
def supports_log_probs(self) -> Union[bool, None]:
|
|
313
|
+
return self.model_data.supports_log_probs
|
|
314
|
+
|
|
315
|
+
def supports_temperature(self) -> Union[bool, None]:
|
|
316
|
+
return self.model_data.supports_temperature
|
|
317
|
+
|
|
318
|
+
def supports_multimodal(self) -> Union[bool, None]:
|
|
319
|
+
return self.model_data.supports_multimodal
|
|
320
|
+
|
|
321
|
+
def supports_structured_outputs(self) -> Union[bool, None]:
|
|
322
|
+
"""
|
|
323
|
+
OpenAI models that natively enforce typed structured outputs.
|
|
324
|
+
Used by generate(...) when a schema is provided.
|
|
325
|
+
"""
|
|
326
|
+
return self.model_data.supports_structured_outputs
|
|
327
|
+
|
|
328
|
+
def supports_json_mode(self) -> Union[bool, None]:
|
|
329
|
+
"""
|
|
330
|
+
OpenAI models that enforce JSON mode
|
|
331
|
+
"""
|
|
332
|
+
return self.model_data.supports_json
|
|
333
|
+
|
|
289
334
|
#########
|
|
290
335
|
# Model #
|
|
291
336
|
#########
|
|
@@ -326,8 +371,27 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
326
371
|
client_kwargs = self._client_kwargs(**self.kwargs)
|
|
327
372
|
|
|
328
373
|
if self.should_use_vertexai():
|
|
374
|
+
service_account_key_json = require_secret_api_key(
|
|
375
|
+
self.service_account_key,
|
|
376
|
+
provider_label="Google Gemini",
|
|
377
|
+
env_var_name="GOOGLE_SERVICE_ACCOUNT_KEY",
|
|
378
|
+
param_hint="`service_account_key` to GeminiModel(...)",
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
try:
|
|
382
|
+
service_account_key = json.loads(service_account_key_json)
|
|
383
|
+
except Exception as e:
|
|
384
|
+
raise DeepEvalError(
|
|
385
|
+
"GOOGLE_SERVICE_ACCOUNT_KEY must be valid JSON for a Google service account."
|
|
386
|
+
) from e
|
|
387
|
+
|
|
388
|
+
if not isinstance(service_account_key, dict):
|
|
389
|
+
raise DeepEvalError(
|
|
390
|
+
"GOOGLE_SERVICE_ACCOUNT_KEY must decode to a JSON object."
|
|
391
|
+
)
|
|
392
|
+
|
|
329
393
|
if not self.project or not self.location:
|
|
330
|
-
raise
|
|
394
|
+
raise DeepEvalError(
|
|
331
395
|
"When using Vertex AI API, both project and location are required. "
|
|
332
396
|
"Either provide them as arguments or set GOOGLE_CLOUD_PROJECT and "
|
|
333
397
|
"GOOGLE_CLOUD_LOCATION in your DeepEval configuration."
|
|
@@ -336,12 +400,12 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
336
400
|
oauth2 = self._require_oauth2()
|
|
337
401
|
credentials = (
|
|
338
402
|
oauth2.service_account.Credentials.from_service_account_info(
|
|
339
|
-
|
|
403
|
+
service_account_key,
|
|
340
404
|
scopes=[
|
|
341
405
|
"https://www.googleapis.com/auth/cloud-platform",
|
|
342
406
|
],
|
|
343
407
|
)
|
|
344
|
-
if
|
|
408
|
+
if service_account_key
|
|
345
409
|
else None
|
|
346
410
|
)
|
|
347
411
|
|
|
@@ -364,10 +428,5 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
364
428
|
|
|
365
429
|
return client
|
|
366
430
|
|
|
367
|
-
def supports_multimodal(self):
|
|
368
|
-
if self.name in valid_multimodal_models:
|
|
369
|
-
return True
|
|
370
|
-
return False
|
|
371
|
-
|
|
372
431
|
def get_model_name(self):
|
|
373
432
|
return f"{self.name} (Gemini)"
|