deepeval 3.7.4__py3-none-any.whl → 3.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +35 -1
- deepeval/dataset/api.py +23 -1
- deepeval/dataset/golden.py +139 -2
- deepeval/evaluate/evaluate.py +16 -11
- deepeval/evaluate/execute.py +13 -181
- deepeval/evaluate/utils.py +6 -26
- deepeval/integrations/pydantic_ai/agent.py +19 -2
- deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
- deepeval/key_handler.py +3 -0
- deepeval/metrics/__init__.py +14 -16
- deepeval/metrics/answer_relevancy/answer_relevancy.py +118 -116
- deepeval/metrics/answer_relevancy/template.py +22 -3
- deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
- deepeval/metrics/arena_g_eval/template.py +17 -1
- deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
- deepeval/metrics/argument_correctness/template.py +19 -2
- deepeval/metrics/base_metric.py +13 -44
- deepeval/metrics/bias/bias.py +102 -108
- deepeval/metrics/bias/template.py +14 -2
- deepeval/metrics/contextual_precision/contextual_precision.py +96 -94
- deepeval/metrics/contextual_precision/template.py +115 -66
- deepeval/metrics/contextual_recall/contextual_recall.py +94 -84
- deepeval/metrics/contextual_recall/template.py +106 -55
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +86 -84
- deepeval/metrics/contextual_relevancy/template.py +87 -58
- deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
- deepeval/metrics/conversation_completeness/template.py +23 -3
- deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
- deepeval/metrics/conversational_dag/nodes.py +66 -123
- deepeval/metrics/conversational_dag/templates.py +16 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
- deepeval/metrics/dag/dag.py +10 -0
- deepeval/metrics/dag/nodes.py +63 -126
- deepeval/metrics/dag/templates.py +16 -2
- deepeval/metrics/exact_match/exact_match.py +9 -1
- deepeval/metrics/faithfulness/faithfulness.py +138 -149
- deepeval/metrics/faithfulness/schema.py +1 -1
- deepeval/metrics/faithfulness/template.py +200 -115
- deepeval/metrics/g_eval/g_eval.py +87 -78
- deepeval/metrics/g_eval/template.py +18 -1
- deepeval/metrics/g_eval/utils.py +7 -6
- deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
- deepeval/metrics/goal_accuracy/template.py +21 -3
- deepeval/metrics/hallucination/hallucination.py +60 -75
- deepeval/metrics/hallucination/template.py +13 -0
- deepeval/metrics/indicator.py +7 -10
- deepeval/metrics/json_correctness/json_correctness.py +40 -38
- deepeval/metrics/json_correctness/template.py +10 -0
- deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
- deepeval/metrics/knowledge_retention/schema.py +9 -3
- deepeval/metrics/knowledge_retention/template.py +12 -0
- deepeval/metrics/mcp/mcp_task_completion.py +68 -38
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
- deepeval/metrics/mcp/template.py +52 -0
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
- deepeval/metrics/mcp_use_metric/template.py +12 -0
- deepeval/metrics/misuse/misuse.py +77 -97
- deepeval/metrics/misuse/template.py +15 -0
- deepeval/metrics/multimodal_metrics/__init__.py +0 -19
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +59 -53
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +79 -95
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +59 -53
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +59 -53
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +111 -109
- deepeval/metrics/non_advice/non_advice.py +79 -105
- deepeval/metrics/non_advice/template.py +12 -0
- deepeval/metrics/pattern_match/pattern_match.py +12 -4
- deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
- deepeval/metrics/pii_leakage/template.py +14 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
- deepeval/metrics/plan_adherence/template.py +11 -0
- deepeval/metrics/plan_quality/plan_quality.py +63 -87
- deepeval/metrics/plan_quality/template.py +9 -0
- deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
- deepeval/metrics/prompt_alignment/template.py +12 -0
- deepeval/metrics/ragas.py +3 -3
- deepeval/metrics/role_adherence/role_adherence.py +48 -71
- deepeval/metrics/role_adherence/template.py +14 -0
- deepeval/metrics/role_violation/role_violation.py +75 -108
- deepeval/metrics/role_violation/template.py +12 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
- deepeval/metrics/step_efficiency/template.py +11 -0
- deepeval/metrics/summarization/summarization.py +115 -183
- deepeval/metrics/summarization/template.py +19 -0
- deepeval/metrics/task_completion/task_completion.py +67 -73
- deepeval/metrics/tool_correctness/tool_correctness.py +45 -44
- deepeval/metrics/tool_use/tool_use.py +42 -66
- deepeval/metrics/topic_adherence/template.py +13 -0
- deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
- deepeval/metrics/toxicity/template.py +13 -0
- deepeval/metrics/toxicity/toxicity.py +80 -99
- deepeval/metrics/turn_contextual_precision/schema.py +21 -0
- deepeval/metrics/turn_contextual_precision/template.py +187 -0
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +592 -0
- deepeval/metrics/turn_contextual_recall/schema.py +21 -0
- deepeval/metrics/turn_contextual_recall/template.py +178 -0
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +563 -0
- deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
- deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +576 -0
- deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
- deepeval/metrics/turn_faithfulness/template.py +218 -0
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +627 -0
- deepeval/metrics/turn_relevancy/template.py +14 -0
- deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
- deepeval/metrics/utils.py +158 -122
- deepeval/models/__init__.py +0 -12
- deepeval/models/base_model.py +49 -33
- deepeval/models/embedding_models/__init__.py +7 -0
- deepeval/models/embedding_models/azure_embedding_model.py +79 -33
- deepeval/models/embedding_models/local_embedding_model.py +39 -20
- deepeval/models/embedding_models/ollama_embedding_model.py +52 -19
- deepeval/models/embedding_models/openai_embedding_model.py +42 -22
- deepeval/models/llms/amazon_bedrock_model.py +226 -72
- deepeval/models/llms/anthropic_model.py +178 -63
- deepeval/models/llms/azure_model.py +218 -60
- deepeval/models/llms/constants.py +2032 -0
- deepeval/models/llms/deepseek_model.py +95 -40
- deepeval/models/llms/gemini_model.py +209 -64
- deepeval/models/llms/grok_model.py +139 -68
- deepeval/models/llms/kimi_model.py +140 -90
- deepeval/models/llms/litellm_model.py +131 -37
- deepeval/models/llms/local_model.py +125 -21
- deepeval/models/llms/ollama_model.py +147 -24
- deepeval/models/llms/openai_model.py +222 -269
- deepeval/models/llms/portkey_model.py +81 -22
- deepeval/models/llms/utils.py +8 -3
- deepeval/models/retry_policy.py +17 -14
- deepeval/models/utils.py +106 -5
- deepeval/optimizer/__init__.py +5 -0
- deepeval/optimizer/algorithms/__init__.py +6 -0
- deepeval/optimizer/algorithms/base.py +29 -0
- deepeval/optimizer/algorithms/configs.py +18 -0
- deepeval/optimizer/algorithms/copro/__init__.py +5 -0
- deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
- deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
- deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
- deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
- deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
- deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
- deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
- deepeval/optimizer/algorithms/simba/__init__.py +5 -0
- deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
- deepeval/{optimization → optimizer}/configs.py +5 -8
- deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
- deepeval/optimizer/prompt_optimizer.py +263 -0
- deepeval/optimizer/rewriter/__init__.py +5 -0
- deepeval/optimizer/rewriter/rewriter.py +124 -0
- deepeval/optimizer/rewriter/utils.py +214 -0
- deepeval/optimizer/scorer/__init__.py +5 -0
- deepeval/optimizer/scorer/base.py +86 -0
- deepeval/optimizer/scorer/scorer.py +316 -0
- deepeval/optimizer/scorer/utils.py +30 -0
- deepeval/optimizer/types.py +148 -0
- deepeval/{optimization → optimizer}/utils.py +47 -165
- deepeval/prompt/prompt.py +5 -9
- deepeval/simulator/conversation_simulator.py +43 -0
- deepeval/simulator/template.py +13 -0
- deepeval/test_case/__init__.py +1 -3
- deepeval/test_case/api.py +26 -45
- deepeval/test_case/arena_test_case.py +7 -2
- deepeval/test_case/conversational_test_case.py +68 -1
- deepeval/test_case/llm_test_case.py +206 -1
- deepeval/test_case/utils.py +4 -8
- deepeval/test_run/api.py +18 -14
- deepeval/test_run/test_run.py +3 -3
- deepeval/tracing/patchers.py +9 -4
- deepeval/tracing/tracing.py +2 -2
- deepeval/utils.py +65 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -4
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/RECORD +180 -193
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -148
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
- deepeval/models/mlllms/__init__.py +0 -4
- deepeval/models/mlllms/azure_model.py +0 -343
- deepeval/models/mlllms/gemini_model.py +0 -313
- deepeval/models/mlllms/ollama_model.py +0 -175
- deepeval/models/mlllms/openai_model.py +0 -309
- deepeval/optimization/__init__.py +0 -13
- deepeval/optimization/adapters/__init__.py +0 -2
- deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
- deepeval/optimization/aggregates.py +0 -14
- deepeval/optimization/copro/configs.py +0 -31
- deepeval/optimization/gepa/__init__.py +0 -7
- deepeval/optimization/gepa/configs.py +0 -115
- deepeval/optimization/miprov2/configs.py +0 -134
- deepeval/optimization/miprov2/loop.py +0 -785
- deepeval/optimization/mutations/__init__.py +0 -0
- deepeval/optimization/mutations/prompt_rewriter.py +0 -458
- deepeval/optimization/policies/__init__.py +0 -16
- deepeval/optimization/policies/tie_breaker.py +0 -67
- deepeval/optimization/prompt_optimizer.py +0 -462
- deepeval/optimization/simba/__init__.py +0 -0
- deepeval/optimization/simba/configs.py +0 -33
- deepeval/optimization/types.py +0 -361
- deepeval/test_case/mllm_test_case.py +0 -170
- /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
- /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
|
@@ -2,66 +2,101 @@ from typing import Optional, Tuple, Union, Dict
|
|
|
2
2
|
from openai import OpenAI, AsyncOpenAI
|
|
3
3
|
from pydantic import BaseModel, SecretStr
|
|
4
4
|
|
|
5
|
+
from deepeval.errors import DeepEvalError
|
|
5
6
|
from deepeval.config.settings import get_settings
|
|
6
7
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
7
|
-
from deepeval.models.utils import
|
|
8
|
+
from deepeval.models.utils import (
|
|
9
|
+
require_costs,
|
|
10
|
+
require_secret_api_key,
|
|
11
|
+
)
|
|
8
12
|
from deepeval.models import DeepEvalBaseLLM
|
|
9
13
|
from deepeval.models.retry_policy import (
|
|
10
14
|
create_retry_decorator,
|
|
11
15
|
sdk_retries_for,
|
|
12
16
|
)
|
|
13
17
|
from deepeval.constants import ProviderSlug as PS
|
|
18
|
+
from deepeval.models.llms.constants import DEEPSEEK_MODELS_DATA
|
|
19
|
+
from deepeval.utils import require_param
|
|
14
20
|
|
|
15
21
|
|
|
16
22
|
# consistent retry rules
|
|
17
23
|
retry_deepseek = create_retry_decorator(PS.DEEPSEEK)
|
|
18
24
|
|
|
19
|
-
model_pricing = {
|
|
20
|
-
"deepseek-chat": {
|
|
21
|
-
"input": 0.27 / 1e6,
|
|
22
|
-
"output": 1.10 / 1e6,
|
|
23
|
-
},
|
|
24
|
-
"deepseek-reasoner": {
|
|
25
|
-
"input": 0.55 / 1e6,
|
|
26
|
-
"output": 2.19 / 1e6,
|
|
27
|
-
},
|
|
28
|
-
}
|
|
29
|
-
|
|
30
25
|
|
|
31
26
|
class DeepSeekModel(DeepEvalBaseLLM):
|
|
32
27
|
def __init__(
|
|
33
28
|
self,
|
|
34
|
-
api_key: Optional[str] = None,
|
|
35
29
|
model: Optional[str] = None,
|
|
36
|
-
|
|
30
|
+
api_key: Optional[str] = None,
|
|
31
|
+
temperature: Optional[float] = None,
|
|
32
|
+
cost_per_input_token: Optional[float] = None,
|
|
33
|
+
cost_per_output_token: Optional[float] = None,
|
|
37
34
|
generation_kwargs: Optional[Dict] = None,
|
|
38
35
|
**kwargs,
|
|
39
36
|
):
|
|
40
37
|
settings = get_settings()
|
|
41
38
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
if temperature_from_key is None:
|
|
49
|
-
self.temperature = temperature
|
|
39
|
+
model = model or settings.DEEPSEEK_MODEL_NAME
|
|
40
|
+
|
|
41
|
+
if temperature is not None:
|
|
42
|
+
temperature = float(temperature)
|
|
43
|
+
elif settings.TEMPERATURE is not None:
|
|
44
|
+
temperature = settings.TEMPERATURE
|
|
50
45
|
else:
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
46
|
+
temperature = 0.0
|
|
47
|
+
|
|
48
|
+
cost_per_input_token = (
|
|
49
|
+
cost_per_input_token
|
|
50
|
+
if cost_per_input_token is not None
|
|
51
|
+
else settings.DEEPSEEK_COST_PER_INPUT_TOKEN
|
|
52
|
+
)
|
|
53
|
+
cost_per_output_token = (
|
|
54
|
+
cost_per_output_token
|
|
55
|
+
if cost_per_output_token is not None
|
|
56
|
+
else settings.DEEPSEEK_COST_PER_OUTPUT_TOKEN
|
|
57
|
+
)
|
|
54
58
|
|
|
55
59
|
if api_key is not None:
|
|
56
60
|
# keep it secret, keep it safe from serializings, logging and alike
|
|
57
|
-
self.api_key: SecretStr
|
|
61
|
+
self.api_key: Optional[SecretStr] = SecretStr(api_key)
|
|
58
62
|
else:
|
|
59
63
|
self.api_key = settings.DEEPSEEK_API_KEY
|
|
60
64
|
|
|
61
65
|
self.base_url = "https://api.deepseek.com"
|
|
66
|
+
|
|
67
|
+
# validation
|
|
68
|
+
model = require_param(
|
|
69
|
+
model,
|
|
70
|
+
provider_label="DeepSeekModel",
|
|
71
|
+
env_var_name="DEEPSEEK_MODEL_NAME",
|
|
72
|
+
param_hint="model",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
if temperature < 0:
|
|
76
|
+
raise DeepEvalError("Temperature must be >= 0.")
|
|
77
|
+
|
|
78
|
+
self.model_data = DEEPSEEK_MODELS_DATA.get(model)
|
|
79
|
+
self.temperature = temperature
|
|
80
|
+
|
|
81
|
+
cost_per_input_token, cost_per_output_token = require_costs(
|
|
82
|
+
self.model_data,
|
|
83
|
+
model,
|
|
84
|
+
"DEEPSEEK_COST_PER_INPUT_TOKEN",
|
|
85
|
+
"DEEPSEEK_COST_PER_OUTPUT_TOKEN",
|
|
86
|
+
cost_per_input_token,
|
|
87
|
+
cost_per_output_token,
|
|
88
|
+
)
|
|
89
|
+
self.model_data.input_price = cost_per_input_token
|
|
90
|
+
self.model_data.output_price = cost_per_output_token
|
|
91
|
+
|
|
92
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
62
93
|
self.kwargs = kwargs
|
|
63
|
-
self.
|
|
64
|
-
|
|
94
|
+
self.kwargs.pop("temperature", None)
|
|
95
|
+
|
|
96
|
+
self.generation_kwargs = dict(generation_kwargs or {})
|
|
97
|
+
self.generation_kwargs.pop("temperature", None)
|
|
98
|
+
|
|
99
|
+
super().__init__(model)
|
|
65
100
|
|
|
66
101
|
###############################################
|
|
67
102
|
# Other generate functions
|
|
@@ -70,11 +105,12 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
70
105
|
@retry_deepseek
|
|
71
106
|
def generate(
|
|
72
107
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
73
|
-
) -> Tuple[Union[str,
|
|
108
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
109
|
+
|
|
74
110
|
client = self.load_model(async_mode=False)
|
|
75
111
|
if schema:
|
|
76
112
|
completion = client.chat.completions.create(
|
|
77
|
-
model=self.
|
|
113
|
+
model=self.name,
|
|
78
114
|
messages=[{"role": "user", "content": prompt}],
|
|
79
115
|
response_format={"type": "json_object"},
|
|
80
116
|
temperature=self.temperature,
|
|
@@ -90,7 +126,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
90
126
|
return schema.model_validate(json_output), cost
|
|
91
127
|
else:
|
|
92
128
|
completion = client.chat.completions.create(
|
|
93
|
-
model=self.
|
|
129
|
+
model=self.name,
|
|
94
130
|
messages=[{"role": "user", "content": prompt}],
|
|
95
131
|
**self.generation_kwargs,
|
|
96
132
|
)
|
|
@@ -104,11 +140,12 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
104
140
|
@retry_deepseek
|
|
105
141
|
async def a_generate(
|
|
106
142
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
107
|
-
) -> Tuple[Union[str,
|
|
143
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
144
|
+
|
|
108
145
|
client = self.load_model(async_mode=True)
|
|
109
146
|
if schema:
|
|
110
147
|
completion = await client.chat.completions.create(
|
|
111
|
-
model=self.
|
|
148
|
+
model=self.name,
|
|
112
149
|
messages=[{"role": "user", "content": prompt}],
|
|
113
150
|
response_format={"type": "json_object"},
|
|
114
151
|
temperature=self.temperature,
|
|
@@ -124,7 +161,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
124
161
|
return schema.model_validate(json_output), cost
|
|
125
162
|
else:
|
|
126
163
|
completion = await client.chat.completions.create(
|
|
127
|
-
model=self.
|
|
164
|
+
model=self.name,
|
|
128
165
|
messages=[{"role": "user", "content": prompt}],
|
|
129
166
|
**self.generation_kwargs,
|
|
130
167
|
)
|
|
@@ -144,11 +181,29 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
144
181
|
input_tokens: int,
|
|
145
182
|
output_tokens: int,
|
|
146
183
|
) -> float:
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
output_cost = output_tokens * pricing["output"]
|
|
184
|
+
input_cost = input_tokens * self.model_data.input_price
|
|
185
|
+
output_cost = output_tokens * self.model_data.output_price
|
|
150
186
|
return input_cost + output_cost
|
|
151
187
|
|
|
188
|
+
###############################################
|
|
189
|
+
# Capabilities
|
|
190
|
+
###############################################
|
|
191
|
+
|
|
192
|
+
def supports_log_probs(self) -> Union[bool, None]:
|
|
193
|
+
return self.model_data.supports_log_probs
|
|
194
|
+
|
|
195
|
+
def supports_temperature(self) -> Union[bool, None]:
|
|
196
|
+
return self.model_data.supports_temperature
|
|
197
|
+
|
|
198
|
+
def supports_multimodal(self) -> Union[bool, None]:
|
|
199
|
+
return self.model_data.supports_multimodal
|
|
200
|
+
|
|
201
|
+
def supports_structured_outputs(self) -> Union[bool, None]:
|
|
202
|
+
return self.model_data.supports_structured_outputs
|
|
203
|
+
|
|
204
|
+
def supports_json_mode(self) -> Union[bool, None]:
|
|
205
|
+
return self.model_data.supports_json
|
|
206
|
+
|
|
152
207
|
###############################################
|
|
153
208
|
# Model
|
|
154
209
|
###############################################
|
|
@@ -158,9 +213,6 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
158
213
|
return self._build_client(OpenAI)
|
|
159
214
|
return self._build_client(AsyncOpenAI)
|
|
160
215
|
|
|
161
|
-
def get_model_name(self):
|
|
162
|
-
return f"{self.model_name}"
|
|
163
|
-
|
|
164
216
|
def _client_kwargs(self) -> Dict:
|
|
165
217
|
kwargs = dict(self.kwargs or {})
|
|
166
218
|
# if we are managing retries with Tenacity, force SDK retries off to avoid double retries.
|
|
@@ -190,3 +242,6 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
190
242
|
kw.pop("max_retries", None)
|
|
191
243
|
return cls(**kw)
|
|
192
244
|
raise
|
|
245
|
+
|
|
246
|
+
def get_model_name(self):
|
|
247
|
+
return f"{self.name} (Deepseek)"
|
|
@@ -1,17 +1,26 @@
|
|
|
1
1
|
import json
|
|
2
|
-
|
|
2
|
+
import base64
|
|
3
3
|
from pydantic import BaseModel, SecretStr
|
|
4
|
-
from
|
|
5
|
-
from typing import Optional, Dict
|
|
4
|
+
from typing import TYPE_CHECKING, Optional, Dict, List, Union, Tuple
|
|
6
5
|
|
|
6
|
+
from deepeval.errors import DeepEvalError
|
|
7
|
+
from deepeval.test_case import MLLMImage
|
|
7
8
|
from deepeval.config.settings import get_settings
|
|
8
9
|
from deepeval.models.utils import require_secret_api_key
|
|
9
10
|
from deepeval.models.retry_policy import (
|
|
10
11
|
create_retry_decorator,
|
|
11
12
|
)
|
|
13
|
+
from deepeval.utils import (
|
|
14
|
+
convert_to_multi_modal_array,
|
|
15
|
+
check_if_multimodal,
|
|
16
|
+
require_dependency,
|
|
17
|
+
)
|
|
12
18
|
from deepeval.models.base_model import DeepEvalBaseLLM
|
|
13
19
|
from deepeval.constants import ProviderSlug as PS
|
|
14
|
-
from
|
|
20
|
+
from deepeval.models.llms.constants import GEMINI_MODELS_DATA
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from google.genai import Client
|
|
15
24
|
|
|
16
25
|
default_gemini_model = "gemini-1.5-pro"
|
|
17
26
|
|
|
@@ -28,7 +37,7 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
28
37
|
To use Vertex AI API, set project and location attributes.
|
|
29
38
|
|
|
30
39
|
Attributes:
|
|
31
|
-
|
|
40
|
+
model: Name of the Gemini model to use
|
|
32
41
|
api_key: Google API key for authentication
|
|
33
42
|
project: Google Cloud project ID
|
|
34
43
|
location: Google Cloud location
|
|
@@ -39,7 +48,7 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
39
48
|
|
|
40
49
|
# Initialize the model
|
|
41
50
|
model = GeminiModel(
|
|
42
|
-
|
|
51
|
+
model="gemini-1.5-pro-001",
|
|
43
52
|
api_key="your-api-key"
|
|
44
53
|
)
|
|
45
54
|
|
|
@@ -50,75 +59,89 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
50
59
|
|
|
51
60
|
def __init__(
|
|
52
61
|
self,
|
|
53
|
-
|
|
62
|
+
model: Optional[str] = None,
|
|
54
63
|
api_key: Optional[str] = None,
|
|
64
|
+
temperature: Optional[float] = None,
|
|
55
65
|
project: Optional[str] = None,
|
|
56
66
|
location: Optional[str] = None,
|
|
57
|
-
service_account_key: Optional[Dict[str, str]] = None,
|
|
58
|
-
temperature: float = 0,
|
|
67
|
+
service_account_key: Optional[Union[str, Dict[str, str]]] = None,
|
|
59
68
|
generation_kwargs: Optional[Dict] = None,
|
|
60
69
|
**kwargs,
|
|
61
70
|
):
|
|
62
71
|
|
|
63
72
|
settings = get_settings()
|
|
64
73
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
)
|
|
74
|
+
model = model or settings.GEMINI_MODEL_NAME or default_gemini_model
|
|
75
|
+
self.model_data = GEMINI_MODELS_DATA.get(model)
|
|
68
76
|
|
|
69
77
|
# Get API key from settings if not provided
|
|
70
78
|
if api_key is not None:
|
|
71
79
|
# keep it secret, keep it safe from serializings, logging and aolike
|
|
72
|
-
self.api_key: SecretStr
|
|
80
|
+
self.api_key: Optional[SecretStr] = SecretStr(api_key)
|
|
73
81
|
else:
|
|
74
82
|
self.api_key = settings.GOOGLE_API_KEY
|
|
75
83
|
|
|
84
|
+
if temperature is not None:
|
|
85
|
+
temperature = float(temperature)
|
|
86
|
+
elif settings.TEMPERATURE is not None:
|
|
87
|
+
temperature = settings.TEMPERATURE
|
|
88
|
+
else:
|
|
89
|
+
temperature = 0.0
|
|
90
|
+
|
|
76
91
|
self.project = project or settings.GOOGLE_CLOUD_PROJECT
|
|
77
|
-
|
|
78
|
-
location
|
|
79
|
-
or settings.GOOGLE_CLOUD_LOCATION is not None
|
|
80
|
-
and str(settings.GOOGLE_CLOUD_LOCATION)
|
|
92
|
+
location = (
|
|
93
|
+
location if location is not None else settings.GOOGLE_CLOUD_LOCATION
|
|
81
94
|
)
|
|
95
|
+
self.location = str(location).strip() if location is not None else None
|
|
82
96
|
self.use_vertexai = settings.GOOGLE_GENAI_USE_VERTEXAI
|
|
83
97
|
|
|
84
|
-
|
|
85
|
-
|
|
98
|
+
self.service_account_key: Optional[SecretStr] = None
|
|
99
|
+
if service_account_key is None:
|
|
100
|
+
self.service_account_key = settings.GOOGLE_SERVICE_ACCOUNT_KEY
|
|
101
|
+
elif isinstance(service_account_key, dict):
|
|
102
|
+
self.service_account_key = SecretStr(
|
|
103
|
+
json.dumps(service_account_key)
|
|
104
|
+
)
|
|
86
105
|
else:
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
self.service_account_key = json.loads(service_account_key_data)
|
|
106
|
+
str_value = str(service_account_key).strip()
|
|
107
|
+
self.service_account_key = (
|
|
108
|
+
SecretStr(str_value) if str_value else None
|
|
109
|
+
)
|
|
92
110
|
|
|
93
111
|
if temperature < 0:
|
|
94
|
-
raise
|
|
112
|
+
raise DeepEvalError("Temperature must be >= 0.")
|
|
113
|
+
|
|
95
114
|
self.temperature = temperature
|
|
96
115
|
|
|
97
116
|
# Raw kwargs destined for the underlying Client
|
|
98
117
|
self.kwargs = kwargs
|
|
99
|
-
self.
|
|
118
|
+
self.kwargs.pop("temperature", None)
|
|
100
119
|
|
|
120
|
+
self.generation_kwargs = dict(generation_kwargs or {})
|
|
121
|
+
self.generation_kwargs.pop("temperature", None)
|
|
122
|
+
|
|
123
|
+
self._module = self._require_module()
|
|
101
124
|
# Configure default model generation settings
|
|
102
125
|
self.model_safety_settings = [
|
|
103
|
-
types.SafetySetting(
|
|
104
|
-
category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
|
|
105
|
-
threshold=types.HarmBlockThreshold.BLOCK_NONE,
|
|
126
|
+
self._module.types.SafetySetting(
|
|
127
|
+
category=self._module.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
|
|
128
|
+
threshold=self._module.types.HarmBlockThreshold.BLOCK_NONE,
|
|
106
129
|
),
|
|
107
|
-
types.SafetySetting(
|
|
108
|
-
category=types.HarmCategory.HARM_CATEGORY_HARASSMENT,
|
|
109
|
-
threshold=types.HarmBlockThreshold.BLOCK_NONE,
|
|
130
|
+
self._module.types.SafetySetting(
|
|
131
|
+
category=self._module.types.HarmCategory.HARM_CATEGORY_HARASSMENT,
|
|
132
|
+
threshold=self._module.types.HarmBlockThreshold.BLOCK_NONE,
|
|
110
133
|
),
|
|
111
|
-
types.SafetySetting(
|
|
112
|
-
category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
|
|
113
|
-
threshold=types.HarmBlockThreshold.BLOCK_NONE,
|
|
134
|
+
self._module.types.SafetySetting(
|
|
135
|
+
category=self._module.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
|
|
136
|
+
threshold=self._module.types.HarmBlockThreshold.BLOCK_NONE,
|
|
114
137
|
),
|
|
115
|
-
types.SafetySetting(
|
|
116
|
-
category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
|
|
117
|
-
threshold=types.HarmBlockThreshold.BLOCK_NONE,
|
|
138
|
+
self._module.types.SafetySetting(
|
|
139
|
+
category=self._module.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
|
|
140
|
+
threshold=self._module.types.HarmBlockThreshold.BLOCK_NONE,
|
|
118
141
|
),
|
|
119
142
|
]
|
|
120
143
|
|
|
121
|
-
super().__init__(
|
|
144
|
+
super().__init__(model)
|
|
122
145
|
|
|
123
146
|
def should_use_vertexai(self) -> bool:
|
|
124
147
|
"""Checks if the model should use Vertex AI for generation.
|
|
@@ -131,18 +154,73 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
131
154
|
True if the model should use Vertex AI, False otherwise
|
|
132
155
|
"""
|
|
133
156
|
if self.use_vertexai is not None:
|
|
134
|
-
return self.use_vertexai
|
|
157
|
+
return self.use_vertexai
|
|
135
158
|
if self.project and self.location:
|
|
136
159
|
return True
|
|
137
160
|
else:
|
|
138
161
|
return False
|
|
139
162
|
|
|
163
|
+
@retry_gemini
|
|
164
|
+
def generate_content(
|
|
165
|
+
self, multimodal_input: Optional[List[Union[str, MLLMImage]]] = None
|
|
166
|
+
):
|
|
167
|
+
multimodal_input = (
|
|
168
|
+
multimodal_input if multimodal_input is not None else []
|
|
169
|
+
)
|
|
170
|
+
content = []
|
|
171
|
+
|
|
172
|
+
for element in multimodal_input:
|
|
173
|
+
if isinstance(element, str):
|
|
174
|
+
content.append(element)
|
|
175
|
+
elif isinstance(element, MLLMImage):
|
|
176
|
+
# Gemini doesn't support direct external URLs
|
|
177
|
+
# Must convert all images to bytes
|
|
178
|
+
if element.url and not element.local:
|
|
179
|
+
import requests
|
|
180
|
+
|
|
181
|
+
settings = get_settings()
|
|
182
|
+
|
|
183
|
+
response = requests.get(
|
|
184
|
+
element.url,
|
|
185
|
+
timeout=(
|
|
186
|
+
settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
|
|
187
|
+
settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
|
|
188
|
+
),
|
|
189
|
+
)
|
|
190
|
+
response.raise_for_status()
|
|
191
|
+
image_data = response.content
|
|
192
|
+
mime_type = response.headers.get(
|
|
193
|
+
"content-type", element.mimeType or "image/jpeg"
|
|
194
|
+
)
|
|
195
|
+
else:
|
|
196
|
+
element.ensure_images_loaded()
|
|
197
|
+
try:
|
|
198
|
+
image_data = base64.b64decode(element.dataBase64)
|
|
199
|
+
except Exception:
|
|
200
|
+
raise ValueError(
|
|
201
|
+
f"Invalid base64 data in MLLMImage: {element._id}"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
mime_type = element.mimeType or "image/jpeg"
|
|
205
|
+
|
|
206
|
+
# Create Part from bytes
|
|
207
|
+
image_part = self._module.types.Part.from_bytes(
|
|
208
|
+
data=image_data, mime_type=mime_type
|
|
209
|
+
)
|
|
210
|
+
content.append(image_part)
|
|
211
|
+
else:
|
|
212
|
+
raise DeepEvalError(f"Invalid input type: {type(element)}")
|
|
213
|
+
|
|
214
|
+
return content
|
|
215
|
+
|
|
140
216
|
###############################################
|
|
141
217
|
# Generate functions
|
|
142
218
|
###############################################
|
|
143
219
|
|
|
144
220
|
@retry_gemini
|
|
145
|
-
def generate(
|
|
221
|
+
def generate(
|
|
222
|
+
self, prompt: str, schema: Optional[BaseModel] = None
|
|
223
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
146
224
|
"""Generates text from a prompt.
|
|
147
225
|
|
|
148
226
|
Args:
|
|
@@ -154,11 +232,15 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
154
232
|
"""
|
|
155
233
|
client = self.load_model()
|
|
156
234
|
|
|
235
|
+
if check_if_multimodal(prompt):
|
|
236
|
+
prompt = convert_to_multi_modal_array(prompt)
|
|
237
|
+
prompt = self.generate_content(prompt)
|
|
238
|
+
|
|
157
239
|
if schema is not None:
|
|
158
240
|
response = client.models.generate_content(
|
|
159
|
-
model=self.
|
|
241
|
+
model=self.name,
|
|
160
242
|
contents=prompt,
|
|
161
|
-
config=types.GenerateContentConfig(
|
|
243
|
+
config=self._module.types.GenerateContentConfig(
|
|
162
244
|
response_mime_type="application/json",
|
|
163
245
|
response_schema=schema,
|
|
164
246
|
safety_settings=self.model_safety_settings,
|
|
@@ -169,9 +251,9 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
169
251
|
return response.parsed, 0
|
|
170
252
|
else:
|
|
171
253
|
response = client.models.generate_content(
|
|
172
|
-
model=self.
|
|
254
|
+
model=self.name,
|
|
173
255
|
contents=prompt,
|
|
174
|
-
config=types.GenerateContentConfig(
|
|
256
|
+
config=self._module.types.GenerateContentConfig(
|
|
175
257
|
safety_settings=self.model_safety_settings,
|
|
176
258
|
temperature=self.temperature,
|
|
177
259
|
**self.generation_kwargs,
|
|
@@ -182,7 +264,7 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
182
264
|
@retry_gemini
|
|
183
265
|
async def a_generate(
|
|
184
266
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
185
|
-
) -> str:
|
|
267
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
186
268
|
"""Asynchronously generates text from a prompt.
|
|
187
269
|
|
|
188
270
|
Args:
|
|
@@ -194,11 +276,15 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
194
276
|
"""
|
|
195
277
|
client = self.load_model()
|
|
196
278
|
|
|
279
|
+
if check_if_multimodal(prompt):
|
|
280
|
+
prompt = convert_to_multi_modal_array(prompt)
|
|
281
|
+
prompt = self.generate_content(prompt)
|
|
282
|
+
|
|
197
283
|
if schema is not None:
|
|
198
284
|
response = await client.aio.models.generate_content(
|
|
199
|
-
model=self.
|
|
285
|
+
model=self.name,
|
|
200
286
|
contents=prompt,
|
|
201
|
-
config=types.GenerateContentConfig(
|
|
287
|
+
config=self._module.types.GenerateContentConfig(
|
|
202
288
|
response_mime_type="application/json",
|
|
203
289
|
response_schema=schema,
|
|
204
290
|
safety_settings=self.model_safety_settings,
|
|
@@ -209,9 +295,9 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
209
295
|
return response.parsed, 0
|
|
210
296
|
else:
|
|
211
297
|
response = await client.aio.models.generate_content(
|
|
212
|
-
model=self.
|
|
298
|
+
model=self.name,
|
|
213
299
|
contents=prompt,
|
|
214
|
-
config=types.GenerateContentConfig(
|
|
300
|
+
config=self._module.types.GenerateContentConfig(
|
|
215
301
|
safety_settings=self.model_safety_settings,
|
|
216
302
|
temperature=self.temperature,
|
|
217
303
|
**self.generation_kwargs,
|
|
@@ -219,15 +305,37 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
219
305
|
)
|
|
220
306
|
return response.text, 0
|
|
221
307
|
|
|
308
|
+
#########################
|
|
309
|
+
# Capabilities #
|
|
310
|
+
#########################
|
|
311
|
+
|
|
312
|
+
def supports_log_probs(self) -> Union[bool, None]:
|
|
313
|
+
return self.model_data.supports_log_probs
|
|
314
|
+
|
|
315
|
+
def supports_temperature(self) -> Union[bool, None]:
|
|
316
|
+
return self.model_data.supports_temperature
|
|
317
|
+
|
|
318
|
+
def supports_multimodal(self) -> Union[bool, None]:
|
|
319
|
+
return self.model_data.supports_multimodal
|
|
320
|
+
|
|
321
|
+
def supports_structured_outputs(self) -> Union[bool, None]:
|
|
322
|
+
"""
|
|
323
|
+
OpenAI models that natively enforce typed structured outputs.
|
|
324
|
+
Used by generate(...) when a schema is provided.
|
|
325
|
+
"""
|
|
326
|
+
return self.model_data.supports_structured_outputs
|
|
327
|
+
|
|
328
|
+
def supports_json_mode(self) -> Union[bool, None]:
|
|
329
|
+
"""
|
|
330
|
+
OpenAI models that enforce JSON mode
|
|
331
|
+
"""
|
|
332
|
+
return self.model_data.supports_json
|
|
333
|
+
|
|
222
334
|
#########
|
|
223
335
|
# Model #
|
|
224
336
|
#########
|
|
225
337
|
|
|
226
|
-
def
|
|
227
|
-
"""Returns the name of the Gemini model being used."""
|
|
228
|
-
return self.model_name
|
|
229
|
-
|
|
230
|
-
def load_model(self, *args, **kwargs):
|
|
338
|
+
def load_model(self):
|
|
231
339
|
"""Creates a client.
|
|
232
340
|
With Gen AI SDK, model is set at inference time, so there is no
|
|
233
341
|
model to load and initialize.
|
|
@@ -236,7 +344,21 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
236
344
|
Returns:
|
|
237
345
|
A GenerativeModel instance configured for evaluation.
|
|
238
346
|
"""
|
|
239
|
-
return self._build_client(
|
|
347
|
+
return self._build_client()
|
|
348
|
+
|
|
349
|
+
def _require_oauth2(self):
|
|
350
|
+
return require_dependency(
|
|
351
|
+
"google.oauth2",
|
|
352
|
+
provider_label="GeminiModel",
|
|
353
|
+
install_hint="Install it with `pip install google-auth`.",
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
def _require_module(self):
|
|
357
|
+
return require_dependency(
|
|
358
|
+
"google.genai",
|
|
359
|
+
provider_label="GeminiModel",
|
|
360
|
+
install_hint="Install it with `pip install google-genai`.",
|
|
361
|
+
)
|
|
240
362
|
|
|
241
363
|
def _client_kwargs(self, **override_kwargs) -> Dict:
|
|
242
364
|
"""Merge ctor kwargs with any overrides passed at load_model time."""
|
|
@@ -245,29 +367,49 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
245
367
|
client_kwargs.update(override_kwargs)
|
|
246
368
|
return client_kwargs
|
|
247
369
|
|
|
248
|
-
def _build_client(self
|
|
249
|
-
client_kwargs = self._client_kwargs(**
|
|
370
|
+
def _build_client(self) -> "Client":
|
|
371
|
+
client_kwargs = self._client_kwargs(**self.kwargs)
|
|
250
372
|
|
|
251
373
|
if self.should_use_vertexai():
|
|
374
|
+
service_account_key_json = require_secret_api_key(
|
|
375
|
+
self.service_account_key,
|
|
376
|
+
provider_label="Google Gemini",
|
|
377
|
+
env_var_name="GOOGLE_SERVICE_ACCOUNT_KEY",
|
|
378
|
+
param_hint="`service_account_key` to GeminiModel(...)",
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
try:
|
|
382
|
+
service_account_key = json.loads(service_account_key_json)
|
|
383
|
+
except Exception as e:
|
|
384
|
+
raise DeepEvalError(
|
|
385
|
+
"GOOGLE_SERVICE_ACCOUNT_KEY must be valid JSON for a Google service account."
|
|
386
|
+
) from e
|
|
387
|
+
|
|
388
|
+
if not isinstance(service_account_key, dict):
|
|
389
|
+
raise DeepEvalError(
|
|
390
|
+
"GOOGLE_SERVICE_ACCOUNT_KEY must decode to a JSON object."
|
|
391
|
+
)
|
|
392
|
+
|
|
252
393
|
if not self.project or not self.location:
|
|
253
|
-
raise
|
|
394
|
+
raise DeepEvalError(
|
|
254
395
|
"When using Vertex AI API, both project and location are required. "
|
|
255
396
|
"Either provide them as arguments or set GOOGLE_CLOUD_PROJECT and "
|
|
256
397
|
"GOOGLE_CLOUD_LOCATION in your DeepEval configuration."
|
|
257
398
|
)
|
|
258
399
|
|
|
400
|
+
oauth2 = self._require_oauth2()
|
|
259
401
|
credentials = (
|
|
260
|
-
service_account.Credentials.from_service_account_info(
|
|
261
|
-
|
|
402
|
+
oauth2.service_account.Credentials.from_service_account_info(
|
|
403
|
+
service_account_key,
|
|
262
404
|
scopes=[
|
|
263
405
|
"https://www.googleapis.com/auth/cloud-platform",
|
|
264
406
|
],
|
|
265
407
|
)
|
|
266
|
-
if
|
|
408
|
+
if service_account_key
|
|
267
409
|
else None
|
|
268
410
|
)
|
|
269
411
|
|
|
270
|
-
client = Client(
|
|
412
|
+
client = self._module.Client(
|
|
271
413
|
vertexai=True,
|
|
272
414
|
project=self.project,
|
|
273
415
|
location=self.location,
|
|
@@ -282,6 +424,9 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
282
424
|
param_hint="`api_key` to GeminiModel(...)",
|
|
283
425
|
)
|
|
284
426
|
|
|
285
|
-
client = Client(api_key=api_key, **client_kwargs)
|
|
427
|
+
client = self._module.Client(api_key=api_key, **client_kwargs)
|
|
286
428
|
|
|
287
429
|
return client
|
|
430
|
+
|
|
431
|
+
def get_model_name(self):
|
|
432
|
+
return f"{self.name} (Gemini)"
|