deepeval 3.7.3__py3-none-any.whl → 3.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/cli/test.py +1 -1
- deepeval/config/settings.py +102 -13
- deepeval/dataset/golden.py +54 -2
- deepeval/evaluate/configs.py +1 -1
- deepeval/evaluate/evaluate.py +16 -8
- deepeval/evaluate/execute.py +74 -27
- deepeval/evaluate/utils.py +26 -22
- deepeval/integrations/pydantic_ai/agent.py +19 -2
- deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
- deepeval/metrics/__init__.py +14 -12
- deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
- deepeval/metrics/answer_relevancy/template.py +188 -92
- deepeval/metrics/argument_correctness/template.py +2 -2
- deepeval/metrics/base_metric.py +2 -5
- deepeval/metrics/bias/template.py +3 -3
- deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
- deepeval/metrics/contextual_precision/template.py +115 -66
- deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
- deepeval/metrics/contextual_recall/template.py +106 -55
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
- deepeval/metrics/contextual_relevancy/template.py +87 -58
- deepeval/metrics/conversation_completeness/template.py +2 -2
- deepeval/metrics/conversational_dag/templates.py +4 -4
- deepeval/metrics/conversational_g_eval/template.py +4 -3
- deepeval/metrics/dag/templates.py +5 -5
- deepeval/metrics/faithfulness/faithfulness.py +70 -27
- deepeval/metrics/faithfulness/schema.py +1 -1
- deepeval/metrics/faithfulness/template.py +200 -115
- deepeval/metrics/g_eval/utils.py +2 -2
- deepeval/metrics/hallucination/template.py +4 -4
- deepeval/metrics/indicator.py +4 -4
- deepeval/metrics/misuse/template.py +2 -2
- deepeval/metrics/multimodal_metrics/__init__.py +0 -18
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
- deepeval/metrics/non_advice/template.py +2 -2
- deepeval/metrics/pii_leakage/template.py +2 -2
- deepeval/metrics/prompt_alignment/template.py +4 -4
- deepeval/metrics/ragas.py +3 -3
- deepeval/metrics/role_violation/template.py +2 -2
- deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
- deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
- deepeval/metrics/toxicity/template.py +4 -4
- deepeval/metrics/turn_contextual_precision/schema.py +21 -0
- deepeval/metrics/turn_contextual_precision/template.py +187 -0
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
- deepeval/metrics/turn_contextual_recall/schema.py +21 -0
- deepeval/metrics/turn_contextual_recall/template.py +178 -0
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
- deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
- deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
- deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
- deepeval/metrics/turn_faithfulness/template.py +218 -0
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
- deepeval/metrics/turn_relevancy/template.py +2 -2
- deepeval/metrics/utils.py +39 -58
- deepeval/models/__init__.py +0 -12
- deepeval/models/base_model.py +16 -38
- deepeval/models/embedding_models/__init__.py +7 -0
- deepeval/models/embedding_models/azure_embedding_model.py +69 -32
- deepeval/models/embedding_models/local_embedding_model.py +39 -22
- deepeval/models/embedding_models/ollama_embedding_model.py +42 -18
- deepeval/models/embedding_models/openai_embedding_model.py +50 -15
- deepeval/models/llms/amazon_bedrock_model.py +1 -2
- deepeval/models/llms/anthropic_model.py +53 -20
- deepeval/models/llms/azure_model.py +140 -43
- deepeval/models/llms/deepseek_model.py +38 -23
- deepeval/models/llms/gemini_model.py +222 -103
- deepeval/models/llms/grok_model.py +39 -27
- deepeval/models/llms/kimi_model.py +39 -23
- deepeval/models/llms/litellm_model.py +103 -45
- deepeval/models/llms/local_model.py +35 -22
- deepeval/models/llms/ollama_model.py +129 -17
- deepeval/models/llms/openai_model.py +151 -50
- deepeval/models/llms/portkey_model.py +149 -0
- deepeval/models/llms/utils.py +5 -3
- deepeval/models/retry_policy.py +17 -14
- deepeval/models/utils.py +94 -4
- deepeval/optimizer/__init__.py +5 -0
- deepeval/optimizer/algorithms/__init__.py +6 -0
- deepeval/optimizer/algorithms/base.py +29 -0
- deepeval/optimizer/algorithms/configs.py +18 -0
- deepeval/optimizer/algorithms/copro/__init__.py +5 -0
- deepeval/optimizer/algorithms/copro/copro.py +836 -0
- deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
- deepeval/optimizer/algorithms/gepa/gepa.py +737 -0
- deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
- deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
- deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
- deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
- deepeval/optimizer/algorithms/simba/__init__.py +5 -0
- deepeval/optimizer/algorithms/simba/simba.py +999 -0
- deepeval/optimizer/algorithms/simba/types.py +15 -0
- deepeval/optimizer/configs.py +31 -0
- deepeval/optimizer/policies.py +227 -0
- deepeval/optimizer/prompt_optimizer.py +263 -0
- deepeval/optimizer/rewriter/__init__.py +5 -0
- deepeval/optimizer/rewriter/rewriter.py +124 -0
- deepeval/optimizer/rewriter/utils.py +214 -0
- deepeval/optimizer/scorer/__init__.py +5 -0
- deepeval/optimizer/scorer/base.py +86 -0
- deepeval/optimizer/scorer/scorer.py +316 -0
- deepeval/optimizer/scorer/utils.py +30 -0
- deepeval/optimizer/types.py +148 -0
- deepeval/optimizer/utils.py +480 -0
- deepeval/prompt/prompt.py +7 -6
- deepeval/test_case/__init__.py +1 -3
- deepeval/test_case/api.py +12 -10
- deepeval/test_case/conversational_test_case.py +19 -1
- deepeval/test_case/llm_test_case.py +152 -1
- deepeval/test_case/utils.py +4 -8
- deepeval/test_run/api.py +15 -14
- deepeval/test_run/cache.py +2 -0
- deepeval/test_run/test_run.py +9 -4
- deepeval/tracing/patchers.py +9 -4
- deepeval/tracing/tracing.py +2 -2
- deepeval/utils.py +89 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/RECORD +134 -118
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
- deepeval/models/mlllms/__init__.py +0 -4
- deepeval/models/mlllms/azure_model.py +0 -334
- deepeval/models/mlllms/gemini_model.py +0 -284
- deepeval/models/mlllms/ollama_model.py +0 -144
- deepeval/models/mlllms/openai_model.py +0 -258
- deepeval/test_case/mllm_test_case.py +0 -170
- /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,10 +1,12 @@
|
|
|
1
|
+
import base64
|
|
1
2
|
from openai.types.chat.chat_completion import ChatCompletion
|
|
2
3
|
from openai import AzureOpenAI, AsyncAzureOpenAI
|
|
3
|
-
from typing import Optional, Tuple, Union, Dict
|
|
4
|
-
from pydantic import BaseModel
|
|
4
|
+
from typing import Optional, Tuple, Union, Dict, List
|
|
5
|
+
from pydantic import BaseModel, SecretStr
|
|
6
|
+
from io import BytesIO
|
|
5
7
|
|
|
8
|
+
from deepeval.config.settings import get_settings
|
|
6
9
|
from deepeval.models import DeepEvalBaseLLM
|
|
7
|
-
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
8
10
|
from deepeval.models.llms.openai_model import (
|
|
9
11
|
structured_outputs_models,
|
|
10
12
|
json_mode_models,
|
|
@@ -14,53 +16,87 @@ from deepeval.models.retry_policy import (
|
|
|
14
16
|
create_retry_decorator,
|
|
15
17
|
sdk_retries_for,
|
|
16
18
|
)
|
|
17
|
-
|
|
18
|
-
from deepeval.
|
|
19
|
-
from deepeval.models.utils import
|
|
19
|
+
from deepeval.test_case import MLLMImage
|
|
20
|
+
from deepeval.utils import convert_to_multi_modal_array, check_if_multimodal
|
|
21
|
+
from deepeval.models.llms.utils import (
|
|
22
|
+
trim_and_load_json,
|
|
23
|
+
)
|
|
24
|
+
from deepeval.models.utils import (
|
|
25
|
+
parse_model_name,
|
|
26
|
+
require_secret_api_key,
|
|
27
|
+
normalize_kwargs_and_extract_aliases,
|
|
28
|
+
)
|
|
20
29
|
from deepeval.constants import ProviderSlug as PS
|
|
21
30
|
|
|
31
|
+
valid_multimodal_models = [
|
|
32
|
+
"gpt-4o",
|
|
33
|
+
"gpt-4o-mini",
|
|
34
|
+
"gpt-4.1",
|
|
35
|
+
"gpt-4.1-mini",
|
|
36
|
+
"gpt-5",
|
|
37
|
+
]
|
|
22
38
|
|
|
23
39
|
retry_azure = create_retry_decorator(PS.AZURE)
|
|
24
40
|
|
|
41
|
+
_ALIAS_MAP = {
|
|
42
|
+
"api_key": ["azure_openai_api_key"],
|
|
43
|
+
"base_url": ["azure_endpoint"],
|
|
44
|
+
}
|
|
45
|
+
|
|
25
46
|
|
|
26
47
|
class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
27
48
|
def __init__(
|
|
28
49
|
self,
|
|
50
|
+
model: Optional[str] = None,
|
|
51
|
+
api_key: Optional[str] = None,
|
|
52
|
+
base_url: Optional[str] = None,
|
|
53
|
+
temperature: float = 0,
|
|
29
54
|
deployment_name: Optional[str] = None,
|
|
30
|
-
model_name: Optional[str] = None,
|
|
31
|
-
azure_openai_api_key: Optional[str] = None,
|
|
32
55
|
openai_api_version: Optional[str] = None,
|
|
33
|
-
azure_endpoint: Optional[str] = None,
|
|
34
|
-
temperature: float = 0,
|
|
35
56
|
generation_kwargs: Optional[Dict] = None,
|
|
36
57
|
**kwargs,
|
|
37
58
|
):
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
self.deployment_name = deployment_name or KEY_FILE_HANDLER.fetch_data(
|
|
43
|
-
ModelKeyValues.AZURE_DEPLOYMENT_NAME
|
|
44
|
-
)
|
|
45
|
-
self.azure_openai_api_key = (
|
|
46
|
-
azure_openai_api_key
|
|
47
|
-
or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.AZURE_OPENAI_API_KEY)
|
|
59
|
+
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
60
|
+
"AzureOpenAIModel",
|
|
61
|
+
kwargs,
|
|
62
|
+
_ALIAS_MAP,
|
|
48
63
|
)
|
|
64
|
+
|
|
65
|
+
# re-map depricated keywords to re-named positional args
|
|
66
|
+
if api_key is None and "api_key" in alias_values:
|
|
67
|
+
api_key = alias_values["api_key"]
|
|
68
|
+
if base_url is None and "base_url" in alias_values:
|
|
69
|
+
base_url = alias_values["base_url"]
|
|
70
|
+
|
|
71
|
+
settings = get_settings()
|
|
72
|
+
|
|
73
|
+
# fetch Azure deployment parameters
|
|
74
|
+
model = model or settings.AZURE_MODEL_NAME
|
|
75
|
+
self.deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
|
|
76
|
+
|
|
77
|
+
if api_key is not None:
|
|
78
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
79
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
80
|
+
else:
|
|
81
|
+
self.api_key = settings.AZURE_OPENAI_API_KEY
|
|
82
|
+
|
|
49
83
|
self.openai_api_version = (
|
|
50
|
-
openai_api_version
|
|
51
|
-
or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.OPENAI_API_VERSION)
|
|
84
|
+
openai_api_version or settings.OPENAI_API_VERSION
|
|
52
85
|
)
|
|
53
|
-
self.
|
|
54
|
-
|
|
86
|
+
self.base_url = (
|
|
87
|
+
base_url
|
|
88
|
+
or settings.AZURE_OPENAI_ENDPOINT
|
|
89
|
+
and str(settings.AZURE_OPENAI_ENDPOINT)
|
|
55
90
|
)
|
|
91
|
+
|
|
56
92
|
if temperature < 0:
|
|
57
93
|
raise ValueError("Temperature must be >= 0.")
|
|
58
94
|
self.temperature = temperature
|
|
59
95
|
|
|
60
|
-
#
|
|
61
|
-
self.kwargs =
|
|
96
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
97
|
+
self.kwargs = normalized_kwargs
|
|
62
98
|
self.generation_kwargs = generation_kwargs or {}
|
|
63
|
-
super().__init__(parse_model_name(
|
|
99
|
+
super().__init__(parse_model_name(model))
|
|
64
100
|
|
|
65
101
|
###############################################
|
|
66
102
|
# Other generate functions
|
|
@@ -71,13 +107,16 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
71
107
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
72
108
|
) -> Tuple[Union[str, Dict], float]:
|
|
73
109
|
client = self.load_model(async_mode=False)
|
|
110
|
+
|
|
111
|
+
if check_if_multimodal(prompt):
|
|
112
|
+
prompt = convert_to_multi_modal_array(prompt)
|
|
113
|
+
prompt = self.generate_prompt(prompt)
|
|
114
|
+
|
|
74
115
|
if schema:
|
|
75
|
-
if self.
|
|
116
|
+
if self.name in structured_outputs_models:
|
|
76
117
|
completion = client.beta.chat.completions.parse(
|
|
77
118
|
model=self.deployment_name,
|
|
78
|
-
messages=[
|
|
79
|
-
{"role": "user", "content": prompt},
|
|
80
|
-
],
|
|
119
|
+
messages=[{"role": "user", "content": prompt}],
|
|
81
120
|
response_format=schema,
|
|
82
121
|
temperature=self.temperature,
|
|
83
122
|
)
|
|
@@ -89,7 +128,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
89
128
|
completion.usage.completion_tokens,
|
|
90
129
|
)
|
|
91
130
|
return structured_output, cost
|
|
92
|
-
if self.
|
|
131
|
+
if self.name in json_mode_models:
|
|
93
132
|
completion = client.beta.chat.completions.parse(
|
|
94
133
|
model=self.deployment_name,
|
|
95
134
|
messages=[
|
|
@@ -130,13 +169,16 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
130
169
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
131
170
|
) -> Tuple[Union[str, BaseModel], float]:
|
|
132
171
|
client = self.load_model(async_mode=True)
|
|
172
|
+
|
|
173
|
+
if check_if_multimodal(prompt):
|
|
174
|
+
prompt = convert_to_multi_modal_array(prompt)
|
|
175
|
+
prompt = self.generate_prompt(prompt)
|
|
176
|
+
|
|
133
177
|
if schema:
|
|
134
|
-
if self.
|
|
178
|
+
if self.name in structured_outputs_models:
|
|
135
179
|
completion = await client.beta.chat.completions.parse(
|
|
136
180
|
model=self.deployment_name,
|
|
137
|
-
messages=[
|
|
138
|
-
{"role": "user", "content": prompt},
|
|
139
|
-
],
|
|
181
|
+
messages=[{"role": "user", "content": prompt}],
|
|
140
182
|
response_format=schema,
|
|
141
183
|
temperature=self.temperature,
|
|
142
184
|
)
|
|
@@ -148,7 +190,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
148
190
|
completion.usage.completion_tokens,
|
|
149
191
|
)
|
|
150
192
|
return structured_output, cost
|
|
151
|
-
if self.
|
|
193
|
+
if self.name in json_mode_models:
|
|
152
194
|
completion = await client.beta.chat.completions.parse(
|
|
153
195
|
model=self.deployment_name,
|
|
154
196
|
messages=[
|
|
@@ -198,6 +240,9 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
198
240
|
) -> Tuple[ChatCompletion, float]:
|
|
199
241
|
# Generate completion
|
|
200
242
|
client = self.load_model(async_mode=False)
|
|
243
|
+
if check_if_multimodal(prompt):
|
|
244
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
245
|
+
prompt = self.generate_prompt(prompt)
|
|
201
246
|
completion = client.chat.completions.create(
|
|
202
247
|
model=self.deployment_name,
|
|
203
248
|
messages=[{"role": "user", "content": prompt}],
|
|
@@ -221,6 +266,9 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
221
266
|
) -> Tuple[ChatCompletion, float]:
|
|
222
267
|
# Generate completion
|
|
223
268
|
client = self.load_model(async_mode=True)
|
|
269
|
+
if check_if_multimodal(prompt):
|
|
270
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
271
|
+
prompt = self.generate_prompt(prompt)
|
|
224
272
|
completion = await client.chat.completions.create(
|
|
225
273
|
model=self.deployment_name,
|
|
226
274
|
messages=[{"role": "user", "content": prompt}],
|
|
@@ -236,12 +284,49 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
236
284
|
|
|
237
285
|
return completion, cost
|
|
238
286
|
|
|
287
|
+
def generate_prompt(
|
|
288
|
+
self, multimodal_input: List[Union[str, MLLMImage]] = []
|
|
289
|
+
):
|
|
290
|
+
"""Convert multimodal input into the proper message format for Azure OpenAI."""
|
|
291
|
+
prompt = []
|
|
292
|
+
for ele in multimodal_input:
|
|
293
|
+
if isinstance(ele, str):
|
|
294
|
+
prompt.append({"type": "text", "text": ele})
|
|
295
|
+
elif isinstance(ele, MLLMImage):
|
|
296
|
+
if ele.local:
|
|
297
|
+
import PIL.Image
|
|
298
|
+
|
|
299
|
+
image = PIL.Image.open(ele.url)
|
|
300
|
+
visual_dict = {
|
|
301
|
+
"type": "image_url",
|
|
302
|
+
"image_url": {
|
|
303
|
+
"url": f"data:image/jpeg;base64,{self.encode_pil_image(image)}"
|
|
304
|
+
},
|
|
305
|
+
}
|
|
306
|
+
else:
|
|
307
|
+
visual_dict = {
|
|
308
|
+
"type": "image_url",
|
|
309
|
+
"image_url": {"url": ele.url},
|
|
310
|
+
}
|
|
311
|
+
prompt.append(visual_dict)
|
|
312
|
+
return prompt
|
|
313
|
+
|
|
314
|
+
def encode_pil_image(self, pil_image):
|
|
315
|
+
"""Encode a PIL image to base64 string."""
|
|
316
|
+
image_buffer = BytesIO()
|
|
317
|
+
if pil_image.mode in ("RGBA", "LA", "P"):
|
|
318
|
+
pil_image = pil_image.convert("RGB")
|
|
319
|
+
pil_image.save(image_buffer, format="JPEG")
|
|
320
|
+
image_bytes = image_buffer.getvalue()
|
|
321
|
+
base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
|
|
322
|
+
return base64_encoded_image
|
|
323
|
+
|
|
239
324
|
###############################################
|
|
240
325
|
# Utilities
|
|
241
326
|
###############################################
|
|
242
327
|
|
|
243
328
|
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
244
|
-
pricing = model_pricing.get(self.
|
|
329
|
+
pricing = model_pricing.get(self.name, model_pricing["gpt-4.1"])
|
|
245
330
|
input_cost = input_tokens * pricing["input"]
|
|
246
331
|
output_cost = output_tokens * pricing["output"]
|
|
247
332
|
return input_cost + output_cost
|
|
@@ -250,9 +335,6 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
250
335
|
# Model
|
|
251
336
|
###############################################
|
|
252
337
|
|
|
253
|
-
def get_model_name(self):
|
|
254
|
-
return f"Azure OpenAI ({self.model_name})"
|
|
255
|
-
|
|
256
338
|
def load_model(self, async_mode: bool = False):
|
|
257
339
|
if not async_mode:
|
|
258
340
|
return self._build_client(AzureOpenAI)
|
|
@@ -270,10 +352,17 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
270
352
|
return kwargs
|
|
271
353
|
|
|
272
354
|
def _build_client(self, cls):
|
|
355
|
+
api_key = require_secret_api_key(
|
|
356
|
+
self.api_key,
|
|
357
|
+
provider_label="AzureOpenAI",
|
|
358
|
+
env_var_name="AZURE_OPENAI_API_KEY",
|
|
359
|
+
param_hint="`api_key` to AzureOpenAIModel(...)",
|
|
360
|
+
)
|
|
361
|
+
|
|
273
362
|
kw = dict(
|
|
274
|
-
api_key=
|
|
363
|
+
api_key=api_key,
|
|
275
364
|
api_version=self.openai_api_version,
|
|
276
|
-
|
|
365
|
+
base_url=self.base_url,
|
|
277
366
|
azure_deployment=self.deployment_name,
|
|
278
367
|
**self._client_kwargs(),
|
|
279
368
|
)
|
|
@@ -285,3 +374,11 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
285
374
|
kw.pop("max_retries", None)
|
|
286
375
|
return cls(**kw)
|
|
287
376
|
raise
|
|
377
|
+
|
|
378
|
+
def supports_multimodal(self):
|
|
379
|
+
if self.name in valid_multimodal_models:
|
|
380
|
+
return True
|
|
381
|
+
return False
|
|
382
|
+
|
|
383
|
+
def get_model_name(self):
|
|
384
|
+
return f"{self.name} (Azure)"
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
from typing import Optional, Tuple, Union, Dict
|
|
2
2
|
from openai import OpenAI, AsyncOpenAI
|
|
3
|
-
from pydantic import BaseModel
|
|
3
|
+
from pydantic import BaseModel, SecretStr
|
|
4
4
|
|
|
5
|
-
from deepeval.
|
|
5
|
+
from deepeval.config.settings import get_settings
|
|
6
6
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
7
|
+
from deepeval.models.utils import (
|
|
8
|
+
require_secret_api_key,
|
|
9
|
+
)
|
|
7
10
|
from deepeval.models import DeepEvalBaseLLM
|
|
8
11
|
from deepeval.models.retry_policy import (
|
|
9
12
|
create_retry_decorator,
|
|
@@ -30,35 +33,38 @@ model_pricing = {
|
|
|
30
33
|
class DeepSeekModel(DeepEvalBaseLLM):
|
|
31
34
|
def __init__(
|
|
32
35
|
self,
|
|
33
|
-
api_key: Optional[str] = None,
|
|
34
36
|
model: Optional[str] = None,
|
|
37
|
+
api_key: Optional[str] = None,
|
|
35
38
|
temperature: float = 0,
|
|
36
39
|
generation_kwargs: Optional[Dict] = None,
|
|
37
40
|
**kwargs,
|
|
38
41
|
):
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
if
|
|
42
|
+
settings = get_settings()
|
|
43
|
+
|
|
44
|
+
model = model or settings.DEEPSEEK_MODEL_NAME
|
|
45
|
+
if model not in model_pricing:
|
|
43
46
|
raise ValueError(
|
|
44
47
|
f"Invalid model. Available DeepSeek models: {', '.join(model_pricing.keys())}"
|
|
45
48
|
)
|
|
46
|
-
temperature_from_key =
|
|
47
|
-
ModelKeyValues.TEMPERATURE
|
|
48
|
-
)
|
|
49
|
+
temperature_from_key = settings.TEMPERATURE
|
|
49
50
|
if temperature_from_key is None:
|
|
50
51
|
self.temperature = temperature
|
|
51
52
|
else:
|
|
52
53
|
self.temperature = float(temperature_from_key)
|
|
53
54
|
if self.temperature < 0:
|
|
54
55
|
raise ValueError("Temperature must be >= 0.")
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
56
|
+
|
|
57
|
+
if api_key is not None:
|
|
58
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
59
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
60
|
+
else:
|
|
61
|
+
self.api_key = settings.DEEPSEEK_API_KEY
|
|
62
|
+
|
|
58
63
|
self.base_url = "https://api.deepseek.com"
|
|
64
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
59
65
|
self.kwargs = kwargs
|
|
60
66
|
self.generation_kwargs = generation_kwargs or {}
|
|
61
|
-
super().__init__(
|
|
67
|
+
super().__init__(model)
|
|
62
68
|
|
|
63
69
|
###############################################
|
|
64
70
|
# Other generate functions
|
|
@@ -68,10 +74,11 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
68
74
|
def generate(
|
|
69
75
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
70
76
|
) -> Tuple[Union[str, Dict], float]:
|
|
77
|
+
|
|
71
78
|
client = self.load_model(async_mode=False)
|
|
72
79
|
if schema:
|
|
73
80
|
completion = client.chat.completions.create(
|
|
74
|
-
model=self.
|
|
81
|
+
model=self.name,
|
|
75
82
|
messages=[{"role": "user", "content": prompt}],
|
|
76
83
|
response_format={"type": "json_object"},
|
|
77
84
|
temperature=self.temperature,
|
|
@@ -87,7 +94,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
87
94
|
return schema.model_validate(json_output), cost
|
|
88
95
|
else:
|
|
89
96
|
completion = client.chat.completions.create(
|
|
90
|
-
model=self.
|
|
97
|
+
model=self.name,
|
|
91
98
|
messages=[{"role": "user", "content": prompt}],
|
|
92
99
|
**self.generation_kwargs,
|
|
93
100
|
)
|
|
@@ -102,10 +109,11 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
102
109
|
async def a_generate(
|
|
103
110
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
104
111
|
) -> Tuple[Union[str, Dict], float]:
|
|
112
|
+
|
|
105
113
|
client = self.load_model(async_mode=True)
|
|
106
114
|
if schema:
|
|
107
115
|
completion = await client.chat.completions.create(
|
|
108
|
-
model=self.
|
|
116
|
+
model=self.name,
|
|
109
117
|
messages=[{"role": "user", "content": prompt}],
|
|
110
118
|
response_format={"type": "json_object"},
|
|
111
119
|
temperature=self.temperature,
|
|
@@ -121,7 +129,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
121
129
|
return schema.model_validate(json_output), cost
|
|
122
130
|
else:
|
|
123
131
|
completion = await client.chat.completions.create(
|
|
124
|
-
model=self.
|
|
132
|
+
model=self.name,
|
|
125
133
|
messages=[{"role": "user", "content": prompt}],
|
|
126
134
|
**self.generation_kwargs,
|
|
127
135
|
)
|
|
@@ -141,7 +149,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
141
149
|
input_tokens: int,
|
|
142
150
|
output_tokens: int,
|
|
143
151
|
) -> float:
|
|
144
|
-
pricing = model_pricing.get(self.
|
|
152
|
+
pricing = model_pricing.get(self.name, model_pricing)
|
|
145
153
|
input_cost = input_tokens * pricing["input"]
|
|
146
154
|
output_cost = output_tokens * pricing["output"]
|
|
147
155
|
return input_cost + output_cost
|
|
@@ -155,9 +163,6 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
155
163
|
return self._build_client(OpenAI)
|
|
156
164
|
return self._build_client(AsyncOpenAI)
|
|
157
165
|
|
|
158
|
-
def get_model_name(self):
|
|
159
|
-
return f"{self.model_name}"
|
|
160
|
-
|
|
161
166
|
def _client_kwargs(self) -> Dict:
|
|
162
167
|
kwargs = dict(self.kwargs or {})
|
|
163
168
|
# if we are managing retries with Tenacity, force SDK retries off to avoid double retries.
|
|
@@ -167,8 +172,15 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
167
172
|
return kwargs
|
|
168
173
|
|
|
169
174
|
def _build_client(self, cls):
|
|
175
|
+
api_key = require_secret_api_key(
|
|
176
|
+
self.api_key,
|
|
177
|
+
provider_label="DeepSeek",
|
|
178
|
+
env_var_name="DEEPSEEK_API_KEY",
|
|
179
|
+
param_hint="`api_key` to DeepSeekModel(...)",
|
|
180
|
+
)
|
|
181
|
+
|
|
170
182
|
kw = dict(
|
|
171
|
-
api_key=
|
|
183
|
+
api_key=api_key,
|
|
172
184
|
base_url=self.base_url,
|
|
173
185
|
**self._client_kwargs(),
|
|
174
186
|
)
|
|
@@ -180,3 +192,6 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
180
192
|
kw.pop("max_retries", None)
|
|
181
193
|
return cls(**kw)
|
|
182
194
|
raise
|
|
195
|
+
|
|
196
|
+
def get_model_name(self):
|
|
197
|
+
return f"{self.name} (Deepseek)"
|