deepeval 3.7.4__py3-none-any.whl → 3.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/dataset/golden.py +54 -2
- deepeval/evaluate/evaluate.py +16 -8
- deepeval/evaluate/execute.py +70 -26
- deepeval/evaluate/utils.py +26 -22
- deepeval/integrations/pydantic_ai/agent.py +19 -2
- deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
- deepeval/metrics/__init__.py +14 -12
- deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
- deepeval/metrics/answer_relevancy/template.py +188 -92
- deepeval/metrics/base_metric.py +2 -5
- deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
- deepeval/metrics/contextual_precision/template.py +115 -66
- deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
- deepeval/metrics/contextual_recall/template.py +106 -55
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
- deepeval/metrics/contextual_relevancy/template.py +87 -58
- deepeval/metrics/dag/templates.py +2 -2
- deepeval/metrics/faithfulness/faithfulness.py +70 -27
- deepeval/metrics/faithfulness/schema.py +1 -1
- deepeval/metrics/faithfulness/template.py +200 -115
- deepeval/metrics/g_eval/utils.py +2 -2
- deepeval/metrics/indicator.py +4 -4
- deepeval/metrics/multimodal_metrics/__init__.py +0 -18
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
- deepeval/metrics/ragas.py +3 -3
- deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
- deepeval/metrics/turn_contextual_precision/schema.py +21 -0
- deepeval/metrics/turn_contextual_precision/template.py +187 -0
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
- deepeval/metrics/turn_contextual_recall/schema.py +21 -0
- deepeval/metrics/turn_contextual_recall/template.py +178 -0
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
- deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
- deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
- deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
- deepeval/metrics/turn_faithfulness/template.py +218 -0
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
- deepeval/metrics/utils.py +39 -58
- deepeval/models/__init__.py +0 -12
- deepeval/models/base_model.py +16 -38
- deepeval/models/embedding_models/__init__.py +7 -0
- deepeval/models/embedding_models/azure_embedding_model.py +52 -28
- deepeval/models/embedding_models/local_embedding_model.py +18 -14
- deepeval/models/embedding_models/ollama_embedding_model.py +38 -16
- deepeval/models/embedding_models/openai_embedding_model.py +40 -21
- deepeval/models/llms/amazon_bedrock_model.py +1 -2
- deepeval/models/llms/anthropic_model.py +44 -23
- deepeval/models/llms/azure_model.py +121 -36
- deepeval/models/llms/deepseek_model.py +18 -13
- deepeval/models/llms/gemini_model.py +129 -43
- deepeval/models/llms/grok_model.py +18 -13
- deepeval/models/llms/kimi_model.py +18 -13
- deepeval/models/llms/litellm_model.py +42 -22
- deepeval/models/llms/local_model.py +12 -7
- deepeval/models/llms/ollama_model.py +114 -12
- deepeval/models/llms/openai_model.py +137 -41
- deepeval/models/llms/portkey_model.py +24 -7
- deepeval/models/llms/utils.py +5 -3
- deepeval/models/retry_policy.py +17 -14
- deepeval/models/utils.py +46 -1
- deepeval/optimizer/__init__.py +5 -0
- deepeval/optimizer/algorithms/__init__.py +6 -0
- deepeval/optimizer/algorithms/base.py +29 -0
- deepeval/optimizer/algorithms/configs.py +18 -0
- deepeval/optimizer/algorithms/copro/__init__.py +5 -0
- deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
- deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
- deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
- deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
- deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
- deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
- deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
- deepeval/optimizer/algorithms/simba/__init__.py +5 -0
- deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
- deepeval/{optimization → optimizer}/configs.py +5 -8
- deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
- deepeval/optimizer/prompt_optimizer.py +263 -0
- deepeval/optimizer/rewriter/__init__.py +5 -0
- deepeval/optimizer/rewriter/rewriter.py +124 -0
- deepeval/optimizer/rewriter/utils.py +214 -0
- deepeval/optimizer/scorer/__init__.py +5 -0
- deepeval/optimizer/scorer/base.py +86 -0
- deepeval/optimizer/scorer/scorer.py +316 -0
- deepeval/optimizer/scorer/utils.py +30 -0
- deepeval/optimizer/types.py +148 -0
- deepeval/{optimization → optimizer}/utils.py +47 -165
- deepeval/prompt/prompt.py +5 -9
- deepeval/test_case/__init__.py +1 -3
- deepeval/test_case/api.py +12 -10
- deepeval/test_case/conversational_test_case.py +19 -1
- deepeval/test_case/llm_test_case.py +152 -1
- deepeval/test_case/utils.py +4 -8
- deepeval/test_run/api.py +15 -14
- deepeval/test_run/test_run.py +3 -3
- deepeval/tracing/patchers.py +9 -4
- deepeval/tracing/tracing.py +2 -2
- deepeval/utils.py +65 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
- {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/RECORD +116 -125
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
- deepeval/models/mlllms/__init__.py +0 -4
- deepeval/models/mlllms/azure_model.py +0 -343
- deepeval/models/mlllms/gemini_model.py +0 -313
- deepeval/models/mlllms/ollama_model.py +0 -175
- deepeval/models/mlllms/openai_model.py +0 -309
- deepeval/optimization/__init__.py +0 -13
- deepeval/optimization/adapters/__init__.py +0 -2
- deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
- deepeval/optimization/aggregates.py +0 -14
- deepeval/optimization/copro/configs.py +0 -31
- deepeval/optimization/gepa/__init__.py +0 -7
- deepeval/optimization/gepa/configs.py +0 -115
- deepeval/optimization/miprov2/configs.py +0 -134
- deepeval/optimization/miprov2/loop.py +0 -785
- deepeval/optimization/mutations/__init__.py +0 -0
- deepeval/optimization/mutations/prompt_rewriter.py +0 -458
- deepeval/optimization/policies/__init__.py +0 -16
- deepeval/optimization/policies/tie_breaker.py +0 -67
- deepeval/optimization/prompt_optimizer.py +0 -462
- deepeval/optimization/simba/__init__.py +0 -0
- deepeval/optimization/simba/configs.py +0 -33
- deepeval/optimization/types.py +0 -361
- deepeval/test_case/mllm_test_case.py +0 -170
- /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
- /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
import base64
|
|
1
2
|
from openai.types.chat.chat_completion import ChatCompletion
|
|
2
3
|
from openai import AzureOpenAI, AsyncAzureOpenAI
|
|
3
|
-
from typing import Optional, Tuple, Union, Dict
|
|
4
|
+
from typing import Optional, Tuple, Union, Dict, List
|
|
4
5
|
from pydantic import BaseModel, SecretStr
|
|
6
|
+
from io import BytesIO
|
|
5
7
|
|
|
6
8
|
from deepeval.config.settings import get_settings
|
|
7
9
|
from deepeval.models import DeepEvalBaseLLM
|
|
@@ -14,46 +16,75 @@ from deepeval.models.retry_policy import (
|
|
|
14
16
|
create_retry_decorator,
|
|
15
17
|
sdk_retries_for,
|
|
16
18
|
)
|
|
17
|
-
|
|
18
|
-
from deepeval.
|
|
19
|
-
from deepeval.models.utils import
|
|
19
|
+
from deepeval.test_case import MLLMImage
|
|
20
|
+
from deepeval.utils import convert_to_multi_modal_array, check_if_multimodal
|
|
21
|
+
from deepeval.models.llms.utils import (
|
|
22
|
+
trim_and_load_json,
|
|
23
|
+
)
|
|
24
|
+
from deepeval.models.utils import (
|
|
25
|
+
parse_model_name,
|
|
26
|
+
require_secret_api_key,
|
|
27
|
+
normalize_kwargs_and_extract_aliases,
|
|
28
|
+
)
|
|
20
29
|
from deepeval.constants import ProviderSlug as PS
|
|
21
30
|
|
|
31
|
+
valid_multimodal_models = [
|
|
32
|
+
"gpt-4o",
|
|
33
|
+
"gpt-4o-mini",
|
|
34
|
+
"gpt-4.1",
|
|
35
|
+
"gpt-4.1-mini",
|
|
36
|
+
"gpt-5",
|
|
37
|
+
]
|
|
22
38
|
|
|
23
39
|
retry_azure = create_retry_decorator(PS.AZURE)
|
|
24
40
|
|
|
41
|
+
_ALIAS_MAP = {
|
|
42
|
+
"api_key": ["azure_openai_api_key"],
|
|
43
|
+
"base_url": ["azure_endpoint"],
|
|
44
|
+
}
|
|
45
|
+
|
|
25
46
|
|
|
26
47
|
class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
27
48
|
def __init__(
|
|
28
49
|
self,
|
|
50
|
+
model: Optional[str] = None,
|
|
51
|
+
api_key: Optional[str] = None,
|
|
52
|
+
base_url: Optional[str] = None,
|
|
53
|
+
temperature: float = 0,
|
|
29
54
|
deployment_name: Optional[str] = None,
|
|
30
|
-
model_name: Optional[str] = None,
|
|
31
|
-
azure_openai_api_key: Optional[str] = None,
|
|
32
55
|
openai_api_version: Optional[str] = None,
|
|
33
|
-
azure_endpoint: Optional[str] = None,
|
|
34
|
-
temperature: float = 0,
|
|
35
56
|
generation_kwargs: Optional[Dict] = None,
|
|
36
57
|
**kwargs,
|
|
37
58
|
):
|
|
59
|
+
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
60
|
+
"AzureOpenAIModel",
|
|
61
|
+
kwargs,
|
|
62
|
+
_ALIAS_MAP,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# re-map depricated keywords to re-named positional args
|
|
66
|
+
if api_key is None and "api_key" in alias_values:
|
|
67
|
+
api_key = alias_values["api_key"]
|
|
68
|
+
if base_url is None and "base_url" in alias_values:
|
|
69
|
+
base_url = alias_values["base_url"]
|
|
70
|
+
|
|
38
71
|
settings = get_settings()
|
|
39
72
|
|
|
40
73
|
# fetch Azure deployment parameters
|
|
41
|
-
|
|
74
|
+
model = model or settings.AZURE_MODEL_NAME
|
|
42
75
|
self.deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
|
|
43
76
|
|
|
44
|
-
if
|
|
77
|
+
if api_key is not None:
|
|
45
78
|
# keep it secret, keep it safe from serializings, logging and alike
|
|
46
|
-
self.
|
|
47
|
-
azure_openai_api_key
|
|
48
|
-
)
|
|
79
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
49
80
|
else:
|
|
50
|
-
self.
|
|
81
|
+
self.api_key = settings.AZURE_OPENAI_API_KEY
|
|
51
82
|
|
|
52
83
|
self.openai_api_version = (
|
|
53
84
|
openai_api_version or settings.OPENAI_API_VERSION
|
|
54
85
|
)
|
|
55
|
-
self.
|
|
56
|
-
|
|
86
|
+
self.base_url = (
|
|
87
|
+
base_url
|
|
57
88
|
or settings.AZURE_OPENAI_ENDPOINT
|
|
58
89
|
and str(settings.AZURE_OPENAI_ENDPOINT)
|
|
59
90
|
)
|
|
@@ -62,10 +93,10 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
62
93
|
raise ValueError("Temperature must be >= 0.")
|
|
63
94
|
self.temperature = temperature
|
|
64
95
|
|
|
65
|
-
#
|
|
66
|
-
self.kwargs =
|
|
96
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
97
|
+
self.kwargs = normalized_kwargs
|
|
67
98
|
self.generation_kwargs = generation_kwargs or {}
|
|
68
|
-
super().__init__(parse_model_name(
|
|
99
|
+
super().__init__(parse_model_name(model))
|
|
69
100
|
|
|
70
101
|
###############################################
|
|
71
102
|
# Other generate functions
|
|
@@ -76,13 +107,16 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
76
107
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
77
108
|
) -> Tuple[Union[str, Dict], float]:
|
|
78
109
|
client = self.load_model(async_mode=False)
|
|
110
|
+
|
|
111
|
+
if check_if_multimodal(prompt):
|
|
112
|
+
prompt = convert_to_multi_modal_array(prompt)
|
|
113
|
+
prompt = self.generate_prompt(prompt)
|
|
114
|
+
|
|
79
115
|
if schema:
|
|
80
|
-
if self.
|
|
116
|
+
if self.name in structured_outputs_models:
|
|
81
117
|
completion = client.beta.chat.completions.parse(
|
|
82
118
|
model=self.deployment_name,
|
|
83
|
-
messages=[
|
|
84
|
-
{"role": "user", "content": prompt},
|
|
85
|
-
],
|
|
119
|
+
messages=[{"role": "user", "content": prompt}],
|
|
86
120
|
response_format=schema,
|
|
87
121
|
temperature=self.temperature,
|
|
88
122
|
)
|
|
@@ -94,7 +128,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
94
128
|
completion.usage.completion_tokens,
|
|
95
129
|
)
|
|
96
130
|
return structured_output, cost
|
|
97
|
-
if self.
|
|
131
|
+
if self.name in json_mode_models:
|
|
98
132
|
completion = client.beta.chat.completions.parse(
|
|
99
133
|
model=self.deployment_name,
|
|
100
134
|
messages=[
|
|
@@ -135,13 +169,16 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
135
169
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
136
170
|
) -> Tuple[Union[str, BaseModel], float]:
|
|
137
171
|
client = self.load_model(async_mode=True)
|
|
172
|
+
|
|
173
|
+
if check_if_multimodal(prompt):
|
|
174
|
+
prompt = convert_to_multi_modal_array(prompt)
|
|
175
|
+
prompt = self.generate_prompt(prompt)
|
|
176
|
+
|
|
138
177
|
if schema:
|
|
139
|
-
if self.
|
|
178
|
+
if self.name in structured_outputs_models:
|
|
140
179
|
completion = await client.beta.chat.completions.parse(
|
|
141
180
|
model=self.deployment_name,
|
|
142
|
-
messages=[
|
|
143
|
-
{"role": "user", "content": prompt},
|
|
144
|
-
],
|
|
181
|
+
messages=[{"role": "user", "content": prompt}],
|
|
145
182
|
response_format=schema,
|
|
146
183
|
temperature=self.temperature,
|
|
147
184
|
)
|
|
@@ -153,7 +190,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
153
190
|
completion.usage.completion_tokens,
|
|
154
191
|
)
|
|
155
192
|
return structured_output, cost
|
|
156
|
-
if self.
|
|
193
|
+
if self.name in json_mode_models:
|
|
157
194
|
completion = await client.beta.chat.completions.parse(
|
|
158
195
|
model=self.deployment_name,
|
|
159
196
|
messages=[
|
|
@@ -203,6 +240,9 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
203
240
|
) -> Tuple[ChatCompletion, float]:
|
|
204
241
|
# Generate completion
|
|
205
242
|
client = self.load_model(async_mode=False)
|
|
243
|
+
if check_if_multimodal(prompt):
|
|
244
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
245
|
+
prompt = self.generate_prompt(prompt)
|
|
206
246
|
completion = client.chat.completions.create(
|
|
207
247
|
model=self.deployment_name,
|
|
208
248
|
messages=[{"role": "user", "content": prompt}],
|
|
@@ -226,6 +266,9 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
226
266
|
) -> Tuple[ChatCompletion, float]:
|
|
227
267
|
# Generate completion
|
|
228
268
|
client = self.load_model(async_mode=True)
|
|
269
|
+
if check_if_multimodal(prompt):
|
|
270
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
271
|
+
prompt = self.generate_prompt(prompt)
|
|
229
272
|
completion = await client.chat.completions.create(
|
|
230
273
|
model=self.deployment_name,
|
|
231
274
|
messages=[{"role": "user", "content": prompt}],
|
|
@@ -241,12 +284,49 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
241
284
|
|
|
242
285
|
return completion, cost
|
|
243
286
|
|
|
287
|
+
def generate_prompt(
|
|
288
|
+
self, multimodal_input: List[Union[str, MLLMImage]] = []
|
|
289
|
+
):
|
|
290
|
+
"""Convert multimodal input into the proper message format for Azure OpenAI."""
|
|
291
|
+
prompt = []
|
|
292
|
+
for ele in multimodal_input:
|
|
293
|
+
if isinstance(ele, str):
|
|
294
|
+
prompt.append({"type": "text", "text": ele})
|
|
295
|
+
elif isinstance(ele, MLLMImage):
|
|
296
|
+
if ele.local:
|
|
297
|
+
import PIL.Image
|
|
298
|
+
|
|
299
|
+
image = PIL.Image.open(ele.url)
|
|
300
|
+
visual_dict = {
|
|
301
|
+
"type": "image_url",
|
|
302
|
+
"image_url": {
|
|
303
|
+
"url": f"data:image/jpeg;base64,{self.encode_pil_image(image)}"
|
|
304
|
+
},
|
|
305
|
+
}
|
|
306
|
+
else:
|
|
307
|
+
visual_dict = {
|
|
308
|
+
"type": "image_url",
|
|
309
|
+
"image_url": {"url": ele.url},
|
|
310
|
+
}
|
|
311
|
+
prompt.append(visual_dict)
|
|
312
|
+
return prompt
|
|
313
|
+
|
|
314
|
+
def encode_pil_image(self, pil_image):
|
|
315
|
+
"""Encode a PIL image to base64 string."""
|
|
316
|
+
image_buffer = BytesIO()
|
|
317
|
+
if pil_image.mode in ("RGBA", "LA", "P"):
|
|
318
|
+
pil_image = pil_image.convert("RGB")
|
|
319
|
+
pil_image.save(image_buffer, format="JPEG")
|
|
320
|
+
image_bytes = image_buffer.getvalue()
|
|
321
|
+
base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
|
|
322
|
+
return base64_encoded_image
|
|
323
|
+
|
|
244
324
|
###############################################
|
|
245
325
|
# Utilities
|
|
246
326
|
###############################################
|
|
247
327
|
|
|
248
328
|
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
249
|
-
pricing = model_pricing.get(self.
|
|
329
|
+
pricing = model_pricing.get(self.name, model_pricing["gpt-4.1"])
|
|
250
330
|
input_cost = input_tokens * pricing["input"]
|
|
251
331
|
output_cost = output_tokens * pricing["output"]
|
|
252
332
|
return input_cost + output_cost
|
|
@@ -255,9 +335,6 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
255
335
|
# Model
|
|
256
336
|
###############################################
|
|
257
337
|
|
|
258
|
-
def get_model_name(self):
|
|
259
|
-
return f"Azure OpenAI ({self.model_name})"
|
|
260
|
-
|
|
261
338
|
def load_model(self, async_mode: bool = False):
|
|
262
339
|
if not async_mode:
|
|
263
340
|
return self._build_client(AzureOpenAI)
|
|
@@ -276,16 +353,16 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
276
353
|
|
|
277
354
|
def _build_client(self, cls):
|
|
278
355
|
api_key = require_secret_api_key(
|
|
279
|
-
self.
|
|
356
|
+
self.api_key,
|
|
280
357
|
provider_label="AzureOpenAI",
|
|
281
358
|
env_var_name="AZURE_OPENAI_API_KEY",
|
|
282
|
-
param_hint="`
|
|
359
|
+
param_hint="`api_key` to AzureOpenAIModel(...)",
|
|
283
360
|
)
|
|
284
361
|
|
|
285
362
|
kw = dict(
|
|
286
363
|
api_key=api_key,
|
|
287
364
|
api_version=self.openai_api_version,
|
|
288
|
-
|
|
365
|
+
base_url=self.base_url,
|
|
289
366
|
azure_deployment=self.deployment_name,
|
|
290
367
|
**self._client_kwargs(),
|
|
291
368
|
)
|
|
@@ -297,3 +374,11 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
297
374
|
kw.pop("max_retries", None)
|
|
298
375
|
return cls(**kw)
|
|
299
376
|
raise
|
|
377
|
+
|
|
378
|
+
def supports_multimodal(self):
|
|
379
|
+
if self.name in valid_multimodal_models:
|
|
380
|
+
return True
|
|
381
|
+
return False
|
|
382
|
+
|
|
383
|
+
def get_model_name(self):
|
|
384
|
+
return f"{self.name} (Azure)"
|
|
@@ -4,7 +4,9 @@ from pydantic import BaseModel, SecretStr
|
|
|
4
4
|
|
|
5
5
|
from deepeval.config.settings import get_settings
|
|
6
6
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
7
|
-
from deepeval.models.utils import
|
|
7
|
+
from deepeval.models.utils import (
|
|
8
|
+
require_secret_api_key,
|
|
9
|
+
)
|
|
8
10
|
from deepeval.models import DeepEvalBaseLLM
|
|
9
11
|
from deepeval.models.retry_policy import (
|
|
10
12
|
create_retry_decorator,
|
|
@@ -31,16 +33,16 @@ model_pricing = {
|
|
|
31
33
|
class DeepSeekModel(DeepEvalBaseLLM):
|
|
32
34
|
def __init__(
|
|
33
35
|
self,
|
|
34
|
-
api_key: Optional[str] = None,
|
|
35
36
|
model: Optional[str] = None,
|
|
37
|
+
api_key: Optional[str] = None,
|
|
36
38
|
temperature: float = 0,
|
|
37
39
|
generation_kwargs: Optional[Dict] = None,
|
|
38
40
|
**kwargs,
|
|
39
41
|
):
|
|
40
42
|
settings = get_settings()
|
|
41
43
|
|
|
42
|
-
|
|
43
|
-
if
|
|
44
|
+
model = model or settings.DEEPSEEK_MODEL_NAME
|
|
45
|
+
if model not in model_pricing:
|
|
44
46
|
raise ValueError(
|
|
45
47
|
f"Invalid model. Available DeepSeek models: {', '.join(model_pricing.keys())}"
|
|
46
48
|
)
|
|
@@ -59,9 +61,10 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
59
61
|
self.api_key = settings.DEEPSEEK_API_KEY
|
|
60
62
|
|
|
61
63
|
self.base_url = "https://api.deepseek.com"
|
|
64
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
62
65
|
self.kwargs = kwargs
|
|
63
66
|
self.generation_kwargs = generation_kwargs or {}
|
|
64
|
-
super().__init__(
|
|
67
|
+
super().__init__(model)
|
|
65
68
|
|
|
66
69
|
###############################################
|
|
67
70
|
# Other generate functions
|
|
@@ -71,10 +74,11 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
71
74
|
def generate(
|
|
72
75
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
73
76
|
) -> Tuple[Union[str, Dict], float]:
|
|
77
|
+
|
|
74
78
|
client = self.load_model(async_mode=False)
|
|
75
79
|
if schema:
|
|
76
80
|
completion = client.chat.completions.create(
|
|
77
|
-
model=self.
|
|
81
|
+
model=self.name,
|
|
78
82
|
messages=[{"role": "user", "content": prompt}],
|
|
79
83
|
response_format={"type": "json_object"},
|
|
80
84
|
temperature=self.temperature,
|
|
@@ -90,7 +94,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
90
94
|
return schema.model_validate(json_output), cost
|
|
91
95
|
else:
|
|
92
96
|
completion = client.chat.completions.create(
|
|
93
|
-
model=self.
|
|
97
|
+
model=self.name,
|
|
94
98
|
messages=[{"role": "user", "content": prompt}],
|
|
95
99
|
**self.generation_kwargs,
|
|
96
100
|
)
|
|
@@ -105,10 +109,11 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
105
109
|
async def a_generate(
|
|
106
110
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
107
111
|
) -> Tuple[Union[str, Dict], float]:
|
|
112
|
+
|
|
108
113
|
client = self.load_model(async_mode=True)
|
|
109
114
|
if schema:
|
|
110
115
|
completion = await client.chat.completions.create(
|
|
111
|
-
model=self.
|
|
116
|
+
model=self.name,
|
|
112
117
|
messages=[{"role": "user", "content": prompt}],
|
|
113
118
|
response_format={"type": "json_object"},
|
|
114
119
|
temperature=self.temperature,
|
|
@@ -124,7 +129,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
124
129
|
return schema.model_validate(json_output), cost
|
|
125
130
|
else:
|
|
126
131
|
completion = await client.chat.completions.create(
|
|
127
|
-
model=self.
|
|
132
|
+
model=self.name,
|
|
128
133
|
messages=[{"role": "user", "content": prompt}],
|
|
129
134
|
**self.generation_kwargs,
|
|
130
135
|
)
|
|
@@ -144,7 +149,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
144
149
|
input_tokens: int,
|
|
145
150
|
output_tokens: int,
|
|
146
151
|
) -> float:
|
|
147
|
-
pricing = model_pricing.get(self.
|
|
152
|
+
pricing = model_pricing.get(self.name, model_pricing)
|
|
148
153
|
input_cost = input_tokens * pricing["input"]
|
|
149
154
|
output_cost = output_tokens * pricing["output"]
|
|
150
155
|
return input_cost + output_cost
|
|
@@ -158,9 +163,6 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
158
163
|
return self._build_client(OpenAI)
|
|
159
164
|
return self._build_client(AsyncOpenAI)
|
|
160
165
|
|
|
161
|
-
def get_model_name(self):
|
|
162
|
-
return f"{self.model_name}"
|
|
163
|
-
|
|
164
166
|
def _client_kwargs(self) -> Dict:
|
|
165
167
|
kwargs = dict(self.kwargs or {})
|
|
166
168
|
# if we are managing retries with Tenacity, force SDK retries off to avoid double retries.
|
|
@@ -190,3 +192,6 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
190
192
|
kw.pop("max_retries", None)
|
|
191
193
|
return cls(**kw)
|
|
192
194
|
raise
|
|
195
|
+
|
|
196
|
+
def get_model_name(self):
|
|
197
|
+
return f"{self.name} (Deepseek)"
|