deepeval 3.7.5__py3-none-any.whl → 3.7.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/cli/main.py +2022 -759
- deepeval/cli/utils.py +208 -36
- deepeval/config/dotenv_handler.py +19 -0
- deepeval/config/settings.py +675 -245
- deepeval/config/utils.py +9 -1
- deepeval/dataset/api.py +23 -1
- deepeval/dataset/golden.py +106 -21
- deepeval/evaluate/evaluate.py +0 -3
- deepeval/evaluate/execute.py +162 -315
- deepeval/evaluate/utils.py +6 -30
- deepeval/key_handler.py +124 -51
- deepeval/metrics/__init__.py +0 -4
- deepeval/metrics/answer_relevancy/answer_relevancy.py +89 -132
- deepeval/metrics/answer_relevancy/template.py +102 -179
- deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
- deepeval/metrics/arena_g_eval/template.py +17 -1
- deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
- deepeval/metrics/argument_correctness/template.py +19 -2
- deepeval/metrics/base_metric.py +19 -41
- deepeval/metrics/bias/bias.py +102 -108
- deepeval/metrics/bias/template.py +14 -2
- deepeval/metrics/contextual_precision/contextual_precision.py +56 -92
- deepeval/metrics/contextual_recall/contextual_recall.py +58 -85
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +53 -83
- deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
- deepeval/metrics/conversation_completeness/template.py +23 -3
- deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
- deepeval/metrics/conversational_dag/nodes.py +66 -123
- deepeval/metrics/conversational_dag/templates.py +16 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
- deepeval/metrics/dag/dag.py +10 -0
- deepeval/metrics/dag/nodes.py +63 -126
- deepeval/metrics/dag/templates.py +14 -0
- deepeval/metrics/exact_match/exact_match.py +9 -1
- deepeval/metrics/faithfulness/faithfulness.py +82 -136
- deepeval/metrics/g_eval/g_eval.py +93 -79
- deepeval/metrics/g_eval/template.py +18 -1
- deepeval/metrics/g_eval/utils.py +7 -6
- deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
- deepeval/metrics/goal_accuracy/template.py +21 -3
- deepeval/metrics/hallucination/hallucination.py +60 -75
- deepeval/metrics/hallucination/template.py +13 -0
- deepeval/metrics/indicator.py +11 -10
- deepeval/metrics/json_correctness/json_correctness.py +40 -38
- deepeval/metrics/json_correctness/template.py +10 -0
- deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
- deepeval/metrics/knowledge_retention/schema.py +9 -3
- deepeval/metrics/knowledge_retention/template.py +12 -0
- deepeval/metrics/mcp/mcp_task_completion.py +72 -43
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +93 -75
- deepeval/metrics/mcp/schema.py +4 -0
- deepeval/metrics/mcp/template.py +59 -0
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
- deepeval/metrics/mcp_use_metric/template.py +12 -0
- deepeval/metrics/misuse/misuse.py +77 -97
- deepeval/metrics/misuse/template.py +15 -0
- deepeval/metrics/multimodal_metrics/__init__.py +0 -1
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +37 -38
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +55 -76
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +37 -38
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +37 -38
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +57 -76
- deepeval/metrics/non_advice/non_advice.py +79 -105
- deepeval/metrics/non_advice/template.py +12 -0
- deepeval/metrics/pattern_match/pattern_match.py +12 -4
- deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
- deepeval/metrics/pii_leakage/template.py +14 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
- deepeval/metrics/plan_adherence/template.py +11 -0
- deepeval/metrics/plan_quality/plan_quality.py +63 -87
- deepeval/metrics/plan_quality/template.py +9 -0
- deepeval/metrics/prompt_alignment/prompt_alignment.py +78 -86
- deepeval/metrics/prompt_alignment/template.py +12 -0
- deepeval/metrics/role_adherence/role_adherence.py +48 -71
- deepeval/metrics/role_adherence/template.py +14 -0
- deepeval/metrics/role_violation/role_violation.py +75 -108
- deepeval/metrics/role_violation/template.py +12 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
- deepeval/metrics/step_efficiency/template.py +11 -0
- deepeval/metrics/summarization/summarization.py +115 -183
- deepeval/metrics/summarization/template.py +19 -0
- deepeval/metrics/task_completion/task_completion.py +67 -73
- deepeval/metrics/tool_correctness/tool_correctness.py +43 -42
- deepeval/metrics/tool_use/schema.py +4 -0
- deepeval/metrics/tool_use/template.py +16 -2
- deepeval/metrics/tool_use/tool_use.py +72 -94
- deepeval/metrics/topic_adherence/schema.py +4 -0
- deepeval/metrics/topic_adherence/template.py +21 -1
- deepeval/metrics/topic_adherence/topic_adherence.py +68 -81
- deepeval/metrics/toxicity/template.py +13 -0
- deepeval/metrics/toxicity/toxicity.py +80 -99
- deepeval/metrics/turn_contextual_precision/schema.py +3 -3
- deepeval/metrics/turn_contextual_precision/template.py +9 -2
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +154 -154
- deepeval/metrics/turn_contextual_recall/schema.py +3 -3
- deepeval/metrics/turn_contextual_recall/template.py +8 -1
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +148 -143
- deepeval/metrics/turn_contextual_relevancy/schema.py +2 -2
- deepeval/metrics/turn_contextual_relevancy/template.py +8 -1
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +154 -157
- deepeval/metrics/turn_faithfulness/schema.py +1 -1
- deepeval/metrics/turn_faithfulness/template.py +8 -1
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +180 -203
- deepeval/metrics/turn_relevancy/template.py +14 -0
- deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
- deepeval/metrics/utils.py +161 -91
- deepeval/models/__init__.py +2 -0
- deepeval/models/base_model.py +44 -6
- deepeval/models/embedding_models/azure_embedding_model.py +34 -12
- deepeval/models/embedding_models/local_embedding_model.py +22 -7
- deepeval/models/embedding_models/ollama_embedding_model.py +17 -6
- deepeval/models/embedding_models/openai_embedding_model.py +3 -2
- deepeval/models/llms/__init__.py +2 -0
- deepeval/models/llms/amazon_bedrock_model.py +229 -73
- deepeval/models/llms/anthropic_model.py +143 -48
- deepeval/models/llms/azure_model.py +169 -95
- deepeval/models/llms/constants.py +2032 -0
- deepeval/models/llms/deepseek_model.py +82 -35
- deepeval/models/llms/gemini_model.py +126 -67
- deepeval/models/llms/grok_model.py +128 -65
- deepeval/models/llms/kimi_model.py +129 -87
- deepeval/models/llms/litellm_model.py +94 -18
- deepeval/models/llms/local_model.py +115 -16
- deepeval/models/llms/ollama_model.py +97 -76
- deepeval/models/llms/openai_model.py +169 -311
- deepeval/models/llms/portkey_model.py +58 -16
- deepeval/models/llms/utils.py +5 -2
- deepeval/models/retry_policy.py +10 -5
- deepeval/models/utils.py +56 -4
- deepeval/simulator/conversation_simulator.py +49 -2
- deepeval/simulator/template.py +16 -1
- deepeval/synthesizer/synthesizer.py +19 -17
- deepeval/test_case/api.py +24 -45
- deepeval/test_case/arena_test_case.py +7 -2
- deepeval/test_case/conversational_test_case.py +55 -6
- deepeval/test_case/llm_test_case.py +60 -6
- deepeval/test_run/api.py +3 -0
- deepeval/test_run/test_run.py +6 -1
- deepeval/utils.py +26 -0
- {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/METADATA +3 -3
- {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/RECORD +145 -148
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -133
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
- {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/WHEEL +0 -0
- {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/entry_points.txt +0 -0
|
@@ -1,41 +1,33 @@
|
|
|
1
|
-
import base64
|
|
2
1
|
from openai.types.chat.chat_completion import ChatCompletion
|
|
3
2
|
from openai import AzureOpenAI, AsyncAzureOpenAI
|
|
4
3
|
from typing import Optional, Tuple, Union, Dict, List
|
|
5
4
|
from pydantic import BaseModel, SecretStr
|
|
6
|
-
from io import BytesIO
|
|
7
5
|
|
|
6
|
+
from deepeval.errors import DeepEvalError
|
|
8
7
|
from deepeval.config.settings import get_settings
|
|
9
8
|
from deepeval.models import DeepEvalBaseLLM
|
|
10
|
-
from deepeval.models.llms.
|
|
11
|
-
structured_outputs_models,
|
|
12
|
-
json_mode_models,
|
|
13
|
-
model_pricing,
|
|
14
|
-
)
|
|
9
|
+
from deepeval.models.llms.constants import OPENAI_MODELS_DATA
|
|
15
10
|
from deepeval.models.retry_policy import (
|
|
16
11
|
create_retry_decorator,
|
|
17
12
|
sdk_retries_for,
|
|
18
13
|
)
|
|
19
14
|
from deepeval.test_case import MLLMImage
|
|
20
|
-
from deepeval.utils import
|
|
15
|
+
from deepeval.utils import (
|
|
16
|
+
convert_to_multi_modal_array,
|
|
17
|
+
check_if_multimodal,
|
|
18
|
+
require_param,
|
|
19
|
+
)
|
|
21
20
|
from deepeval.models.llms.utils import (
|
|
22
21
|
trim_and_load_json,
|
|
23
22
|
)
|
|
24
23
|
from deepeval.models.utils import (
|
|
25
24
|
parse_model_name,
|
|
26
25
|
require_secret_api_key,
|
|
26
|
+
require_costs,
|
|
27
27
|
normalize_kwargs_and_extract_aliases,
|
|
28
28
|
)
|
|
29
29
|
from deepeval.constants import ProviderSlug as PS
|
|
30
30
|
|
|
31
|
-
valid_multimodal_models = [
|
|
32
|
-
"gpt-4o",
|
|
33
|
-
"gpt-4o-mini",
|
|
34
|
-
"gpt-4.1",
|
|
35
|
-
"gpt-4.1-mini",
|
|
36
|
-
"gpt-5",
|
|
37
|
-
]
|
|
38
|
-
|
|
39
31
|
retry_azure = create_retry_decorator(PS.AZURE)
|
|
40
32
|
|
|
41
33
|
_ALIAS_MAP = {
|
|
@@ -50,52 +42,117 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
50
42
|
model: Optional[str] = None,
|
|
51
43
|
api_key: Optional[str] = None,
|
|
52
44
|
base_url: Optional[str] = None,
|
|
53
|
-
temperature: float =
|
|
45
|
+
temperature: Optional[float] = None,
|
|
46
|
+
cost_per_input_token: Optional[float] = None,
|
|
47
|
+
cost_per_output_token: Optional[float] = None,
|
|
54
48
|
deployment_name: Optional[str] = None,
|
|
55
|
-
|
|
49
|
+
api_version: Optional[str] = None,
|
|
56
50
|
generation_kwargs: Optional[Dict] = None,
|
|
57
51
|
**kwargs,
|
|
58
52
|
):
|
|
53
|
+
settings = get_settings()
|
|
59
54
|
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
60
55
|
"AzureOpenAIModel",
|
|
61
56
|
kwargs,
|
|
62
57
|
_ALIAS_MAP,
|
|
63
58
|
)
|
|
64
59
|
|
|
65
|
-
# re-map
|
|
60
|
+
# re-map deprecated keywords to re-named positional args
|
|
66
61
|
if api_key is None and "api_key" in alias_values:
|
|
67
62
|
api_key = alias_values["api_key"]
|
|
68
63
|
if base_url is None and "base_url" in alias_values:
|
|
69
64
|
base_url = alias_values["base_url"]
|
|
70
65
|
|
|
71
|
-
settings = get_settings()
|
|
72
|
-
|
|
73
66
|
# fetch Azure deployment parameters
|
|
74
67
|
model = model or settings.AZURE_MODEL_NAME
|
|
75
|
-
|
|
68
|
+
deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
|
|
76
69
|
|
|
77
70
|
if api_key is not None:
|
|
78
71
|
# keep it secret, keep it safe from serializings, logging and alike
|
|
79
|
-
self.api_key: SecretStr
|
|
72
|
+
self.api_key: Optional[SecretStr] = SecretStr(api_key)
|
|
80
73
|
else:
|
|
81
74
|
self.api_key = settings.AZURE_OPENAI_API_KEY
|
|
82
75
|
|
|
83
|
-
|
|
84
|
-
|
|
76
|
+
api_version = api_version or settings.OPENAI_API_VERSION
|
|
77
|
+
if base_url is not None:
|
|
78
|
+
base_url = str(base_url).rstrip("/")
|
|
79
|
+
elif settings.AZURE_OPENAI_ENDPOINT is not None:
|
|
80
|
+
base_url = str(settings.AZURE_OPENAI_ENDPOINT).rstrip("/")
|
|
81
|
+
|
|
82
|
+
if temperature is not None:
|
|
83
|
+
temperature = float(temperature)
|
|
84
|
+
elif settings.TEMPERATURE is not None:
|
|
85
|
+
temperature = settings.TEMPERATURE
|
|
86
|
+
else:
|
|
87
|
+
temperature = 0.0
|
|
88
|
+
|
|
89
|
+
cost_per_input_token = (
|
|
90
|
+
cost_per_input_token
|
|
91
|
+
if cost_per_input_token is not None
|
|
92
|
+
else settings.OPENAI_COST_PER_INPUT_TOKEN
|
|
93
|
+
)
|
|
94
|
+
cost_per_output_token = (
|
|
95
|
+
cost_per_output_token
|
|
96
|
+
if cost_per_output_token is not None
|
|
97
|
+
else settings.OPENAI_COST_PER_OUTPUT_TOKEN
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# validation
|
|
101
|
+
model = require_param(
|
|
102
|
+
model,
|
|
103
|
+
provider_label="AzureOpenAIModel",
|
|
104
|
+
env_var_name="AZURE_MODEL_NAME",
|
|
105
|
+
param_hint="model",
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
self.deployment_name = require_param(
|
|
109
|
+
deployment_name,
|
|
110
|
+
provider_label="AzureOpenAIModel",
|
|
111
|
+
env_var_name="AZURE_DEPLOYMENT_NAME",
|
|
112
|
+
param_hint="deployment_name",
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
self.base_url = require_param(
|
|
116
|
+
base_url,
|
|
117
|
+
provider_label="AzureOpenAIModel",
|
|
118
|
+
env_var_name="AZURE_OPENAI_ENDPOINT",
|
|
119
|
+
param_hint="base_url",
|
|
85
120
|
)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
121
|
+
|
|
122
|
+
self.api_version = require_param(
|
|
123
|
+
api_version,
|
|
124
|
+
provider_label="AzureOpenAIModel",
|
|
125
|
+
env_var_name="OPENAI_API_VERSION",
|
|
126
|
+
param_hint="api_version",
|
|
90
127
|
)
|
|
91
128
|
|
|
129
|
+
self.model_data = OPENAI_MODELS_DATA.get(model)
|
|
130
|
+
cost_per_input_token, cost_per_output_token = require_costs(
|
|
131
|
+
self.model_data,
|
|
132
|
+
model,
|
|
133
|
+
"OPENAI_COST_PER_INPUT_TOKEN",
|
|
134
|
+
"OPENAI_COST_PER_OUTPUT_TOKEN",
|
|
135
|
+
cost_per_input_token,
|
|
136
|
+
cost_per_output_token,
|
|
137
|
+
)
|
|
138
|
+
self.model_data.input_price = cost_per_input_token
|
|
139
|
+
self.model_data.output_price = cost_per_output_token
|
|
140
|
+
|
|
92
141
|
if temperature < 0:
|
|
93
|
-
raise
|
|
142
|
+
raise DeepEvalError("Temperature must be >= 0.")
|
|
94
143
|
self.temperature = temperature
|
|
95
144
|
|
|
96
145
|
# Keep sanitized kwargs for client call to strip legacy keys
|
|
97
146
|
self.kwargs = normalized_kwargs
|
|
98
|
-
self.
|
|
147
|
+
self.kwargs.pop(
|
|
148
|
+
"temperature", None
|
|
149
|
+
) # to avoid duplicate with self.temperature
|
|
150
|
+
|
|
151
|
+
self.generation_kwargs = dict(generation_kwargs or {})
|
|
152
|
+
self.generation_kwargs.pop(
|
|
153
|
+
"temperature", None
|
|
154
|
+
) # to avoid duplicate with self.temperature
|
|
155
|
+
|
|
99
156
|
super().__init__(parse_model_name(model))
|
|
100
157
|
|
|
101
158
|
###############################################
|
|
@@ -105,20 +162,23 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
105
162
|
@retry_azure
|
|
106
163
|
def generate(
|
|
107
164
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
108
|
-
) -> Tuple[Union[str,
|
|
165
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
109
166
|
client = self.load_model(async_mode=False)
|
|
110
167
|
|
|
111
168
|
if check_if_multimodal(prompt):
|
|
112
|
-
prompt = convert_to_multi_modal_array(prompt)
|
|
113
|
-
|
|
169
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
170
|
+
content = self.generate_content(prompt)
|
|
171
|
+
else:
|
|
172
|
+
content = [{"type": "text", "text": prompt}]
|
|
114
173
|
|
|
115
174
|
if schema:
|
|
116
|
-
if self.
|
|
175
|
+
if self.model_data.supports_structured_outputs:
|
|
117
176
|
completion = client.beta.chat.completions.parse(
|
|
118
177
|
model=self.deployment_name,
|
|
119
|
-
messages=[{"role": "user", "content":
|
|
178
|
+
messages=[{"role": "user", "content": content}],
|
|
120
179
|
response_format=schema,
|
|
121
180
|
temperature=self.temperature,
|
|
181
|
+
**self.generation_kwargs,
|
|
122
182
|
)
|
|
123
183
|
structured_output: BaseModel = completion.choices[
|
|
124
184
|
0
|
|
@@ -128,14 +188,15 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
128
188
|
completion.usage.completion_tokens,
|
|
129
189
|
)
|
|
130
190
|
return structured_output, cost
|
|
131
|
-
if self.
|
|
191
|
+
if self.model_data.supports_json:
|
|
132
192
|
completion = client.beta.chat.completions.parse(
|
|
133
193
|
model=self.deployment_name,
|
|
134
194
|
messages=[
|
|
135
|
-
{"role": "user", "content":
|
|
195
|
+
{"role": "user", "content": content},
|
|
136
196
|
],
|
|
137
197
|
response_format={"type": "json_object"},
|
|
138
198
|
temperature=self.temperature,
|
|
199
|
+
**self.generation_kwargs,
|
|
139
200
|
)
|
|
140
201
|
json_output = trim_and_load_json(
|
|
141
202
|
completion.choices[0].message.content
|
|
@@ -149,7 +210,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
149
210
|
completion = client.chat.completions.create(
|
|
150
211
|
model=self.deployment_name,
|
|
151
212
|
messages=[
|
|
152
|
-
{"role": "user", "content":
|
|
213
|
+
{"role": "user", "content": content},
|
|
153
214
|
],
|
|
154
215
|
temperature=self.temperature,
|
|
155
216
|
**self.generation_kwargs,
|
|
@@ -171,16 +232,19 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
171
232
|
client = self.load_model(async_mode=True)
|
|
172
233
|
|
|
173
234
|
if check_if_multimodal(prompt):
|
|
174
|
-
prompt = convert_to_multi_modal_array(prompt)
|
|
175
|
-
|
|
235
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
236
|
+
content = self.generate_content(prompt)
|
|
237
|
+
else:
|
|
238
|
+
content = [{"type": "text", "text": prompt}]
|
|
176
239
|
|
|
177
240
|
if schema:
|
|
178
|
-
if self.
|
|
241
|
+
if self.model_data.supports_structured_outputs:
|
|
179
242
|
completion = await client.beta.chat.completions.parse(
|
|
180
243
|
model=self.deployment_name,
|
|
181
|
-
messages=[{"role": "user", "content":
|
|
244
|
+
messages=[{"role": "user", "content": content}],
|
|
182
245
|
response_format=schema,
|
|
183
246
|
temperature=self.temperature,
|
|
247
|
+
**self.generation_kwargs,
|
|
184
248
|
)
|
|
185
249
|
structured_output: BaseModel = completion.choices[
|
|
186
250
|
0
|
|
@@ -190,11 +254,11 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
190
254
|
completion.usage.completion_tokens,
|
|
191
255
|
)
|
|
192
256
|
return structured_output, cost
|
|
193
|
-
if self.
|
|
257
|
+
if self.model_data.supports_json:
|
|
194
258
|
completion = await client.beta.chat.completions.parse(
|
|
195
259
|
model=self.deployment_name,
|
|
196
260
|
messages=[
|
|
197
|
-
{"role": "user", "content":
|
|
261
|
+
{"role": "user", "content": content},
|
|
198
262
|
],
|
|
199
263
|
response_format={"type": "json_object"},
|
|
200
264
|
temperature=self.temperature,
|
|
@@ -212,7 +276,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
212
276
|
completion = await client.chat.completions.create(
|
|
213
277
|
model=self.deployment_name,
|
|
214
278
|
messages=[
|
|
215
|
-
{"role": "user", "content":
|
|
279
|
+
{"role": "user", "content": content},
|
|
216
280
|
],
|
|
217
281
|
temperature=self.temperature,
|
|
218
282
|
**self.generation_kwargs,
|
|
@@ -242,10 +306,12 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
242
306
|
client = self.load_model(async_mode=False)
|
|
243
307
|
if check_if_multimodal(prompt):
|
|
244
308
|
prompt = convert_to_multi_modal_array(input=prompt)
|
|
245
|
-
|
|
309
|
+
content = self.generate_content(prompt)
|
|
310
|
+
else:
|
|
311
|
+
content = [{"type": "text", "text": prompt}]
|
|
246
312
|
completion = client.chat.completions.create(
|
|
247
313
|
model=self.deployment_name,
|
|
248
|
-
messages=[{"role": "user", "content":
|
|
314
|
+
messages=[{"role": "user", "content": content}],
|
|
249
315
|
temperature=self.temperature,
|
|
250
316
|
logprobs=True,
|
|
251
317
|
top_logprobs=top_logprobs,
|
|
@@ -268,10 +334,12 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
268
334
|
client = self.load_model(async_mode=True)
|
|
269
335
|
if check_if_multimodal(prompt):
|
|
270
336
|
prompt = convert_to_multi_modal_array(input=prompt)
|
|
271
|
-
|
|
337
|
+
content = self.generate_content(prompt)
|
|
338
|
+
else:
|
|
339
|
+
content = [{"type": "text", "text": prompt}]
|
|
272
340
|
completion = await client.chat.completions.create(
|
|
273
341
|
model=self.deployment_name,
|
|
274
|
-
messages=[{"role": "user", "content":
|
|
342
|
+
messages=[{"role": "user", "content": content}],
|
|
275
343
|
temperature=self.temperature,
|
|
276
344
|
logprobs=True,
|
|
277
345
|
top_logprobs=top_logprobs,
|
|
@@ -284,52 +352,63 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
284
352
|
|
|
285
353
|
return completion, cost
|
|
286
354
|
|
|
287
|
-
def
|
|
288
|
-
self, multimodal_input: List[Union[str, MLLMImage]] =
|
|
355
|
+
def generate_content(
|
|
356
|
+
self, multimodal_input: Optional[List[Union[str, MLLMImage]]] = None
|
|
289
357
|
):
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
for
|
|
293
|
-
if isinstance(
|
|
294
|
-
|
|
295
|
-
elif isinstance(
|
|
296
|
-
if
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
"url": f"data:image/jpeg;base64,{self.encode_pil_image(image)}"
|
|
304
|
-
},
|
|
305
|
-
}
|
|
358
|
+
multimodal_input = [] if multimodal_input is None else multimodal_input
|
|
359
|
+
content = []
|
|
360
|
+
for element in multimodal_input:
|
|
361
|
+
if isinstance(element, str):
|
|
362
|
+
content.append({"type": "text", "text": element})
|
|
363
|
+
elif isinstance(element, MLLMImage):
|
|
364
|
+
if element.url and not element.local:
|
|
365
|
+
content.append(
|
|
366
|
+
{
|
|
367
|
+
"type": "image_url",
|
|
368
|
+
"image_url": {"url": element.url},
|
|
369
|
+
}
|
|
370
|
+
)
|
|
306
371
|
else:
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
"
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
pil_image = pil_image.convert("RGB")
|
|
319
|
-
pil_image.save(image_buffer, format="JPEG")
|
|
320
|
-
image_bytes = image_buffer.getvalue()
|
|
321
|
-
base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
|
|
322
|
-
return base64_encoded_image
|
|
372
|
+
element.ensure_images_loaded()
|
|
373
|
+
data_uri = (
|
|
374
|
+
f"data:{element.mimeType};base64,{element.dataBase64}"
|
|
375
|
+
)
|
|
376
|
+
content.append(
|
|
377
|
+
{
|
|
378
|
+
"type": "image_url",
|
|
379
|
+
"image_url": {"url": data_uri},
|
|
380
|
+
}
|
|
381
|
+
)
|
|
382
|
+
return content
|
|
323
383
|
|
|
324
384
|
###############################################
|
|
325
385
|
# Utilities
|
|
326
386
|
###############################################
|
|
327
387
|
|
|
328
388
|
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
389
|
+
if self.model_data.input_price and self.model_data.output_price:
|
|
390
|
+
input_cost = input_tokens * self.model_data.input_price
|
|
391
|
+
output_cost = output_tokens * self.model_data.output_price
|
|
392
|
+
return input_cost + output_cost
|
|
393
|
+
|
|
394
|
+
###############################################
|
|
395
|
+
# Capabilities
|
|
396
|
+
###############################################
|
|
397
|
+
|
|
398
|
+
def supports_log_probs(self) -> Union[bool, None]:
|
|
399
|
+
return self.model_data.supports_log_probs
|
|
400
|
+
|
|
401
|
+
def supports_temperature(self) -> Union[bool, None]:
|
|
402
|
+
return self.model_data.supports_temperature
|
|
403
|
+
|
|
404
|
+
def supports_multimodal(self) -> Union[bool, None]:
|
|
405
|
+
return self.model_data.supports_multimodal
|
|
406
|
+
|
|
407
|
+
def supports_structured_outputs(self) -> Union[bool, None]:
|
|
408
|
+
return self.model_data.supports_structured_outputs
|
|
409
|
+
|
|
410
|
+
def supports_json_mode(self) -> Union[bool, None]:
|
|
411
|
+
return self.model_data.supports_json
|
|
333
412
|
|
|
334
413
|
###############################################
|
|
335
414
|
# Model
|
|
@@ -361,8 +440,8 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
361
440
|
|
|
362
441
|
kw = dict(
|
|
363
442
|
api_key=api_key,
|
|
364
|
-
api_version=self.
|
|
365
|
-
|
|
443
|
+
api_version=self.api_version,
|
|
444
|
+
azure_endpoint=self.base_url,
|
|
366
445
|
azure_deployment=self.deployment_name,
|
|
367
446
|
**self._client_kwargs(),
|
|
368
447
|
)
|
|
@@ -375,10 +454,5 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
375
454
|
return cls(**kw)
|
|
376
455
|
raise
|
|
377
456
|
|
|
378
|
-
def supports_multimodal(self):
|
|
379
|
-
if self.name in valid_multimodal_models:
|
|
380
|
-
return True
|
|
381
|
-
return False
|
|
382
|
-
|
|
383
457
|
def get_model_name(self):
|
|
384
458
|
return f"{self.name} (Azure)"
|