deepeval 3.7.5__py3-none-any.whl → 3.7.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/cli/main.py +2022 -759
  3. deepeval/cli/utils.py +208 -36
  4. deepeval/config/dotenv_handler.py +19 -0
  5. deepeval/config/settings.py +675 -245
  6. deepeval/config/utils.py +9 -1
  7. deepeval/dataset/api.py +23 -1
  8. deepeval/dataset/golden.py +106 -21
  9. deepeval/evaluate/evaluate.py +0 -3
  10. deepeval/evaluate/execute.py +162 -315
  11. deepeval/evaluate/utils.py +6 -30
  12. deepeval/key_handler.py +124 -51
  13. deepeval/metrics/__init__.py +0 -4
  14. deepeval/metrics/answer_relevancy/answer_relevancy.py +89 -132
  15. deepeval/metrics/answer_relevancy/template.py +102 -179
  16. deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
  17. deepeval/metrics/arena_g_eval/template.py +17 -1
  18. deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
  19. deepeval/metrics/argument_correctness/template.py +19 -2
  20. deepeval/metrics/base_metric.py +19 -41
  21. deepeval/metrics/bias/bias.py +102 -108
  22. deepeval/metrics/bias/template.py +14 -2
  23. deepeval/metrics/contextual_precision/contextual_precision.py +56 -92
  24. deepeval/metrics/contextual_recall/contextual_recall.py +58 -85
  25. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +53 -83
  26. deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
  27. deepeval/metrics/conversation_completeness/template.py +23 -3
  28. deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
  29. deepeval/metrics/conversational_dag/nodes.py +66 -123
  30. deepeval/metrics/conversational_dag/templates.py +16 -0
  31. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
  32. deepeval/metrics/dag/dag.py +10 -0
  33. deepeval/metrics/dag/nodes.py +63 -126
  34. deepeval/metrics/dag/templates.py +14 -0
  35. deepeval/metrics/exact_match/exact_match.py +9 -1
  36. deepeval/metrics/faithfulness/faithfulness.py +82 -136
  37. deepeval/metrics/g_eval/g_eval.py +93 -79
  38. deepeval/metrics/g_eval/template.py +18 -1
  39. deepeval/metrics/g_eval/utils.py +7 -6
  40. deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
  41. deepeval/metrics/goal_accuracy/template.py +21 -3
  42. deepeval/metrics/hallucination/hallucination.py +60 -75
  43. deepeval/metrics/hallucination/template.py +13 -0
  44. deepeval/metrics/indicator.py +11 -10
  45. deepeval/metrics/json_correctness/json_correctness.py +40 -38
  46. deepeval/metrics/json_correctness/template.py +10 -0
  47. deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
  48. deepeval/metrics/knowledge_retention/schema.py +9 -3
  49. deepeval/metrics/knowledge_retention/template.py +12 -0
  50. deepeval/metrics/mcp/mcp_task_completion.py +72 -43
  51. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +93 -75
  52. deepeval/metrics/mcp/schema.py +4 -0
  53. deepeval/metrics/mcp/template.py +59 -0
  54. deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
  55. deepeval/metrics/mcp_use_metric/template.py +12 -0
  56. deepeval/metrics/misuse/misuse.py +77 -97
  57. deepeval/metrics/misuse/template.py +15 -0
  58. deepeval/metrics/multimodal_metrics/__init__.py +0 -1
  59. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +37 -38
  60. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +55 -76
  61. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +37 -38
  62. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +37 -38
  63. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +57 -76
  64. deepeval/metrics/non_advice/non_advice.py +79 -105
  65. deepeval/metrics/non_advice/template.py +12 -0
  66. deepeval/metrics/pattern_match/pattern_match.py +12 -4
  67. deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
  68. deepeval/metrics/pii_leakage/template.py +14 -0
  69. deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
  70. deepeval/metrics/plan_adherence/template.py +11 -0
  71. deepeval/metrics/plan_quality/plan_quality.py +63 -87
  72. deepeval/metrics/plan_quality/template.py +9 -0
  73. deepeval/metrics/prompt_alignment/prompt_alignment.py +78 -86
  74. deepeval/metrics/prompt_alignment/template.py +12 -0
  75. deepeval/metrics/role_adherence/role_adherence.py +48 -71
  76. deepeval/metrics/role_adherence/template.py +14 -0
  77. deepeval/metrics/role_violation/role_violation.py +75 -108
  78. deepeval/metrics/role_violation/template.py +12 -0
  79. deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
  80. deepeval/metrics/step_efficiency/template.py +11 -0
  81. deepeval/metrics/summarization/summarization.py +115 -183
  82. deepeval/metrics/summarization/template.py +19 -0
  83. deepeval/metrics/task_completion/task_completion.py +67 -73
  84. deepeval/metrics/tool_correctness/tool_correctness.py +43 -42
  85. deepeval/metrics/tool_use/schema.py +4 -0
  86. deepeval/metrics/tool_use/template.py +16 -2
  87. deepeval/metrics/tool_use/tool_use.py +72 -94
  88. deepeval/metrics/topic_adherence/schema.py +4 -0
  89. deepeval/metrics/topic_adherence/template.py +21 -1
  90. deepeval/metrics/topic_adherence/topic_adherence.py +68 -81
  91. deepeval/metrics/toxicity/template.py +13 -0
  92. deepeval/metrics/toxicity/toxicity.py +80 -99
  93. deepeval/metrics/turn_contextual_precision/schema.py +3 -3
  94. deepeval/metrics/turn_contextual_precision/template.py +9 -2
  95. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +154 -154
  96. deepeval/metrics/turn_contextual_recall/schema.py +3 -3
  97. deepeval/metrics/turn_contextual_recall/template.py +8 -1
  98. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +148 -143
  99. deepeval/metrics/turn_contextual_relevancy/schema.py +2 -2
  100. deepeval/metrics/turn_contextual_relevancy/template.py +8 -1
  101. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +154 -157
  102. deepeval/metrics/turn_faithfulness/schema.py +1 -1
  103. deepeval/metrics/turn_faithfulness/template.py +8 -1
  104. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +180 -203
  105. deepeval/metrics/turn_relevancy/template.py +14 -0
  106. deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
  107. deepeval/metrics/utils.py +161 -91
  108. deepeval/models/__init__.py +2 -0
  109. deepeval/models/base_model.py +44 -6
  110. deepeval/models/embedding_models/azure_embedding_model.py +34 -12
  111. deepeval/models/embedding_models/local_embedding_model.py +22 -7
  112. deepeval/models/embedding_models/ollama_embedding_model.py +17 -6
  113. deepeval/models/embedding_models/openai_embedding_model.py +3 -2
  114. deepeval/models/llms/__init__.py +2 -0
  115. deepeval/models/llms/amazon_bedrock_model.py +229 -73
  116. deepeval/models/llms/anthropic_model.py +143 -48
  117. deepeval/models/llms/azure_model.py +169 -95
  118. deepeval/models/llms/constants.py +2032 -0
  119. deepeval/models/llms/deepseek_model.py +82 -35
  120. deepeval/models/llms/gemini_model.py +126 -67
  121. deepeval/models/llms/grok_model.py +128 -65
  122. deepeval/models/llms/kimi_model.py +129 -87
  123. deepeval/models/llms/litellm_model.py +94 -18
  124. deepeval/models/llms/local_model.py +115 -16
  125. deepeval/models/llms/ollama_model.py +97 -76
  126. deepeval/models/llms/openai_model.py +169 -311
  127. deepeval/models/llms/portkey_model.py +58 -16
  128. deepeval/models/llms/utils.py +5 -2
  129. deepeval/models/retry_policy.py +10 -5
  130. deepeval/models/utils.py +56 -4
  131. deepeval/simulator/conversation_simulator.py +49 -2
  132. deepeval/simulator/template.py +16 -1
  133. deepeval/synthesizer/synthesizer.py +19 -17
  134. deepeval/test_case/api.py +24 -45
  135. deepeval/test_case/arena_test_case.py +7 -2
  136. deepeval/test_case/conversational_test_case.py +55 -6
  137. deepeval/test_case/llm_test_case.py +60 -6
  138. deepeval/test_run/api.py +3 -0
  139. deepeval/test_run/test_run.py +6 -1
  140. deepeval/utils.py +26 -0
  141. {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/METADATA +3 -3
  142. {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/RECORD +145 -148
  143. deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
  144. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
  145. deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
  146. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -133
  147. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
  148. {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/LICENSE.md +0 -0
  149. {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/WHEEL +0 -0
  150. {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/entry_points.txt +0 -0
@@ -1,41 +1,33 @@
1
- import base64
2
1
  from openai.types.chat.chat_completion import ChatCompletion
3
2
  from openai import AzureOpenAI, AsyncAzureOpenAI
4
3
  from typing import Optional, Tuple, Union, Dict, List
5
4
  from pydantic import BaseModel, SecretStr
6
- from io import BytesIO
7
5
 
6
+ from deepeval.errors import DeepEvalError
8
7
  from deepeval.config.settings import get_settings
9
8
  from deepeval.models import DeepEvalBaseLLM
10
- from deepeval.models.llms.openai_model import (
11
- structured_outputs_models,
12
- json_mode_models,
13
- model_pricing,
14
- )
9
+ from deepeval.models.llms.constants import OPENAI_MODELS_DATA
15
10
  from deepeval.models.retry_policy import (
16
11
  create_retry_decorator,
17
12
  sdk_retries_for,
18
13
  )
19
14
  from deepeval.test_case import MLLMImage
20
- from deepeval.utils import convert_to_multi_modal_array, check_if_multimodal
15
+ from deepeval.utils import (
16
+ convert_to_multi_modal_array,
17
+ check_if_multimodal,
18
+ require_param,
19
+ )
21
20
  from deepeval.models.llms.utils import (
22
21
  trim_and_load_json,
23
22
  )
24
23
  from deepeval.models.utils import (
25
24
  parse_model_name,
26
25
  require_secret_api_key,
26
+ require_costs,
27
27
  normalize_kwargs_and_extract_aliases,
28
28
  )
29
29
  from deepeval.constants import ProviderSlug as PS
30
30
 
31
- valid_multimodal_models = [
32
- "gpt-4o",
33
- "gpt-4o-mini",
34
- "gpt-4.1",
35
- "gpt-4.1-mini",
36
- "gpt-5",
37
- ]
38
-
39
31
  retry_azure = create_retry_decorator(PS.AZURE)
40
32
 
41
33
  _ALIAS_MAP = {
@@ -50,52 +42,117 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
50
42
  model: Optional[str] = None,
51
43
  api_key: Optional[str] = None,
52
44
  base_url: Optional[str] = None,
53
- temperature: float = 0,
45
+ temperature: Optional[float] = None,
46
+ cost_per_input_token: Optional[float] = None,
47
+ cost_per_output_token: Optional[float] = None,
54
48
  deployment_name: Optional[str] = None,
55
- openai_api_version: Optional[str] = None,
49
+ api_version: Optional[str] = None,
56
50
  generation_kwargs: Optional[Dict] = None,
57
51
  **kwargs,
58
52
  ):
53
+ settings = get_settings()
59
54
  normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
60
55
  "AzureOpenAIModel",
61
56
  kwargs,
62
57
  _ALIAS_MAP,
63
58
  )
64
59
 
65
- # re-map depricated keywords to re-named positional args
60
+ # re-map deprecated keywords to re-named positional args
66
61
  if api_key is None and "api_key" in alias_values:
67
62
  api_key = alias_values["api_key"]
68
63
  if base_url is None and "base_url" in alias_values:
69
64
  base_url = alias_values["base_url"]
70
65
 
71
- settings = get_settings()
72
-
73
66
  # fetch Azure deployment parameters
74
67
  model = model or settings.AZURE_MODEL_NAME
75
- self.deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
68
+ deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
76
69
 
77
70
  if api_key is not None:
78
71
  # keep it secret, keep it safe from serializings, logging and alike
79
- self.api_key: SecretStr | None = SecretStr(api_key)
72
+ self.api_key: Optional[SecretStr] = SecretStr(api_key)
80
73
  else:
81
74
  self.api_key = settings.AZURE_OPENAI_API_KEY
82
75
 
83
- self.openai_api_version = (
84
- openai_api_version or settings.OPENAI_API_VERSION
76
+ api_version = api_version or settings.OPENAI_API_VERSION
77
+ if base_url is not None:
78
+ base_url = str(base_url).rstrip("/")
79
+ elif settings.AZURE_OPENAI_ENDPOINT is not None:
80
+ base_url = str(settings.AZURE_OPENAI_ENDPOINT).rstrip("/")
81
+
82
+ if temperature is not None:
83
+ temperature = float(temperature)
84
+ elif settings.TEMPERATURE is not None:
85
+ temperature = settings.TEMPERATURE
86
+ else:
87
+ temperature = 0.0
88
+
89
+ cost_per_input_token = (
90
+ cost_per_input_token
91
+ if cost_per_input_token is not None
92
+ else settings.OPENAI_COST_PER_INPUT_TOKEN
93
+ )
94
+ cost_per_output_token = (
95
+ cost_per_output_token
96
+ if cost_per_output_token is not None
97
+ else settings.OPENAI_COST_PER_OUTPUT_TOKEN
98
+ )
99
+
100
+ # validation
101
+ model = require_param(
102
+ model,
103
+ provider_label="AzureOpenAIModel",
104
+ env_var_name="AZURE_MODEL_NAME",
105
+ param_hint="model",
106
+ )
107
+
108
+ self.deployment_name = require_param(
109
+ deployment_name,
110
+ provider_label="AzureOpenAIModel",
111
+ env_var_name="AZURE_DEPLOYMENT_NAME",
112
+ param_hint="deployment_name",
113
+ )
114
+
115
+ self.base_url = require_param(
116
+ base_url,
117
+ provider_label="AzureOpenAIModel",
118
+ env_var_name="AZURE_OPENAI_ENDPOINT",
119
+ param_hint="base_url",
85
120
  )
86
- self.base_url = (
87
- base_url
88
- or settings.AZURE_OPENAI_ENDPOINT
89
- and str(settings.AZURE_OPENAI_ENDPOINT)
121
+
122
+ self.api_version = require_param(
123
+ api_version,
124
+ provider_label="AzureOpenAIModel",
125
+ env_var_name="OPENAI_API_VERSION",
126
+ param_hint="api_version",
90
127
  )
91
128
 
129
+ self.model_data = OPENAI_MODELS_DATA.get(model)
130
+ cost_per_input_token, cost_per_output_token = require_costs(
131
+ self.model_data,
132
+ model,
133
+ "OPENAI_COST_PER_INPUT_TOKEN",
134
+ "OPENAI_COST_PER_OUTPUT_TOKEN",
135
+ cost_per_input_token,
136
+ cost_per_output_token,
137
+ )
138
+ self.model_data.input_price = cost_per_input_token
139
+ self.model_data.output_price = cost_per_output_token
140
+
92
141
  if temperature < 0:
93
- raise ValueError("Temperature must be >= 0.")
142
+ raise DeepEvalError("Temperature must be >= 0.")
94
143
  self.temperature = temperature
95
144
 
96
145
  # Keep sanitized kwargs for client call to strip legacy keys
97
146
  self.kwargs = normalized_kwargs
98
- self.generation_kwargs = generation_kwargs or {}
147
+ self.kwargs.pop(
148
+ "temperature", None
149
+ ) # to avoid duplicate with self.temperature
150
+
151
+ self.generation_kwargs = dict(generation_kwargs or {})
152
+ self.generation_kwargs.pop(
153
+ "temperature", None
154
+ ) # to avoid duplicate with self.temperature
155
+
99
156
  super().__init__(parse_model_name(model))
100
157
 
101
158
  ###############################################
@@ -105,20 +162,23 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
105
162
  @retry_azure
106
163
  def generate(
107
164
  self, prompt: str, schema: Optional[BaseModel] = None
108
- ) -> Tuple[Union[str, Dict], float]:
165
+ ) -> Tuple[Union[str, BaseModel], float]:
109
166
  client = self.load_model(async_mode=False)
110
167
 
111
168
  if check_if_multimodal(prompt):
112
- prompt = convert_to_multi_modal_array(prompt)
113
- prompt = self.generate_prompt(prompt)
169
+ prompt = convert_to_multi_modal_array(input=prompt)
170
+ content = self.generate_content(prompt)
171
+ else:
172
+ content = [{"type": "text", "text": prompt}]
114
173
 
115
174
  if schema:
116
- if self.name in structured_outputs_models:
175
+ if self.model_data.supports_structured_outputs:
117
176
  completion = client.beta.chat.completions.parse(
118
177
  model=self.deployment_name,
119
- messages=[{"role": "user", "content": prompt}],
178
+ messages=[{"role": "user", "content": content}],
120
179
  response_format=schema,
121
180
  temperature=self.temperature,
181
+ **self.generation_kwargs,
122
182
  )
123
183
  structured_output: BaseModel = completion.choices[
124
184
  0
@@ -128,14 +188,15 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
128
188
  completion.usage.completion_tokens,
129
189
  )
130
190
  return structured_output, cost
131
- if self.name in json_mode_models:
191
+ if self.model_data.supports_json:
132
192
  completion = client.beta.chat.completions.parse(
133
193
  model=self.deployment_name,
134
194
  messages=[
135
- {"role": "user", "content": prompt},
195
+ {"role": "user", "content": content},
136
196
  ],
137
197
  response_format={"type": "json_object"},
138
198
  temperature=self.temperature,
199
+ **self.generation_kwargs,
139
200
  )
140
201
  json_output = trim_and_load_json(
141
202
  completion.choices[0].message.content
@@ -149,7 +210,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
149
210
  completion = client.chat.completions.create(
150
211
  model=self.deployment_name,
151
212
  messages=[
152
- {"role": "user", "content": prompt},
213
+ {"role": "user", "content": content},
153
214
  ],
154
215
  temperature=self.temperature,
155
216
  **self.generation_kwargs,
@@ -171,16 +232,19 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
171
232
  client = self.load_model(async_mode=True)
172
233
 
173
234
  if check_if_multimodal(prompt):
174
- prompt = convert_to_multi_modal_array(prompt)
175
- prompt = self.generate_prompt(prompt)
235
+ prompt = convert_to_multi_modal_array(input=prompt)
236
+ content = self.generate_content(prompt)
237
+ else:
238
+ content = [{"type": "text", "text": prompt}]
176
239
 
177
240
  if schema:
178
- if self.name in structured_outputs_models:
241
+ if self.model_data.supports_structured_outputs:
179
242
  completion = await client.beta.chat.completions.parse(
180
243
  model=self.deployment_name,
181
- messages=[{"role": "user", "content": prompt}],
244
+ messages=[{"role": "user", "content": content}],
182
245
  response_format=schema,
183
246
  temperature=self.temperature,
247
+ **self.generation_kwargs,
184
248
  )
185
249
  structured_output: BaseModel = completion.choices[
186
250
  0
@@ -190,11 +254,11 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
190
254
  completion.usage.completion_tokens,
191
255
  )
192
256
  return structured_output, cost
193
- if self.name in json_mode_models:
257
+ if self.model_data.supports_json:
194
258
  completion = await client.beta.chat.completions.parse(
195
259
  model=self.deployment_name,
196
260
  messages=[
197
- {"role": "user", "content": prompt},
261
+ {"role": "user", "content": content},
198
262
  ],
199
263
  response_format={"type": "json_object"},
200
264
  temperature=self.temperature,
@@ -212,7 +276,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
212
276
  completion = await client.chat.completions.create(
213
277
  model=self.deployment_name,
214
278
  messages=[
215
- {"role": "user", "content": prompt},
279
+ {"role": "user", "content": content},
216
280
  ],
217
281
  temperature=self.temperature,
218
282
  **self.generation_kwargs,
@@ -242,10 +306,12 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
242
306
  client = self.load_model(async_mode=False)
243
307
  if check_if_multimodal(prompt):
244
308
  prompt = convert_to_multi_modal_array(input=prompt)
245
- prompt = self.generate_prompt(prompt)
309
+ content = self.generate_content(prompt)
310
+ else:
311
+ content = [{"type": "text", "text": prompt}]
246
312
  completion = client.chat.completions.create(
247
313
  model=self.deployment_name,
248
- messages=[{"role": "user", "content": prompt}],
314
+ messages=[{"role": "user", "content": content}],
249
315
  temperature=self.temperature,
250
316
  logprobs=True,
251
317
  top_logprobs=top_logprobs,
@@ -268,10 +334,12 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
268
334
  client = self.load_model(async_mode=True)
269
335
  if check_if_multimodal(prompt):
270
336
  prompt = convert_to_multi_modal_array(input=prompt)
271
- prompt = self.generate_prompt(prompt)
337
+ content = self.generate_content(prompt)
338
+ else:
339
+ content = [{"type": "text", "text": prompt}]
272
340
  completion = await client.chat.completions.create(
273
341
  model=self.deployment_name,
274
- messages=[{"role": "user", "content": prompt}],
342
+ messages=[{"role": "user", "content": content}],
275
343
  temperature=self.temperature,
276
344
  logprobs=True,
277
345
  top_logprobs=top_logprobs,
@@ -284,52 +352,63 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
284
352
 
285
353
  return completion, cost
286
354
 
287
- def generate_prompt(
288
- self, multimodal_input: List[Union[str, MLLMImage]] = []
355
+ def generate_content(
356
+ self, multimodal_input: Optional[List[Union[str, MLLMImage]]] = None
289
357
  ):
290
- """Convert multimodal input into the proper message format for Azure OpenAI."""
291
- prompt = []
292
- for ele in multimodal_input:
293
- if isinstance(ele, str):
294
- prompt.append({"type": "text", "text": ele})
295
- elif isinstance(ele, MLLMImage):
296
- if ele.local:
297
- import PIL.Image
298
-
299
- image = PIL.Image.open(ele.url)
300
- visual_dict = {
301
- "type": "image_url",
302
- "image_url": {
303
- "url": f"data:image/jpeg;base64,{self.encode_pil_image(image)}"
304
- },
305
- }
358
+ multimodal_input = [] if multimodal_input is None else multimodal_input
359
+ content = []
360
+ for element in multimodal_input:
361
+ if isinstance(element, str):
362
+ content.append({"type": "text", "text": element})
363
+ elif isinstance(element, MLLMImage):
364
+ if element.url and not element.local:
365
+ content.append(
366
+ {
367
+ "type": "image_url",
368
+ "image_url": {"url": element.url},
369
+ }
370
+ )
306
371
  else:
307
- visual_dict = {
308
- "type": "image_url",
309
- "image_url": {"url": ele.url},
310
- }
311
- prompt.append(visual_dict)
312
- return prompt
313
-
314
- def encode_pil_image(self, pil_image):
315
- """Encode a PIL image to base64 string."""
316
- image_buffer = BytesIO()
317
- if pil_image.mode in ("RGBA", "LA", "P"):
318
- pil_image = pil_image.convert("RGB")
319
- pil_image.save(image_buffer, format="JPEG")
320
- image_bytes = image_buffer.getvalue()
321
- base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
322
- return base64_encoded_image
372
+ element.ensure_images_loaded()
373
+ data_uri = (
374
+ f"data:{element.mimeType};base64,{element.dataBase64}"
375
+ )
376
+ content.append(
377
+ {
378
+ "type": "image_url",
379
+ "image_url": {"url": data_uri},
380
+ }
381
+ )
382
+ return content
323
383
 
324
384
  ###############################################
325
385
  # Utilities
326
386
  ###############################################
327
387
 
328
388
  def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
329
- pricing = model_pricing.get(self.name, model_pricing["gpt-4.1"])
330
- input_cost = input_tokens * pricing["input"]
331
- output_cost = output_tokens * pricing["output"]
332
- return input_cost + output_cost
389
+ if self.model_data.input_price and self.model_data.output_price:
390
+ input_cost = input_tokens * self.model_data.input_price
391
+ output_cost = output_tokens * self.model_data.output_price
392
+ return input_cost + output_cost
393
+
394
+ ###############################################
395
+ # Capabilities
396
+ ###############################################
397
+
398
+ def supports_log_probs(self) -> Union[bool, None]:
399
+ return self.model_data.supports_log_probs
400
+
401
+ def supports_temperature(self) -> Union[bool, None]:
402
+ return self.model_data.supports_temperature
403
+
404
+ def supports_multimodal(self) -> Union[bool, None]:
405
+ return self.model_data.supports_multimodal
406
+
407
+ def supports_structured_outputs(self) -> Union[bool, None]:
408
+ return self.model_data.supports_structured_outputs
409
+
410
+ def supports_json_mode(self) -> Union[bool, None]:
411
+ return self.model_data.supports_json
333
412
 
334
413
  ###############################################
335
414
  # Model
@@ -361,8 +440,8 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
361
440
 
362
441
  kw = dict(
363
442
  api_key=api_key,
364
- api_version=self.openai_api_version,
365
- base_url=self.base_url,
443
+ api_version=self.api_version,
444
+ azure_endpoint=self.base_url,
366
445
  azure_deployment=self.deployment_name,
367
446
  **self._client_kwargs(),
368
447
  )
@@ -375,10 +454,5 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
375
454
  return cls(**kw)
376
455
  raise
377
456
 
378
- def supports_multimodal(self):
379
- if self.name in valid_multimodal_models:
380
- return True
381
- return False
382
-
383
457
  def get_model_name(self):
384
458
  return f"{self.name} (Azure)"