deepeval 3.7.5__py3-none-any.whl → 3.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/config/settings.py +35 -1
  3. deepeval/dataset/api.py +23 -1
  4. deepeval/dataset/golden.py +106 -21
  5. deepeval/evaluate/evaluate.py +0 -3
  6. deepeval/evaluate/execute.py +10 -222
  7. deepeval/evaluate/utils.py +6 -30
  8. deepeval/key_handler.py +3 -0
  9. deepeval/metrics/__init__.py +0 -4
  10. deepeval/metrics/answer_relevancy/answer_relevancy.py +89 -132
  11. deepeval/metrics/answer_relevancy/template.py +102 -179
  12. deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
  13. deepeval/metrics/arena_g_eval/template.py +17 -1
  14. deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
  15. deepeval/metrics/argument_correctness/template.py +19 -2
  16. deepeval/metrics/base_metric.py +13 -41
  17. deepeval/metrics/bias/bias.py +102 -108
  18. deepeval/metrics/bias/template.py +14 -2
  19. deepeval/metrics/contextual_precision/contextual_precision.py +56 -92
  20. deepeval/metrics/contextual_recall/contextual_recall.py +58 -85
  21. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +53 -83
  22. deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
  23. deepeval/metrics/conversation_completeness/template.py +23 -3
  24. deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
  25. deepeval/metrics/conversational_dag/nodes.py +66 -123
  26. deepeval/metrics/conversational_dag/templates.py +16 -0
  27. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
  28. deepeval/metrics/dag/dag.py +10 -0
  29. deepeval/metrics/dag/nodes.py +63 -126
  30. deepeval/metrics/dag/templates.py +14 -0
  31. deepeval/metrics/exact_match/exact_match.py +9 -1
  32. deepeval/metrics/faithfulness/faithfulness.py +82 -136
  33. deepeval/metrics/g_eval/g_eval.py +87 -78
  34. deepeval/metrics/g_eval/template.py +18 -1
  35. deepeval/metrics/g_eval/utils.py +7 -6
  36. deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
  37. deepeval/metrics/goal_accuracy/template.py +21 -3
  38. deepeval/metrics/hallucination/hallucination.py +60 -75
  39. deepeval/metrics/hallucination/template.py +13 -0
  40. deepeval/metrics/indicator.py +3 -6
  41. deepeval/metrics/json_correctness/json_correctness.py +40 -38
  42. deepeval/metrics/json_correctness/template.py +10 -0
  43. deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
  44. deepeval/metrics/knowledge_retention/schema.py +9 -3
  45. deepeval/metrics/knowledge_retention/template.py +12 -0
  46. deepeval/metrics/mcp/mcp_task_completion.py +68 -38
  47. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
  48. deepeval/metrics/mcp/template.py +52 -0
  49. deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
  50. deepeval/metrics/mcp_use_metric/template.py +12 -0
  51. deepeval/metrics/misuse/misuse.py +77 -97
  52. deepeval/metrics/misuse/template.py +15 -0
  53. deepeval/metrics/multimodal_metrics/__init__.py +0 -1
  54. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +37 -38
  55. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +55 -76
  56. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +37 -38
  57. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +37 -38
  58. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +57 -76
  59. deepeval/metrics/non_advice/non_advice.py +79 -105
  60. deepeval/metrics/non_advice/template.py +12 -0
  61. deepeval/metrics/pattern_match/pattern_match.py +12 -4
  62. deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
  63. deepeval/metrics/pii_leakage/template.py +14 -0
  64. deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
  65. deepeval/metrics/plan_adherence/template.py +11 -0
  66. deepeval/metrics/plan_quality/plan_quality.py +63 -87
  67. deepeval/metrics/plan_quality/template.py +9 -0
  68. deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
  69. deepeval/metrics/prompt_alignment/template.py +12 -0
  70. deepeval/metrics/role_adherence/role_adherence.py +48 -71
  71. deepeval/metrics/role_adherence/template.py +14 -0
  72. deepeval/metrics/role_violation/role_violation.py +75 -108
  73. deepeval/metrics/role_violation/template.py +12 -0
  74. deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
  75. deepeval/metrics/step_efficiency/template.py +11 -0
  76. deepeval/metrics/summarization/summarization.py +115 -183
  77. deepeval/metrics/summarization/template.py +19 -0
  78. deepeval/metrics/task_completion/task_completion.py +67 -73
  79. deepeval/metrics/tool_correctness/tool_correctness.py +43 -42
  80. deepeval/metrics/tool_use/tool_use.py +42 -66
  81. deepeval/metrics/topic_adherence/template.py +13 -0
  82. deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
  83. deepeval/metrics/toxicity/template.py +13 -0
  84. deepeval/metrics/toxicity/toxicity.py +80 -99
  85. deepeval/metrics/turn_contextual_precision/schema.py +3 -3
  86. deepeval/metrics/turn_contextual_precision/template.py +1 -1
  87. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +110 -68
  88. deepeval/metrics/turn_contextual_recall/schema.py +3 -3
  89. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +104 -61
  90. deepeval/metrics/turn_contextual_relevancy/schema.py +2 -2
  91. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +106 -65
  92. deepeval/metrics/turn_faithfulness/schema.py +1 -1
  93. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +104 -73
  94. deepeval/metrics/turn_relevancy/template.py +14 -0
  95. deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
  96. deepeval/metrics/utils.py +145 -90
  97. deepeval/models/base_model.py +44 -6
  98. deepeval/models/embedding_models/azure_embedding_model.py +34 -12
  99. deepeval/models/embedding_models/local_embedding_model.py +22 -7
  100. deepeval/models/embedding_models/ollama_embedding_model.py +17 -6
  101. deepeval/models/embedding_models/openai_embedding_model.py +3 -2
  102. deepeval/models/llms/amazon_bedrock_model.py +226 -71
  103. deepeval/models/llms/anthropic_model.py +141 -47
  104. deepeval/models/llms/azure_model.py +167 -94
  105. deepeval/models/llms/constants.py +2032 -0
  106. deepeval/models/llms/deepseek_model.py +79 -29
  107. deepeval/models/llms/gemini_model.py +126 -67
  108. deepeval/models/llms/grok_model.py +125 -59
  109. deepeval/models/llms/kimi_model.py +126 -81
  110. deepeval/models/llms/litellm_model.py +92 -18
  111. deepeval/models/llms/local_model.py +114 -15
  112. deepeval/models/llms/ollama_model.py +97 -76
  113. deepeval/models/llms/openai_model.py +167 -310
  114. deepeval/models/llms/portkey_model.py +58 -16
  115. deepeval/models/llms/utils.py +5 -2
  116. deepeval/models/utils.py +60 -4
  117. deepeval/simulator/conversation_simulator.py +43 -0
  118. deepeval/simulator/template.py +13 -0
  119. deepeval/test_case/api.py +24 -45
  120. deepeval/test_case/arena_test_case.py +7 -2
  121. deepeval/test_case/conversational_test_case.py +55 -6
  122. deepeval/test_case/llm_test_case.py +60 -6
  123. deepeval/test_run/api.py +3 -0
  124. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -1
  125. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/RECORD +128 -132
  126. deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
  127. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
  128. deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
  129. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -133
  130. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
  131. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
  132. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
  133. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
@@ -1,41 +1,33 @@
1
- import base64
2
1
  from openai.types.chat.chat_completion import ChatCompletion
3
2
  from openai import AzureOpenAI, AsyncAzureOpenAI
4
3
  from typing import Optional, Tuple, Union, Dict, List
5
4
  from pydantic import BaseModel, SecretStr
6
- from io import BytesIO
7
5
 
6
+ from deepeval.errors import DeepEvalError
8
7
  from deepeval.config.settings import get_settings
9
8
  from deepeval.models import DeepEvalBaseLLM
10
- from deepeval.models.llms.openai_model import (
11
- structured_outputs_models,
12
- json_mode_models,
13
- model_pricing,
14
- )
9
+ from deepeval.models.llms.constants import OPENAI_MODELS_DATA
15
10
  from deepeval.models.retry_policy import (
16
11
  create_retry_decorator,
17
12
  sdk_retries_for,
18
13
  )
19
14
  from deepeval.test_case import MLLMImage
20
- from deepeval.utils import convert_to_multi_modal_array, check_if_multimodal
15
+ from deepeval.utils import (
16
+ convert_to_multi_modal_array,
17
+ check_if_multimodal,
18
+ require_param,
19
+ )
21
20
  from deepeval.models.llms.utils import (
22
21
  trim_and_load_json,
23
22
  )
24
23
  from deepeval.models.utils import (
25
24
  parse_model_name,
26
25
  require_secret_api_key,
26
+ require_costs,
27
27
  normalize_kwargs_and_extract_aliases,
28
28
  )
29
29
  from deepeval.constants import ProviderSlug as PS
30
30
 
31
- valid_multimodal_models = [
32
- "gpt-4o",
33
- "gpt-4o-mini",
34
- "gpt-4.1",
35
- "gpt-4.1-mini",
36
- "gpt-5",
37
- ]
38
-
39
31
  retry_azure = create_retry_decorator(PS.AZURE)
40
32
 
41
33
  _ALIAS_MAP = {
@@ -50,52 +42,117 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
50
42
  model: Optional[str] = None,
51
43
  api_key: Optional[str] = None,
52
44
  base_url: Optional[str] = None,
53
- temperature: float = 0,
45
+ temperature: Optional[float] = None,
46
+ cost_per_input_token: Optional[float] = None,
47
+ cost_per_output_token: Optional[float] = None,
54
48
  deployment_name: Optional[str] = None,
55
- openai_api_version: Optional[str] = None,
49
+ api_version: Optional[str] = None,
56
50
  generation_kwargs: Optional[Dict] = None,
57
51
  **kwargs,
58
52
  ):
53
+ settings = get_settings()
59
54
  normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
60
55
  "AzureOpenAIModel",
61
56
  kwargs,
62
57
  _ALIAS_MAP,
63
58
  )
64
59
 
65
- # re-map depricated keywords to re-named positional args
60
+ # re-map deprecated keywords to re-named positional args
66
61
  if api_key is None and "api_key" in alias_values:
67
62
  api_key = alias_values["api_key"]
68
63
  if base_url is None and "base_url" in alias_values:
69
64
  base_url = alias_values["base_url"]
70
65
 
71
- settings = get_settings()
72
-
73
66
  # fetch Azure deployment parameters
74
67
  model = model or settings.AZURE_MODEL_NAME
75
- self.deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
68
+ deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
76
69
 
77
70
  if api_key is not None:
78
71
  # keep it secret, keep it safe from serializings, logging and alike
79
- self.api_key: SecretStr | None = SecretStr(api_key)
72
+ self.api_key: Optional[SecretStr] = SecretStr(api_key)
80
73
  else:
81
74
  self.api_key = settings.AZURE_OPENAI_API_KEY
82
75
 
83
- self.openai_api_version = (
84
- openai_api_version or settings.OPENAI_API_VERSION
76
+ api_version = api_version or settings.OPENAI_API_VERSION
77
+ if base_url is not None:
78
+ base_url = str(base_url).rstrip("/")
79
+ elif settings.AZURE_OPENAI_ENDPOINT is not None:
80
+ base_url = str(settings.AZURE_OPENAI_ENDPOINT).rstrip("/")
81
+
82
+ if temperature is not None:
83
+ temperature = float(temperature)
84
+ elif settings.TEMPERATURE is not None:
85
+ temperature = settings.TEMPERATURE
86
+ else:
87
+ temperature = 0.0
88
+
89
+ cost_per_input_token = (
90
+ cost_per_input_token
91
+ if cost_per_input_token is not None
92
+ else settings.OPENAI_COST_PER_INPUT_TOKEN
93
+ )
94
+ cost_per_output_token = (
95
+ cost_per_output_token
96
+ if cost_per_output_token is not None
97
+ else settings.OPENAI_COST_PER_OUTPUT_TOKEN
98
+ )
99
+
100
+ # validation
101
+ model = require_param(
102
+ model,
103
+ provider_label="AzureOpenAIModel",
104
+ env_var_name="AZURE_MODEL_NAME",
105
+ param_hint="model",
106
+ )
107
+
108
+ self.deployment_name = require_param(
109
+ deployment_name,
110
+ provider_label="AzureOpenAIModel",
111
+ env_var_name="AZURE_DEPLOYMENT_NAME",
112
+ param_hint="deployment_name",
113
+ )
114
+
115
+ self.base_url = require_param(
116
+ base_url,
117
+ provider_label="AzureOpenAIModel",
118
+ env_var_name="AZURE_OPENAI_ENDPOINT",
119
+ param_hint="base_url",
85
120
  )
86
- self.base_url = (
87
- base_url
88
- or settings.AZURE_OPENAI_ENDPOINT
89
- and str(settings.AZURE_OPENAI_ENDPOINT)
121
+
122
+ self.api_version = require_param(
123
+ api_version,
124
+ provider_label="AzureOpenAIModel",
125
+ env_var_name="OPENAI_API_VERSION",
126
+ param_hint="api_version",
90
127
  )
91
128
 
129
+ self.model_data = OPENAI_MODELS_DATA.get(model)
130
+ cost_per_input_token, cost_per_output_token = require_costs(
131
+ self.model_data,
132
+ model,
133
+ "OPENAI_COST_PER_INPUT_TOKEN",
134
+ "OPENAI_COST_PER_OUTPUT_TOKEN",
135
+ cost_per_input_token,
136
+ cost_per_output_token,
137
+ )
138
+ self.model_data.input_price = cost_per_input_token
139
+ self.model_data.output_price = cost_per_output_token
140
+
92
141
  if temperature < 0:
93
- raise ValueError("Temperature must be >= 0.")
142
+ raise DeepEvalError("Temperature must be >= 0.")
94
143
  self.temperature = temperature
95
144
 
96
145
  # Keep sanitized kwargs for client call to strip legacy keys
97
146
  self.kwargs = normalized_kwargs
98
- self.generation_kwargs = generation_kwargs or {}
147
+ self.kwargs.pop(
148
+ "temperature", None
149
+ ) # to avoid duplicate with self.temperature
150
+
151
+ self.generation_kwargs = dict(generation_kwargs or {})
152
+ self.generation_kwargs.pop(
153
+ "temperature", None
154
+ ) # to avoid duplicate with self.temperature
155
+
99
156
  super().__init__(parse_model_name(model))
100
157
 
101
158
  ###############################################
@@ -105,20 +162,23 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
105
162
  @retry_azure
106
163
  def generate(
107
164
  self, prompt: str, schema: Optional[BaseModel] = None
108
- ) -> Tuple[Union[str, Dict], float]:
165
+ ) -> Tuple[Union[str, BaseModel], float]:
109
166
  client = self.load_model(async_mode=False)
110
167
 
111
168
  if check_if_multimodal(prompt):
112
- prompt = convert_to_multi_modal_array(prompt)
113
- prompt = self.generate_prompt(prompt)
169
+ prompt = convert_to_multi_modal_array(input=prompt)
170
+ content = self.generate_content(prompt)
171
+ else:
172
+ content = [{"type": "text", "text": prompt}]
114
173
 
115
174
  if schema:
116
- if self.name in structured_outputs_models:
175
+ if self.model_data.supports_structured_outputs:
117
176
  completion = client.beta.chat.completions.parse(
118
177
  model=self.deployment_name,
119
- messages=[{"role": "user", "content": prompt}],
178
+ messages=[{"role": "user", "content": content}],
120
179
  response_format=schema,
121
180
  temperature=self.temperature,
181
+ **self.generation_kwargs,
122
182
  )
123
183
  structured_output: BaseModel = completion.choices[
124
184
  0
@@ -128,14 +188,15 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
128
188
  completion.usage.completion_tokens,
129
189
  )
130
190
  return structured_output, cost
131
- if self.name in json_mode_models:
191
+ if self.model_data.supports_json:
132
192
  completion = client.beta.chat.completions.parse(
133
193
  model=self.deployment_name,
134
194
  messages=[
135
- {"role": "user", "content": prompt},
195
+ {"role": "user", "content": content},
136
196
  ],
137
197
  response_format={"type": "json_object"},
138
198
  temperature=self.temperature,
199
+ **self.generation_kwargs,
139
200
  )
140
201
  json_output = trim_and_load_json(
141
202
  completion.choices[0].message.content
@@ -149,7 +210,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
149
210
  completion = client.chat.completions.create(
150
211
  model=self.deployment_name,
151
212
  messages=[
152
- {"role": "user", "content": prompt},
213
+ {"role": "user", "content": content},
153
214
  ],
154
215
  temperature=self.temperature,
155
216
  **self.generation_kwargs,
@@ -171,16 +232,19 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
171
232
  client = self.load_model(async_mode=True)
172
233
 
173
234
  if check_if_multimodal(prompt):
174
- prompt = convert_to_multi_modal_array(prompt)
175
- prompt = self.generate_prompt(prompt)
235
+ prompt = convert_to_multi_modal_array(input=prompt)
236
+ content = self.generate_content(prompt)
237
+ else:
238
+ content = [{"type": "text", "text": prompt}]
176
239
 
177
240
  if schema:
178
- if self.name in structured_outputs_models:
241
+ if self.model_data.supports_structured_outputs:
179
242
  completion = await client.beta.chat.completions.parse(
180
243
  model=self.deployment_name,
181
- messages=[{"role": "user", "content": prompt}],
244
+ messages=[{"role": "user", "content": content}],
182
245
  response_format=schema,
183
246
  temperature=self.temperature,
247
+ **self.generation_kwargs,
184
248
  )
185
249
  structured_output: BaseModel = completion.choices[
186
250
  0
@@ -190,11 +254,11 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
190
254
  completion.usage.completion_tokens,
191
255
  )
192
256
  return structured_output, cost
193
- if self.name in json_mode_models:
257
+ if self.model_data.supports_json:
194
258
  completion = await client.beta.chat.completions.parse(
195
259
  model=self.deployment_name,
196
260
  messages=[
197
- {"role": "user", "content": prompt},
261
+ {"role": "user", "content": content},
198
262
  ],
199
263
  response_format={"type": "json_object"},
200
264
  temperature=self.temperature,
@@ -212,7 +276,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
212
276
  completion = await client.chat.completions.create(
213
277
  model=self.deployment_name,
214
278
  messages=[
215
- {"role": "user", "content": prompt},
279
+ {"role": "user", "content": content},
216
280
  ],
217
281
  temperature=self.temperature,
218
282
  **self.generation_kwargs,
@@ -242,10 +306,12 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
242
306
  client = self.load_model(async_mode=False)
243
307
  if check_if_multimodal(prompt):
244
308
  prompt = convert_to_multi_modal_array(input=prompt)
245
- prompt = self.generate_prompt(prompt)
309
+ content = self.generate_content(prompt)
310
+ else:
311
+ content = [{"type": "text", "text": prompt}]
246
312
  completion = client.chat.completions.create(
247
313
  model=self.deployment_name,
248
- messages=[{"role": "user", "content": prompt}],
314
+ messages=[{"role": "user", "content": content}],
249
315
  temperature=self.temperature,
250
316
  logprobs=True,
251
317
  top_logprobs=top_logprobs,
@@ -268,10 +334,12 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
268
334
  client = self.load_model(async_mode=True)
269
335
  if check_if_multimodal(prompt):
270
336
  prompt = convert_to_multi_modal_array(input=prompt)
271
- prompt = self.generate_prompt(prompt)
337
+ content = self.generate_content(prompt)
338
+ else:
339
+ content = [{"type": "text", "text": prompt}]
272
340
  completion = await client.chat.completions.create(
273
341
  model=self.deployment_name,
274
- messages=[{"role": "user", "content": prompt}],
342
+ messages=[{"role": "user", "content": content}],
275
343
  temperature=self.temperature,
276
344
  logprobs=True,
277
345
  top_logprobs=top_logprobs,
@@ -284,53 +352,63 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
284
352
 
285
353
  return completion, cost
286
354
 
287
- def generate_prompt(
288
- self, multimodal_input: List[Union[str, MLLMImage]] = []
355
+ def generate_content(
356
+ self, multimodal_input: Optional[List[Union[str, MLLMImage]]] = None
289
357
  ):
290
- """Convert multimodal input into the proper message format for Azure OpenAI."""
291
- prompt = []
292
- for ele in multimodal_input:
293
- if isinstance(ele, str):
294
- prompt.append({"type": "text", "text": ele})
295
- elif isinstance(ele, MLLMImage):
296
- if ele.local:
297
- import PIL.Image
298
-
299
- image = PIL.Image.open(ele.url)
300
- visual_dict = {
301
- "type": "image_url",
302
- "image_url": {
303
- "url": f"data:image/jpeg;base64,{self.encode_pil_image(image)}"
304
- },
305
- }
358
+ multimodal_input = [] if multimodal_input is None else multimodal_input
359
+ content = []
360
+ for element in multimodal_input:
361
+ if isinstance(element, str):
362
+ content.append({"type": "text", "text": element})
363
+ elif isinstance(element, MLLMImage):
364
+ if element.url and not element.local:
365
+ content.append(
366
+ {
367
+ "type": "image_url",
368
+ "image_url": {"url": element.url},
369
+ }
370
+ )
306
371
  else:
307
- visual_dict = {
308
- "type": "image_url",
309
- "image_url": {"url": ele.url},
310
- }
311
- prompt.append(visual_dict)
312
- return prompt
313
-
314
- def encode_pil_image(self, pil_image):
315
- """Encode a PIL image to base64 string."""
316
- image_buffer = BytesIO()
317
- if pil_image.mode in ("RGBA", "LA", "P"):
318
- pil_image = pil_image.convert("RGB")
319
- pil_image.save(image_buffer, format="JPEG")
320
- image_bytes = image_buffer.getvalue()
321
- base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
322
- return base64_encoded_image
372
+ element.ensure_images_loaded()
373
+ data_uri = (
374
+ f"data:{element.mimeType};base64,{element.dataBase64}"
375
+ )
376
+ content.append(
377
+ {
378
+ "type": "image_url",
379
+ "image_url": {"url": data_uri},
380
+ }
381
+ )
382
+ return content
323
383
 
324
384
  ###############################################
325
385
  # Utilities
326
386
  ###############################################
327
387
 
328
388
  def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
329
- pricing = model_pricing.get(self.name, model_pricing["gpt-4.1"])
330
- input_cost = input_tokens * pricing["input"]
331
- output_cost = output_tokens * pricing["output"]
389
+ input_cost = input_tokens * self.model_data.input_price
390
+ output_cost = output_tokens * self.model_data.output_price
332
391
  return input_cost + output_cost
333
392
 
393
+ ###############################################
394
+ # Capabilities
395
+ ###############################################
396
+
397
+ def supports_log_probs(self) -> Union[bool, None]:
398
+ return self.model_data.supports_log_probs
399
+
400
+ def supports_temperature(self) -> Union[bool, None]:
401
+ return self.model_data.supports_temperature
402
+
403
+ def supports_multimodal(self) -> Union[bool, None]:
404
+ return self.model_data.supports_multimodal
405
+
406
+ def supports_structured_outputs(self) -> Union[bool, None]:
407
+ return self.model_data.supports_structured_outputs
408
+
409
+ def supports_json_mode(self) -> Union[bool, None]:
410
+ return self.model_data.supports_json
411
+
334
412
  ###############################################
335
413
  # Model
336
414
  ###############################################
@@ -361,8 +439,8 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
361
439
 
362
440
  kw = dict(
363
441
  api_key=api_key,
364
- api_version=self.openai_api_version,
365
- base_url=self.base_url,
442
+ api_version=self.api_version,
443
+ azure_endpoint=self.base_url,
366
444
  azure_deployment=self.deployment_name,
367
445
  **self._client_kwargs(),
368
446
  )
@@ -375,10 +453,5 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
375
453
  return cls(**kw)
376
454
  raise
377
455
 
378
- def supports_multimodal(self):
379
- if self.name in valid_multimodal_models:
380
- return True
381
- return False
382
-
383
456
  def get_model_name(self):
384
457
  return f"{self.name} (Azure)"