deepeval 3.7.2__py3-none-any.whl → 3.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/benchmarks/human_eval/human_eval.py +2 -1
  3. deepeval/cli/test.py +1 -1
  4. deepeval/config/settings.py +102 -13
  5. deepeval/dataset/dataset.py +35 -11
  6. deepeval/dataset/utils.py +2 -0
  7. deepeval/evaluate/configs.py +1 -1
  8. deepeval/evaluate/execute.py +4 -1
  9. deepeval/metrics/answer_relevancy/template.py +4 -4
  10. deepeval/metrics/argument_correctness/template.py +2 -2
  11. deepeval/metrics/bias/template.py +3 -3
  12. deepeval/metrics/contextual_precision/template.py +6 -6
  13. deepeval/metrics/contextual_recall/template.py +2 -2
  14. deepeval/metrics/contextual_relevancy/template.py +3 -3
  15. deepeval/metrics/conversation_completeness/template.py +2 -2
  16. deepeval/metrics/conversational_dag/templates.py +4 -4
  17. deepeval/metrics/conversational_g_eval/template.py +4 -3
  18. deepeval/metrics/dag/templates.py +4 -4
  19. deepeval/metrics/faithfulness/template.py +4 -4
  20. deepeval/metrics/hallucination/template.py +4 -4
  21. deepeval/metrics/misuse/template.py +2 -2
  22. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +7 -7
  23. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +6 -6
  24. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +2 -2
  25. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +3 -3
  26. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +9 -9
  27. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +4 -4
  28. deepeval/metrics/non_advice/template.py +2 -2
  29. deepeval/metrics/pii_leakage/template.py +2 -2
  30. deepeval/metrics/prompt_alignment/template.py +4 -4
  31. deepeval/metrics/role_violation/template.py +2 -2
  32. deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
  33. deepeval/metrics/toxicity/template.py +4 -4
  34. deepeval/metrics/turn_relevancy/template.py +2 -2
  35. deepeval/metrics/utils.py +3 -0
  36. deepeval/models/__init__.py +2 -0
  37. deepeval/models/embedding_models/azure_embedding_model.py +28 -15
  38. deepeval/models/embedding_models/local_embedding_model.py +23 -10
  39. deepeval/models/embedding_models/ollama_embedding_model.py +8 -6
  40. deepeval/models/embedding_models/openai_embedding_model.py +18 -2
  41. deepeval/models/llms/anthropic_model.py +17 -5
  42. deepeval/models/llms/azure_model.py +30 -18
  43. deepeval/models/llms/deepseek_model.py +22 -12
  44. deepeval/models/llms/gemini_model.py +120 -87
  45. deepeval/models/llms/grok_model.py +23 -16
  46. deepeval/models/llms/kimi_model.py +23 -12
  47. deepeval/models/llms/litellm_model.py +63 -25
  48. deepeval/models/llms/local_model.py +26 -18
  49. deepeval/models/llms/ollama_model.py +17 -7
  50. deepeval/models/llms/openai_model.py +22 -17
  51. deepeval/models/llms/portkey_model.py +132 -0
  52. deepeval/models/mlllms/__init__.py +1 -0
  53. deepeval/models/mlllms/azure_model.py +343 -0
  54. deepeval/models/mlllms/gemini_model.py +102 -73
  55. deepeval/models/mlllms/ollama_model.py +40 -9
  56. deepeval/models/mlllms/openai_model.py +65 -14
  57. deepeval/models/utils.py +48 -3
  58. deepeval/optimization/__init__.py +13 -0
  59. deepeval/optimization/adapters/__init__.py +2 -0
  60. deepeval/optimization/adapters/deepeval_scoring_adapter.py +588 -0
  61. deepeval/optimization/aggregates.py +14 -0
  62. deepeval/optimization/configs.py +34 -0
  63. deepeval/optimization/copro/configs.py +31 -0
  64. deepeval/optimization/copro/loop.py +837 -0
  65. deepeval/optimization/gepa/__init__.py +7 -0
  66. deepeval/optimization/gepa/configs.py +115 -0
  67. deepeval/optimization/gepa/loop.py +677 -0
  68. deepeval/optimization/miprov2/configs.py +134 -0
  69. deepeval/optimization/miprov2/loop.py +785 -0
  70. deepeval/optimization/mutations/__init__.py +0 -0
  71. deepeval/optimization/mutations/prompt_rewriter.py +458 -0
  72. deepeval/optimization/policies/__init__.py +16 -0
  73. deepeval/optimization/policies/selection.py +166 -0
  74. deepeval/optimization/policies/tie_breaker.py +67 -0
  75. deepeval/optimization/prompt_optimizer.py +462 -0
  76. deepeval/optimization/simba/__init__.py +0 -0
  77. deepeval/optimization/simba/configs.py +33 -0
  78. deepeval/optimization/simba/loop.py +983 -0
  79. deepeval/optimization/simba/types.py +15 -0
  80. deepeval/optimization/types.py +361 -0
  81. deepeval/optimization/utils.py +598 -0
  82. deepeval/prompt/prompt.py +10 -5
  83. deepeval/test_run/cache.py +2 -0
  84. deepeval/test_run/test_run.py +6 -1
  85. deepeval/tracing/context.py +3 -0
  86. deepeval/tracing/tracing.py +22 -11
  87. deepeval/utils.py +24 -0
  88. {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/METADATA +1 -1
  89. {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/RECORD +92 -66
  90. {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/entry_points.txt +1 -1
  91. {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/LICENSE.md +0 -0
  92. {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/WHEEL +0 -0
@@ -2,12 +2,11 @@ from ollama import Client, AsyncClient, ChatResponse
2
2
  from typing import Optional, Tuple, Union, Dict
3
3
  from pydantic import BaseModel
4
4
 
5
+ from deepeval.config.settings import get_settings
5
6
  from deepeval.models.retry_policy import (
6
7
  create_retry_decorator,
7
8
  )
8
-
9
9
  from deepeval.models import DeepEvalBaseLLM
10
- from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
11
10
  from deepeval.constants import ProviderSlug as PS
12
11
 
13
12
 
@@ -23,17 +22,20 @@ class OllamaModel(DeepEvalBaseLLM):
23
22
  generation_kwargs: Optional[Dict] = None,
24
23
  **kwargs,
25
24
  ):
26
- model_name = model or KEY_FILE_HANDLER.fetch_data(
27
- ModelKeyValues.LOCAL_MODEL_NAME
28
- )
25
+ settings = get_settings()
26
+ model_name = model or settings.LOCAL_MODEL_NAME
29
27
  self.base_url = (
30
28
  base_url
31
- or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.LOCAL_MODEL_BASE_URL)
29
+ or (
30
+ settings.LOCAL_MODEL_BASE_URL
31
+ and str(settings.LOCAL_MODEL_BASE_URL)
32
+ )
32
33
  or "http://localhost:11434"
33
34
  )
34
35
  if temperature < 0:
35
36
  raise ValueError("Temperature must be >= 0.")
36
37
  self.temperature = temperature
38
+ # Raw kwargs destined for the underlying Ollama client
37
39
  self.kwargs = kwargs
38
40
  self.generation_kwargs = generation_kwargs or {}
39
41
  super().__init__(model_name)
@@ -97,8 +99,16 @@ class OllamaModel(DeepEvalBaseLLM):
97
99
  return self._build_client(Client)
98
100
  return self._build_client(AsyncClient)
99
101
 
102
+ def _client_kwargs(self) -> Dict:
103
+ """Return kwargs forwarded to the underlying Ollama Client/AsyncClient."""
104
+ return dict(self.kwargs or {})
105
+
100
106
  def _build_client(self, cls):
101
- return cls(host=self.base_url, **self.kwargs)
107
+ kw = dict(
108
+ host=self.base_url,
109
+ **self._client_kwargs(),
110
+ )
111
+ return cls(**kw)
102
112
 
103
113
  def get_model_name(self):
104
114
  return f"{self.model_name} (Ollama)"
@@ -1,7 +1,6 @@
1
1
  from openai.types.chat.chat_completion import ChatCompletion
2
- from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
3
2
  from typing import Optional, Tuple, Union, Dict
4
- from pydantic import BaseModel
3
+ from pydantic import BaseModel, SecretStr
5
4
 
6
5
  from openai import (
7
6
  OpenAI,
@@ -12,7 +11,7 @@ from deepeval.config.settings import get_settings
12
11
  from deepeval.constants import ProviderSlug as PS
13
12
  from deepeval.models import DeepEvalBaseLLM
14
13
  from deepeval.models.llms.utils import trim_and_load_json
15
- from deepeval.models.utils import parse_model_name
14
+ from deepeval.models.utils import parse_model_name, require_secret_api_key
16
15
  from deepeval.models.retry_policy import (
17
16
  create_retry_decorator,
18
17
  sdk_retries_for,
@@ -227,23 +226,18 @@ class GPTModel(DeepEvalBaseLLM):
227
226
  generation_kwargs: Optional[Dict] = None,
228
227
  **kwargs,
229
228
  ):
229
+ settings = get_settings()
230
230
  model_name = None
231
- model = model or KEY_FILE_HANDLER.fetch_data(
232
- ModelKeyValues.OPENAI_MODEL_NAME
233
- )
231
+ model = model or settings.OPENAI_MODEL_NAME
234
232
  cost_per_input_token = (
235
233
  cost_per_input_token
236
234
  if cost_per_input_token is not None
237
- else KEY_FILE_HANDLER.fetch_data(
238
- ModelKeyValues.OPENAI_COST_PER_INPUT_TOKEN
239
- )
235
+ else settings.OPENAI_COST_PER_INPUT_TOKEN
240
236
  )
241
237
  cost_per_output_token = (
242
238
  cost_per_output_token
243
239
  if cost_per_output_token is not None
244
- else KEY_FILE_HANDLER.fetch_data(
245
- ModelKeyValues.OPENAI_COST_PER_OUTPUT_TOKEN
246
- )
240
+ else settings.OPENAI_COST_PER_OUTPUT_TOKEN
247
241
  )
248
242
 
249
243
  if isinstance(model, str):
@@ -272,7 +266,12 @@ class GPTModel(DeepEvalBaseLLM):
272
266
  elif model is None:
273
267
  model_name = default_gpt_model
274
268
 
275
- self._openai_api_key = _openai_api_key
269
+ if _openai_api_key is not None:
270
+ # keep it secret, keep it safe from serializings, logging and alike
271
+ self._openai_api_key: SecretStr | None = SecretStr(_openai_api_key)
272
+ else:
273
+ self._openai_api_key = get_settings().OPENAI_API_KEY
274
+
276
275
  self.base_url = base_url
277
276
  # args and kwargs will be passed to the underlying model, in load_model function
278
277
 
@@ -485,9 +484,9 @@ class GPTModel(DeepEvalBaseLLM):
485
484
  output_cost = output_tokens * pricing["output"]
486
485
  return input_cost + output_cost
487
486
 
488
- ###############################################
489
- # Model
490
- ###############################################
487
+ #########
488
+ # Model #
489
+ #########
491
490
 
492
491
  def get_model_name(self):
493
492
  return self.model_name
@@ -512,9 +511,15 @@ class GPTModel(DeepEvalBaseLLM):
512
511
  return kwargs
513
512
 
514
513
  def _build_client(self, cls):
514
+ api_key = require_secret_api_key(
515
+ self._openai_api_key,
516
+ provider_label="OpenAI",
517
+ env_var_name="OPENAI_API_KEY",
518
+ param_hint="`_openai_api_key` to GPTModel(...)",
519
+ )
515
520
 
516
521
  kw = dict(
517
- api_key=self._openai_api_key,
522
+ api_key=api_key,
518
523
  base_url=self.base_url,
519
524
  **self._client_kwargs(),
520
525
  )
@@ -0,0 +1,132 @@
1
+ import aiohttp
2
+ import requests
3
+ from typing import Any, Dict, List, Optional, Union
4
+ from pydantic import AnyUrl, SecretStr
5
+
6
+ from deepeval.config.settings import get_settings
7
+ from deepeval.models.utils import require_secret_api_key
8
+ from deepeval.models import DeepEvalBaseLLM
9
+ from deepeval.utils import require_param
10
+
11
+
12
+ class PortkeyModel(DeepEvalBaseLLM):
13
+ def __init__(
14
+ self,
15
+ model: Optional[str] = None,
16
+ api_key: Optional[str] = None,
17
+ base_url: Optional[AnyUrl] = None,
18
+ provider: Optional[str] = None,
19
+ ):
20
+ settings = get_settings()
21
+ model = model or settings.PORTKEY_MODEL_NAME
22
+
23
+ self.model = require_param(
24
+ model,
25
+ provider_label="Portkey",
26
+ env_var_name="PORTKEY_MODEL_NAME",
27
+ param_hint="model",
28
+ )
29
+
30
+ if api_key is not None:
31
+ # keep it secret, keep it safe from serializings, logging and alike
32
+ self.api_key: SecretStr | None = SecretStr(api_key)
33
+ else:
34
+ self.api_key = settings.PORTKEY_API_KEY
35
+
36
+ if base_url is not None:
37
+ base_url = str(base_url).rstrip("/")
38
+ elif settings.PORTKEY_BASE_URL is not None:
39
+ base_url = str(settings.PORTKEY_BASE_URL).rstrip("/")
40
+
41
+ self.base_url = require_param(
42
+ base_url,
43
+ provider_label="Portkey",
44
+ env_var_name="PORTKEY_BASE_URL",
45
+ param_hint="base_url",
46
+ )
47
+
48
+ provider = provider or settings.PORTKEY_PROVIDER_NAME
49
+ self.provider = require_param(
50
+ provider,
51
+ provider_label="Portkey",
52
+ env_var_name="PORTKEY_PROVIDER_NAME",
53
+ param_hint="provider",
54
+ )
55
+
56
+ def _headers(self) -> Dict[str, str]:
57
+ api_key = require_secret_api_key(
58
+ self.api_key,
59
+ provider_label="Portkey",
60
+ env_var_name="PORTKEY_API_KEY",
61
+ param_hint="`api_key` to PortkeyModel(...)",
62
+ )
63
+
64
+ headers = {
65
+ "Content-Type": "application/json",
66
+ "x-portkey-api-key": api_key,
67
+ }
68
+ if self.provider:
69
+ headers["x-portkey-provider"] = self.provider
70
+ return headers
71
+
72
+ def _payload(self, prompt: str) -> Dict[str, Any]:
73
+ return {
74
+ "model": self.model,
75
+ "messages": [{"role": "user", "content": prompt}],
76
+ }
77
+
78
+ def _extract_content(self, data: Dict[str, Any]) -> str:
79
+ choices: Union[List[Dict[str, Any]], None] = data.get("choices")
80
+ if not choices:
81
+ raise ValueError("Portkey response did not include any choices.")
82
+ message = choices[0].get("message", {})
83
+ content: Union[str, List[Dict[str, Any]], None] = message.get("content")
84
+ if isinstance(content, str):
85
+ return content
86
+ if isinstance(content, list):
87
+ return "".join(part.get("text", "") for part in content)
88
+ return ""
89
+
90
+ def generate(self, prompt: str) -> str:
91
+ try:
92
+ response = requests.post(
93
+ f"{self.base_url}/chat/completions",
94
+ json=self._payload(prompt),
95
+ headers=self._headers(),
96
+ timeout=60,
97
+ )
98
+ response.raise_for_status()
99
+ except requests.HTTPError as error:
100
+ body: Union[str, Dict[str, Any]]
101
+ try:
102
+ body = response.json()
103
+ except Exception:
104
+ body = response.text
105
+ raise ValueError(
106
+ f"Portkey request failed with status {response.status_code}: {body}"
107
+ ) from error
108
+ except requests.RequestException as error:
109
+ raise ValueError(f"Portkey request failed: {error}") from error
110
+ return self._extract_content(response.json())
111
+
112
+ async def a_generate(self, prompt: str) -> str:
113
+ async with aiohttp.ClientSession() as session:
114
+ async with session.post(
115
+ f"{self.base_url}/chat/completions",
116
+ json=self._payload(prompt),
117
+ headers=self._headers(),
118
+ timeout=60,
119
+ ) as response:
120
+ if response.status >= 400:
121
+ body = await response.text()
122
+ raise ValueError(
123
+ f"Portkey request failed with status {response.status}: {body}"
124
+ )
125
+ data = await response.json()
126
+ return self._extract_content(data)
127
+
128
+ def get_model_name(self) -> str:
129
+ return f"Portkey ({self.model})"
130
+
131
+ def load_model(self):
132
+ return None
@@ -1,3 +1,4 @@
1
1
  from .openai_model import MultimodalOpenAIModel
2
2
  from .ollama_model import MultimodalOllamaModel
3
3
  from .gemini_model import MultimodalGeminiModel
4
+ from .azure_model import MultimodalAzureOpenAIMLLMModel
@@ -0,0 +1,343 @@
1
+ import base64
2
+ from openai.types.chat.chat_completion import ChatCompletion
3
+ from openai import AzureOpenAI, AsyncAzureOpenAI
4
+ from typing import Optional, Tuple, Union, Dict, List
5
+ from pydantic import BaseModel, SecretStr
6
+ from io import BytesIO
7
+
8
+ from deepeval.config.settings import get_settings
9
+ from deepeval.models import DeepEvalBaseMLLM
10
+ from deepeval.test_case import MLLMImage
11
+ from deepeval.models.llms.openai_model import (
12
+ structured_outputs_models,
13
+ json_mode_models,
14
+ model_pricing,
15
+ )
16
+ from deepeval.models.retry_policy import (
17
+ create_retry_decorator,
18
+ sdk_retries_for,
19
+ )
20
+
21
+ from deepeval.models.llms.utils import trim_and_load_json
22
+ from deepeval.models.utils import parse_model_name, require_secret_api_key
23
+ from deepeval.constants import ProviderSlug as PS
24
+
25
+
26
+ retry_azure = create_retry_decorator(PS.AZURE)
27
+
28
+
29
+ class MultimodalAzureOpenAIMLLMModel(DeepEvalBaseMLLM):
30
+ def __init__(
31
+ self,
32
+ deployment_name: Optional[str] = None,
33
+ model_name: Optional[str] = None,
34
+ azure_openai_api_key: Optional[str] = None,
35
+ openai_api_version: Optional[str] = None,
36
+ azure_endpoint: Optional[str] = None,
37
+ temperature: float = 0,
38
+ generation_kwargs: Optional[Dict] = None,
39
+ **kwargs,
40
+ ):
41
+ settings = get_settings()
42
+ # fetch Azure deployment parameters
43
+ model_name = model_name or settings.AZURE_MODEL_NAME
44
+ self.deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
45
+ if azure_openai_api_key is not None:
46
+ # keep it secret, keep it safe from serializings, logging and alike
47
+ self.azure_openai_api_key: SecretStr | None = SecretStr(
48
+ azure_openai_api_key
49
+ )
50
+ else:
51
+ self.azure_openai_api_key = settings.AZURE_OPENAI_API_KEY
52
+
53
+ self.openai_api_version = (
54
+ openai_api_version or settings.OPENAI_API_VERSION
55
+ )
56
+ self.azure_endpoint = (
57
+ azure_endpoint
58
+ or settings.AZURE_OPENAI_ENDPOINT
59
+ and str(settings.AZURE_OPENAI_ENDPOINT)
60
+ )
61
+ if temperature < 0:
62
+ raise ValueError("Temperature must be >= 0.")
63
+ self.temperature = temperature
64
+
65
+ # args and kwargs will be passed to the underlying model, in load_model function
66
+ self.kwargs = kwargs
67
+ self.generation_kwargs = generation_kwargs or {}
68
+ super().__init__(parse_model_name(model_name))
69
+
70
+ ###############################################
71
+ # Generate functions
72
+ ###############################################
73
+
74
+ @retry_azure
75
+ def generate(
76
+ self,
77
+ multimodal_input: List[Union[str, MLLMImage]],
78
+ schema: Optional[BaseModel] = None,
79
+ ) -> Tuple[Union[str, BaseModel], float]:
80
+ client = self.load_model(async_mode=False)
81
+ prompt = self.generate_prompt(multimodal_input)
82
+
83
+ if schema:
84
+ if self.model_name in structured_outputs_models:
85
+ messages = [{"role": "user", "content": prompt}]
86
+ completion = client.beta.chat.completions.parse(
87
+ model=self.deployment_name,
88
+ messages=messages,
89
+ response_format=schema,
90
+ temperature=self.temperature,
91
+ )
92
+ structured_output: BaseModel = completion.choices[
93
+ 0
94
+ ].message.parsed
95
+ cost = self.calculate_cost(
96
+ completion.usage.prompt_tokens,
97
+ completion.usage.completion_tokens,
98
+ )
99
+ return structured_output, cost
100
+ if self.model_name in json_mode_models:
101
+ messages = [{"role": "user", "content": prompt}]
102
+ completion = client.beta.chat.completions.parse(
103
+ model=self.deployment_name,
104
+ messages=messages,
105
+ response_format={"type": "json_object"},
106
+ temperature=self.temperature,
107
+ )
108
+ json_output = trim_and_load_json(
109
+ completion.choices[0].message.content
110
+ )
111
+ cost = self.calculate_cost(
112
+ completion.usage.prompt_tokens,
113
+ completion.usage.completion_tokens,
114
+ )
115
+ return schema.model_validate(json_output), cost
116
+ print("Loading model client:")
117
+ print(client.base_url)
118
+ completion = client.chat.completions.create(
119
+ model=self.deployment_name,
120
+ messages=[{"role": "user", "content": prompt}],
121
+ temperature=self.temperature,
122
+ **self.generation_kwargs,
123
+ )
124
+ output = completion.choices[0].message.content
125
+ cost = self.calculate_cost(
126
+ completion.usage.prompt_tokens, completion.usage.completion_tokens
127
+ )
128
+ if schema:
129
+ json_output = trim_and_load_json(output)
130
+ return schema.model_validate(json_output), cost
131
+ else:
132
+ return output, cost
133
+
134
+ @retry_azure
135
+ async def a_generate(
136
+ self,
137
+ multimodal_input: List[Union[str, MLLMImage]],
138
+ schema: Optional[BaseModel] = None,
139
+ ) -> Tuple[Union[str, BaseModel], float]:
140
+ client = self.load_model(async_mode=True)
141
+ prompt = self.generate_prompt(multimodal_input)
142
+
143
+ if schema:
144
+ if self.model_name in structured_outputs_models:
145
+ messages = [{"role": "user", "content": prompt}]
146
+ completion = await client.beta.chat.completions.parse(
147
+ model=self.deployment_name,
148
+ messages=messages,
149
+ response_format=schema,
150
+ temperature=self.temperature,
151
+ )
152
+ structured_output: BaseModel = completion.choices[
153
+ 0
154
+ ].message.parsed
155
+ cost = self.calculate_cost(
156
+ completion.usage.prompt_tokens,
157
+ completion.usage.completion_tokens,
158
+ )
159
+ return structured_output, cost
160
+ if self.model_name in json_mode_models:
161
+ messages = [{"role": "user", "content": prompt}]
162
+ completion = await client.beta.chat.completions.parse(
163
+ model=self.deployment_name,
164
+ messages=messages,
165
+ response_format={"type": "json_object"},
166
+ temperature=self.temperature,
167
+ **self.generation_kwargs,
168
+ )
169
+ json_output = trim_and_load_json(
170
+ completion.choices[0].message.content
171
+ )
172
+ cost = self.calculate_cost(
173
+ completion.usage.prompt_tokens,
174
+ completion.usage.completion_tokens,
175
+ )
176
+ return schema.model_validate(json_output), cost
177
+
178
+ completion = await client.chat.completions.create(
179
+ model=self.deployment_name,
180
+ messages=[{"role": "user", "content": prompt}],
181
+ temperature=self.temperature,
182
+ **self.generation_kwargs,
183
+ )
184
+ output = completion.choices[0].message.content
185
+ cost = self.calculate_cost(
186
+ completion.usage.prompt_tokens,
187
+ completion.usage.completion_tokens,
188
+ )
189
+ if schema:
190
+ json_output = trim_and_load_json(output)
191
+ return schema.model_validate(json_output), cost
192
+ else:
193
+ return output, cost
194
+
195
+ ###############################################
196
+ # Other generate functions
197
+ ###############################################
198
+
199
+ @retry_azure
200
+ def generate_raw_response(
201
+ self,
202
+ multimodal_input: List[Union[str, MLLMImage]],
203
+ top_logprobs: int = 5,
204
+ ) -> Tuple[ChatCompletion, float]:
205
+ client = self.load_model(async_mode=False)
206
+ prompt = self.generate_prompt(multimodal_input)
207
+ messages = [{"role": "user", "content": prompt}]
208
+
209
+ # Generate completion
210
+ completion = client.chat.completions.create(
211
+ model=self.deployment_name,
212
+ messages=messages,
213
+ temperature=self.temperature,
214
+ logprobs=True,
215
+ top_logprobs=top_logprobs,
216
+ **self.generation_kwargs,
217
+ )
218
+ # Cost calculation
219
+ input_tokens = completion.usage.prompt_tokens
220
+ output_tokens = completion.usage.completion_tokens
221
+ cost = self.calculate_cost(input_tokens, output_tokens)
222
+
223
+ return completion, cost
224
+
225
+ @retry_azure
226
+ async def a_generate_raw_response(
227
+ self,
228
+ multimodal_input: List[Union[str, MLLMImage]],
229
+ top_logprobs: int = 5,
230
+ ) -> Tuple[ChatCompletion, float]:
231
+ client = self.load_model(async_mode=True)
232
+ prompt = self.generate_prompt(multimodal_input)
233
+ messages = [{"role": "user", "content": prompt}]
234
+
235
+ # Generate completion
236
+ completion = await client.chat.completions.create(
237
+ model=self.deployment_name,
238
+ messages=messages,
239
+ temperature=self.temperature,
240
+ logprobs=True,
241
+ top_logprobs=top_logprobs,
242
+ **self.generation_kwargs,
243
+ )
244
+ # Cost calculation
245
+ input_tokens = completion.usage.prompt_tokens
246
+ output_tokens = completion.usage.completion_tokens
247
+ cost = self.calculate_cost(input_tokens, output_tokens)
248
+
249
+ return completion, cost
250
+
251
+ ###############################################
252
+ # Utilities
253
+ ###############################################
254
+
255
+ def generate_prompt(
256
+ self, multimodal_input: List[Union[str, MLLMImage]] = []
257
+ ):
258
+ """Convert multimodal input into the proper message format for Azure OpenAI."""
259
+ prompt = []
260
+ for ele in multimodal_input:
261
+ if isinstance(ele, str):
262
+ prompt.append({"type": "text", "text": ele})
263
+ elif isinstance(ele, MLLMImage):
264
+ if ele.local:
265
+ import PIL.Image
266
+
267
+ image = PIL.Image.open(ele.url)
268
+ visual_dict = {
269
+ "type": "image_url",
270
+ "image_url": {
271
+ "url": f"data:image/jpeg;base64,{self.encode_pil_image(image)}"
272
+ },
273
+ }
274
+ else:
275
+ visual_dict = {
276
+ "type": "image_url",
277
+ "image_url": {"url": ele.url},
278
+ }
279
+ prompt.append(visual_dict)
280
+ return prompt
281
+
282
+ def encode_pil_image(self, pil_image):
283
+ """Encode a PIL image to base64 string."""
284
+ image_buffer = BytesIO()
285
+ if pil_image.mode in ("RGBA", "LA", "P"):
286
+ pil_image = pil_image.convert("RGB")
287
+ pil_image.save(image_buffer, format="JPEG")
288
+ image_bytes = image_buffer.getvalue()
289
+ base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
290
+ return base64_encoded_image
291
+
292
+ def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
293
+ pricing = model_pricing.get(self.model_name, model_pricing["gpt-4.1"])
294
+ input_cost = input_tokens * pricing["input"]
295
+ output_cost = output_tokens * pricing["output"]
296
+ return input_cost + output_cost
297
+
298
+ ###############################################
299
+ # Model
300
+ ###############################################
301
+
302
+ def get_model_name(self):
303
+ return f"Azure OpenAI ({self.model_name})"
304
+
305
+ def load_model(self, async_mode: bool = False):
306
+ if not async_mode:
307
+ return self._build_client(AzureOpenAI)
308
+ return self._build_client(AsyncAzureOpenAI)
309
+
310
+ def _client_kwargs(self) -> Dict:
311
+ """
312
+ If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
313
+ If the user opts into SDK retries for 'azure' via DEEPEVAL_SDK_RETRY_PROVIDERS,
314
+ leave their retry settings as is.
315
+ """
316
+ kwargs = dict(self.kwargs or {})
317
+ if not sdk_retries_for(PS.AZURE):
318
+ kwargs["max_retries"] = 0
319
+ return kwargs
320
+
321
+ def _build_client(self, cls):
322
+ api_key = require_secret_api_key(
323
+ self.azure_openai_api_key,
324
+ provider_label="AzureOpenAI",
325
+ env_var_name="AZURE_OPENAI_API_KEY",
326
+ param_hint="`azure_openai_api_key` to MultimodalAzureOpenAIMLLMModel(...)",
327
+ )
328
+
329
+ kw = dict(
330
+ api_key=api_key,
331
+ api_version=self.openai_api_version,
332
+ azure_endpoint=self.azure_endpoint,
333
+ azure_deployment=self.deployment_name,
334
+ **self._client_kwargs(),
335
+ )
336
+ try:
337
+ return cls(**kw)
338
+ except TypeError as e:
339
+ # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
340
+ if "max_retries" in str(e):
341
+ kw.pop("max_retries", None)
342
+ return cls(**kw)
343
+ raise