deepeval 3.7.2__py3-none-any.whl → 3.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/benchmarks/human_eval/human_eval.py +2 -1
- deepeval/cli/test.py +1 -1
- deepeval/config/settings.py +102 -13
- deepeval/dataset/dataset.py +35 -11
- deepeval/dataset/utils.py +2 -0
- deepeval/evaluate/configs.py +1 -1
- deepeval/evaluate/execute.py +4 -1
- deepeval/metrics/answer_relevancy/template.py +4 -4
- deepeval/metrics/argument_correctness/template.py +2 -2
- deepeval/metrics/bias/template.py +3 -3
- deepeval/metrics/contextual_precision/template.py +6 -6
- deepeval/metrics/contextual_recall/template.py +2 -2
- deepeval/metrics/contextual_relevancy/template.py +3 -3
- deepeval/metrics/conversation_completeness/template.py +2 -2
- deepeval/metrics/conversational_dag/templates.py +4 -4
- deepeval/metrics/conversational_g_eval/template.py +4 -3
- deepeval/metrics/dag/templates.py +4 -4
- deepeval/metrics/faithfulness/template.py +4 -4
- deepeval/metrics/hallucination/template.py +4 -4
- deepeval/metrics/misuse/template.py +2 -2
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +7 -7
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +6 -6
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +2 -2
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +3 -3
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +9 -9
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +4 -4
- deepeval/metrics/non_advice/template.py +2 -2
- deepeval/metrics/pii_leakage/template.py +2 -2
- deepeval/metrics/prompt_alignment/template.py +4 -4
- deepeval/metrics/role_violation/template.py +2 -2
- deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
- deepeval/metrics/toxicity/template.py +4 -4
- deepeval/metrics/turn_relevancy/template.py +2 -2
- deepeval/metrics/utils.py +3 -0
- deepeval/models/__init__.py +2 -0
- deepeval/models/embedding_models/azure_embedding_model.py +28 -15
- deepeval/models/embedding_models/local_embedding_model.py +23 -10
- deepeval/models/embedding_models/ollama_embedding_model.py +8 -6
- deepeval/models/embedding_models/openai_embedding_model.py +18 -2
- deepeval/models/llms/anthropic_model.py +17 -5
- deepeval/models/llms/azure_model.py +30 -18
- deepeval/models/llms/deepseek_model.py +22 -12
- deepeval/models/llms/gemini_model.py +120 -87
- deepeval/models/llms/grok_model.py +23 -16
- deepeval/models/llms/kimi_model.py +23 -12
- deepeval/models/llms/litellm_model.py +63 -25
- deepeval/models/llms/local_model.py +26 -18
- deepeval/models/llms/ollama_model.py +17 -7
- deepeval/models/llms/openai_model.py +22 -17
- deepeval/models/llms/portkey_model.py +132 -0
- deepeval/models/mlllms/__init__.py +1 -0
- deepeval/models/mlllms/azure_model.py +343 -0
- deepeval/models/mlllms/gemini_model.py +102 -73
- deepeval/models/mlllms/ollama_model.py +40 -9
- deepeval/models/mlllms/openai_model.py +65 -14
- deepeval/models/utils.py +48 -3
- deepeval/optimization/__init__.py +13 -0
- deepeval/optimization/adapters/__init__.py +2 -0
- deepeval/optimization/adapters/deepeval_scoring_adapter.py +588 -0
- deepeval/optimization/aggregates.py +14 -0
- deepeval/optimization/configs.py +34 -0
- deepeval/optimization/copro/configs.py +31 -0
- deepeval/optimization/copro/loop.py +837 -0
- deepeval/optimization/gepa/__init__.py +7 -0
- deepeval/optimization/gepa/configs.py +115 -0
- deepeval/optimization/gepa/loop.py +677 -0
- deepeval/optimization/miprov2/configs.py +134 -0
- deepeval/optimization/miprov2/loop.py +785 -0
- deepeval/optimization/mutations/__init__.py +0 -0
- deepeval/optimization/mutations/prompt_rewriter.py +458 -0
- deepeval/optimization/policies/__init__.py +16 -0
- deepeval/optimization/policies/selection.py +166 -0
- deepeval/optimization/policies/tie_breaker.py +67 -0
- deepeval/optimization/prompt_optimizer.py +462 -0
- deepeval/optimization/simba/__init__.py +0 -0
- deepeval/optimization/simba/configs.py +33 -0
- deepeval/optimization/simba/loop.py +983 -0
- deepeval/optimization/simba/types.py +15 -0
- deepeval/optimization/types.py +361 -0
- deepeval/optimization/utils.py +598 -0
- deepeval/prompt/prompt.py +10 -5
- deepeval/test_run/cache.py +2 -0
- deepeval/test_run/test_run.py +6 -1
- deepeval/tracing/context.py +3 -0
- deepeval/tracing/tracing.py +22 -11
- deepeval/utils.py +24 -0
- {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/METADATA +1 -1
- {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/RECORD +92 -66
- {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/entry_points.txt +1 -1
- {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/WHEEL +0 -0
|
@@ -2,12 +2,11 @@ from ollama import Client, AsyncClient, ChatResponse
|
|
|
2
2
|
from typing import Optional, Tuple, Union, Dict
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
5
|
+
from deepeval.config.settings import get_settings
|
|
5
6
|
from deepeval.models.retry_policy import (
|
|
6
7
|
create_retry_decorator,
|
|
7
8
|
)
|
|
8
|
-
|
|
9
9
|
from deepeval.models import DeepEvalBaseLLM
|
|
10
|
-
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
11
10
|
from deepeval.constants import ProviderSlug as PS
|
|
12
11
|
|
|
13
12
|
|
|
@@ -23,17 +22,20 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
23
22
|
generation_kwargs: Optional[Dict] = None,
|
|
24
23
|
**kwargs,
|
|
25
24
|
):
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
)
|
|
25
|
+
settings = get_settings()
|
|
26
|
+
model_name = model or settings.LOCAL_MODEL_NAME
|
|
29
27
|
self.base_url = (
|
|
30
28
|
base_url
|
|
31
|
-
or
|
|
29
|
+
or (
|
|
30
|
+
settings.LOCAL_MODEL_BASE_URL
|
|
31
|
+
and str(settings.LOCAL_MODEL_BASE_URL)
|
|
32
|
+
)
|
|
32
33
|
or "http://localhost:11434"
|
|
33
34
|
)
|
|
34
35
|
if temperature < 0:
|
|
35
36
|
raise ValueError("Temperature must be >= 0.")
|
|
36
37
|
self.temperature = temperature
|
|
38
|
+
# Raw kwargs destined for the underlying Ollama client
|
|
37
39
|
self.kwargs = kwargs
|
|
38
40
|
self.generation_kwargs = generation_kwargs or {}
|
|
39
41
|
super().__init__(model_name)
|
|
@@ -97,8 +99,16 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
97
99
|
return self._build_client(Client)
|
|
98
100
|
return self._build_client(AsyncClient)
|
|
99
101
|
|
|
102
|
+
def _client_kwargs(self) -> Dict:
|
|
103
|
+
"""Return kwargs forwarded to the underlying Ollama Client/AsyncClient."""
|
|
104
|
+
return dict(self.kwargs or {})
|
|
105
|
+
|
|
100
106
|
def _build_client(self, cls):
|
|
101
|
-
|
|
107
|
+
kw = dict(
|
|
108
|
+
host=self.base_url,
|
|
109
|
+
**self._client_kwargs(),
|
|
110
|
+
)
|
|
111
|
+
return cls(**kw)
|
|
102
112
|
|
|
103
113
|
def get_model_name(self):
|
|
104
114
|
return f"{self.model_name} (Ollama)"
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from openai.types.chat.chat_completion import ChatCompletion
|
|
2
|
-
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
3
2
|
from typing import Optional, Tuple, Union, Dict
|
|
4
|
-
from pydantic import BaseModel
|
|
3
|
+
from pydantic import BaseModel, SecretStr
|
|
5
4
|
|
|
6
5
|
from openai import (
|
|
7
6
|
OpenAI,
|
|
@@ -12,7 +11,7 @@ from deepeval.config.settings import get_settings
|
|
|
12
11
|
from deepeval.constants import ProviderSlug as PS
|
|
13
12
|
from deepeval.models import DeepEvalBaseLLM
|
|
14
13
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
15
|
-
from deepeval.models.utils import parse_model_name
|
|
14
|
+
from deepeval.models.utils import parse_model_name, require_secret_api_key
|
|
16
15
|
from deepeval.models.retry_policy import (
|
|
17
16
|
create_retry_decorator,
|
|
18
17
|
sdk_retries_for,
|
|
@@ -227,23 +226,18 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
227
226
|
generation_kwargs: Optional[Dict] = None,
|
|
228
227
|
**kwargs,
|
|
229
228
|
):
|
|
229
|
+
settings = get_settings()
|
|
230
230
|
model_name = None
|
|
231
|
-
model = model or
|
|
232
|
-
ModelKeyValues.OPENAI_MODEL_NAME
|
|
233
|
-
)
|
|
231
|
+
model = model or settings.OPENAI_MODEL_NAME
|
|
234
232
|
cost_per_input_token = (
|
|
235
233
|
cost_per_input_token
|
|
236
234
|
if cost_per_input_token is not None
|
|
237
|
-
else
|
|
238
|
-
ModelKeyValues.OPENAI_COST_PER_INPUT_TOKEN
|
|
239
|
-
)
|
|
235
|
+
else settings.OPENAI_COST_PER_INPUT_TOKEN
|
|
240
236
|
)
|
|
241
237
|
cost_per_output_token = (
|
|
242
238
|
cost_per_output_token
|
|
243
239
|
if cost_per_output_token is not None
|
|
244
|
-
else
|
|
245
|
-
ModelKeyValues.OPENAI_COST_PER_OUTPUT_TOKEN
|
|
246
|
-
)
|
|
240
|
+
else settings.OPENAI_COST_PER_OUTPUT_TOKEN
|
|
247
241
|
)
|
|
248
242
|
|
|
249
243
|
if isinstance(model, str):
|
|
@@ -272,7 +266,12 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
272
266
|
elif model is None:
|
|
273
267
|
model_name = default_gpt_model
|
|
274
268
|
|
|
275
|
-
|
|
269
|
+
if _openai_api_key is not None:
|
|
270
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
271
|
+
self._openai_api_key: SecretStr | None = SecretStr(_openai_api_key)
|
|
272
|
+
else:
|
|
273
|
+
self._openai_api_key = get_settings().OPENAI_API_KEY
|
|
274
|
+
|
|
276
275
|
self.base_url = base_url
|
|
277
276
|
# args and kwargs will be passed to the underlying model, in load_model function
|
|
278
277
|
|
|
@@ -485,9 +484,9 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
485
484
|
output_cost = output_tokens * pricing["output"]
|
|
486
485
|
return input_cost + output_cost
|
|
487
486
|
|
|
488
|
-
|
|
489
|
-
# Model
|
|
490
|
-
|
|
487
|
+
#########
|
|
488
|
+
# Model #
|
|
489
|
+
#########
|
|
491
490
|
|
|
492
491
|
def get_model_name(self):
|
|
493
492
|
return self.model_name
|
|
@@ -512,9 +511,15 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
512
511
|
return kwargs
|
|
513
512
|
|
|
514
513
|
def _build_client(self, cls):
|
|
514
|
+
api_key = require_secret_api_key(
|
|
515
|
+
self._openai_api_key,
|
|
516
|
+
provider_label="OpenAI",
|
|
517
|
+
env_var_name="OPENAI_API_KEY",
|
|
518
|
+
param_hint="`_openai_api_key` to GPTModel(...)",
|
|
519
|
+
)
|
|
515
520
|
|
|
516
521
|
kw = dict(
|
|
517
|
-
api_key=
|
|
522
|
+
api_key=api_key,
|
|
518
523
|
base_url=self.base_url,
|
|
519
524
|
**self._client_kwargs(),
|
|
520
525
|
)
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import aiohttp
|
|
2
|
+
import requests
|
|
3
|
+
from typing import Any, Dict, List, Optional, Union
|
|
4
|
+
from pydantic import AnyUrl, SecretStr
|
|
5
|
+
|
|
6
|
+
from deepeval.config.settings import get_settings
|
|
7
|
+
from deepeval.models.utils import require_secret_api_key
|
|
8
|
+
from deepeval.models import DeepEvalBaseLLM
|
|
9
|
+
from deepeval.utils import require_param
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PortkeyModel(DeepEvalBaseLLM):
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
model: Optional[str] = None,
|
|
16
|
+
api_key: Optional[str] = None,
|
|
17
|
+
base_url: Optional[AnyUrl] = None,
|
|
18
|
+
provider: Optional[str] = None,
|
|
19
|
+
):
|
|
20
|
+
settings = get_settings()
|
|
21
|
+
model = model or settings.PORTKEY_MODEL_NAME
|
|
22
|
+
|
|
23
|
+
self.model = require_param(
|
|
24
|
+
model,
|
|
25
|
+
provider_label="Portkey",
|
|
26
|
+
env_var_name="PORTKEY_MODEL_NAME",
|
|
27
|
+
param_hint="model",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
if api_key is not None:
|
|
31
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
32
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
33
|
+
else:
|
|
34
|
+
self.api_key = settings.PORTKEY_API_KEY
|
|
35
|
+
|
|
36
|
+
if base_url is not None:
|
|
37
|
+
base_url = str(base_url).rstrip("/")
|
|
38
|
+
elif settings.PORTKEY_BASE_URL is not None:
|
|
39
|
+
base_url = str(settings.PORTKEY_BASE_URL).rstrip("/")
|
|
40
|
+
|
|
41
|
+
self.base_url = require_param(
|
|
42
|
+
base_url,
|
|
43
|
+
provider_label="Portkey",
|
|
44
|
+
env_var_name="PORTKEY_BASE_URL",
|
|
45
|
+
param_hint="base_url",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
provider = provider or settings.PORTKEY_PROVIDER_NAME
|
|
49
|
+
self.provider = require_param(
|
|
50
|
+
provider,
|
|
51
|
+
provider_label="Portkey",
|
|
52
|
+
env_var_name="PORTKEY_PROVIDER_NAME",
|
|
53
|
+
param_hint="provider",
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def _headers(self) -> Dict[str, str]:
|
|
57
|
+
api_key = require_secret_api_key(
|
|
58
|
+
self.api_key,
|
|
59
|
+
provider_label="Portkey",
|
|
60
|
+
env_var_name="PORTKEY_API_KEY",
|
|
61
|
+
param_hint="`api_key` to PortkeyModel(...)",
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
headers = {
|
|
65
|
+
"Content-Type": "application/json",
|
|
66
|
+
"x-portkey-api-key": api_key,
|
|
67
|
+
}
|
|
68
|
+
if self.provider:
|
|
69
|
+
headers["x-portkey-provider"] = self.provider
|
|
70
|
+
return headers
|
|
71
|
+
|
|
72
|
+
def _payload(self, prompt: str) -> Dict[str, Any]:
|
|
73
|
+
return {
|
|
74
|
+
"model": self.model,
|
|
75
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
def _extract_content(self, data: Dict[str, Any]) -> str:
|
|
79
|
+
choices: Union[List[Dict[str, Any]], None] = data.get("choices")
|
|
80
|
+
if not choices:
|
|
81
|
+
raise ValueError("Portkey response did not include any choices.")
|
|
82
|
+
message = choices[0].get("message", {})
|
|
83
|
+
content: Union[str, List[Dict[str, Any]], None] = message.get("content")
|
|
84
|
+
if isinstance(content, str):
|
|
85
|
+
return content
|
|
86
|
+
if isinstance(content, list):
|
|
87
|
+
return "".join(part.get("text", "") for part in content)
|
|
88
|
+
return ""
|
|
89
|
+
|
|
90
|
+
def generate(self, prompt: str) -> str:
|
|
91
|
+
try:
|
|
92
|
+
response = requests.post(
|
|
93
|
+
f"{self.base_url}/chat/completions",
|
|
94
|
+
json=self._payload(prompt),
|
|
95
|
+
headers=self._headers(),
|
|
96
|
+
timeout=60,
|
|
97
|
+
)
|
|
98
|
+
response.raise_for_status()
|
|
99
|
+
except requests.HTTPError as error:
|
|
100
|
+
body: Union[str, Dict[str, Any]]
|
|
101
|
+
try:
|
|
102
|
+
body = response.json()
|
|
103
|
+
except Exception:
|
|
104
|
+
body = response.text
|
|
105
|
+
raise ValueError(
|
|
106
|
+
f"Portkey request failed with status {response.status_code}: {body}"
|
|
107
|
+
) from error
|
|
108
|
+
except requests.RequestException as error:
|
|
109
|
+
raise ValueError(f"Portkey request failed: {error}") from error
|
|
110
|
+
return self._extract_content(response.json())
|
|
111
|
+
|
|
112
|
+
async def a_generate(self, prompt: str) -> str:
|
|
113
|
+
async with aiohttp.ClientSession() as session:
|
|
114
|
+
async with session.post(
|
|
115
|
+
f"{self.base_url}/chat/completions",
|
|
116
|
+
json=self._payload(prompt),
|
|
117
|
+
headers=self._headers(),
|
|
118
|
+
timeout=60,
|
|
119
|
+
) as response:
|
|
120
|
+
if response.status >= 400:
|
|
121
|
+
body = await response.text()
|
|
122
|
+
raise ValueError(
|
|
123
|
+
f"Portkey request failed with status {response.status}: {body}"
|
|
124
|
+
)
|
|
125
|
+
data = await response.json()
|
|
126
|
+
return self._extract_content(data)
|
|
127
|
+
|
|
128
|
+
def get_model_name(self) -> str:
|
|
129
|
+
return f"Portkey ({self.model})"
|
|
130
|
+
|
|
131
|
+
def load_model(self):
|
|
132
|
+
return None
|
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
from openai.types.chat.chat_completion import ChatCompletion
|
|
3
|
+
from openai import AzureOpenAI, AsyncAzureOpenAI
|
|
4
|
+
from typing import Optional, Tuple, Union, Dict, List
|
|
5
|
+
from pydantic import BaseModel, SecretStr
|
|
6
|
+
from io import BytesIO
|
|
7
|
+
|
|
8
|
+
from deepeval.config.settings import get_settings
|
|
9
|
+
from deepeval.models import DeepEvalBaseMLLM
|
|
10
|
+
from deepeval.test_case import MLLMImage
|
|
11
|
+
from deepeval.models.llms.openai_model import (
|
|
12
|
+
structured_outputs_models,
|
|
13
|
+
json_mode_models,
|
|
14
|
+
model_pricing,
|
|
15
|
+
)
|
|
16
|
+
from deepeval.models.retry_policy import (
|
|
17
|
+
create_retry_decorator,
|
|
18
|
+
sdk_retries_for,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
from deepeval.models.llms.utils import trim_and_load_json
|
|
22
|
+
from deepeval.models.utils import parse_model_name, require_secret_api_key
|
|
23
|
+
from deepeval.constants import ProviderSlug as PS
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
retry_azure = create_retry_decorator(PS.AZURE)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class MultimodalAzureOpenAIMLLMModel(DeepEvalBaseMLLM):
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
deployment_name: Optional[str] = None,
|
|
33
|
+
model_name: Optional[str] = None,
|
|
34
|
+
azure_openai_api_key: Optional[str] = None,
|
|
35
|
+
openai_api_version: Optional[str] = None,
|
|
36
|
+
azure_endpoint: Optional[str] = None,
|
|
37
|
+
temperature: float = 0,
|
|
38
|
+
generation_kwargs: Optional[Dict] = None,
|
|
39
|
+
**kwargs,
|
|
40
|
+
):
|
|
41
|
+
settings = get_settings()
|
|
42
|
+
# fetch Azure deployment parameters
|
|
43
|
+
model_name = model_name or settings.AZURE_MODEL_NAME
|
|
44
|
+
self.deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
|
|
45
|
+
if azure_openai_api_key is not None:
|
|
46
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
47
|
+
self.azure_openai_api_key: SecretStr | None = SecretStr(
|
|
48
|
+
azure_openai_api_key
|
|
49
|
+
)
|
|
50
|
+
else:
|
|
51
|
+
self.azure_openai_api_key = settings.AZURE_OPENAI_API_KEY
|
|
52
|
+
|
|
53
|
+
self.openai_api_version = (
|
|
54
|
+
openai_api_version or settings.OPENAI_API_VERSION
|
|
55
|
+
)
|
|
56
|
+
self.azure_endpoint = (
|
|
57
|
+
azure_endpoint
|
|
58
|
+
or settings.AZURE_OPENAI_ENDPOINT
|
|
59
|
+
and str(settings.AZURE_OPENAI_ENDPOINT)
|
|
60
|
+
)
|
|
61
|
+
if temperature < 0:
|
|
62
|
+
raise ValueError("Temperature must be >= 0.")
|
|
63
|
+
self.temperature = temperature
|
|
64
|
+
|
|
65
|
+
# args and kwargs will be passed to the underlying model, in load_model function
|
|
66
|
+
self.kwargs = kwargs
|
|
67
|
+
self.generation_kwargs = generation_kwargs or {}
|
|
68
|
+
super().__init__(parse_model_name(model_name))
|
|
69
|
+
|
|
70
|
+
###############################################
|
|
71
|
+
# Generate functions
|
|
72
|
+
###############################################
|
|
73
|
+
|
|
74
|
+
@retry_azure
|
|
75
|
+
def generate(
|
|
76
|
+
self,
|
|
77
|
+
multimodal_input: List[Union[str, MLLMImage]],
|
|
78
|
+
schema: Optional[BaseModel] = None,
|
|
79
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
80
|
+
client = self.load_model(async_mode=False)
|
|
81
|
+
prompt = self.generate_prompt(multimodal_input)
|
|
82
|
+
|
|
83
|
+
if schema:
|
|
84
|
+
if self.model_name in structured_outputs_models:
|
|
85
|
+
messages = [{"role": "user", "content": prompt}]
|
|
86
|
+
completion = client.beta.chat.completions.parse(
|
|
87
|
+
model=self.deployment_name,
|
|
88
|
+
messages=messages,
|
|
89
|
+
response_format=schema,
|
|
90
|
+
temperature=self.temperature,
|
|
91
|
+
)
|
|
92
|
+
structured_output: BaseModel = completion.choices[
|
|
93
|
+
0
|
|
94
|
+
].message.parsed
|
|
95
|
+
cost = self.calculate_cost(
|
|
96
|
+
completion.usage.prompt_tokens,
|
|
97
|
+
completion.usage.completion_tokens,
|
|
98
|
+
)
|
|
99
|
+
return structured_output, cost
|
|
100
|
+
if self.model_name in json_mode_models:
|
|
101
|
+
messages = [{"role": "user", "content": prompt}]
|
|
102
|
+
completion = client.beta.chat.completions.parse(
|
|
103
|
+
model=self.deployment_name,
|
|
104
|
+
messages=messages,
|
|
105
|
+
response_format={"type": "json_object"},
|
|
106
|
+
temperature=self.temperature,
|
|
107
|
+
)
|
|
108
|
+
json_output = trim_and_load_json(
|
|
109
|
+
completion.choices[0].message.content
|
|
110
|
+
)
|
|
111
|
+
cost = self.calculate_cost(
|
|
112
|
+
completion.usage.prompt_tokens,
|
|
113
|
+
completion.usage.completion_tokens,
|
|
114
|
+
)
|
|
115
|
+
return schema.model_validate(json_output), cost
|
|
116
|
+
print("Loading model client:")
|
|
117
|
+
print(client.base_url)
|
|
118
|
+
completion = client.chat.completions.create(
|
|
119
|
+
model=self.deployment_name,
|
|
120
|
+
messages=[{"role": "user", "content": prompt}],
|
|
121
|
+
temperature=self.temperature,
|
|
122
|
+
**self.generation_kwargs,
|
|
123
|
+
)
|
|
124
|
+
output = completion.choices[0].message.content
|
|
125
|
+
cost = self.calculate_cost(
|
|
126
|
+
completion.usage.prompt_tokens, completion.usage.completion_tokens
|
|
127
|
+
)
|
|
128
|
+
if schema:
|
|
129
|
+
json_output = trim_and_load_json(output)
|
|
130
|
+
return schema.model_validate(json_output), cost
|
|
131
|
+
else:
|
|
132
|
+
return output, cost
|
|
133
|
+
|
|
134
|
+
@retry_azure
|
|
135
|
+
async def a_generate(
|
|
136
|
+
self,
|
|
137
|
+
multimodal_input: List[Union[str, MLLMImage]],
|
|
138
|
+
schema: Optional[BaseModel] = None,
|
|
139
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
140
|
+
client = self.load_model(async_mode=True)
|
|
141
|
+
prompt = self.generate_prompt(multimodal_input)
|
|
142
|
+
|
|
143
|
+
if schema:
|
|
144
|
+
if self.model_name in structured_outputs_models:
|
|
145
|
+
messages = [{"role": "user", "content": prompt}]
|
|
146
|
+
completion = await client.beta.chat.completions.parse(
|
|
147
|
+
model=self.deployment_name,
|
|
148
|
+
messages=messages,
|
|
149
|
+
response_format=schema,
|
|
150
|
+
temperature=self.temperature,
|
|
151
|
+
)
|
|
152
|
+
structured_output: BaseModel = completion.choices[
|
|
153
|
+
0
|
|
154
|
+
].message.parsed
|
|
155
|
+
cost = self.calculate_cost(
|
|
156
|
+
completion.usage.prompt_tokens,
|
|
157
|
+
completion.usage.completion_tokens,
|
|
158
|
+
)
|
|
159
|
+
return structured_output, cost
|
|
160
|
+
if self.model_name in json_mode_models:
|
|
161
|
+
messages = [{"role": "user", "content": prompt}]
|
|
162
|
+
completion = await client.beta.chat.completions.parse(
|
|
163
|
+
model=self.deployment_name,
|
|
164
|
+
messages=messages,
|
|
165
|
+
response_format={"type": "json_object"},
|
|
166
|
+
temperature=self.temperature,
|
|
167
|
+
**self.generation_kwargs,
|
|
168
|
+
)
|
|
169
|
+
json_output = trim_and_load_json(
|
|
170
|
+
completion.choices[0].message.content
|
|
171
|
+
)
|
|
172
|
+
cost = self.calculate_cost(
|
|
173
|
+
completion.usage.prompt_tokens,
|
|
174
|
+
completion.usage.completion_tokens,
|
|
175
|
+
)
|
|
176
|
+
return schema.model_validate(json_output), cost
|
|
177
|
+
|
|
178
|
+
completion = await client.chat.completions.create(
|
|
179
|
+
model=self.deployment_name,
|
|
180
|
+
messages=[{"role": "user", "content": prompt}],
|
|
181
|
+
temperature=self.temperature,
|
|
182
|
+
**self.generation_kwargs,
|
|
183
|
+
)
|
|
184
|
+
output = completion.choices[0].message.content
|
|
185
|
+
cost = self.calculate_cost(
|
|
186
|
+
completion.usage.prompt_tokens,
|
|
187
|
+
completion.usage.completion_tokens,
|
|
188
|
+
)
|
|
189
|
+
if schema:
|
|
190
|
+
json_output = trim_and_load_json(output)
|
|
191
|
+
return schema.model_validate(json_output), cost
|
|
192
|
+
else:
|
|
193
|
+
return output, cost
|
|
194
|
+
|
|
195
|
+
###############################################
|
|
196
|
+
# Other generate functions
|
|
197
|
+
###############################################
|
|
198
|
+
|
|
199
|
+
@retry_azure
|
|
200
|
+
def generate_raw_response(
|
|
201
|
+
self,
|
|
202
|
+
multimodal_input: List[Union[str, MLLMImage]],
|
|
203
|
+
top_logprobs: int = 5,
|
|
204
|
+
) -> Tuple[ChatCompletion, float]:
|
|
205
|
+
client = self.load_model(async_mode=False)
|
|
206
|
+
prompt = self.generate_prompt(multimodal_input)
|
|
207
|
+
messages = [{"role": "user", "content": prompt}]
|
|
208
|
+
|
|
209
|
+
# Generate completion
|
|
210
|
+
completion = client.chat.completions.create(
|
|
211
|
+
model=self.deployment_name,
|
|
212
|
+
messages=messages,
|
|
213
|
+
temperature=self.temperature,
|
|
214
|
+
logprobs=True,
|
|
215
|
+
top_logprobs=top_logprobs,
|
|
216
|
+
**self.generation_kwargs,
|
|
217
|
+
)
|
|
218
|
+
# Cost calculation
|
|
219
|
+
input_tokens = completion.usage.prompt_tokens
|
|
220
|
+
output_tokens = completion.usage.completion_tokens
|
|
221
|
+
cost = self.calculate_cost(input_tokens, output_tokens)
|
|
222
|
+
|
|
223
|
+
return completion, cost
|
|
224
|
+
|
|
225
|
+
@retry_azure
|
|
226
|
+
async def a_generate_raw_response(
|
|
227
|
+
self,
|
|
228
|
+
multimodal_input: List[Union[str, MLLMImage]],
|
|
229
|
+
top_logprobs: int = 5,
|
|
230
|
+
) -> Tuple[ChatCompletion, float]:
|
|
231
|
+
client = self.load_model(async_mode=True)
|
|
232
|
+
prompt = self.generate_prompt(multimodal_input)
|
|
233
|
+
messages = [{"role": "user", "content": prompt}]
|
|
234
|
+
|
|
235
|
+
# Generate completion
|
|
236
|
+
completion = await client.chat.completions.create(
|
|
237
|
+
model=self.deployment_name,
|
|
238
|
+
messages=messages,
|
|
239
|
+
temperature=self.temperature,
|
|
240
|
+
logprobs=True,
|
|
241
|
+
top_logprobs=top_logprobs,
|
|
242
|
+
**self.generation_kwargs,
|
|
243
|
+
)
|
|
244
|
+
# Cost calculation
|
|
245
|
+
input_tokens = completion.usage.prompt_tokens
|
|
246
|
+
output_tokens = completion.usage.completion_tokens
|
|
247
|
+
cost = self.calculate_cost(input_tokens, output_tokens)
|
|
248
|
+
|
|
249
|
+
return completion, cost
|
|
250
|
+
|
|
251
|
+
###############################################
|
|
252
|
+
# Utilities
|
|
253
|
+
###############################################
|
|
254
|
+
|
|
255
|
+
def generate_prompt(
|
|
256
|
+
self, multimodal_input: List[Union[str, MLLMImage]] = []
|
|
257
|
+
):
|
|
258
|
+
"""Convert multimodal input into the proper message format for Azure OpenAI."""
|
|
259
|
+
prompt = []
|
|
260
|
+
for ele in multimodal_input:
|
|
261
|
+
if isinstance(ele, str):
|
|
262
|
+
prompt.append({"type": "text", "text": ele})
|
|
263
|
+
elif isinstance(ele, MLLMImage):
|
|
264
|
+
if ele.local:
|
|
265
|
+
import PIL.Image
|
|
266
|
+
|
|
267
|
+
image = PIL.Image.open(ele.url)
|
|
268
|
+
visual_dict = {
|
|
269
|
+
"type": "image_url",
|
|
270
|
+
"image_url": {
|
|
271
|
+
"url": f"data:image/jpeg;base64,{self.encode_pil_image(image)}"
|
|
272
|
+
},
|
|
273
|
+
}
|
|
274
|
+
else:
|
|
275
|
+
visual_dict = {
|
|
276
|
+
"type": "image_url",
|
|
277
|
+
"image_url": {"url": ele.url},
|
|
278
|
+
}
|
|
279
|
+
prompt.append(visual_dict)
|
|
280
|
+
return prompt
|
|
281
|
+
|
|
282
|
+
def encode_pil_image(self, pil_image):
|
|
283
|
+
"""Encode a PIL image to base64 string."""
|
|
284
|
+
image_buffer = BytesIO()
|
|
285
|
+
if pil_image.mode in ("RGBA", "LA", "P"):
|
|
286
|
+
pil_image = pil_image.convert("RGB")
|
|
287
|
+
pil_image.save(image_buffer, format="JPEG")
|
|
288
|
+
image_bytes = image_buffer.getvalue()
|
|
289
|
+
base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
|
|
290
|
+
return base64_encoded_image
|
|
291
|
+
|
|
292
|
+
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
293
|
+
pricing = model_pricing.get(self.model_name, model_pricing["gpt-4.1"])
|
|
294
|
+
input_cost = input_tokens * pricing["input"]
|
|
295
|
+
output_cost = output_tokens * pricing["output"]
|
|
296
|
+
return input_cost + output_cost
|
|
297
|
+
|
|
298
|
+
###############################################
|
|
299
|
+
# Model
|
|
300
|
+
###############################################
|
|
301
|
+
|
|
302
|
+
def get_model_name(self):
|
|
303
|
+
return f"Azure OpenAI ({self.model_name})"
|
|
304
|
+
|
|
305
|
+
def load_model(self, async_mode: bool = False):
|
|
306
|
+
if not async_mode:
|
|
307
|
+
return self._build_client(AzureOpenAI)
|
|
308
|
+
return self._build_client(AsyncAzureOpenAI)
|
|
309
|
+
|
|
310
|
+
def _client_kwargs(self) -> Dict:
|
|
311
|
+
"""
|
|
312
|
+
If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
|
|
313
|
+
If the user opts into SDK retries for 'azure' via DEEPEVAL_SDK_RETRY_PROVIDERS,
|
|
314
|
+
leave their retry settings as is.
|
|
315
|
+
"""
|
|
316
|
+
kwargs = dict(self.kwargs or {})
|
|
317
|
+
if not sdk_retries_for(PS.AZURE):
|
|
318
|
+
kwargs["max_retries"] = 0
|
|
319
|
+
return kwargs
|
|
320
|
+
|
|
321
|
+
def _build_client(self, cls):
|
|
322
|
+
api_key = require_secret_api_key(
|
|
323
|
+
self.azure_openai_api_key,
|
|
324
|
+
provider_label="AzureOpenAI",
|
|
325
|
+
env_var_name="AZURE_OPENAI_API_KEY",
|
|
326
|
+
param_hint="`azure_openai_api_key` to MultimodalAzureOpenAIMLLMModel(...)",
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
kw = dict(
|
|
330
|
+
api_key=api_key,
|
|
331
|
+
api_version=self.openai_api_version,
|
|
332
|
+
azure_endpoint=self.azure_endpoint,
|
|
333
|
+
azure_deployment=self.deployment_name,
|
|
334
|
+
**self._client_kwargs(),
|
|
335
|
+
)
|
|
336
|
+
try:
|
|
337
|
+
return cls(**kw)
|
|
338
|
+
except TypeError as e:
|
|
339
|
+
# older OpenAI SDKs may not accept max_retries, in that case remove and retry once
|
|
340
|
+
if "max_retries" in str(e):
|
|
341
|
+
kw.pop("max_retries", None)
|
|
342
|
+
return cls(**kw)
|
|
343
|
+
raise
|