deepeval 3.5.2__py3-none-any.whl → 3.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +94 -2
- deepeval/config/utils.py +54 -1
- deepeval/constants.py +27 -0
- deepeval/metrics/pii_leakage/pii_leakage.py +1 -1
- deepeval/models/embedding_models/azure_embedding_model.py +40 -9
- deepeval/models/embedding_models/local_embedding_model.py +52 -9
- deepeval/models/embedding_models/ollama_embedding_model.py +25 -7
- deepeval/models/embedding_models/openai_embedding_model.py +47 -5
- deepeval/models/llms/amazon_bedrock_model.py +31 -4
- deepeval/models/llms/anthropic_model.py +39 -13
- deepeval/models/llms/azure_model.py +37 -38
- deepeval/models/llms/deepseek_model.py +36 -7
- deepeval/models/llms/gemini_model.py +10 -0
- deepeval/models/llms/grok_model.py +50 -3
- deepeval/models/llms/kimi_model.py +37 -7
- deepeval/models/llms/local_model.py +38 -12
- deepeval/models/llms/ollama_model.py +15 -3
- deepeval/models/llms/openai_model.py +37 -44
- deepeval/models/mlllms/gemini_model.py +21 -3
- deepeval/models/mlllms/ollama_model.py +38 -13
- deepeval/models/mlllms/openai_model.py +18 -42
- deepeval/models/retry_policy.py +548 -64
- deepeval/tracing/tracing.py +87 -0
- {deepeval-3.5.2.dist-info → deepeval-3.5.3.dist-info}/METADATA +1 -1
- {deepeval-3.5.2.dist-info → deepeval-3.5.3.dist-info}/RECORD +29 -29
- {deepeval-3.5.2.dist-info → deepeval-3.5.3.dist-info}/LICENSE.md +0 -0
- {deepeval-3.5.2.dist-info → deepeval-3.5.3.dist-info}/WHEEL +0 -0
- {deepeval-3.5.2.dist-info → deepeval-3.5.3.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
|
|
3
1
|
from openai.types.chat.chat_completion import ChatCompletion
|
|
4
2
|
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
5
3
|
from typing import Optional, Tuple, Union, Dict
|
|
@@ -10,27 +8,17 @@ from openai import (
|
|
|
10
8
|
AsyncOpenAI,
|
|
11
9
|
)
|
|
12
10
|
|
|
13
|
-
from
|
|
14
|
-
|
|
11
|
+
from deepeval.constants import ProviderSlug as PS
|
|
15
12
|
from deepeval.models import DeepEvalBaseLLM
|
|
16
13
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
17
14
|
from deepeval.models.utils import parse_model_name
|
|
18
15
|
from deepeval.models.retry_policy import (
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
default_stop,
|
|
22
|
-
retry_predicate,
|
|
16
|
+
create_retry_decorator,
|
|
17
|
+
sdk_retries_for,
|
|
23
18
|
)
|
|
24
19
|
|
|
25
|
-
logger = logging.getLogger("deepeval.openai_model")
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def log_retry_error(retry_state: RetryCallState):
|
|
29
|
-
exception = retry_state.outcome.exception()
|
|
30
|
-
logger.error(
|
|
31
|
-
f"OpenAI Error: {exception} Retrying: {retry_state.attempt_number} time(s)..."
|
|
32
|
-
)
|
|
33
20
|
|
|
21
|
+
retry_openai = create_retry_decorator(PS.OPENAI)
|
|
34
22
|
|
|
35
23
|
valid_gpt_models = [
|
|
36
24
|
"gpt-3.5-turbo",
|
|
@@ -219,21 +207,6 @@ models_requiring_temperature_1 = [
|
|
|
219
207
|
"gpt-5-chat-latest",
|
|
220
208
|
]
|
|
221
209
|
|
|
222
|
-
_base_retry_rules_kw = dict(
|
|
223
|
-
wait=default_wait(),
|
|
224
|
-
stop=default_stop(),
|
|
225
|
-
retry=retry_predicate(OPENAI_ERROR_POLICY),
|
|
226
|
-
before_sleep=before_sleep_log(
|
|
227
|
-
logger, logging.INFO
|
|
228
|
-
), # <- logs only on retries
|
|
229
|
-
after=log_retry_error,
|
|
230
|
-
)
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
def _openai_client_kwargs():
|
|
234
|
-
# Avoid double-retry at SDK layer by disabling the SDK's own retries so tenacity is the single source of truth for retry logic.
|
|
235
|
-
return {"max_retries": 0}
|
|
236
|
-
|
|
237
210
|
|
|
238
211
|
class GPTModel(DeepEvalBaseLLM):
|
|
239
212
|
def __init__(
|
|
@@ -311,7 +284,7 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
311
284
|
# Generate functions
|
|
312
285
|
###############################################
|
|
313
286
|
|
|
314
|
-
@
|
|
287
|
+
@retry_openai
|
|
315
288
|
def generate(
|
|
316
289
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
317
290
|
) -> Tuple[Union[str, Dict], float]:
|
|
@@ -370,7 +343,7 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
370
343
|
else:
|
|
371
344
|
return output, cost
|
|
372
345
|
|
|
373
|
-
@
|
|
346
|
+
@retry_openai
|
|
374
347
|
async def a_generate(
|
|
375
348
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
376
349
|
) -> Tuple[Union[str, BaseModel], float]:
|
|
@@ -434,7 +407,7 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
434
407
|
# Other generate functions
|
|
435
408
|
###############################################
|
|
436
409
|
|
|
437
|
-
@
|
|
410
|
+
@retry_openai
|
|
438
411
|
def generate_raw_response(
|
|
439
412
|
self,
|
|
440
413
|
prompt: str,
|
|
@@ -457,7 +430,7 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
457
430
|
|
|
458
431
|
return completion, cost
|
|
459
432
|
|
|
460
|
-
@
|
|
433
|
+
@retry_openai
|
|
461
434
|
async def a_generate_raw_response(
|
|
462
435
|
self,
|
|
463
436
|
prompt: str,
|
|
@@ -480,7 +453,7 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
480
453
|
|
|
481
454
|
return completion, cost
|
|
482
455
|
|
|
483
|
-
@
|
|
456
|
+
@retry_openai
|
|
484
457
|
def generate_samples(
|
|
485
458
|
self, prompt: str, n: int, temperature: float
|
|
486
459
|
) -> Tuple[list[str], float]:
|
|
@@ -500,6 +473,7 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
500
473
|
###############################################
|
|
501
474
|
|
|
502
475
|
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
476
|
+
# TODO: consider loggin a warning instead of defaulting to whole model pricing
|
|
503
477
|
pricing = model_pricing.get(self.model_name, model_pricing)
|
|
504
478
|
input_cost = input_tokens * pricing["input"]
|
|
505
479
|
output_cost = output_tokens * pricing["output"]
|
|
@@ -513,13 +487,32 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
513
487
|
return self.model_name
|
|
514
488
|
|
|
515
489
|
def load_model(self, async_mode: bool = False):
|
|
516
|
-
kwargs = {**self.kwargs, **_openai_client_kwargs()}
|
|
517
490
|
if not async_mode:
|
|
518
|
-
return OpenAI
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
491
|
+
return self._build_client(OpenAI)
|
|
492
|
+
return self._build_client(AsyncOpenAI)
|
|
493
|
+
|
|
494
|
+
def _client_kwargs(self) -> Dict:
|
|
495
|
+
"""
|
|
496
|
+
If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
|
|
497
|
+
If the user opts into SDK retries for 'openai' via DEEPEVAL_SDK_RETRY_PROVIDERS,
|
|
498
|
+
leave their retry settings as is.
|
|
499
|
+
"""
|
|
500
|
+
kwargs = dict(self.kwargs or {})
|
|
501
|
+
if not sdk_retries_for(PS.OPENAI):
|
|
502
|
+
kwargs["max_retries"] = 0
|
|
503
|
+
return kwargs
|
|
504
|
+
|
|
505
|
+
def _build_client(self, cls):
|
|
506
|
+
kw = dict(
|
|
507
|
+
api_key=self._openai_api_key,
|
|
508
|
+
base_url=self.base_url,
|
|
509
|
+
**self._client_kwargs(),
|
|
525
510
|
)
|
|
511
|
+
try:
|
|
512
|
+
return cls(**kw)
|
|
513
|
+
except TypeError as e:
|
|
514
|
+
# older OpenAI SDKs may not accept max_retries, in that case remove and retry once
|
|
515
|
+
if "max_retries" in str(e):
|
|
516
|
+
kw.pop("max_retries", None)
|
|
517
|
+
return cls(**kw)
|
|
518
|
+
raise
|
|
@@ -4,12 +4,19 @@ from pydantic import BaseModel
|
|
|
4
4
|
from google.genai import types
|
|
5
5
|
from google import genai
|
|
6
6
|
|
|
7
|
+
from deepeval.models.retry_policy import (
|
|
8
|
+
create_retry_decorator,
|
|
9
|
+
)
|
|
7
10
|
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
8
11
|
from deepeval.models.base_model import DeepEvalBaseMLLM
|
|
9
12
|
from deepeval.test_case import MLLMImage
|
|
13
|
+
from deepeval.config.settings import get_settings
|
|
14
|
+
from deepeval.constants import ProviderSlug as PS
|
|
10
15
|
|
|
11
16
|
|
|
12
17
|
default_multimodal_gemini_model = "gemini-1.5-pro"
|
|
18
|
+
# consistent retry rules
|
|
19
|
+
retry_gemini = create_retry_decorator(PS.GOOGLE)
|
|
13
20
|
|
|
14
21
|
|
|
15
22
|
class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
@@ -147,6 +154,8 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
147
154
|
self.model_temperature = 0.0
|
|
148
155
|
return self.client.models
|
|
149
156
|
|
|
157
|
+
# TODO: Refactor genete prompt to minimize the work done on retry
|
|
158
|
+
@retry_gemini
|
|
150
159
|
def generate_prompt(
|
|
151
160
|
self, multimodal_input: List[Union[str, MLLMImage]] = []
|
|
152
161
|
) -> List[Union[str, MLLMImage]]:
|
|
@@ -162,6 +171,8 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
162
171
|
ValueError: If an invalid input type is provided
|
|
163
172
|
"""
|
|
164
173
|
prompt = []
|
|
174
|
+
settings = get_settings()
|
|
175
|
+
|
|
165
176
|
for ele in multimodal_input:
|
|
166
177
|
if isinstance(ele, str):
|
|
167
178
|
prompt.append(ele)
|
|
@@ -170,9 +181,14 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
170
181
|
with open(ele.url, "rb") as f:
|
|
171
182
|
image_data = f.read()
|
|
172
183
|
else:
|
|
173
|
-
response = requests.get(
|
|
174
|
-
|
|
175
|
-
|
|
184
|
+
response = requests.get(
|
|
185
|
+
ele.url,
|
|
186
|
+
timeout=(
|
|
187
|
+
settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
|
|
188
|
+
settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
|
|
189
|
+
),
|
|
190
|
+
)
|
|
191
|
+
response.raise_for_status()
|
|
176
192
|
image_data = response.content
|
|
177
193
|
|
|
178
194
|
image_part = types.Part.from_bytes(
|
|
@@ -183,6 +199,7 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
183
199
|
raise ValueError(f"Invalid input type: {type(ele)}")
|
|
184
200
|
return prompt
|
|
185
201
|
|
|
202
|
+
@retry_gemini
|
|
186
203
|
def generate(
|
|
187
204
|
self,
|
|
188
205
|
multimodal_input: List[Union[str, MLLMImage]],
|
|
@@ -222,6 +239,7 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
222
239
|
)
|
|
223
240
|
return response.text, 0
|
|
224
241
|
|
|
242
|
+
@retry_gemini
|
|
225
243
|
async def a_generate(
|
|
226
244
|
self,
|
|
227
245
|
multimodal_input: List[Union[str, MLLMImage]],
|
|
@@ -5,23 +5,31 @@ import requests
|
|
|
5
5
|
import base64
|
|
6
6
|
import io
|
|
7
7
|
|
|
8
|
+
from deepeval.models.retry_policy import (
|
|
9
|
+
create_retry_decorator,
|
|
10
|
+
)
|
|
8
11
|
from deepeval.key_handler import KEY_FILE_HANDLER, ModelKeyValues
|
|
9
12
|
from deepeval.models import DeepEvalBaseMLLM
|
|
10
13
|
from deepeval.test_case import MLLMImage
|
|
14
|
+
from deepeval.config.settings import get_settings
|
|
15
|
+
from deepeval.constants import ProviderSlug as PS
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
retry_ollama = create_retry_decorator(PS.OLLAMA)
|
|
11
19
|
|
|
12
20
|
|
|
13
21
|
class MultimodalOllamaModel(DeepEvalBaseMLLM):
|
|
14
|
-
def __init__(
|
|
15
|
-
self,
|
|
16
|
-
):
|
|
22
|
+
def __init__(self, **kwargs):
|
|
17
23
|
model_name = KEY_FILE_HANDLER.fetch_data(
|
|
18
24
|
ModelKeyValues.LOCAL_MODEL_NAME
|
|
19
25
|
)
|
|
20
26
|
self.base_url = KEY_FILE_HANDLER.fetch_data(
|
|
21
27
|
ModelKeyValues.LOCAL_MODEL_BASE_URL
|
|
22
28
|
)
|
|
29
|
+
self.kwargs = kwargs
|
|
23
30
|
super().__init__(model_name)
|
|
24
31
|
|
|
32
|
+
@retry_ollama
|
|
25
33
|
def generate(
|
|
26
34
|
self,
|
|
27
35
|
multimodal_input: List[Union[str, MLLMImage]],
|
|
@@ -43,6 +51,7 @@ class MultimodalOllamaModel(DeepEvalBaseMLLM):
|
|
|
43
51
|
0,
|
|
44
52
|
)
|
|
45
53
|
|
|
54
|
+
@retry_ollama
|
|
46
55
|
async def a_generate(
|
|
47
56
|
self,
|
|
48
57
|
multimodal_input: List[Union[str, MLLMImage]],
|
|
@@ -77,12 +86,14 @@ class MultimodalOllamaModel(DeepEvalBaseMLLM):
|
|
|
77
86
|
}
|
|
78
87
|
)
|
|
79
88
|
elif isinstance(ele, MLLMImage):
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
89
|
+
img_b64 = self.convert_to_base64(ele.url, ele.local)
|
|
90
|
+
if img_b64 is not None:
|
|
91
|
+
messages.append(
|
|
92
|
+
{
|
|
93
|
+
"role": "user",
|
|
94
|
+
"images": [img_b64],
|
|
95
|
+
}
|
|
96
|
+
)
|
|
86
97
|
return messages
|
|
87
98
|
|
|
88
99
|
###############################################
|
|
@@ -92,9 +103,17 @@ class MultimodalOllamaModel(DeepEvalBaseMLLM):
|
|
|
92
103
|
def convert_to_base64(self, image_source: str, is_local: bool) -> str:
|
|
93
104
|
from PIL import Image
|
|
94
105
|
|
|
106
|
+
settings = get_settings()
|
|
95
107
|
try:
|
|
96
108
|
if not is_local:
|
|
97
|
-
response = requests.get(
|
|
109
|
+
response = requests.get(
|
|
110
|
+
image_source,
|
|
111
|
+
stream=True,
|
|
112
|
+
timeout=(
|
|
113
|
+
settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
|
|
114
|
+
settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
|
|
115
|
+
),
|
|
116
|
+
)
|
|
98
117
|
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
|
|
99
118
|
image = Image.open(io.BytesIO(response.content))
|
|
100
119
|
else:
|
|
@@ -105,15 +124,21 @@ class MultimodalOllamaModel(DeepEvalBaseMLLM):
|
|
|
105
124
|
img_str = base64.b64encode(buffered.getvalue()).decode()
|
|
106
125
|
return img_str
|
|
107
126
|
|
|
127
|
+
except (requests.exceptions.RequestException, OSError) as e:
|
|
128
|
+
# Log, then rethrow so @retry_ollama can retry generate_messages() on network failures
|
|
129
|
+
print(f"Image fetch/encode failed: {e}")
|
|
130
|
+
raise
|
|
108
131
|
except Exception as e:
|
|
109
132
|
print(f"Error converting image to base64: {e}")
|
|
110
133
|
return None
|
|
111
134
|
|
|
112
135
|
def load_model(self, async_mode: bool = False):
|
|
113
136
|
if not async_mode:
|
|
114
|
-
return
|
|
115
|
-
|
|
116
|
-
|
|
137
|
+
return self._build_client(Client)
|
|
138
|
+
return self._build_client(AsyncClient)
|
|
139
|
+
|
|
140
|
+
def _build_client(self, cls):
|
|
141
|
+
return cls(host=self.base_url, **self.kwargs)
|
|
117
142
|
|
|
118
143
|
def get_model_name(self):
|
|
119
144
|
return f"{self.model_name} (Ollama)"
|
|
@@ -3,15 +3,7 @@ from openai import OpenAI, AsyncOpenAI
|
|
|
3
3
|
from openai.types.chat import ParsedChatCompletion
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
from io import BytesIO
|
|
6
|
-
import logging
|
|
7
|
-
import openai
|
|
8
6
|
import base64
|
|
9
|
-
from tenacity import (
|
|
10
|
-
retry,
|
|
11
|
-
retry_if_exception_type,
|
|
12
|
-
wait_exponential_jitter,
|
|
13
|
-
RetryCallState,
|
|
14
|
-
)
|
|
15
7
|
|
|
16
8
|
from deepeval.models.llms.openai_model import (
|
|
17
9
|
model_pricing,
|
|
@@ -21,21 +13,14 @@ from deepeval.models import DeepEvalBaseMLLM
|
|
|
21
13
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
22
14
|
from deepeval.test_case import MLLMImage
|
|
23
15
|
from deepeval.models.utils import parse_model_name
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
openai.APIConnectionError,
|
|
28
|
-
openai.APITimeoutError,
|
|
29
|
-
openai.LengthFinishReasonError,
|
|
16
|
+
from deepeval.models.retry_policy import (
|
|
17
|
+
create_retry_decorator,
|
|
18
|
+
sdk_retries_for,
|
|
30
19
|
)
|
|
20
|
+
from deepeval.constants import ProviderSlug as PS
|
|
31
21
|
|
|
32
22
|
|
|
33
|
-
|
|
34
|
-
exception = retry_state.outcome.exception()
|
|
35
|
-
logging.error(
|
|
36
|
-
f"OpenAI Error: {exception} Retrying: {retry_state.attempt_number} time(s)..."
|
|
37
|
-
)
|
|
38
|
-
|
|
23
|
+
retry_openai = create_retry_decorator(PS.OPENAI)
|
|
39
24
|
|
|
40
25
|
valid_multimodal_gpt_models = [
|
|
41
26
|
"gpt-4o",
|
|
@@ -95,11 +80,7 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
|
|
|
95
80
|
# Generate functions
|
|
96
81
|
###############################################
|
|
97
82
|
|
|
98
|
-
@
|
|
99
|
-
wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
|
|
100
|
-
retry=retry_if_exception_type(retryable_exceptions),
|
|
101
|
-
after=log_retry_error,
|
|
102
|
-
)
|
|
83
|
+
@retry_openai
|
|
103
84
|
def generate(
|
|
104
85
|
self,
|
|
105
86
|
multimodal_input: List[Union[str, MLLMImage]],
|
|
@@ -136,11 +117,7 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
|
|
|
136
117
|
else:
|
|
137
118
|
return output, cost
|
|
138
119
|
|
|
139
|
-
@
|
|
140
|
-
wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
|
|
141
|
-
retry=retry_if_exception_type(retryable_exceptions),
|
|
142
|
-
after=log_retry_error,
|
|
143
|
-
)
|
|
120
|
+
@retry_openai
|
|
144
121
|
async def a_generate(
|
|
145
122
|
self,
|
|
146
123
|
multimodal_input: List[Union[str, MLLMImage]],
|
|
@@ -181,17 +158,13 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
|
|
|
181
158
|
# Other generate functions
|
|
182
159
|
###############################################
|
|
183
160
|
|
|
184
|
-
@
|
|
185
|
-
wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
|
|
186
|
-
retry=retry_if_exception_type(retryable_exceptions),
|
|
187
|
-
after=log_retry_error,
|
|
188
|
-
)
|
|
161
|
+
@retry_openai
|
|
189
162
|
def generate_raw_response(
|
|
190
163
|
self,
|
|
191
164
|
multimodal_input: List[Union[str, MLLMImage]],
|
|
192
165
|
top_logprobs: int = 5,
|
|
193
166
|
) -> Tuple[ParsedChatCompletion, float]:
|
|
194
|
-
client =
|
|
167
|
+
client = self._client()
|
|
195
168
|
prompt = self.generate_prompt(multimodal_input)
|
|
196
169
|
messages = [{"role": "user", "content": prompt}]
|
|
197
170
|
completion = client.chat.completions.create(
|
|
@@ -206,17 +179,13 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
|
|
|
206
179
|
cost = self.calculate_cost(input_tokens, output_tokens)
|
|
207
180
|
return completion, cost
|
|
208
181
|
|
|
209
|
-
@
|
|
210
|
-
wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
|
|
211
|
-
retry=retry_if_exception_type(retryable_exceptions),
|
|
212
|
-
after=log_retry_error,
|
|
213
|
-
)
|
|
182
|
+
@retry_openai
|
|
214
183
|
async def a_generate_raw_response(
|
|
215
184
|
self,
|
|
216
185
|
multimodal_input: List[Union[str, MLLMImage]],
|
|
217
186
|
top_logprobs: int = 5,
|
|
218
187
|
) -> Tuple[ParsedChatCompletion, float]:
|
|
219
|
-
client =
|
|
188
|
+
client = self._client(async_mode=True)
|
|
220
189
|
prompt = self.generate_prompt(multimodal_input)
|
|
221
190
|
messages = [{"role": "user", "content": prompt}]
|
|
222
191
|
completion = await client.chat.completions.create(
|
|
@@ -278,5 +247,12 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
|
|
|
278
247
|
base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
|
|
279
248
|
return base64_encoded_image
|
|
280
249
|
|
|
250
|
+
def _client(self, async_mode: bool = False):
|
|
251
|
+
kw = {"api_key": self._openai_api_key}
|
|
252
|
+
if not sdk_retries_for(PS.OPENAI):
|
|
253
|
+
kw["max_retries"] = 0
|
|
254
|
+
Client = AsyncOpenAI if async_mode else OpenAI
|
|
255
|
+
return Client(**kw)
|
|
256
|
+
|
|
281
257
|
def get_model_name(self):
|
|
282
258
|
return self.model_name
|