deepeval 3.5.2__py3-none-any.whl → 3.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,3 @@
1
- import logging
2
-
3
1
  from openai.types.chat.chat_completion import ChatCompletion
4
2
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
5
3
  from typing import Optional, Tuple, Union, Dict
@@ -10,27 +8,17 @@ from openai import (
10
8
  AsyncOpenAI,
11
9
  )
12
10
 
13
- from tenacity import retry, RetryCallState, before_sleep_log
14
-
11
+ from deepeval.constants import ProviderSlug as PS
15
12
  from deepeval.models import DeepEvalBaseLLM
16
13
  from deepeval.models.llms.utils import trim_and_load_json
17
14
  from deepeval.models.utils import parse_model_name
18
15
  from deepeval.models.retry_policy import (
19
- OPENAI_ERROR_POLICY,
20
- default_wait,
21
- default_stop,
22
- retry_predicate,
16
+ create_retry_decorator,
17
+ sdk_retries_for,
23
18
  )
24
19
 
25
- logger = logging.getLogger("deepeval.openai_model")
26
-
27
-
28
- def log_retry_error(retry_state: RetryCallState):
29
- exception = retry_state.outcome.exception()
30
- logger.error(
31
- f"OpenAI Error: {exception} Retrying: {retry_state.attempt_number} time(s)..."
32
- )
33
20
 
21
+ retry_openai = create_retry_decorator(PS.OPENAI)
34
22
 
35
23
  valid_gpt_models = [
36
24
  "gpt-3.5-turbo",
@@ -219,21 +207,6 @@ models_requiring_temperature_1 = [
219
207
  "gpt-5-chat-latest",
220
208
  ]
221
209
 
222
- _base_retry_rules_kw = dict(
223
- wait=default_wait(),
224
- stop=default_stop(),
225
- retry=retry_predicate(OPENAI_ERROR_POLICY),
226
- before_sleep=before_sleep_log(
227
- logger, logging.INFO
228
- ), # <- logs only on retries
229
- after=log_retry_error,
230
- )
231
-
232
-
233
- def _openai_client_kwargs():
234
- # Avoid double-retry at SDK layer by disabling the SDK's own retries so tenacity is the single source of truth for retry logic.
235
- return {"max_retries": 0}
236
-
237
210
 
238
211
  class GPTModel(DeepEvalBaseLLM):
239
212
  def __init__(
@@ -311,7 +284,7 @@ class GPTModel(DeepEvalBaseLLM):
311
284
  # Generate functions
312
285
  ###############################################
313
286
 
314
- @retry(**_base_retry_rules_kw)
287
+ @retry_openai
315
288
  def generate(
316
289
  self, prompt: str, schema: Optional[BaseModel] = None
317
290
  ) -> Tuple[Union[str, Dict], float]:
@@ -370,7 +343,7 @@ class GPTModel(DeepEvalBaseLLM):
370
343
  else:
371
344
  return output, cost
372
345
 
373
- @retry(**_base_retry_rules_kw)
346
+ @retry_openai
374
347
  async def a_generate(
375
348
  self, prompt: str, schema: Optional[BaseModel] = None
376
349
  ) -> Tuple[Union[str, BaseModel], float]:
@@ -434,7 +407,7 @@ class GPTModel(DeepEvalBaseLLM):
434
407
  # Other generate functions
435
408
  ###############################################
436
409
 
437
- @retry(**_base_retry_rules_kw)
410
+ @retry_openai
438
411
  def generate_raw_response(
439
412
  self,
440
413
  prompt: str,
@@ -457,7 +430,7 @@ class GPTModel(DeepEvalBaseLLM):
457
430
 
458
431
  return completion, cost
459
432
 
460
- @retry(**_base_retry_rules_kw)
433
+ @retry_openai
461
434
  async def a_generate_raw_response(
462
435
  self,
463
436
  prompt: str,
@@ -480,7 +453,7 @@ class GPTModel(DeepEvalBaseLLM):
480
453
 
481
454
  return completion, cost
482
455
 
483
- @retry(**_base_retry_rules_kw)
456
+ @retry_openai
484
457
  def generate_samples(
485
458
  self, prompt: str, n: int, temperature: float
486
459
  ) -> Tuple[list[str], float]:
@@ -500,6 +473,7 @@ class GPTModel(DeepEvalBaseLLM):
500
473
  ###############################################
501
474
 
502
475
  def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
476
+ # TODO: consider loggin a warning instead of defaulting to whole model pricing
503
477
  pricing = model_pricing.get(self.model_name, model_pricing)
504
478
  input_cost = input_tokens * pricing["input"]
505
479
  output_cost = output_tokens * pricing["output"]
@@ -513,13 +487,32 @@ class GPTModel(DeepEvalBaseLLM):
513
487
  return self.model_name
514
488
 
515
489
  def load_model(self, async_mode: bool = False):
516
- kwargs = {**self.kwargs, **_openai_client_kwargs()}
517
490
  if not async_mode:
518
- return OpenAI(
519
- api_key=self._openai_api_key,
520
- base_url=self.base_url,
521
- **kwargs,
522
- )
523
- return AsyncOpenAI(
524
- api_key=self._openai_api_key, base_url=self.base_url, **kwargs
491
+ return self._build_client(OpenAI)
492
+ return self._build_client(AsyncOpenAI)
493
+
494
+ def _client_kwargs(self) -> Dict:
495
+ """
496
+ If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
497
+ If the user opts into SDK retries for 'openai' via DEEPEVAL_SDK_RETRY_PROVIDERS,
498
+ leave their retry settings as is.
499
+ """
500
+ kwargs = dict(self.kwargs or {})
501
+ if not sdk_retries_for(PS.OPENAI):
502
+ kwargs["max_retries"] = 0
503
+ return kwargs
504
+
505
+ def _build_client(self, cls):
506
+ kw = dict(
507
+ api_key=self._openai_api_key,
508
+ base_url=self.base_url,
509
+ **self._client_kwargs(),
525
510
  )
511
+ try:
512
+ return cls(**kw)
513
+ except TypeError as e:
514
+ # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
515
+ if "max_retries" in str(e):
516
+ kw.pop("max_retries", None)
517
+ return cls(**kw)
518
+ raise
@@ -4,12 +4,19 @@ from pydantic import BaseModel
4
4
  from google.genai import types
5
5
  from google import genai
6
6
 
7
+ from deepeval.models.retry_policy import (
8
+ create_retry_decorator,
9
+ )
7
10
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
8
11
  from deepeval.models.base_model import DeepEvalBaseMLLM
9
12
  from deepeval.test_case import MLLMImage
13
+ from deepeval.config.settings import get_settings
14
+ from deepeval.constants import ProviderSlug as PS
10
15
 
11
16
 
12
17
  default_multimodal_gemini_model = "gemini-1.5-pro"
18
+ # consistent retry rules
19
+ retry_gemini = create_retry_decorator(PS.GOOGLE)
13
20
 
14
21
 
15
22
  class MultimodalGeminiModel(DeepEvalBaseMLLM):
@@ -147,6 +154,8 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
147
154
  self.model_temperature = 0.0
148
155
  return self.client.models
149
156
 
157
+ # TODO: Refactor genete prompt to minimize the work done on retry
158
+ @retry_gemini
150
159
  def generate_prompt(
151
160
  self, multimodal_input: List[Union[str, MLLMImage]] = []
152
161
  ) -> List[Union[str, MLLMImage]]:
@@ -162,6 +171,8 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
162
171
  ValueError: If an invalid input type is provided
163
172
  """
164
173
  prompt = []
174
+ settings = get_settings()
175
+
165
176
  for ele in multimodal_input:
166
177
  if isinstance(ele, str):
167
178
  prompt.append(ele)
@@ -170,9 +181,14 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
170
181
  with open(ele.url, "rb") as f:
171
182
  image_data = f.read()
172
183
  else:
173
- response = requests.get(ele.url)
174
- if response.status_code != 200:
175
- raise ValueError(f"Failed to download image: {ele.url}")
184
+ response = requests.get(
185
+ ele.url,
186
+ timeout=(
187
+ settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
188
+ settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
189
+ ),
190
+ )
191
+ response.raise_for_status()
176
192
  image_data = response.content
177
193
 
178
194
  image_part = types.Part.from_bytes(
@@ -183,6 +199,7 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
183
199
  raise ValueError(f"Invalid input type: {type(ele)}")
184
200
  return prompt
185
201
 
202
+ @retry_gemini
186
203
  def generate(
187
204
  self,
188
205
  multimodal_input: List[Union[str, MLLMImage]],
@@ -222,6 +239,7 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
222
239
  )
223
240
  return response.text, 0
224
241
 
242
+ @retry_gemini
225
243
  async def a_generate(
226
244
  self,
227
245
  multimodal_input: List[Union[str, MLLMImage]],
@@ -5,23 +5,31 @@ import requests
5
5
  import base64
6
6
  import io
7
7
 
8
+ from deepeval.models.retry_policy import (
9
+ create_retry_decorator,
10
+ )
8
11
  from deepeval.key_handler import KEY_FILE_HANDLER, ModelKeyValues
9
12
  from deepeval.models import DeepEvalBaseMLLM
10
13
  from deepeval.test_case import MLLMImage
14
+ from deepeval.config.settings import get_settings
15
+ from deepeval.constants import ProviderSlug as PS
16
+
17
+
18
+ retry_ollama = create_retry_decorator(PS.OLLAMA)
11
19
 
12
20
 
13
21
  class MultimodalOllamaModel(DeepEvalBaseMLLM):
14
- def __init__(
15
- self,
16
- ):
22
+ def __init__(self, **kwargs):
17
23
  model_name = KEY_FILE_HANDLER.fetch_data(
18
24
  ModelKeyValues.LOCAL_MODEL_NAME
19
25
  )
20
26
  self.base_url = KEY_FILE_HANDLER.fetch_data(
21
27
  ModelKeyValues.LOCAL_MODEL_BASE_URL
22
28
  )
29
+ self.kwargs = kwargs
23
30
  super().__init__(model_name)
24
31
 
32
+ @retry_ollama
25
33
  def generate(
26
34
  self,
27
35
  multimodal_input: List[Union[str, MLLMImage]],
@@ -43,6 +51,7 @@ class MultimodalOllamaModel(DeepEvalBaseMLLM):
43
51
  0,
44
52
  )
45
53
 
54
+ @retry_ollama
46
55
  async def a_generate(
47
56
  self,
48
57
  multimodal_input: List[Union[str, MLLMImage]],
@@ -77,12 +86,14 @@ class MultimodalOllamaModel(DeepEvalBaseMLLM):
77
86
  }
78
87
  )
79
88
  elif isinstance(ele, MLLMImage):
80
- messages.append(
81
- {
82
- "role": "user",
83
- "images": [self.convert_to_base64(ele.url, ele.local)],
84
- }
85
- )
89
+ img_b64 = self.convert_to_base64(ele.url, ele.local)
90
+ if img_b64 is not None:
91
+ messages.append(
92
+ {
93
+ "role": "user",
94
+ "images": [img_b64],
95
+ }
96
+ )
86
97
  return messages
87
98
 
88
99
  ###############################################
@@ -92,9 +103,17 @@ class MultimodalOllamaModel(DeepEvalBaseMLLM):
92
103
  def convert_to_base64(self, image_source: str, is_local: bool) -> str:
93
104
  from PIL import Image
94
105
 
106
+ settings = get_settings()
95
107
  try:
96
108
  if not is_local:
97
- response = requests.get(image_source, stream=True)
109
+ response = requests.get(
110
+ image_source,
111
+ stream=True,
112
+ timeout=(
113
+ settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
114
+ settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
115
+ ),
116
+ )
98
117
  response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
99
118
  image = Image.open(io.BytesIO(response.content))
100
119
  else:
@@ -105,15 +124,21 @@ class MultimodalOllamaModel(DeepEvalBaseMLLM):
105
124
  img_str = base64.b64encode(buffered.getvalue()).decode()
106
125
  return img_str
107
126
 
127
+ except (requests.exceptions.RequestException, OSError) as e:
128
+ # Log, then rethrow so @retry_ollama can retry generate_messages() on network failures
129
+ print(f"Image fetch/encode failed: {e}")
130
+ raise
108
131
  except Exception as e:
109
132
  print(f"Error converting image to base64: {e}")
110
133
  return None
111
134
 
112
135
  def load_model(self, async_mode: bool = False):
113
136
  if not async_mode:
114
- return Client(host=self.base_url)
115
- else:
116
- return AsyncClient(host=self.base_url)
137
+ return self._build_client(Client)
138
+ return self._build_client(AsyncClient)
139
+
140
+ def _build_client(self, cls):
141
+ return cls(host=self.base_url, **self.kwargs)
117
142
 
118
143
  def get_model_name(self):
119
144
  return f"{self.model_name} (Ollama)"
@@ -3,15 +3,7 @@ from openai import OpenAI, AsyncOpenAI
3
3
  from openai.types.chat import ParsedChatCompletion
4
4
  from pydantic import BaseModel
5
5
  from io import BytesIO
6
- import logging
7
- import openai
8
6
  import base64
9
- from tenacity import (
10
- retry,
11
- retry_if_exception_type,
12
- wait_exponential_jitter,
13
- RetryCallState,
14
- )
15
7
 
16
8
  from deepeval.models.llms.openai_model import (
17
9
  model_pricing,
@@ -21,21 +13,14 @@ from deepeval.models import DeepEvalBaseMLLM
21
13
  from deepeval.models.llms.utils import trim_and_load_json
22
14
  from deepeval.test_case import MLLMImage
23
15
  from deepeval.models.utils import parse_model_name
24
-
25
- retryable_exceptions = (
26
- openai.RateLimitError,
27
- openai.APIConnectionError,
28
- openai.APITimeoutError,
29
- openai.LengthFinishReasonError,
16
+ from deepeval.models.retry_policy import (
17
+ create_retry_decorator,
18
+ sdk_retries_for,
30
19
  )
20
+ from deepeval.constants import ProviderSlug as PS
31
21
 
32
22
 
33
- def log_retry_error(retry_state: RetryCallState):
34
- exception = retry_state.outcome.exception()
35
- logging.error(
36
- f"OpenAI Error: {exception} Retrying: {retry_state.attempt_number} time(s)..."
37
- )
38
-
23
+ retry_openai = create_retry_decorator(PS.OPENAI)
39
24
 
40
25
  valid_multimodal_gpt_models = [
41
26
  "gpt-4o",
@@ -95,11 +80,7 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
95
80
  # Generate functions
96
81
  ###############################################
97
82
 
98
- @retry(
99
- wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
100
- retry=retry_if_exception_type(retryable_exceptions),
101
- after=log_retry_error,
102
- )
83
+ @retry_openai
103
84
  def generate(
104
85
  self,
105
86
  multimodal_input: List[Union[str, MLLMImage]],
@@ -136,11 +117,7 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
136
117
  else:
137
118
  return output, cost
138
119
 
139
- @retry(
140
- wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
141
- retry=retry_if_exception_type(retryable_exceptions),
142
- after=log_retry_error,
143
- )
120
+ @retry_openai
144
121
  async def a_generate(
145
122
  self,
146
123
  multimodal_input: List[Union[str, MLLMImage]],
@@ -181,17 +158,13 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
181
158
  # Other generate functions
182
159
  ###############################################
183
160
 
184
- @retry(
185
- wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
186
- retry=retry_if_exception_type(retryable_exceptions),
187
- after=log_retry_error,
188
- )
161
+ @retry_openai
189
162
  def generate_raw_response(
190
163
  self,
191
164
  multimodal_input: List[Union[str, MLLMImage]],
192
165
  top_logprobs: int = 5,
193
166
  ) -> Tuple[ParsedChatCompletion, float]:
194
- client = OpenAI(api_key=self._openai_api_key)
167
+ client = self._client()
195
168
  prompt = self.generate_prompt(multimodal_input)
196
169
  messages = [{"role": "user", "content": prompt}]
197
170
  completion = client.chat.completions.create(
@@ -206,17 +179,13 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
206
179
  cost = self.calculate_cost(input_tokens, output_tokens)
207
180
  return completion, cost
208
181
 
209
- @retry(
210
- wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
211
- retry=retry_if_exception_type(retryable_exceptions),
212
- after=log_retry_error,
213
- )
182
+ @retry_openai
214
183
  async def a_generate_raw_response(
215
184
  self,
216
185
  multimodal_input: List[Union[str, MLLMImage]],
217
186
  top_logprobs: int = 5,
218
187
  ) -> Tuple[ParsedChatCompletion, float]:
219
- client = AsyncOpenAI(api_key=self._openai_api_key)
188
+ client = self._client(async_mode=True)
220
189
  prompt = self.generate_prompt(multimodal_input)
221
190
  messages = [{"role": "user", "content": prompt}]
222
191
  completion = await client.chat.completions.create(
@@ -278,5 +247,12 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
278
247
  base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
279
248
  return base64_encoded_image
280
249
 
250
+ def _client(self, async_mode: bool = False):
251
+ kw = {"api_key": self._openai_api_key}
252
+ if not sdk_retries_for(PS.OPENAI):
253
+ kw["max_retries"] = 0
254
+ Client = AsyncOpenAI if async_mode else OpenAI
255
+ return Client(**kw)
256
+
281
257
  def get_model_name(self):
282
258
  return self.model_name