deepeval 3.5.1__py3-none-any.whl → 3.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/config/settings.py +94 -2
  3. deepeval/config/utils.py +54 -1
  4. deepeval/constants.py +27 -0
  5. deepeval/integrations/langchain/__init__.py +2 -3
  6. deepeval/integrations/langchain/callback.py +126 -301
  7. deepeval/integrations/langchain/patch.py +24 -13
  8. deepeval/integrations/langchain/utils.py +203 -1
  9. deepeval/integrations/pydantic_ai/patcher.py +220 -185
  10. deepeval/integrations/pydantic_ai/utils.py +86 -0
  11. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +1 -0
  12. deepeval/metrics/pii_leakage/pii_leakage.py +1 -1
  13. deepeval/models/embedding_models/azure_embedding_model.py +40 -9
  14. deepeval/models/embedding_models/local_embedding_model.py +54 -11
  15. deepeval/models/embedding_models/ollama_embedding_model.py +25 -7
  16. deepeval/models/embedding_models/openai_embedding_model.py +47 -5
  17. deepeval/models/llms/amazon_bedrock_model.py +31 -4
  18. deepeval/models/llms/anthropic_model.py +39 -13
  19. deepeval/models/llms/azure_model.py +37 -38
  20. deepeval/models/llms/deepseek_model.py +36 -7
  21. deepeval/models/llms/gemini_model.py +10 -0
  22. deepeval/models/llms/grok_model.py +50 -3
  23. deepeval/models/llms/kimi_model.py +37 -7
  24. deepeval/models/llms/local_model.py +38 -12
  25. deepeval/models/llms/ollama_model.py +15 -3
  26. deepeval/models/llms/openai_model.py +37 -44
  27. deepeval/models/mlllms/gemini_model.py +21 -3
  28. deepeval/models/mlllms/ollama_model.py +38 -13
  29. deepeval/models/mlllms/openai_model.py +18 -42
  30. deepeval/models/retry_policy.py +548 -64
  31. deepeval/prompt/api.py +13 -9
  32. deepeval/prompt/prompt.py +19 -9
  33. deepeval/tracing/tracing.py +87 -0
  34. deepeval/utils.py +12 -0
  35. {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/METADATA +1 -1
  36. {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/RECORD +39 -38
  37. {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/LICENSE.md +0 -0
  38. {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/WHEEL +0 -0
  39. {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/entry_points.txt +0 -0
@@ -5,6 +5,15 @@ from pydantic import BaseModel
5
5
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
6
6
  from deepeval.models.llms.utils import trim_and_load_json
7
7
  from deepeval.models import DeepEvalBaseLLM
8
+ from deepeval.models.retry_policy import (
9
+ create_retry_decorator,
10
+ sdk_retries_for,
11
+ )
12
+ from deepeval.constants import ProviderSlug as PS
13
+
14
+
15
+ # consistent retry rules
16
+ retry_deepseek = create_retry_decorator(PS.DEEPSEEK)
8
17
 
9
18
  model_pricing = {
10
19
  "deepseek-chat": {
@@ -55,6 +64,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
55
64
  # Other generate functions
56
65
  ###############################################
57
66
 
67
+ @retry_deepseek
58
68
  def generate(
59
69
  self, prompt: str, schema: Optional[BaseModel] = None
60
70
  ) -> Tuple[Union[str, Dict], float]:
@@ -88,6 +98,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
88
98
  )
89
99
  return output, cost
90
100
 
101
+ @retry_deepseek
91
102
  async def a_generate(
92
103
  self, prompt: str, schema: Optional[BaseModel] = None
93
104
  ) -> Tuple[Union[str, Dict], float]:
@@ -141,13 +152,31 @@ class DeepSeekModel(DeepEvalBaseLLM):
141
152
 
142
153
  def load_model(self, async_mode: bool = False):
143
154
  if not async_mode:
144
- return OpenAI(
145
- api_key=self.api_key, base_url=self.base_url, **self.kwargs
146
- )
147
- else:
148
- return AsyncOpenAI(
149
- api_key=self.api_key, base_url=self.base_url, **self.kwargs
150
- )
155
+ return self._build_client(OpenAI)
156
+ return self._build_client(AsyncOpenAI)
151
157
 
152
158
  def get_model_name(self):
153
159
  return f"{self.model_name}"
160
+
161
+ def _client_kwargs(self) -> Dict:
162
+ kwargs = dict(self.kwargs or {})
163
+ # if we are managing retries with Tenacity, force SDK retries off to avoid double retries.
164
+ # if the user opts into SDK retries for "deepseek" via DEEPEVAL_SDK_RETRY_PROVIDERS, honor it.
165
+ if not sdk_retries_for(PS.DEEPSEEK):
166
+ kwargs["max_retries"] = 0
167
+ return kwargs
168
+
169
+ def _build_client(self, cls):
170
+ kw = dict(
171
+ api_key=self.api_key,
172
+ base_url=self.base_url,
173
+ **self._client_kwargs(),
174
+ )
175
+ try:
176
+ return cls(**kw)
177
+ except TypeError as e:
178
+ # In case an older OpenAI client doesn’t accept max_retries, drop it and retry.
179
+ if "max_retries" in str(e):
180
+ kw.pop("max_retries", None)
181
+ return cls(**kw)
182
+ raise
@@ -3,11 +3,19 @@ from google.genai import types
3
3
  from typing import Optional, Dict
4
4
  from google import genai
5
5
 
6
+ from deepeval.models.retry_policy import (
7
+ create_retry_decorator,
8
+ )
6
9
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
7
10
  from deepeval.models.base_model import DeepEvalBaseLLM
11
+ from deepeval.constants import ProviderSlug as PS
12
+
8
13
 
9
14
  default_gemini_model = "gemini-1.5-pro"
10
15
 
16
+ # consistent retry rules
17
+ retry_gemini = create_retry_decorator(PS.GOOGLE)
18
+
11
19
 
12
20
  class GeminiModel(DeepEvalBaseLLM):
13
21
  """Class that implements Google Gemini models for text-based evaluation.
@@ -145,6 +153,7 @@ class GeminiModel(DeepEvalBaseLLM):
145
153
  ]
146
154
  return self.client.models
147
155
 
156
+ @retry_gemini
148
157
  def generate(self, prompt: str, schema: Optional[BaseModel] = None) -> str:
149
158
  """Generates text from a prompt.
150
159
 
@@ -180,6 +189,7 @@ class GeminiModel(DeepEvalBaseLLM):
180
189
  )
181
190
  return response.text, 0
182
191
 
192
+ @retry_gemini
183
193
  async def a_generate(
184
194
  self, prompt: str, schema: Optional[BaseModel] = None
185
195
  ) -> str:
@@ -1,10 +1,20 @@
1
+ import os
2
+
1
3
  from typing import Optional, Tuple, Union, Dict
2
4
  from pydantic import BaseModel
3
- import os
4
5
 
6
+ from deepeval.models.retry_policy import (
7
+ create_retry_decorator,
8
+ sdk_retries_for,
9
+ )
5
10
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
6
11
  from deepeval.models.llms.utils import trim_and_load_json
7
12
  from deepeval.models import DeepEvalBaseLLM
13
+ from deepeval.constants import ProviderSlug as PS
14
+
15
+
16
+ # consistent retry rules
17
+ retry_grok = create_retry_decorator(PS.GROK)
8
18
 
9
19
 
10
20
  structured_outputs_models = [
@@ -81,6 +91,7 @@ class GrokModel(DeepEvalBaseLLM):
81
91
  # Other generate functions
82
92
  ###############################################
83
93
 
94
+ @retry_grok
84
95
  def generate(
85
96
  self, prompt: str, schema: Optional[BaseModel] = None
86
97
  ) -> Tuple[Union[str, Dict], float]:
@@ -118,6 +129,7 @@ class GrokModel(DeepEvalBaseLLM):
118
129
  else:
119
130
  return output, cost
120
131
 
132
+ @retry_grok
121
133
  async def a_generate(
122
134
  self, prompt: str, schema: Optional[BaseModel] = None
123
135
  ) -> Tuple[Union[str, Dict], float]:
@@ -178,9 +190,9 @@ class GrokModel(DeepEvalBaseLLM):
178
190
  from xai_sdk import Client, AsyncClient
179
191
 
180
192
  if not async_mode:
181
- return Client(api_key=self.api_key, **self.kwargs)
193
+ return self._build_client(Client)
182
194
  else:
183
- return AsyncClient(api_key=self.api_key, **self.kwargs)
195
+ return self._build_client(AsyncClient)
184
196
  except ImportError:
185
197
  raise ImportError(
186
198
  "xai_sdk is required to use GrokModel. Please install it with: pip install xai-sdk"
@@ -188,3 +200,38 @@ class GrokModel(DeepEvalBaseLLM):
188
200
 
189
201
  def get_model_name(self):
190
202
  return f"{self.model_name}"
203
+
204
+ def _client_kwargs(self) -> Dict:
205
+ """
206
+ If Tenacity is managing retries, disable gRPC channel retries to avoid double retry.
207
+ If the user opts into SDK retries for 'grok' via DEEPEVAL_SDK_RETRY_PROVIDERS,
208
+ leave channel options as is
209
+ """
210
+ kwargs = dict(self.kwargs or {})
211
+ opts = list(kwargs.get("channel_options", []))
212
+ if not sdk_retries_for(PS.GROK):
213
+ # remove any explicit enable flag, then disable retries
214
+ opts = [
215
+ option
216
+ for option in opts
217
+ if not (
218
+ isinstance(option, (tuple, list))
219
+ and option
220
+ and option[0] == "grpc.enable_retries"
221
+ )
222
+ ]
223
+ opts.append(("grpc.enable_retries", 0))
224
+ if opts:
225
+ kwargs["channel_options"] = opts
226
+ return kwargs
227
+
228
+ def _build_client(self, cls):
229
+ kw = dict(api_key=self.api_key, **self._client_kwargs())
230
+ try:
231
+ return cls(**kw)
232
+ except TypeError as e:
233
+ # fallback: older SDK version might not accept channel_options
234
+ if "channel_options" in str(e):
235
+ kw.pop("channel_options", None)
236
+ return cls(**kw)
237
+ raise
@@ -2,11 +2,18 @@ from typing import Optional, Tuple, Union, Dict
2
2
  from openai import OpenAI, AsyncOpenAI
3
3
  from pydantic import BaseModel
4
4
 
5
+ from deepeval.models.retry_policy import (
6
+ create_retry_decorator,
7
+ sdk_retries_for,
8
+ )
5
9
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
6
10
  from deepeval.models.llms.utils import trim_and_load_json
7
11
  from deepeval.models import DeepEvalBaseLLM
12
+ from deepeval.constants import ProviderSlug as PS
8
13
 
9
14
 
15
+ retry_kimi = create_retry_decorator(PS.KIMI)
16
+
10
17
  json_mode_models = [
11
18
  "kimi-thinking-preview",
12
19
  "kimi-k2-0711-preview",
@@ -100,6 +107,7 @@ class KimiModel(DeepEvalBaseLLM):
100
107
  # Other generate functions
101
108
  ###############################################
102
109
 
110
+ @retry_kimi
103
111
  def generate(
104
112
  self, prompt: str, schema: Optional[BaseModel] = None
105
113
  ) -> Tuple[Union[str, Dict], float]:
@@ -137,6 +145,7 @@ class KimiModel(DeepEvalBaseLLM):
137
145
  else:
138
146
  return output, cost
139
147
 
148
+ @retry_kimi
140
149
  async def a_generate(
141
150
  self, prompt: str, schema: Optional[BaseModel] = None
142
151
  ) -> Tuple[Union[str, Dict], float]:
@@ -194,13 +203,34 @@ class KimiModel(DeepEvalBaseLLM):
194
203
 
195
204
  def load_model(self, async_mode: bool = False):
196
205
  if not async_mode:
197
- return OpenAI(
198
- api_key=self.api_key, base_url=self.base_url, **self.kwargs
199
- )
200
- else:
201
- return AsyncOpenAI(
202
- api_key=self.api_key, base_url=self.base_url, **self.kwargs
203
- )
206
+ return self._build_client(OpenAI)
207
+ return self._build_client(AsyncOpenAI)
208
+
209
+ def _client_kwargs(self) -> Dict:
210
+ """
211
+ If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
212
+ If the user opts into SDK retries for 'kimi' via DEEPEVAL_SDK_RETRY_PROVIDERS,
213
+ leave their retry settings as is.
214
+ """
215
+ kwargs = dict(self.kwargs or {})
216
+ if not sdk_retries_for(PS.KIMI):
217
+ kwargs["max_retries"] = 0
218
+ return kwargs
219
+
220
+ def _build_client(self, cls):
221
+ kw = dict(
222
+ api_key=self.api_key,
223
+ base_url=self.base_url,
224
+ **self._client_kwargs(),
225
+ )
226
+ try:
227
+ return cls(**kw)
228
+ except TypeError as e:
229
+ # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
230
+ if "max_retries" in str(e):
231
+ kw.pop("max_retries", None)
232
+ return cls(**kw)
233
+ raise
204
234
 
205
235
  def get_model_name(self):
206
236
  return f"{self.model_name}"
@@ -1,12 +1,20 @@
1
1
  from typing import Optional, Tuple, Union, Dict
2
2
  from pydantic import BaseModel
3
-
4
3
  from openai import OpenAI, AsyncOpenAI
5
4
  from openai.types.chat import ChatCompletion
6
5
 
6
+ from deepeval.models.retry_policy import (
7
+ create_retry_decorator,
8
+ sdk_retries_for,
9
+ )
7
10
  from deepeval.models.llms.utils import trim_and_load_json
8
11
  from deepeval.models import DeepEvalBaseLLM
9
12
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
13
+ from deepeval.constants import ProviderSlug as PS
14
+
15
+
16
+ # consistent retry rules
17
+ retry_local = create_retry_decorator(PS.LOCAL)
10
18
 
11
19
 
12
20
  class LocalModel(DeepEvalBaseLLM):
@@ -43,6 +51,7 @@ class LocalModel(DeepEvalBaseLLM):
43
51
  # Other generate functions
44
52
  ###############################################
45
53
 
54
+ @retry_local
46
55
  def generate(
47
56
  self, prompt: str, schema: Optional[BaseModel] = None
48
57
  ) -> Tuple[Union[str, Dict], float]:
@@ -61,6 +70,7 @@ class LocalModel(DeepEvalBaseLLM):
61
70
  else:
62
71
  return res_content, 0.0
63
72
 
73
+ @retry_local
64
74
  async def a_generate(
65
75
  self, prompt: str, schema: Optional[BaseModel] = None
66
76
  ) -> Tuple[Union[str, Dict], float]:
@@ -91,14 +101,30 @@ class LocalModel(DeepEvalBaseLLM):
91
101
 
92
102
  def load_model(self, async_mode: bool = False):
93
103
  if not async_mode:
94
- return OpenAI(
95
- api_key=self.local_model_api_key,
96
- base_url=self.base_url,
97
- **self.kwargs,
98
- )
99
- else:
100
- return AsyncOpenAI(
101
- api_key=self.local_model_api_key,
102
- base_url=self.base_url,
103
- **self.kwargs,
104
- )
104
+ return self._build_client(OpenAI)
105
+ return self._build_client(AsyncOpenAI)
106
+
107
+ def _client_kwargs(self) -> Dict:
108
+ """
109
+ If Tenacity manages retries, turn off OpenAI SDK retries to avoid double retrying.
110
+ If users opt into SDK retries via DEEPEVAL_SDK_RETRY_PROVIDERS=local, leave them enabled.
111
+ """
112
+ kwargs = dict(self.kwargs or {})
113
+ if not sdk_retries_for(PS.LOCAL):
114
+ kwargs["max_retries"] = 0
115
+ return kwargs
116
+
117
+ def _build_client(self, cls):
118
+ kw = dict(
119
+ api_key=self.local_model_api_key,
120
+ base_url=self.base_url,
121
+ **self._client_kwargs(),
122
+ )
123
+ try:
124
+ return cls(**kw)
125
+ except TypeError as e:
126
+ # Older OpenAI SDKs may not accept max_retries; drop and retry once.
127
+ if "max_retries" in str(e):
128
+ kw.pop("max_retries", None)
129
+ return cls(**kw)
130
+ raise
@@ -2,8 +2,16 @@ from ollama import Client, AsyncClient, ChatResponse
2
2
  from typing import Optional, Tuple, Union, Dict
3
3
  from pydantic import BaseModel
4
4
 
5
+ from deepeval.models.retry_policy import (
6
+ create_retry_decorator,
7
+ )
8
+
5
9
  from deepeval.models import DeepEvalBaseLLM
6
10
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
11
+ from deepeval.constants import ProviderSlug as PS
12
+
13
+
14
+ retry_ollama = create_retry_decorator(PS.OLLAMA)
7
15
 
8
16
 
9
17
  class OllamaModel(DeepEvalBaseLLM):
@@ -34,6 +42,7 @@ class OllamaModel(DeepEvalBaseLLM):
34
42
  # Other generate functions
35
43
  ###############################################
36
44
 
45
+ @retry_ollama
37
46
  def generate(
38
47
  self, prompt: str, schema: Optional[BaseModel] = None
39
48
  ) -> Tuple[Union[str, Dict], float]:
@@ -56,6 +65,7 @@ class OllamaModel(DeepEvalBaseLLM):
56
65
  0,
57
66
  )
58
67
 
68
+ @retry_ollama
59
69
  async def a_generate(
60
70
  self, prompt: str, schema: Optional[BaseModel] = None
61
71
  ) -> Tuple[str, float]:
@@ -84,9 +94,11 @@ class OllamaModel(DeepEvalBaseLLM):
84
94
 
85
95
  def load_model(self, async_mode: bool = False):
86
96
  if not async_mode:
87
- return Client(host=self.base_url, **self.kwargs)
88
- else:
89
- return AsyncClient(host=self.base_url, **self.kwargs)
97
+ return self._build_client(Client)
98
+ return self._build_client(AsyncClient)
99
+
100
+ def _build_client(self, cls):
101
+ return cls(host=self.base_url, **self.kwargs)
90
102
 
91
103
  def get_model_name(self):
92
104
  return f"{self.model_name} (Ollama)"
@@ -1,5 +1,3 @@
1
- import logging
2
-
3
1
  from openai.types.chat.chat_completion import ChatCompletion
4
2
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
5
3
  from typing import Optional, Tuple, Union, Dict
@@ -10,27 +8,17 @@ from openai import (
10
8
  AsyncOpenAI,
11
9
  )
12
10
 
13
- from tenacity import retry, RetryCallState, before_sleep_log
14
-
11
+ from deepeval.constants import ProviderSlug as PS
15
12
  from deepeval.models import DeepEvalBaseLLM
16
13
  from deepeval.models.llms.utils import trim_and_load_json
17
14
  from deepeval.models.utils import parse_model_name
18
15
  from deepeval.models.retry_policy import (
19
- OPENAI_ERROR_POLICY,
20
- default_wait,
21
- default_stop,
22
- retry_predicate,
16
+ create_retry_decorator,
17
+ sdk_retries_for,
23
18
  )
24
19
 
25
- logger = logging.getLogger("deepeval.openai_model")
26
-
27
-
28
- def log_retry_error(retry_state: RetryCallState):
29
- exception = retry_state.outcome.exception()
30
- logger.error(
31
- f"OpenAI Error: {exception} Retrying: {retry_state.attempt_number} time(s)..."
32
- )
33
20
 
21
+ retry_openai = create_retry_decorator(PS.OPENAI)
34
22
 
35
23
  valid_gpt_models = [
36
24
  "gpt-3.5-turbo",
@@ -219,21 +207,6 @@ models_requiring_temperature_1 = [
219
207
  "gpt-5-chat-latest",
220
208
  ]
221
209
 
222
- _base_retry_rules_kw = dict(
223
- wait=default_wait(),
224
- stop=default_stop(),
225
- retry=retry_predicate(OPENAI_ERROR_POLICY),
226
- before_sleep=before_sleep_log(
227
- logger, logging.INFO
228
- ), # <- logs only on retries
229
- after=log_retry_error,
230
- )
231
-
232
-
233
- def _openai_client_kwargs():
234
- # Avoid double-retry at SDK layer by disabling the SDK's own retries so tenacity is the single source of truth for retry logic.
235
- return {"max_retries": 0}
236
-
237
210
 
238
211
  class GPTModel(DeepEvalBaseLLM):
239
212
  def __init__(
@@ -311,7 +284,7 @@ class GPTModel(DeepEvalBaseLLM):
311
284
  # Generate functions
312
285
  ###############################################
313
286
 
314
- @retry(**_base_retry_rules_kw)
287
+ @retry_openai
315
288
  def generate(
316
289
  self, prompt: str, schema: Optional[BaseModel] = None
317
290
  ) -> Tuple[Union[str, Dict], float]:
@@ -370,7 +343,7 @@ class GPTModel(DeepEvalBaseLLM):
370
343
  else:
371
344
  return output, cost
372
345
 
373
- @retry(**_base_retry_rules_kw)
346
+ @retry_openai
374
347
  async def a_generate(
375
348
  self, prompt: str, schema: Optional[BaseModel] = None
376
349
  ) -> Tuple[Union[str, BaseModel], float]:
@@ -434,7 +407,7 @@ class GPTModel(DeepEvalBaseLLM):
434
407
  # Other generate functions
435
408
  ###############################################
436
409
 
437
- @retry(**_base_retry_rules_kw)
410
+ @retry_openai
438
411
  def generate_raw_response(
439
412
  self,
440
413
  prompt: str,
@@ -457,7 +430,7 @@ class GPTModel(DeepEvalBaseLLM):
457
430
 
458
431
  return completion, cost
459
432
 
460
- @retry(**_base_retry_rules_kw)
433
+ @retry_openai
461
434
  async def a_generate_raw_response(
462
435
  self,
463
436
  prompt: str,
@@ -480,7 +453,7 @@ class GPTModel(DeepEvalBaseLLM):
480
453
 
481
454
  return completion, cost
482
455
 
483
- @retry(**_base_retry_rules_kw)
456
+ @retry_openai
484
457
  def generate_samples(
485
458
  self, prompt: str, n: int, temperature: float
486
459
  ) -> Tuple[list[str], float]:
@@ -500,6 +473,7 @@ class GPTModel(DeepEvalBaseLLM):
500
473
  ###############################################
501
474
 
502
475
  def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
476
+ # TODO: consider loggin a warning instead of defaulting to whole model pricing
503
477
  pricing = model_pricing.get(self.model_name, model_pricing)
504
478
  input_cost = input_tokens * pricing["input"]
505
479
  output_cost = output_tokens * pricing["output"]
@@ -513,13 +487,32 @@ class GPTModel(DeepEvalBaseLLM):
513
487
  return self.model_name
514
488
 
515
489
  def load_model(self, async_mode: bool = False):
516
- kwargs = {**self.kwargs, **_openai_client_kwargs()}
517
490
  if not async_mode:
518
- return OpenAI(
519
- api_key=self._openai_api_key,
520
- base_url=self.base_url,
521
- **kwargs,
522
- )
523
- return AsyncOpenAI(
524
- api_key=self._openai_api_key, base_url=self.base_url, **kwargs
491
+ return self._build_client(OpenAI)
492
+ return self._build_client(AsyncOpenAI)
493
+
494
+ def _client_kwargs(self) -> Dict:
495
+ """
496
+ If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
497
+ If the user opts into SDK retries for 'openai' via DEEPEVAL_SDK_RETRY_PROVIDERS,
498
+ leave their retry settings as is.
499
+ """
500
+ kwargs = dict(self.kwargs or {})
501
+ if not sdk_retries_for(PS.OPENAI):
502
+ kwargs["max_retries"] = 0
503
+ return kwargs
504
+
505
+ def _build_client(self, cls):
506
+ kw = dict(
507
+ api_key=self._openai_api_key,
508
+ base_url=self.base_url,
509
+ **self._client_kwargs(),
525
510
  )
511
+ try:
512
+ return cls(**kw)
513
+ except TypeError as e:
514
+ # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
515
+ if "max_retries" in str(e):
516
+ kw.pop("max_retries", None)
517
+ return cls(**kw)
518
+ raise
@@ -4,12 +4,19 @@ from pydantic import BaseModel
4
4
  from google.genai import types
5
5
  from google import genai
6
6
 
7
+ from deepeval.models.retry_policy import (
8
+ create_retry_decorator,
9
+ )
7
10
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
8
11
  from deepeval.models.base_model import DeepEvalBaseMLLM
9
12
  from deepeval.test_case import MLLMImage
13
+ from deepeval.config.settings import get_settings
14
+ from deepeval.constants import ProviderSlug as PS
10
15
 
11
16
 
12
17
  default_multimodal_gemini_model = "gemini-1.5-pro"
18
+ # consistent retry rules
19
+ retry_gemini = create_retry_decorator(PS.GOOGLE)
13
20
 
14
21
 
15
22
  class MultimodalGeminiModel(DeepEvalBaseMLLM):
@@ -147,6 +154,8 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
147
154
  self.model_temperature = 0.0
148
155
  return self.client.models
149
156
 
157
+ # TODO: Refactor genete prompt to minimize the work done on retry
158
+ @retry_gemini
150
159
  def generate_prompt(
151
160
  self, multimodal_input: List[Union[str, MLLMImage]] = []
152
161
  ) -> List[Union[str, MLLMImage]]:
@@ -162,6 +171,8 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
162
171
  ValueError: If an invalid input type is provided
163
172
  """
164
173
  prompt = []
174
+ settings = get_settings()
175
+
165
176
  for ele in multimodal_input:
166
177
  if isinstance(ele, str):
167
178
  prompt.append(ele)
@@ -170,9 +181,14 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
170
181
  with open(ele.url, "rb") as f:
171
182
  image_data = f.read()
172
183
  else:
173
- response = requests.get(ele.url)
174
- if response.status_code != 200:
175
- raise ValueError(f"Failed to download image: {ele.url}")
184
+ response = requests.get(
185
+ ele.url,
186
+ timeout=(
187
+ settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
188
+ settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
189
+ ),
190
+ )
191
+ response.raise_for_status()
176
192
  image_data = response.content
177
193
 
178
194
  image_part = types.Part.from_bytes(
@@ -183,6 +199,7 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
183
199
  raise ValueError(f"Invalid input type: {type(ele)}")
184
200
  return prompt
185
201
 
202
+ @retry_gemini
186
203
  def generate(
187
204
  self,
188
205
  multimodal_input: List[Union[str, MLLMImage]],
@@ -222,6 +239,7 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
222
239
  )
223
240
  return response.text, 0
224
241
 
242
+ @retry_gemini
225
243
  async def a_generate(
226
244
  self,
227
245
  multimodal_input: List[Union[str, MLLMImage]],