deepeval 3.5.2__py3-none-any.whl → 3.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,16 @@
1
+ import asyncio
2
+
1
3
  from typing import Optional, Tuple, Union, Dict
2
4
  from contextlib import AsyncExitStack
3
5
  from pydantic import BaseModel
4
- import asyncio
5
6
 
7
+ from deepeval.models.retry_policy import (
8
+ create_retry_decorator,
9
+ sdk_retries_for,
10
+ )
6
11
  from deepeval.models import DeepEvalBaseLLM
7
12
  from deepeval.models.llms.utils import trim_and_load_json
13
+ from deepeval.constants import ProviderSlug as PS
8
14
 
9
15
  # check aiobotocore availability
10
16
  try:
@@ -15,6 +21,9 @@ try:
15
21
  except ImportError:
16
22
  aiobotocore_available = False
17
23
 
24
+ # define retry policy
25
+ retry_bedrock = create_retry_decorator(PS.BEDROCK)
26
+
18
27
 
19
28
  def _check_aiobotocore_available():
20
29
  if not aiobotocore_available:
@@ -53,11 +62,11 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
53
62
 
54
63
  # prepare aiobotocore session, config, and async exit stack
55
64
  self._session = get_session()
56
- self._config = Config(retries={"max_attempts": 5, "mode": "adaptive"})
57
65
  self._exit_stack = AsyncExitStack()
58
66
  self.kwargs = kwargs
59
67
  self.generation_kwargs = generation_kwargs or {}
60
68
  self._client = None
69
+ self._sdk_retry_mode: Optional[bool] = None
61
70
 
62
71
  ###############################################
63
72
  # Generate functions
@@ -68,6 +77,7 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
68
77
  ) -> Tuple[Union[str, Dict], float]:
69
78
  return asyncio.run(self.a_generate(prompt, schema))
70
79
 
80
+ @retry_bedrock
71
81
  async def a_generate(
72
82
  self, prompt: str, schema: Optional[BaseModel] = None
73
83
  ) -> Tuple[Union[str, Dict], float]:
@@ -94,16 +104,33 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
94
104
  ###############################################
95
105
 
96
106
  async def _ensure_client(self):
97
- if self._client is None:
107
+ use_sdk = sdk_retries_for(PS.BEDROCK)
108
+
109
+ # only rebuild if client is missing or the sdk retry mode changes
110
+ if self._client is None or self._sdk_retry_mode != use_sdk:
111
+ # Close any previous
112
+ if self._client is not None:
113
+ await self._exit_stack.aclose()
114
+ self._client = None
115
+
116
+ # create retry config for botocore
117
+ retries_config = {"max_attempts": (5 if use_sdk else 1)}
118
+ if use_sdk:
119
+ retries_config["mode"] = "adaptive"
120
+
121
+ config = Config(retries=retries_config)
122
+
98
123
  cm = self._session.create_client(
99
124
  "bedrock-runtime",
100
125
  region_name=self.region_name,
101
126
  aws_access_key_id=self.aws_access_key_id,
102
127
  aws_secret_access_key=self.aws_secret_access_key,
103
- config=self._config,
128
+ config=config,
104
129
  **self.kwargs,
105
130
  )
106
131
  self._client = await self._exit_stack.enter_async_context(cm)
132
+ self._sdk_retry_mode = use_sdk
133
+
107
134
  return self._client
108
135
 
109
136
  async def close(self):
@@ -1,12 +1,22 @@
1
+ import warnings
2
+
1
3
  from typing import Optional, Tuple, Union, Dict
2
4
  from anthropic import Anthropic, AsyncAnthropic
3
5
  from pydantic import BaseModel
4
- import os
5
- import warnings
6
6
 
7
7
  from deepeval.models import DeepEvalBaseLLM
8
8
  from deepeval.models.llms.utils import trim_and_load_json
9
+ from deepeval.models.retry_policy import (
10
+ create_retry_decorator,
11
+ sdk_retries_for,
12
+ )
9
13
  from deepeval.models.utils import parse_model_name
14
+ from deepeval.config.settings import get_settings
15
+ from deepeval.constants import ProviderSlug as PS
16
+
17
+
18
+ # consistent retry rules
19
+ retry_anthropic = create_retry_decorator(PS.ANTHROPIC)
10
20
 
11
21
  model_pricing = {
12
22
  "claude-opus-4-20250514": {"input": 15.00 / 1e6, "output": 75.00 / 1e6},
@@ -45,6 +55,7 @@ class AnthropicModel(DeepEvalBaseLLM):
45
55
  # Generate functions
46
56
  ###############################################
47
57
 
58
+ @retry_anthropic
48
59
  def generate(
49
60
  self, prompt: str, schema: Optional[BaseModel] = None
50
61
  ) -> Tuple[Union[str, Dict], float]:
@@ -70,6 +81,7 @@ class AnthropicModel(DeepEvalBaseLLM):
70
81
  json_output = trim_and_load_json(message.content[0].text)
71
82
  return schema.model_validate(json_output), cost
72
83
 
84
+ @retry_anthropic
73
85
  async def a_generate(
74
86
  self, prompt: str, schema: Optional[BaseModel] = None
75
87
  ) -> Tuple[str, float]:
@@ -128,17 +140,31 @@ class AnthropicModel(DeepEvalBaseLLM):
128
140
 
129
141
  def load_model(self, async_mode: bool = False):
130
142
  if not async_mode:
131
- return Anthropic(
132
- api_key=os.environ.get("ANTHROPIC_API_KEY")
133
- or self._anthropic_api_key,
134
- **self.kwargs,
135
- )
136
- else:
137
- return AsyncAnthropic(
138
- api_key=os.environ.get("ANTHROPIC_API_KEY")
139
- or self._anthropic_api_key,
140
- **self.kwargs,
141
- )
143
+ return self._build_client(Anthropic)
144
+ return self._build_client(AsyncAnthropic)
142
145
 
143
146
  def get_model_name(self):
144
147
  return f"{self.model_name}"
148
+
149
+ def _client_kwargs(self) -> Dict:
150
+ kwargs = dict(self.kwargs or {})
151
+ # If we are managing retries with Tenacity, force SDK retries off to avoid double retries.
152
+ # if the user opts into SDK retries via DEEPEVAL_SDK_RETRY_PROVIDERS, then honor their max_retries.
153
+ if not sdk_retries_for(PS.ANTHROPIC):
154
+ kwargs["max_retries"] = 0
155
+ return kwargs
156
+
157
+ def _build_client(self, cls):
158
+ settings = get_settings()
159
+ kw = dict(
160
+ api_key=settings.ANTHROPIC_API_KEY or self._anthropic_api_key,
161
+ **self._client_kwargs(),
162
+ )
163
+ try:
164
+ return cls(**kw)
165
+ except TypeError as e:
166
+ # in case older SDKs don’t accept max_retries, drop it and retry
167
+ if "max_retries" in str(e):
168
+ kw.pop("max_retries", None)
169
+ return cls(**kw)
170
+ raise
@@ -1,9 +1,7 @@
1
- from tenacity import retry, retry_if_exception_type, wait_exponential_jitter
2
1
  from openai.types.chat.chat_completion import ChatCompletion
3
2
  from openai import AzureOpenAI, AsyncAzureOpenAI
4
3
  from typing import Optional, Tuple, Union, Dict
5
4
  from pydantic import BaseModel
6
- import openai
7
5
 
8
6
  from deepeval.models import DeepEvalBaseLLM
9
7
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
@@ -11,17 +9,18 @@ from deepeval.models.llms.openai_model import (
11
9
  structured_outputs_models,
12
10
  json_mode_models,
13
11
  model_pricing,
14
- log_retry_error,
15
12
  )
13
+ from deepeval.models.retry_policy import (
14
+ create_retry_decorator,
15
+ sdk_retries_for,
16
+ )
17
+
16
18
  from deepeval.models.llms.utils import trim_and_load_json
17
19
  from deepeval.models.utils import parse_model_name
20
+ from deepeval.constants import ProviderSlug as PS
18
21
 
19
- retryable_exceptions = (
20
- openai.RateLimitError,
21
- openai.APIConnectionError,
22
- openai.APITimeoutError,
23
- openai.LengthFinishReasonError,
24
- )
22
+
23
+ retry_azure = create_retry_decorator(PS.AZURE)
25
24
 
26
25
 
27
26
  class AzureOpenAIModel(DeepEvalBaseLLM):
@@ -67,11 +66,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
67
66
  # Other generate functions
68
67
  ###############################################
69
68
 
70
- @retry(
71
- wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
72
- retry=retry_if_exception_type(openai.RateLimitError),
73
- after=log_retry_error,
74
- )
69
+ @retry_azure
75
70
  def generate(
76
71
  self, prompt: str, schema: Optional[BaseModel] = None
77
72
  ) -> Tuple[Union[str, Dict], float]:
@@ -130,11 +125,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
130
125
  else:
131
126
  return output, cost
132
127
 
133
- @retry(
134
- wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
135
- retry=retry_if_exception_type(openai.RateLimitError),
136
- after=log_retry_error,
137
- )
128
+ @retry_azure
138
129
  async def a_generate(
139
130
  self, prompt: str, schema: Optional[BaseModel] = None
140
131
  ) -> Tuple[Union[str, BaseModel], float]:
@@ -199,11 +190,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
199
190
  # Other generate functions
200
191
  ###############################################
201
192
 
202
- @retry(
203
- wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
204
- retry=retry_if_exception_type(retryable_exceptions),
205
- after=log_retry_error,
206
- )
193
+ @retry_azure
207
194
  def generate_raw_response(
208
195
  self,
209
196
  prompt: str,
@@ -226,11 +213,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
226
213
 
227
214
  return completion, cost
228
215
 
229
- @retry(
230
- wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
231
- retry=retry_if_exception_type(retryable_exceptions),
232
- after=log_retry_error,
233
- )
216
+ @retry_azure
234
217
  async def a_generate_raw_response(
235
218
  self,
236
219
  prompt: str,
@@ -272,17 +255,33 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
272
255
 
273
256
  def load_model(self, async_mode: bool = False):
274
257
  if not async_mode:
275
- return AzureOpenAI(
276
- api_key=self.azure_openai_api_key,
277
- api_version=self.openai_api_version,
278
- azure_endpoint=self.azure_endpoint,
279
- azure_deployment=self.deployment_name,
280
- **self.kwargs, # Keep this for client initialization
281
- )
282
- return AsyncAzureOpenAI(
258
+ return self._build_client(AzureOpenAI)
259
+ return self._build_client(AsyncAzureOpenAI)
260
+
261
+ def _client_kwargs(self) -> Dict:
262
+ """
263
+ If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
264
+ If the user opts into SDK retries for 'azure' via DEEPEVAL_SDK_RETRY_PROVIDERS,
265
+ leave their retry settings as is.
266
+ """
267
+ kwargs = dict(self.kwargs or {})
268
+ if not sdk_retries_for(PS.AZURE):
269
+ kwargs["max_retries"] = 0
270
+ return kwargs
271
+
272
+ def _build_client(self, cls):
273
+ kw = dict(
283
274
  api_key=self.azure_openai_api_key,
284
275
  api_version=self.openai_api_version,
285
276
  azure_endpoint=self.azure_endpoint,
286
277
  azure_deployment=self.deployment_name,
287
- **self.kwargs, # ← Keep this for client initialization
278
+ **self._client_kwargs(),
288
279
  )
280
+ try:
281
+ return cls(**kw)
282
+ except TypeError as e:
283
+ # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
284
+ if "max_retries" in str(e):
285
+ kw.pop("max_retries", None)
286
+ return cls(**kw)
287
+ raise
@@ -5,6 +5,15 @@ from pydantic import BaseModel
5
5
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
6
6
  from deepeval.models.llms.utils import trim_and_load_json
7
7
  from deepeval.models import DeepEvalBaseLLM
8
+ from deepeval.models.retry_policy import (
9
+ create_retry_decorator,
10
+ sdk_retries_for,
11
+ )
12
+ from deepeval.constants import ProviderSlug as PS
13
+
14
+
15
+ # consistent retry rules
16
+ retry_deepseek = create_retry_decorator(PS.DEEPSEEK)
8
17
 
9
18
  model_pricing = {
10
19
  "deepseek-chat": {
@@ -55,6 +64,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
55
64
  # Other generate functions
56
65
  ###############################################
57
66
 
67
+ @retry_deepseek
58
68
  def generate(
59
69
  self, prompt: str, schema: Optional[BaseModel] = None
60
70
  ) -> Tuple[Union[str, Dict], float]:
@@ -88,6 +98,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
88
98
  )
89
99
  return output, cost
90
100
 
101
+ @retry_deepseek
91
102
  async def a_generate(
92
103
  self, prompt: str, schema: Optional[BaseModel] = None
93
104
  ) -> Tuple[Union[str, Dict], float]:
@@ -141,13 +152,31 @@ class DeepSeekModel(DeepEvalBaseLLM):
141
152
 
142
153
  def load_model(self, async_mode: bool = False):
143
154
  if not async_mode:
144
- return OpenAI(
145
- api_key=self.api_key, base_url=self.base_url, **self.kwargs
146
- )
147
- else:
148
- return AsyncOpenAI(
149
- api_key=self.api_key, base_url=self.base_url, **self.kwargs
150
- )
155
+ return self._build_client(OpenAI)
156
+ return self._build_client(AsyncOpenAI)
151
157
 
152
158
  def get_model_name(self):
153
159
  return f"{self.model_name}"
160
+
161
+ def _client_kwargs(self) -> Dict:
162
+ kwargs = dict(self.kwargs or {})
163
+ # if we are managing retries with Tenacity, force SDK retries off to avoid double retries.
164
+ # if the user opts into SDK retries for "deepseek" via DEEPEVAL_SDK_RETRY_PROVIDERS, honor it.
165
+ if not sdk_retries_for(PS.DEEPSEEK):
166
+ kwargs["max_retries"] = 0
167
+ return kwargs
168
+
169
+ def _build_client(self, cls):
170
+ kw = dict(
171
+ api_key=self.api_key,
172
+ base_url=self.base_url,
173
+ **self._client_kwargs(),
174
+ )
175
+ try:
176
+ return cls(**kw)
177
+ except TypeError as e:
178
+ # In case an older OpenAI client doesn’t accept max_retries, drop it and retry.
179
+ if "max_retries" in str(e):
180
+ kw.pop("max_retries", None)
181
+ return cls(**kw)
182
+ raise
@@ -3,11 +3,19 @@ from google.genai import types
3
3
  from typing import Optional, Dict
4
4
  from google import genai
5
5
 
6
+ from deepeval.models.retry_policy import (
7
+ create_retry_decorator,
8
+ )
6
9
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
7
10
  from deepeval.models.base_model import DeepEvalBaseLLM
11
+ from deepeval.constants import ProviderSlug as PS
12
+
8
13
 
9
14
  default_gemini_model = "gemini-1.5-pro"
10
15
 
16
+ # consistent retry rules
17
+ retry_gemini = create_retry_decorator(PS.GOOGLE)
18
+
11
19
 
12
20
  class GeminiModel(DeepEvalBaseLLM):
13
21
  """Class that implements Google Gemini models for text-based evaluation.
@@ -145,6 +153,7 @@ class GeminiModel(DeepEvalBaseLLM):
145
153
  ]
146
154
  return self.client.models
147
155
 
156
+ @retry_gemini
148
157
  def generate(self, prompt: str, schema: Optional[BaseModel] = None) -> str:
149
158
  """Generates text from a prompt.
150
159
 
@@ -180,6 +189,7 @@ class GeminiModel(DeepEvalBaseLLM):
180
189
  )
181
190
  return response.text, 0
182
191
 
192
+ @retry_gemini
183
193
  async def a_generate(
184
194
  self, prompt: str, schema: Optional[BaseModel] = None
185
195
  ) -> str:
@@ -1,10 +1,20 @@
1
+ import os
2
+
1
3
  from typing import Optional, Tuple, Union, Dict
2
4
  from pydantic import BaseModel
3
- import os
4
5
 
6
+ from deepeval.models.retry_policy import (
7
+ create_retry_decorator,
8
+ sdk_retries_for,
9
+ )
5
10
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
6
11
  from deepeval.models.llms.utils import trim_and_load_json
7
12
  from deepeval.models import DeepEvalBaseLLM
13
+ from deepeval.constants import ProviderSlug as PS
14
+
15
+
16
+ # consistent retry rules
17
+ retry_grok = create_retry_decorator(PS.GROK)
8
18
 
9
19
 
10
20
  structured_outputs_models = [
@@ -81,6 +91,7 @@ class GrokModel(DeepEvalBaseLLM):
81
91
  # Other generate functions
82
92
  ###############################################
83
93
 
94
+ @retry_grok
84
95
  def generate(
85
96
  self, prompt: str, schema: Optional[BaseModel] = None
86
97
  ) -> Tuple[Union[str, Dict], float]:
@@ -118,6 +129,7 @@ class GrokModel(DeepEvalBaseLLM):
118
129
  else:
119
130
  return output, cost
120
131
 
132
+ @retry_grok
121
133
  async def a_generate(
122
134
  self, prompt: str, schema: Optional[BaseModel] = None
123
135
  ) -> Tuple[Union[str, Dict], float]:
@@ -178,9 +190,9 @@ class GrokModel(DeepEvalBaseLLM):
178
190
  from xai_sdk import Client, AsyncClient
179
191
 
180
192
  if not async_mode:
181
- return Client(api_key=self.api_key, **self.kwargs)
193
+ return self._build_client(Client)
182
194
  else:
183
- return AsyncClient(api_key=self.api_key, **self.kwargs)
195
+ return self._build_client(AsyncClient)
184
196
  except ImportError:
185
197
  raise ImportError(
186
198
  "xai_sdk is required to use GrokModel. Please install it with: pip install xai-sdk"
@@ -188,3 +200,38 @@ class GrokModel(DeepEvalBaseLLM):
188
200
 
189
201
  def get_model_name(self):
190
202
  return f"{self.model_name}"
203
+
204
+ def _client_kwargs(self) -> Dict:
205
+ """
206
+ If Tenacity is managing retries, disable gRPC channel retries to avoid double retry.
207
+ If the user opts into SDK retries for 'grok' via DEEPEVAL_SDK_RETRY_PROVIDERS,
208
+ leave channel options as is
209
+ """
210
+ kwargs = dict(self.kwargs or {})
211
+ opts = list(kwargs.get("channel_options", []))
212
+ if not sdk_retries_for(PS.GROK):
213
+ # remove any explicit enable flag, then disable retries
214
+ opts = [
215
+ option
216
+ for option in opts
217
+ if not (
218
+ isinstance(option, (tuple, list))
219
+ and option
220
+ and option[0] == "grpc.enable_retries"
221
+ )
222
+ ]
223
+ opts.append(("grpc.enable_retries", 0))
224
+ if opts:
225
+ kwargs["channel_options"] = opts
226
+ return kwargs
227
+
228
+ def _build_client(self, cls):
229
+ kw = dict(api_key=self.api_key, **self._client_kwargs())
230
+ try:
231
+ return cls(**kw)
232
+ except TypeError as e:
233
+ # fallback: older SDK version might not accept channel_options
234
+ if "channel_options" in str(e):
235
+ kw.pop("channel_options", None)
236
+ return cls(**kw)
237
+ raise
@@ -2,11 +2,18 @@ from typing import Optional, Tuple, Union, Dict
2
2
  from openai import OpenAI, AsyncOpenAI
3
3
  from pydantic import BaseModel
4
4
 
5
+ from deepeval.models.retry_policy import (
6
+ create_retry_decorator,
7
+ sdk_retries_for,
8
+ )
5
9
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
6
10
  from deepeval.models.llms.utils import trim_and_load_json
7
11
  from deepeval.models import DeepEvalBaseLLM
12
+ from deepeval.constants import ProviderSlug as PS
8
13
 
9
14
 
15
+ retry_kimi = create_retry_decorator(PS.KIMI)
16
+
10
17
  json_mode_models = [
11
18
  "kimi-thinking-preview",
12
19
  "kimi-k2-0711-preview",
@@ -100,6 +107,7 @@ class KimiModel(DeepEvalBaseLLM):
100
107
  # Other generate functions
101
108
  ###############################################
102
109
 
110
+ @retry_kimi
103
111
  def generate(
104
112
  self, prompt: str, schema: Optional[BaseModel] = None
105
113
  ) -> Tuple[Union[str, Dict], float]:
@@ -137,6 +145,7 @@ class KimiModel(DeepEvalBaseLLM):
137
145
  else:
138
146
  return output, cost
139
147
 
148
+ @retry_kimi
140
149
  async def a_generate(
141
150
  self, prompt: str, schema: Optional[BaseModel] = None
142
151
  ) -> Tuple[Union[str, Dict], float]:
@@ -194,13 +203,34 @@ class KimiModel(DeepEvalBaseLLM):
194
203
 
195
204
  def load_model(self, async_mode: bool = False):
196
205
  if not async_mode:
197
- return OpenAI(
198
- api_key=self.api_key, base_url=self.base_url, **self.kwargs
199
- )
200
- else:
201
- return AsyncOpenAI(
202
- api_key=self.api_key, base_url=self.base_url, **self.kwargs
203
- )
206
+ return self._build_client(OpenAI)
207
+ return self._build_client(AsyncOpenAI)
208
+
209
+ def _client_kwargs(self) -> Dict:
210
+ """
211
+ If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
212
+ If the user opts into SDK retries for 'kimi' via DEEPEVAL_SDK_RETRY_PROVIDERS,
213
+ leave their retry settings as is.
214
+ """
215
+ kwargs = dict(self.kwargs or {})
216
+ if not sdk_retries_for(PS.KIMI):
217
+ kwargs["max_retries"] = 0
218
+ return kwargs
219
+
220
+ def _build_client(self, cls):
221
+ kw = dict(
222
+ api_key=self.api_key,
223
+ base_url=self.base_url,
224
+ **self._client_kwargs(),
225
+ )
226
+ try:
227
+ return cls(**kw)
228
+ except TypeError as e:
229
+ # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
230
+ if "max_retries" in str(e):
231
+ kw.pop("max_retries", None)
232
+ return cls(**kw)
233
+ raise
204
234
 
205
235
  def get_model_name(self):
206
236
  return f"{self.model_name}"
@@ -1,12 +1,20 @@
1
1
  from typing import Optional, Tuple, Union, Dict
2
2
  from pydantic import BaseModel
3
-
4
3
  from openai import OpenAI, AsyncOpenAI
5
4
  from openai.types.chat import ChatCompletion
6
5
 
6
+ from deepeval.models.retry_policy import (
7
+ create_retry_decorator,
8
+ sdk_retries_for,
9
+ )
7
10
  from deepeval.models.llms.utils import trim_and_load_json
8
11
  from deepeval.models import DeepEvalBaseLLM
9
12
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
13
+ from deepeval.constants import ProviderSlug as PS
14
+
15
+
16
+ # consistent retry rules
17
+ retry_local = create_retry_decorator(PS.LOCAL)
10
18
 
11
19
 
12
20
  class LocalModel(DeepEvalBaseLLM):
@@ -43,6 +51,7 @@ class LocalModel(DeepEvalBaseLLM):
43
51
  # Other generate functions
44
52
  ###############################################
45
53
 
54
+ @retry_local
46
55
  def generate(
47
56
  self, prompt: str, schema: Optional[BaseModel] = None
48
57
  ) -> Tuple[Union[str, Dict], float]:
@@ -61,6 +70,7 @@ class LocalModel(DeepEvalBaseLLM):
61
70
  else:
62
71
  return res_content, 0.0
63
72
 
73
+ @retry_local
64
74
  async def a_generate(
65
75
  self, prompt: str, schema: Optional[BaseModel] = None
66
76
  ) -> Tuple[Union[str, Dict], float]:
@@ -91,14 +101,30 @@ class LocalModel(DeepEvalBaseLLM):
91
101
 
92
102
  def load_model(self, async_mode: bool = False):
93
103
  if not async_mode:
94
- return OpenAI(
95
- api_key=self.local_model_api_key,
96
- base_url=self.base_url,
97
- **self.kwargs,
98
- )
99
- else:
100
- return AsyncOpenAI(
101
- api_key=self.local_model_api_key,
102
- base_url=self.base_url,
103
- **self.kwargs,
104
- )
104
+ return self._build_client(OpenAI)
105
+ return self._build_client(AsyncOpenAI)
106
+
107
+ def _client_kwargs(self) -> Dict:
108
+ """
109
+ If Tenacity manages retries, turn off OpenAI SDK retries to avoid double retrying.
110
+ If users opt into SDK retries via DEEPEVAL_SDK_RETRY_PROVIDERS=local, leave them enabled.
111
+ """
112
+ kwargs = dict(self.kwargs or {})
113
+ if not sdk_retries_for(PS.LOCAL):
114
+ kwargs["max_retries"] = 0
115
+ return kwargs
116
+
117
+ def _build_client(self, cls):
118
+ kw = dict(
119
+ api_key=self.local_model_api_key,
120
+ base_url=self.base_url,
121
+ **self._client_kwargs(),
122
+ )
123
+ try:
124
+ return cls(**kw)
125
+ except TypeError as e:
126
+ # Older OpenAI SDKs may not accept max_retries; drop and retry once.
127
+ if "max_retries" in str(e):
128
+ kw.pop("max_retries", None)
129
+ return cls(**kw)
130
+ raise
@@ -2,8 +2,16 @@ from ollama import Client, AsyncClient, ChatResponse
2
2
  from typing import Optional, Tuple, Union, Dict
3
3
  from pydantic import BaseModel
4
4
 
5
+ from deepeval.models.retry_policy import (
6
+ create_retry_decorator,
7
+ )
8
+
5
9
  from deepeval.models import DeepEvalBaseLLM
6
10
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
11
+ from deepeval.constants import ProviderSlug as PS
12
+
13
+
14
+ retry_ollama = create_retry_decorator(PS.OLLAMA)
7
15
 
8
16
 
9
17
  class OllamaModel(DeepEvalBaseLLM):
@@ -34,6 +42,7 @@ class OllamaModel(DeepEvalBaseLLM):
34
42
  # Other generate functions
35
43
  ###############################################
36
44
 
45
+ @retry_ollama
37
46
  def generate(
38
47
  self, prompt: str, schema: Optional[BaseModel] = None
39
48
  ) -> Tuple[Union[str, Dict], float]:
@@ -56,6 +65,7 @@ class OllamaModel(DeepEvalBaseLLM):
56
65
  0,
57
66
  )
58
67
 
68
+ @retry_ollama
59
69
  async def a_generate(
60
70
  self, prompt: str, schema: Optional[BaseModel] = None
61
71
  ) -> Tuple[str, float]:
@@ -84,9 +94,11 @@ class OllamaModel(DeepEvalBaseLLM):
84
94
 
85
95
  def load_model(self, async_mode: bool = False):
86
96
  if not async_mode:
87
- return Client(host=self.base_url, **self.kwargs)
88
- else:
89
- return AsyncClient(host=self.base_url, **self.kwargs)
97
+ return self._build_client(Client)
98
+ return self._build_client(AsyncClient)
99
+
100
+ def _build_client(self, cls):
101
+ return cls(host=self.base_url, **self.kwargs)
90
102
 
91
103
  def get_model_name(self):
92
104
  return f"{self.model_name} (Ollama)"