deepeval 3.5.1__py3-none-any.whl → 3.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/config/settings.py +94 -2
  3. deepeval/config/utils.py +54 -1
  4. deepeval/constants.py +27 -0
  5. deepeval/integrations/langchain/__init__.py +2 -3
  6. deepeval/integrations/langchain/callback.py +126 -301
  7. deepeval/integrations/langchain/patch.py +24 -13
  8. deepeval/integrations/langchain/utils.py +203 -1
  9. deepeval/integrations/pydantic_ai/patcher.py +220 -185
  10. deepeval/integrations/pydantic_ai/utils.py +86 -0
  11. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +1 -0
  12. deepeval/metrics/pii_leakage/pii_leakage.py +1 -1
  13. deepeval/models/embedding_models/azure_embedding_model.py +40 -9
  14. deepeval/models/embedding_models/local_embedding_model.py +54 -11
  15. deepeval/models/embedding_models/ollama_embedding_model.py +25 -7
  16. deepeval/models/embedding_models/openai_embedding_model.py +47 -5
  17. deepeval/models/llms/amazon_bedrock_model.py +31 -4
  18. deepeval/models/llms/anthropic_model.py +39 -13
  19. deepeval/models/llms/azure_model.py +37 -38
  20. deepeval/models/llms/deepseek_model.py +36 -7
  21. deepeval/models/llms/gemini_model.py +10 -0
  22. deepeval/models/llms/grok_model.py +50 -3
  23. deepeval/models/llms/kimi_model.py +37 -7
  24. deepeval/models/llms/local_model.py +38 -12
  25. deepeval/models/llms/ollama_model.py +15 -3
  26. deepeval/models/llms/openai_model.py +37 -44
  27. deepeval/models/mlllms/gemini_model.py +21 -3
  28. deepeval/models/mlllms/ollama_model.py +38 -13
  29. deepeval/models/mlllms/openai_model.py +18 -42
  30. deepeval/models/retry_policy.py +548 -64
  31. deepeval/prompt/api.py +13 -9
  32. deepeval/prompt/prompt.py +19 -9
  33. deepeval/tracing/tracing.py +87 -0
  34. deepeval/utils.py +12 -0
  35. {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/METADATA +1 -1
  36. {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/RECORD +39 -38
  37. {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/LICENSE.md +0 -0
  38. {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/WHEEL +0 -0
  39. {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,4 @@
1
- from typing import List
1
+ from typing import Dict, List
2
2
  from openai import AzureOpenAI, AsyncAzureOpenAI
3
3
  from deepeval.key_handler import (
4
4
  EmbeddingKeyValues,
@@ -6,10 +6,18 @@ from deepeval.key_handler import (
6
6
  KEY_FILE_HANDLER,
7
7
  )
8
8
  from deepeval.models import DeepEvalBaseEmbeddingModel
9
+ from deepeval.models.retry_policy import (
10
+ create_retry_decorator,
11
+ sdk_retries_for,
12
+ )
13
+ from deepeval.constants import ProviderSlug as PS
14
+
15
+
16
+ retry_azure = create_retry_decorator(PS.AZURE)
9
17
 
10
18
 
11
19
  class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
12
- def __init__(self):
20
+ def __init__(self, **kwargs):
13
21
  self.azure_openai_api_key = KEY_FILE_HANDLER.fetch_data(
14
22
  ModelKeyValues.AZURE_OPENAI_API_KEY
15
23
  )
@@ -23,7 +31,9 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
23
31
  ModelKeyValues.AZURE_OPENAI_ENDPOINT
24
32
  )
25
33
  self.model_name = self.azure_embedding_deployment
34
+ self.kwargs = kwargs
26
35
 
36
+ @retry_azure
27
37
  def embed_text(self, text: str) -> List[float]:
28
38
  client = self.load_model(async_mode=False)
29
39
  response = client.embeddings.create(
@@ -32,6 +42,7 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
32
42
  )
33
43
  return response.data[0].embedding
34
44
 
45
+ @retry_azure
35
46
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
36
47
  client = self.load_model(async_mode=False)
37
48
  response = client.embeddings.create(
@@ -40,6 +51,7 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
40
51
  )
41
52
  return [item.embedding for item in response.data]
42
53
 
54
+ @retry_azure
43
55
  async def a_embed_text(self, text: str) -> List[float]:
44
56
  client = self.load_model(async_mode=True)
45
57
  response = await client.embeddings.create(
@@ -48,6 +60,7 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
48
60
  )
49
61
  return response.data[0].embedding
50
62
 
63
+ @retry_azure
51
64
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
52
65
  client = self.load_model(async_mode=True)
53
66
  response = await client.embeddings.create(
@@ -61,15 +74,33 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
61
74
 
62
75
  def load_model(self, async_mode: bool = False):
63
76
  if not async_mode:
64
- return AzureOpenAI(
65
- api_key=self.azure_openai_api_key,
66
- api_version=self.openai_api_version,
67
- azure_endpoint=self.azure_endpoint,
68
- azure_deployment=self.azure_embedding_deployment,
69
- )
70
- return AsyncAzureOpenAI(
77
+ return self._build_client(AzureOpenAI)
78
+ return self._build_client(AsyncAzureOpenAI)
79
+
80
+ def _client_kwargs(self) -> Dict:
81
+ """
82
+ If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
83
+ If the user opts into SDK retries for 'azure' via DEEPEVAL_SDK_RETRY_PROVIDERS,
84
+ leave their retry settings as is.
85
+ """
86
+ kwargs = dict(self.kwargs or {})
87
+ if not sdk_retries_for(PS.AZURE):
88
+ kwargs["max_retries"] = 0
89
+ return kwargs
90
+
91
+ def _build_client(self, cls):
92
+ kw = dict(
71
93
  api_key=self.azure_openai_api_key,
72
94
  api_version=self.openai_api_version,
73
95
  azure_endpoint=self.azure_endpoint,
74
96
  azure_deployment=self.azure_embedding_deployment,
97
+ **self._client_kwargs(),
75
98
  )
99
+ try:
100
+ return cls(**kw)
101
+ except TypeError as e:
102
+ # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
103
+ if "max_retries" in str(e):
104
+ kw.pop("max_retries", None)
105
+ return cls(**kw)
106
+ raise
@@ -1,12 +1,21 @@
1
- from openai import OpenAI
2
- from typing import List
1
+ from openai import OpenAI, AsyncOpenAI
2
+ from typing import Dict, List
3
3
 
4
4
  from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
5
5
  from deepeval.models import DeepEvalBaseEmbeddingModel
6
+ from deepeval.models.retry_policy import (
7
+ create_retry_decorator,
8
+ sdk_retries_for,
9
+ )
10
+ from deepeval.constants import ProviderSlug as PS
11
+
12
+
13
+ # consistent retry rules
14
+ retry_local = create_retry_decorator(PS.LOCAL)
6
15
 
7
16
 
8
17
  class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
9
- def __init__(self, *args, **kwargs):
18
+ def __init__(self, **kwargs):
10
19
  self.base_url = KEY_FILE_HANDLER.fetch_data(
11
20
  EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
12
21
  )
@@ -16,13 +25,10 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
16
25
  self.api_key = KEY_FILE_HANDLER.fetch_data(
17
26
  EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
18
27
  )
19
- self.args = args
20
28
  self.kwargs = kwargs
21
29
  super().__init__(model_name)
22
30
 
23
- def load_model(self):
24
- return OpenAI(base_url=self.base_url, api_key=self.api_key)
25
-
31
+ @retry_local
26
32
  def embed_text(self, text: str) -> List[float]:
27
33
  embedding_model = self.load_model()
28
34
  response = embedding_model.embeddings.create(
@@ -31,6 +37,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
31
37
  )
32
38
  return response.data[0].embedding
33
39
 
40
+ @retry_local
34
41
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
35
42
  embedding_model = self.load_model()
36
43
  response = embedding_model.embeddings.create(
@@ -39,21 +46,57 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
39
46
  )
40
47
  return [data.embedding for data in response.data]
41
48
 
49
+ @retry_local
42
50
  async def a_embed_text(self, text: str) -> List[float]:
43
- embedding_model = self.load_model()
44
- response = embedding_model.embeddings.create(
51
+ embedding_model = self.load_model(async_mode=True)
52
+ response = await embedding_model.embeddings.create(
45
53
  model=self.model_name,
46
54
  input=[text],
47
55
  )
48
56
  return response.data[0].embedding
49
57
 
58
+ @retry_local
50
59
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
51
- embedding_model = self.load_model()
52
- response = embedding_model.embeddings.create(
60
+ embedding_model = self.load_model(async_mode=True)
61
+ response = await embedding_model.embeddings.create(
53
62
  model=self.model_name,
54
63
  input=texts,
55
64
  )
56
65
  return [data.embedding for data in response.data]
57
66
 
67
+ ###############################################
68
+ # Model
69
+ ###############################################
70
+
58
71
  def get_model_name(self):
59
72
  return self.model_name
73
+
74
+ def load_model(self, async_mode: bool = False):
75
+ if not async_mode:
76
+ return self._build_client(OpenAI)
77
+ return self._build_client(AsyncOpenAI)
78
+
79
+ def _client_kwargs(self) -> Dict:
80
+ """
81
+ If Tenacity manages retries, turn off OpenAI SDK retries to avoid double retrying.
82
+ If users opt into SDK retries via DEEPEVAL_SDK_RETRY_PROVIDERS=local, leave them enabled.
83
+ """
84
+ kwargs = dict(self.kwargs or {})
85
+ if not sdk_retries_for(PS.LOCAL):
86
+ kwargs["max_retries"] = 0
87
+ return kwargs
88
+
89
+ def _build_client(self, cls):
90
+ kw = dict(
91
+ api_key=self.api_key,
92
+ base_url=self.base_url,
93
+ **self._client_kwargs(),
94
+ )
95
+ try:
96
+ return cls(**kw)
97
+ except TypeError as e:
98
+ # Older OpenAI SDKs may not accept max_retries; drop and retry once.
99
+ if "max_retries" in str(e):
100
+ kw.pop("max_retries", None)
101
+ return cls(**kw)
102
+ raise
@@ -3,6 +3,13 @@ from typing import List
3
3
 
4
4
  from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
5
5
  from deepeval.models import DeepEvalBaseEmbeddingModel
6
+ from deepeval.models.retry_policy import (
7
+ create_retry_decorator,
8
+ )
9
+ from deepeval.constants import ProviderSlug as PS
10
+
11
+
12
+ retry_ollama = create_retry_decorator(PS.OLLAMA)
6
13
 
7
14
 
8
15
  class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
@@ -13,6 +20,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
13
20
  model_name = KEY_FILE_HANDLER.fetch_data(
14
21
  EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
15
22
  )
23
+ # TODO: This is not being used. Clean it up in consistency PR
16
24
  self.api_key = KEY_FILE_HANDLER.fetch_data(
17
25
  EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
18
26
  )
@@ -20,12 +28,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
20
28
  self.kwargs = kwargs
21
29
  super().__init__(model_name)
22
30
 
23
- def load_model(self, async_mode: bool = False):
24
- if not async_mode:
25
- return Client(host=self.base_url)
26
-
27
- return AsyncClient(host=self.base_url)
28
-
31
+ @retry_ollama
29
32
  def embed_text(self, text: str) -> List[float]:
30
33
  embedding_model = self.load_model()
31
34
  response = embedding_model.embed(
@@ -34,6 +37,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
34
37
  )
35
38
  return response["embeddings"][0]
36
39
 
40
+ @retry_ollama
37
41
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
38
42
  embedding_model = self.load_model()
39
43
  response = embedding_model.embed(
@@ -42,6 +46,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
42
46
  )
43
47
  return response["embeddings"]
44
48
 
49
+ @retry_ollama
45
50
  async def a_embed_text(self, text: str) -> List[float]:
46
51
  embedding_model = self.load_model(async_mode=True)
47
52
  response = await embedding_model.embed(
@@ -50,6 +55,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
50
55
  )
51
56
  return response["embeddings"][0]
52
57
 
58
+ @retry_ollama
53
59
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
54
60
  embedding_model = self.load_model(async_mode=True)
55
61
  response = await embedding_model.embed(
@@ -58,5 +64,17 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
58
64
  )
59
65
  return response["embeddings"]
60
66
 
67
+ ###############################################
68
+ # Model
69
+ ###############################################
70
+
71
+ def load_model(self, async_mode: bool = False):
72
+ if not async_mode:
73
+ return self._build_client(Client)
74
+ return self._build_client(AsyncClient)
75
+
76
+ def _build_client(self, cls):
77
+ return cls(host=self.base_url, **self.kwargs)
78
+
61
79
  def get_model_name(self):
62
- return self.model_name
80
+ return f"{self.model_name} (Ollama)"
@@ -1,6 +1,14 @@
1
- from typing import Optional, List
1
+ from typing import Dict, Optional, List
2
2
  from openai import OpenAI, AsyncOpenAI
3
3
  from deepeval.models import DeepEvalBaseEmbeddingModel
4
+ from deepeval.models.retry_policy import (
5
+ create_retry_decorator,
6
+ sdk_retries_for,
7
+ )
8
+ from deepeval.constants import ProviderSlug as PS
9
+
10
+
11
+ retry_openai = create_retry_decorator(PS.OPENAI)
4
12
 
5
13
  valid_openai_embedding_models = [
6
14
  "text-embedding-3-small",
@@ -15,6 +23,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
15
23
  self,
16
24
  model: Optional[str] = None,
17
25
  _openai_api_key: Optional[str] = None,
26
+ **kwargs,
18
27
  ):
19
28
  model_name = model if model else default_openai_embedding_model
20
29
  if model_name not in valid_openai_embedding_models:
@@ -23,7 +32,9 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
23
32
  )
24
33
  self._openai_api_key = _openai_api_key
25
34
  self.model_name = model_name
35
+ self.kwargs = kwargs
26
36
 
37
+ @retry_openai
27
38
  def embed_text(self, text: str) -> List[float]:
28
39
  client = self.load_model(async_mode=False)
29
40
  response = client.embeddings.create(
@@ -32,6 +43,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
32
43
  )
33
44
  return response.data[0].embedding
34
45
 
46
+ @retry_openai
35
47
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
36
48
  client = self.load_model(async_mode=False)
37
49
  response = client.embeddings.create(
@@ -40,6 +52,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
40
52
  )
41
53
  return [item.embedding for item in response.data]
42
54
 
55
+ @retry_openai
43
56
  async def a_embed_text(self, text: str) -> List[float]:
44
57
  client = self.load_model(async_mode=True)
45
58
  response = await client.embeddings.create(
@@ -48,6 +61,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
48
61
  )
49
62
  return response.data[0].embedding
50
63
 
64
+ @retry_openai
51
65
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
52
66
  client = self.load_model(async_mode=True)
53
67
  response = await client.embeddings.create(
@@ -56,11 +70,39 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
56
70
  )
57
71
  return [item.embedding for item in response.data]
58
72
 
59
- def get_model_name(self) -> str:
73
+ ###############################################
74
+ # Model
75
+ ###############################################
76
+
77
+ def get_model_name(self):
60
78
  return self.model_name
61
79
 
62
- def load_model(self, async_mode: bool):
80
+ def load_model(self, async_mode: bool = False):
63
81
  if not async_mode:
64
- return OpenAI(api_key=self._openai_api_key)
82
+ return self._build_client(OpenAI)
83
+ return self._build_client(AsyncOpenAI)
65
84
 
66
- return AsyncOpenAI(api_key=self._openai_api_key)
85
+ def _client_kwargs(self) -> Dict:
86
+ """
87
+ If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
88
+ If the user opts into SDK retries for 'openai' via DEEPEVAL_SDK_RETRY_PROVIDERS,
89
+ leave their retry settings as is.
90
+ """
91
+ kwargs = dict(self.kwargs or {})
92
+ if not sdk_retries_for(PS.OPENAI):
93
+ kwargs["max_retries"] = 0
94
+ return kwargs
95
+
96
+ def _build_client(self, cls):
97
+ kw = dict(
98
+ api_key=self._openai_api_key,
99
+ **self._client_kwargs(),
100
+ )
101
+ try:
102
+ return cls(**kw)
103
+ except TypeError as e:
104
+ # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
105
+ if "max_retries" in str(e):
106
+ kw.pop("max_retries", None)
107
+ return cls(**kw)
108
+ raise
@@ -1,10 +1,16 @@
1
+ import asyncio
2
+
1
3
  from typing import Optional, Tuple, Union, Dict
2
4
  from contextlib import AsyncExitStack
3
5
  from pydantic import BaseModel
4
- import asyncio
5
6
 
7
+ from deepeval.models.retry_policy import (
8
+ create_retry_decorator,
9
+ sdk_retries_for,
10
+ )
6
11
  from deepeval.models import DeepEvalBaseLLM
7
12
  from deepeval.models.llms.utils import trim_and_load_json
13
+ from deepeval.constants import ProviderSlug as PS
8
14
 
9
15
  # check aiobotocore availability
10
16
  try:
@@ -15,6 +21,9 @@ try:
15
21
  except ImportError:
16
22
  aiobotocore_available = False
17
23
 
24
+ # define retry policy
25
+ retry_bedrock = create_retry_decorator(PS.BEDROCK)
26
+
18
27
 
19
28
  def _check_aiobotocore_available():
20
29
  if not aiobotocore_available:
@@ -53,11 +62,11 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
53
62
 
54
63
  # prepare aiobotocore session, config, and async exit stack
55
64
  self._session = get_session()
56
- self._config = Config(retries={"max_attempts": 5, "mode": "adaptive"})
57
65
  self._exit_stack = AsyncExitStack()
58
66
  self.kwargs = kwargs
59
67
  self.generation_kwargs = generation_kwargs or {}
60
68
  self._client = None
69
+ self._sdk_retry_mode: Optional[bool] = None
61
70
 
62
71
  ###############################################
63
72
  # Generate functions
@@ -68,6 +77,7 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
68
77
  ) -> Tuple[Union[str, Dict], float]:
69
78
  return asyncio.run(self.a_generate(prompt, schema))
70
79
 
80
+ @retry_bedrock
71
81
  async def a_generate(
72
82
  self, prompt: str, schema: Optional[BaseModel] = None
73
83
  ) -> Tuple[Union[str, Dict], float]:
@@ -94,16 +104,33 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
94
104
  ###############################################
95
105
 
96
106
  async def _ensure_client(self):
97
- if self._client is None:
107
+ use_sdk = sdk_retries_for(PS.BEDROCK)
108
+
109
+ # only rebuild if client is missing or the sdk retry mode changes
110
+ if self._client is None or self._sdk_retry_mode != use_sdk:
111
+ # Close any previous
112
+ if self._client is not None:
113
+ await self._exit_stack.aclose()
114
+ self._client = None
115
+
116
+ # create retry config for botocore
117
+ retries_config = {"max_attempts": (5 if use_sdk else 1)}
118
+ if use_sdk:
119
+ retries_config["mode"] = "adaptive"
120
+
121
+ config = Config(retries=retries_config)
122
+
98
123
  cm = self._session.create_client(
99
124
  "bedrock-runtime",
100
125
  region_name=self.region_name,
101
126
  aws_access_key_id=self.aws_access_key_id,
102
127
  aws_secret_access_key=self.aws_secret_access_key,
103
- config=self._config,
128
+ config=config,
104
129
  **self.kwargs,
105
130
  )
106
131
  self._client = await self._exit_stack.enter_async_context(cm)
132
+ self._sdk_retry_mode = use_sdk
133
+
107
134
  return self._client
108
135
 
109
136
  async def close(self):
@@ -1,12 +1,22 @@
1
+ import warnings
2
+
1
3
  from typing import Optional, Tuple, Union, Dict
2
4
  from anthropic import Anthropic, AsyncAnthropic
3
5
  from pydantic import BaseModel
4
- import os
5
- import warnings
6
6
 
7
7
  from deepeval.models import DeepEvalBaseLLM
8
8
  from deepeval.models.llms.utils import trim_and_load_json
9
+ from deepeval.models.retry_policy import (
10
+ create_retry_decorator,
11
+ sdk_retries_for,
12
+ )
9
13
  from deepeval.models.utils import parse_model_name
14
+ from deepeval.config.settings import get_settings
15
+ from deepeval.constants import ProviderSlug as PS
16
+
17
+
18
+ # consistent retry rules
19
+ retry_anthropic = create_retry_decorator(PS.ANTHROPIC)
10
20
 
11
21
  model_pricing = {
12
22
  "claude-opus-4-20250514": {"input": 15.00 / 1e6, "output": 75.00 / 1e6},
@@ -45,6 +55,7 @@ class AnthropicModel(DeepEvalBaseLLM):
45
55
  # Generate functions
46
56
  ###############################################
47
57
 
58
+ @retry_anthropic
48
59
  def generate(
49
60
  self, prompt: str, schema: Optional[BaseModel] = None
50
61
  ) -> Tuple[Union[str, Dict], float]:
@@ -70,6 +81,7 @@ class AnthropicModel(DeepEvalBaseLLM):
70
81
  json_output = trim_and_load_json(message.content[0].text)
71
82
  return schema.model_validate(json_output), cost
72
83
 
84
+ @retry_anthropic
73
85
  async def a_generate(
74
86
  self, prompt: str, schema: Optional[BaseModel] = None
75
87
  ) -> Tuple[str, float]:
@@ -128,17 +140,31 @@ class AnthropicModel(DeepEvalBaseLLM):
128
140
 
129
141
  def load_model(self, async_mode: bool = False):
130
142
  if not async_mode:
131
- return Anthropic(
132
- api_key=os.environ.get("ANTHROPIC_API_KEY")
133
- or self._anthropic_api_key,
134
- **self.kwargs,
135
- )
136
- else:
137
- return AsyncAnthropic(
138
- api_key=os.environ.get("ANTHROPIC_API_KEY")
139
- or self._anthropic_api_key,
140
- **self.kwargs,
141
- )
143
+ return self._build_client(Anthropic)
144
+ return self._build_client(AsyncAnthropic)
142
145
 
143
146
  def get_model_name(self):
144
147
  return f"{self.model_name}"
148
+
149
+ def _client_kwargs(self) -> Dict:
150
+ kwargs = dict(self.kwargs or {})
151
+ # If we are managing retries with Tenacity, force SDK retries off to avoid double retries.
152
+ # if the user opts into SDK retries via DEEPEVAL_SDK_RETRY_PROVIDERS, then honor their max_retries.
153
+ if not sdk_retries_for(PS.ANTHROPIC):
154
+ kwargs["max_retries"] = 0
155
+ return kwargs
156
+
157
+ def _build_client(self, cls):
158
+ settings = get_settings()
159
+ kw = dict(
160
+ api_key=settings.ANTHROPIC_API_KEY or self._anthropic_api_key,
161
+ **self._client_kwargs(),
162
+ )
163
+ try:
164
+ return cls(**kw)
165
+ except TypeError as e:
166
+ # in case older SDKs don’t accept max_retries, drop it and retry
167
+ if "max_retries" in str(e):
168
+ kw.pop("max_retries", None)
169
+ return cls(**kw)
170
+ raise
@@ -1,9 +1,7 @@
1
- from tenacity import retry, retry_if_exception_type, wait_exponential_jitter
2
1
  from openai.types.chat.chat_completion import ChatCompletion
3
2
  from openai import AzureOpenAI, AsyncAzureOpenAI
4
3
  from typing import Optional, Tuple, Union, Dict
5
4
  from pydantic import BaseModel
6
- import openai
7
5
 
8
6
  from deepeval.models import DeepEvalBaseLLM
9
7
  from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
@@ -11,17 +9,18 @@ from deepeval.models.llms.openai_model import (
11
9
  structured_outputs_models,
12
10
  json_mode_models,
13
11
  model_pricing,
14
- log_retry_error,
15
12
  )
13
+ from deepeval.models.retry_policy import (
14
+ create_retry_decorator,
15
+ sdk_retries_for,
16
+ )
17
+
16
18
  from deepeval.models.llms.utils import trim_and_load_json
17
19
  from deepeval.models.utils import parse_model_name
20
+ from deepeval.constants import ProviderSlug as PS
18
21
 
19
- retryable_exceptions = (
20
- openai.RateLimitError,
21
- openai.APIConnectionError,
22
- openai.APITimeoutError,
23
- openai.LengthFinishReasonError,
24
- )
22
+
23
+ retry_azure = create_retry_decorator(PS.AZURE)
25
24
 
26
25
 
27
26
  class AzureOpenAIModel(DeepEvalBaseLLM):
@@ -67,11 +66,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
67
66
  # Other generate functions
68
67
  ###############################################
69
68
 
70
- @retry(
71
- wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
72
- retry=retry_if_exception_type(openai.RateLimitError),
73
- after=log_retry_error,
74
- )
69
+ @retry_azure
75
70
  def generate(
76
71
  self, prompt: str, schema: Optional[BaseModel] = None
77
72
  ) -> Tuple[Union[str, Dict], float]:
@@ -130,11 +125,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
130
125
  else:
131
126
  return output, cost
132
127
 
133
- @retry(
134
- wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
135
- retry=retry_if_exception_type(openai.RateLimitError),
136
- after=log_retry_error,
137
- )
128
+ @retry_azure
138
129
  async def a_generate(
139
130
  self, prompt: str, schema: Optional[BaseModel] = None
140
131
  ) -> Tuple[Union[str, BaseModel], float]:
@@ -199,11 +190,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
199
190
  # Other generate functions
200
191
  ###############################################
201
192
 
202
- @retry(
203
- wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
204
- retry=retry_if_exception_type(retryable_exceptions),
205
- after=log_retry_error,
206
- )
193
+ @retry_azure
207
194
  def generate_raw_response(
208
195
  self,
209
196
  prompt: str,
@@ -226,11 +213,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
226
213
 
227
214
  return completion, cost
228
215
 
229
- @retry(
230
- wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
231
- retry=retry_if_exception_type(retryable_exceptions),
232
- after=log_retry_error,
233
- )
216
+ @retry_azure
234
217
  async def a_generate_raw_response(
235
218
  self,
236
219
  prompt: str,
@@ -272,17 +255,33 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
272
255
 
273
256
  def load_model(self, async_mode: bool = False):
274
257
  if not async_mode:
275
- return AzureOpenAI(
276
- api_key=self.azure_openai_api_key,
277
- api_version=self.openai_api_version,
278
- azure_endpoint=self.azure_endpoint,
279
- azure_deployment=self.deployment_name,
280
- **self.kwargs, # Keep this for client initialization
281
- )
282
- return AsyncAzureOpenAI(
258
+ return self._build_client(AzureOpenAI)
259
+ return self._build_client(AsyncAzureOpenAI)
260
+
261
+ def _client_kwargs(self) -> Dict:
262
+ """
263
+ If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
264
+ If the user opts into SDK retries for 'azure' via DEEPEVAL_SDK_RETRY_PROVIDERS,
265
+ leave their retry settings as is.
266
+ """
267
+ kwargs = dict(self.kwargs or {})
268
+ if not sdk_retries_for(PS.AZURE):
269
+ kwargs["max_retries"] = 0
270
+ return kwargs
271
+
272
+ def _build_client(self, cls):
273
+ kw = dict(
283
274
  api_key=self.azure_openai_api_key,
284
275
  api_version=self.openai_api_version,
285
276
  azure_endpoint=self.azure_endpoint,
286
277
  azure_deployment=self.deployment_name,
287
- **self.kwargs, # ← Keep this for client initialization
278
+ **self._client_kwargs(),
288
279
  )
280
+ try:
281
+ return cls(**kw)
282
+ except TypeError as e:
283
+ # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
284
+ if "max_retries" in str(e):
285
+ kw.pop("max_retries", None)
286
+ return cls(**kw)
287
+ raise