deepeval 3.5.1__py3-none-any.whl → 3.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +94 -2
- deepeval/config/utils.py +54 -1
- deepeval/constants.py +27 -0
- deepeval/integrations/langchain/__init__.py +2 -3
- deepeval/integrations/langchain/callback.py +126 -301
- deepeval/integrations/langchain/patch.py +24 -13
- deepeval/integrations/langchain/utils.py +203 -1
- deepeval/integrations/pydantic_ai/patcher.py +220 -185
- deepeval/integrations/pydantic_ai/utils.py +86 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +1 -0
- deepeval/metrics/pii_leakage/pii_leakage.py +1 -1
- deepeval/models/embedding_models/azure_embedding_model.py +40 -9
- deepeval/models/embedding_models/local_embedding_model.py +54 -11
- deepeval/models/embedding_models/ollama_embedding_model.py +25 -7
- deepeval/models/embedding_models/openai_embedding_model.py +47 -5
- deepeval/models/llms/amazon_bedrock_model.py +31 -4
- deepeval/models/llms/anthropic_model.py +39 -13
- deepeval/models/llms/azure_model.py +37 -38
- deepeval/models/llms/deepseek_model.py +36 -7
- deepeval/models/llms/gemini_model.py +10 -0
- deepeval/models/llms/grok_model.py +50 -3
- deepeval/models/llms/kimi_model.py +37 -7
- deepeval/models/llms/local_model.py +38 -12
- deepeval/models/llms/ollama_model.py +15 -3
- deepeval/models/llms/openai_model.py +37 -44
- deepeval/models/mlllms/gemini_model.py +21 -3
- deepeval/models/mlllms/ollama_model.py +38 -13
- deepeval/models/mlllms/openai_model.py +18 -42
- deepeval/models/retry_policy.py +548 -64
- deepeval/prompt/api.py +13 -9
- deepeval/prompt/prompt.py +19 -9
- deepeval/tracing/tracing.py +87 -0
- deepeval/utils.py +12 -0
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/METADATA +1 -1
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/RECORD +39 -38
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/LICENSE.md +0 -0
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/WHEEL +0 -0
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/entry_points.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List
|
|
1
|
+
from typing import Dict, List
|
|
2
2
|
from openai import AzureOpenAI, AsyncAzureOpenAI
|
|
3
3
|
from deepeval.key_handler import (
|
|
4
4
|
EmbeddingKeyValues,
|
|
@@ -6,10 +6,18 @@ from deepeval.key_handler import (
|
|
|
6
6
|
KEY_FILE_HANDLER,
|
|
7
7
|
)
|
|
8
8
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
9
|
+
from deepeval.models.retry_policy import (
|
|
10
|
+
create_retry_decorator,
|
|
11
|
+
sdk_retries_for,
|
|
12
|
+
)
|
|
13
|
+
from deepeval.constants import ProviderSlug as PS
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
retry_azure = create_retry_decorator(PS.AZURE)
|
|
9
17
|
|
|
10
18
|
|
|
11
19
|
class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
12
|
-
def __init__(self):
|
|
20
|
+
def __init__(self, **kwargs):
|
|
13
21
|
self.azure_openai_api_key = KEY_FILE_HANDLER.fetch_data(
|
|
14
22
|
ModelKeyValues.AZURE_OPENAI_API_KEY
|
|
15
23
|
)
|
|
@@ -23,7 +31,9 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
23
31
|
ModelKeyValues.AZURE_OPENAI_ENDPOINT
|
|
24
32
|
)
|
|
25
33
|
self.model_name = self.azure_embedding_deployment
|
|
34
|
+
self.kwargs = kwargs
|
|
26
35
|
|
|
36
|
+
@retry_azure
|
|
27
37
|
def embed_text(self, text: str) -> List[float]:
|
|
28
38
|
client = self.load_model(async_mode=False)
|
|
29
39
|
response = client.embeddings.create(
|
|
@@ -32,6 +42,7 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
32
42
|
)
|
|
33
43
|
return response.data[0].embedding
|
|
34
44
|
|
|
45
|
+
@retry_azure
|
|
35
46
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
36
47
|
client = self.load_model(async_mode=False)
|
|
37
48
|
response = client.embeddings.create(
|
|
@@ -40,6 +51,7 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
40
51
|
)
|
|
41
52
|
return [item.embedding for item in response.data]
|
|
42
53
|
|
|
54
|
+
@retry_azure
|
|
43
55
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
44
56
|
client = self.load_model(async_mode=True)
|
|
45
57
|
response = await client.embeddings.create(
|
|
@@ -48,6 +60,7 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
48
60
|
)
|
|
49
61
|
return response.data[0].embedding
|
|
50
62
|
|
|
63
|
+
@retry_azure
|
|
51
64
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
52
65
|
client = self.load_model(async_mode=True)
|
|
53
66
|
response = await client.embeddings.create(
|
|
@@ -61,15 +74,33 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
61
74
|
|
|
62
75
|
def load_model(self, async_mode: bool = False):
|
|
63
76
|
if not async_mode:
|
|
64
|
-
return AzureOpenAI
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
77
|
+
return self._build_client(AzureOpenAI)
|
|
78
|
+
return self._build_client(AsyncAzureOpenAI)
|
|
79
|
+
|
|
80
|
+
def _client_kwargs(self) -> Dict:
|
|
81
|
+
"""
|
|
82
|
+
If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
|
|
83
|
+
If the user opts into SDK retries for 'azure' via DEEPEVAL_SDK_RETRY_PROVIDERS,
|
|
84
|
+
leave their retry settings as is.
|
|
85
|
+
"""
|
|
86
|
+
kwargs = dict(self.kwargs or {})
|
|
87
|
+
if not sdk_retries_for(PS.AZURE):
|
|
88
|
+
kwargs["max_retries"] = 0
|
|
89
|
+
return kwargs
|
|
90
|
+
|
|
91
|
+
def _build_client(self, cls):
|
|
92
|
+
kw = dict(
|
|
71
93
|
api_key=self.azure_openai_api_key,
|
|
72
94
|
api_version=self.openai_api_version,
|
|
73
95
|
azure_endpoint=self.azure_endpoint,
|
|
74
96
|
azure_deployment=self.azure_embedding_deployment,
|
|
97
|
+
**self._client_kwargs(),
|
|
75
98
|
)
|
|
99
|
+
try:
|
|
100
|
+
return cls(**kw)
|
|
101
|
+
except TypeError as e:
|
|
102
|
+
# older OpenAI SDKs may not accept max_retries, in that case remove and retry once
|
|
103
|
+
if "max_retries" in str(e):
|
|
104
|
+
kw.pop("max_retries", None)
|
|
105
|
+
return cls(**kw)
|
|
106
|
+
raise
|
|
@@ -1,12 +1,21 @@
|
|
|
1
|
-
from openai import OpenAI
|
|
2
|
-
from typing import List
|
|
1
|
+
from openai import OpenAI, AsyncOpenAI
|
|
2
|
+
from typing import Dict, List
|
|
3
3
|
|
|
4
4
|
from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
|
|
5
5
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
6
|
+
from deepeval.models.retry_policy import (
|
|
7
|
+
create_retry_decorator,
|
|
8
|
+
sdk_retries_for,
|
|
9
|
+
)
|
|
10
|
+
from deepeval.constants import ProviderSlug as PS
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# consistent retry rules
|
|
14
|
+
retry_local = create_retry_decorator(PS.LOCAL)
|
|
6
15
|
|
|
7
16
|
|
|
8
17
|
class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
9
|
-
def __init__(self,
|
|
18
|
+
def __init__(self, **kwargs):
|
|
10
19
|
self.base_url = KEY_FILE_HANDLER.fetch_data(
|
|
11
20
|
EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
|
|
12
21
|
)
|
|
@@ -16,13 +25,10 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
16
25
|
self.api_key = KEY_FILE_HANDLER.fetch_data(
|
|
17
26
|
EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
|
|
18
27
|
)
|
|
19
|
-
self.args = args
|
|
20
28
|
self.kwargs = kwargs
|
|
21
29
|
super().__init__(model_name)
|
|
22
30
|
|
|
23
|
-
|
|
24
|
-
return OpenAI(base_url=self.base_url, api_key=self.api_key)
|
|
25
|
-
|
|
31
|
+
@retry_local
|
|
26
32
|
def embed_text(self, text: str) -> List[float]:
|
|
27
33
|
embedding_model = self.load_model()
|
|
28
34
|
response = embedding_model.embeddings.create(
|
|
@@ -31,6 +37,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
31
37
|
)
|
|
32
38
|
return response.data[0].embedding
|
|
33
39
|
|
|
40
|
+
@retry_local
|
|
34
41
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
35
42
|
embedding_model = self.load_model()
|
|
36
43
|
response = embedding_model.embeddings.create(
|
|
@@ -39,21 +46,57 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
39
46
|
)
|
|
40
47
|
return [data.embedding for data in response.data]
|
|
41
48
|
|
|
49
|
+
@retry_local
|
|
42
50
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
43
|
-
embedding_model = self.load_model()
|
|
44
|
-
response = embedding_model.embeddings.create(
|
|
51
|
+
embedding_model = self.load_model(async_mode=True)
|
|
52
|
+
response = await embedding_model.embeddings.create(
|
|
45
53
|
model=self.model_name,
|
|
46
54
|
input=[text],
|
|
47
55
|
)
|
|
48
56
|
return response.data[0].embedding
|
|
49
57
|
|
|
58
|
+
@retry_local
|
|
50
59
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
51
|
-
embedding_model = self.load_model()
|
|
52
|
-
response = embedding_model.embeddings.create(
|
|
60
|
+
embedding_model = self.load_model(async_mode=True)
|
|
61
|
+
response = await embedding_model.embeddings.create(
|
|
53
62
|
model=self.model_name,
|
|
54
63
|
input=texts,
|
|
55
64
|
)
|
|
56
65
|
return [data.embedding for data in response.data]
|
|
57
66
|
|
|
67
|
+
###############################################
|
|
68
|
+
# Model
|
|
69
|
+
###############################################
|
|
70
|
+
|
|
58
71
|
def get_model_name(self):
|
|
59
72
|
return self.model_name
|
|
73
|
+
|
|
74
|
+
def load_model(self, async_mode: bool = False):
|
|
75
|
+
if not async_mode:
|
|
76
|
+
return self._build_client(OpenAI)
|
|
77
|
+
return self._build_client(AsyncOpenAI)
|
|
78
|
+
|
|
79
|
+
def _client_kwargs(self) -> Dict:
|
|
80
|
+
"""
|
|
81
|
+
If Tenacity manages retries, turn off OpenAI SDK retries to avoid double retrying.
|
|
82
|
+
If users opt into SDK retries via DEEPEVAL_SDK_RETRY_PROVIDERS=local, leave them enabled.
|
|
83
|
+
"""
|
|
84
|
+
kwargs = dict(self.kwargs or {})
|
|
85
|
+
if not sdk_retries_for(PS.LOCAL):
|
|
86
|
+
kwargs["max_retries"] = 0
|
|
87
|
+
return kwargs
|
|
88
|
+
|
|
89
|
+
def _build_client(self, cls):
|
|
90
|
+
kw = dict(
|
|
91
|
+
api_key=self.api_key,
|
|
92
|
+
base_url=self.base_url,
|
|
93
|
+
**self._client_kwargs(),
|
|
94
|
+
)
|
|
95
|
+
try:
|
|
96
|
+
return cls(**kw)
|
|
97
|
+
except TypeError as e:
|
|
98
|
+
# Older OpenAI SDKs may not accept max_retries; drop and retry once.
|
|
99
|
+
if "max_retries" in str(e):
|
|
100
|
+
kw.pop("max_retries", None)
|
|
101
|
+
return cls(**kw)
|
|
102
|
+
raise
|
|
@@ -3,6 +3,13 @@ from typing import List
|
|
|
3
3
|
|
|
4
4
|
from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
|
|
5
5
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
6
|
+
from deepeval.models.retry_policy import (
|
|
7
|
+
create_retry_decorator,
|
|
8
|
+
)
|
|
9
|
+
from deepeval.constants import ProviderSlug as PS
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
retry_ollama = create_retry_decorator(PS.OLLAMA)
|
|
6
13
|
|
|
7
14
|
|
|
8
15
|
class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
@@ -13,6 +20,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
13
20
|
model_name = KEY_FILE_HANDLER.fetch_data(
|
|
14
21
|
EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
|
|
15
22
|
)
|
|
23
|
+
# TODO: This is not being used. Clean it up in consistency PR
|
|
16
24
|
self.api_key = KEY_FILE_HANDLER.fetch_data(
|
|
17
25
|
EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
|
|
18
26
|
)
|
|
@@ -20,12 +28,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
20
28
|
self.kwargs = kwargs
|
|
21
29
|
super().__init__(model_name)
|
|
22
30
|
|
|
23
|
-
|
|
24
|
-
if not async_mode:
|
|
25
|
-
return Client(host=self.base_url)
|
|
26
|
-
|
|
27
|
-
return AsyncClient(host=self.base_url)
|
|
28
|
-
|
|
31
|
+
@retry_ollama
|
|
29
32
|
def embed_text(self, text: str) -> List[float]:
|
|
30
33
|
embedding_model = self.load_model()
|
|
31
34
|
response = embedding_model.embed(
|
|
@@ -34,6 +37,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
34
37
|
)
|
|
35
38
|
return response["embeddings"][0]
|
|
36
39
|
|
|
40
|
+
@retry_ollama
|
|
37
41
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
38
42
|
embedding_model = self.load_model()
|
|
39
43
|
response = embedding_model.embed(
|
|
@@ -42,6 +46,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
42
46
|
)
|
|
43
47
|
return response["embeddings"]
|
|
44
48
|
|
|
49
|
+
@retry_ollama
|
|
45
50
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
46
51
|
embedding_model = self.load_model(async_mode=True)
|
|
47
52
|
response = await embedding_model.embed(
|
|
@@ -50,6 +55,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
50
55
|
)
|
|
51
56
|
return response["embeddings"][0]
|
|
52
57
|
|
|
58
|
+
@retry_ollama
|
|
53
59
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
54
60
|
embedding_model = self.load_model(async_mode=True)
|
|
55
61
|
response = await embedding_model.embed(
|
|
@@ -58,5 +64,17 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
58
64
|
)
|
|
59
65
|
return response["embeddings"]
|
|
60
66
|
|
|
67
|
+
###############################################
|
|
68
|
+
# Model
|
|
69
|
+
###############################################
|
|
70
|
+
|
|
71
|
+
def load_model(self, async_mode: bool = False):
|
|
72
|
+
if not async_mode:
|
|
73
|
+
return self._build_client(Client)
|
|
74
|
+
return self._build_client(AsyncClient)
|
|
75
|
+
|
|
76
|
+
def _build_client(self, cls):
|
|
77
|
+
return cls(host=self.base_url, **self.kwargs)
|
|
78
|
+
|
|
61
79
|
def get_model_name(self):
|
|
62
|
-
return self.model_name
|
|
80
|
+
return f"{self.model_name} (Ollama)"
|
|
@@ -1,6 +1,14 @@
|
|
|
1
|
-
from typing import Optional, List
|
|
1
|
+
from typing import Dict, Optional, List
|
|
2
2
|
from openai import OpenAI, AsyncOpenAI
|
|
3
3
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
4
|
+
from deepeval.models.retry_policy import (
|
|
5
|
+
create_retry_decorator,
|
|
6
|
+
sdk_retries_for,
|
|
7
|
+
)
|
|
8
|
+
from deepeval.constants import ProviderSlug as PS
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
retry_openai = create_retry_decorator(PS.OPENAI)
|
|
4
12
|
|
|
5
13
|
valid_openai_embedding_models = [
|
|
6
14
|
"text-embedding-3-small",
|
|
@@ -15,6 +23,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
15
23
|
self,
|
|
16
24
|
model: Optional[str] = None,
|
|
17
25
|
_openai_api_key: Optional[str] = None,
|
|
26
|
+
**kwargs,
|
|
18
27
|
):
|
|
19
28
|
model_name = model if model else default_openai_embedding_model
|
|
20
29
|
if model_name not in valid_openai_embedding_models:
|
|
@@ -23,7 +32,9 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
23
32
|
)
|
|
24
33
|
self._openai_api_key = _openai_api_key
|
|
25
34
|
self.model_name = model_name
|
|
35
|
+
self.kwargs = kwargs
|
|
26
36
|
|
|
37
|
+
@retry_openai
|
|
27
38
|
def embed_text(self, text: str) -> List[float]:
|
|
28
39
|
client = self.load_model(async_mode=False)
|
|
29
40
|
response = client.embeddings.create(
|
|
@@ -32,6 +43,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
32
43
|
)
|
|
33
44
|
return response.data[0].embedding
|
|
34
45
|
|
|
46
|
+
@retry_openai
|
|
35
47
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
36
48
|
client = self.load_model(async_mode=False)
|
|
37
49
|
response = client.embeddings.create(
|
|
@@ -40,6 +52,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
40
52
|
)
|
|
41
53
|
return [item.embedding for item in response.data]
|
|
42
54
|
|
|
55
|
+
@retry_openai
|
|
43
56
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
44
57
|
client = self.load_model(async_mode=True)
|
|
45
58
|
response = await client.embeddings.create(
|
|
@@ -48,6 +61,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
48
61
|
)
|
|
49
62
|
return response.data[0].embedding
|
|
50
63
|
|
|
64
|
+
@retry_openai
|
|
51
65
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
52
66
|
client = self.load_model(async_mode=True)
|
|
53
67
|
response = await client.embeddings.create(
|
|
@@ -56,11 +70,39 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
56
70
|
)
|
|
57
71
|
return [item.embedding for item in response.data]
|
|
58
72
|
|
|
59
|
-
|
|
73
|
+
###############################################
|
|
74
|
+
# Model
|
|
75
|
+
###############################################
|
|
76
|
+
|
|
77
|
+
def get_model_name(self):
|
|
60
78
|
return self.model_name
|
|
61
79
|
|
|
62
|
-
def load_model(self, async_mode: bool):
|
|
80
|
+
def load_model(self, async_mode: bool = False):
|
|
63
81
|
if not async_mode:
|
|
64
|
-
return
|
|
82
|
+
return self._build_client(OpenAI)
|
|
83
|
+
return self._build_client(AsyncOpenAI)
|
|
65
84
|
|
|
66
|
-
|
|
85
|
+
def _client_kwargs(self) -> Dict:
|
|
86
|
+
"""
|
|
87
|
+
If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
|
|
88
|
+
If the user opts into SDK retries for 'openai' via DEEPEVAL_SDK_RETRY_PROVIDERS,
|
|
89
|
+
leave their retry settings as is.
|
|
90
|
+
"""
|
|
91
|
+
kwargs = dict(self.kwargs or {})
|
|
92
|
+
if not sdk_retries_for(PS.OPENAI):
|
|
93
|
+
kwargs["max_retries"] = 0
|
|
94
|
+
return kwargs
|
|
95
|
+
|
|
96
|
+
def _build_client(self, cls):
|
|
97
|
+
kw = dict(
|
|
98
|
+
api_key=self._openai_api_key,
|
|
99
|
+
**self._client_kwargs(),
|
|
100
|
+
)
|
|
101
|
+
try:
|
|
102
|
+
return cls(**kw)
|
|
103
|
+
except TypeError as e:
|
|
104
|
+
# older OpenAI SDKs may not accept max_retries, in that case remove and retry once
|
|
105
|
+
if "max_retries" in str(e):
|
|
106
|
+
kw.pop("max_retries", None)
|
|
107
|
+
return cls(**kw)
|
|
108
|
+
raise
|
|
@@ -1,10 +1,16 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
|
|
1
3
|
from typing import Optional, Tuple, Union, Dict
|
|
2
4
|
from contextlib import AsyncExitStack
|
|
3
5
|
from pydantic import BaseModel
|
|
4
|
-
import asyncio
|
|
5
6
|
|
|
7
|
+
from deepeval.models.retry_policy import (
|
|
8
|
+
create_retry_decorator,
|
|
9
|
+
sdk_retries_for,
|
|
10
|
+
)
|
|
6
11
|
from deepeval.models import DeepEvalBaseLLM
|
|
7
12
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
13
|
+
from deepeval.constants import ProviderSlug as PS
|
|
8
14
|
|
|
9
15
|
# check aiobotocore availability
|
|
10
16
|
try:
|
|
@@ -15,6 +21,9 @@ try:
|
|
|
15
21
|
except ImportError:
|
|
16
22
|
aiobotocore_available = False
|
|
17
23
|
|
|
24
|
+
# define retry policy
|
|
25
|
+
retry_bedrock = create_retry_decorator(PS.BEDROCK)
|
|
26
|
+
|
|
18
27
|
|
|
19
28
|
def _check_aiobotocore_available():
|
|
20
29
|
if not aiobotocore_available:
|
|
@@ -53,11 +62,11 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
|
53
62
|
|
|
54
63
|
# prepare aiobotocore session, config, and async exit stack
|
|
55
64
|
self._session = get_session()
|
|
56
|
-
self._config = Config(retries={"max_attempts": 5, "mode": "adaptive"})
|
|
57
65
|
self._exit_stack = AsyncExitStack()
|
|
58
66
|
self.kwargs = kwargs
|
|
59
67
|
self.generation_kwargs = generation_kwargs or {}
|
|
60
68
|
self._client = None
|
|
69
|
+
self._sdk_retry_mode: Optional[bool] = None
|
|
61
70
|
|
|
62
71
|
###############################################
|
|
63
72
|
# Generate functions
|
|
@@ -68,6 +77,7 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
|
68
77
|
) -> Tuple[Union[str, Dict], float]:
|
|
69
78
|
return asyncio.run(self.a_generate(prompt, schema))
|
|
70
79
|
|
|
80
|
+
@retry_bedrock
|
|
71
81
|
async def a_generate(
|
|
72
82
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
73
83
|
) -> Tuple[Union[str, Dict], float]:
|
|
@@ -94,16 +104,33 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
|
94
104
|
###############################################
|
|
95
105
|
|
|
96
106
|
async def _ensure_client(self):
|
|
97
|
-
|
|
107
|
+
use_sdk = sdk_retries_for(PS.BEDROCK)
|
|
108
|
+
|
|
109
|
+
# only rebuild if client is missing or the sdk retry mode changes
|
|
110
|
+
if self._client is None or self._sdk_retry_mode != use_sdk:
|
|
111
|
+
# Close any previous
|
|
112
|
+
if self._client is not None:
|
|
113
|
+
await self._exit_stack.aclose()
|
|
114
|
+
self._client = None
|
|
115
|
+
|
|
116
|
+
# create retry config for botocore
|
|
117
|
+
retries_config = {"max_attempts": (5 if use_sdk else 1)}
|
|
118
|
+
if use_sdk:
|
|
119
|
+
retries_config["mode"] = "adaptive"
|
|
120
|
+
|
|
121
|
+
config = Config(retries=retries_config)
|
|
122
|
+
|
|
98
123
|
cm = self._session.create_client(
|
|
99
124
|
"bedrock-runtime",
|
|
100
125
|
region_name=self.region_name,
|
|
101
126
|
aws_access_key_id=self.aws_access_key_id,
|
|
102
127
|
aws_secret_access_key=self.aws_secret_access_key,
|
|
103
|
-
config=
|
|
128
|
+
config=config,
|
|
104
129
|
**self.kwargs,
|
|
105
130
|
)
|
|
106
131
|
self._client = await self._exit_stack.enter_async_context(cm)
|
|
132
|
+
self._sdk_retry_mode = use_sdk
|
|
133
|
+
|
|
107
134
|
return self._client
|
|
108
135
|
|
|
109
136
|
async def close(self):
|
|
@@ -1,12 +1,22 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
1
3
|
from typing import Optional, Tuple, Union, Dict
|
|
2
4
|
from anthropic import Anthropic, AsyncAnthropic
|
|
3
5
|
from pydantic import BaseModel
|
|
4
|
-
import os
|
|
5
|
-
import warnings
|
|
6
6
|
|
|
7
7
|
from deepeval.models import DeepEvalBaseLLM
|
|
8
8
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
9
|
+
from deepeval.models.retry_policy import (
|
|
10
|
+
create_retry_decorator,
|
|
11
|
+
sdk_retries_for,
|
|
12
|
+
)
|
|
9
13
|
from deepeval.models.utils import parse_model_name
|
|
14
|
+
from deepeval.config.settings import get_settings
|
|
15
|
+
from deepeval.constants import ProviderSlug as PS
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# consistent retry rules
|
|
19
|
+
retry_anthropic = create_retry_decorator(PS.ANTHROPIC)
|
|
10
20
|
|
|
11
21
|
model_pricing = {
|
|
12
22
|
"claude-opus-4-20250514": {"input": 15.00 / 1e6, "output": 75.00 / 1e6},
|
|
@@ -45,6 +55,7 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
45
55
|
# Generate functions
|
|
46
56
|
###############################################
|
|
47
57
|
|
|
58
|
+
@retry_anthropic
|
|
48
59
|
def generate(
|
|
49
60
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
50
61
|
) -> Tuple[Union[str, Dict], float]:
|
|
@@ -70,6 +81,7 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
70
81
|
json_output = trim_and_load_json(message.content[0].text)
|
|
71
82
|
return schema.model_validate(json_output), cost
|
|
72
83
|
|
|
84
|
+
@retry_anthropic
|
|
73
85
|
async def a_generate(
|
|
74
86
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
75
87
|
) -> Tuple[str, float]:
|
|
@@ -128,17 +140,31 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
128
140
|
|
|
129
141
|
def load_model(self, async_mode: bool = False):
|
|
130
142
|
if not async_mode:
|
|
131
|
-
return Anthropic
|
|
132
|
-
|
|
133
|
-
or self._anthropic_api_key,
|
|
134
|
-
**self.kwargs,
|
|
135
|
-
)
|
|
136
|
-
else:
|
|
137
|
-
return AsyncAnthropic(
|
|
138
|
-
api_key=os.environ.get("ANTHROPIC_API_KEY")
|
|
139
|
-
or self._anthropic_api_key,
|
|
140
|
-
**self.kwargs,
|
|
141
|
-
)
|
|
143
|
+
return self._build_client(Anthropic)
|
|
144
|
+
return self._build_client(AsyncAnthropic)
|
|
142
145
|
|
|
143
146
|
def get_model_name(self):
|
|
144
147
|
return f"{self.model_name}"
|
|
148
|
+
|
|
149
|
+
def _client_kwargs(self) -> Dict:
|
|
150
|
+
kwargs = dict(self.kwargs or {})
|
|
151
|
+
# If we are managing retries with Tenacity, force SDK retries off to avoid double retries.
|
|
152
|
+
# if the user opts into SDK retries via DEEPEVAL_SDK_RETRY_PROVIDERS, then honor their max_retries.
|
|
153
|
+
if not sdk_retries_for(PS.ANTHROPIC):
|
|
154
|
+
kwargs["max_retries"] = 0
|
|
155
|
+
return kwargs
|
|
156
|
+
|
|
157
|
+
def _build_client(self, cls):
|
|
158
|
+
settings = get_settings()
|
|
159
|
+
kw = dict(
|
|
160
|
+
api_key=settings.ANTHROPIC_API_KEY or self._anthropic_api_key,
|
|
161
|
+
**self._client_kwargs(),
|
|
162
|
+
)
|
|
163
|
+
try:
|
|
164
|
+
return cls(**kw)
|
|
165
|
+
except TypeError as e:
|
|
166
|
+
# in case older SDKs don’t accept max_retries, drop it and retry
|
|
167
|
+
if "max_retries" in str(e):
|
|
168
|
+
kw.pop("max_retries", None)
|
|
169
|
+
return cls(**kw)
|
|
170
|
+
raise
|
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
from tenacity import retry, retry_if_exception_type, wait_exponential_jitter
|
|
2
1
|
from openai.types.chat.chat_completion import ChatCompletion
|
|
3
2
|
from openai import AzureOpenAI, AsyncAzureOpenAI
|
|
4
3
|
from typing import Optional, Tuple, Union, Dict
|
|
5
4
|
from pydantic import BaseModel
|
|
6
|
-
import openai
|
|
7
5
|
|
|
8
6
|
from deepeval.models import DeepEvalBaseLLM
|
|
9
7
|
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
@@ -11,17 +9,18 @@ from deepeval.models.llms.openai_model import (
|
|
|
11
9
|
structured_outputs_models,
|
|
12
10
|
json_mode_models,
|
|
13
11
|
model_pricing,
|
|
14
|
-
log_retry_error,
|
|
15
12
|
)
|
|
13
|
+
from deepeval.models.retry_policy import (
|
|
14
|
+
create_retry_decorator,
|
|
15
|
+
sdk_retries_for,
|
|
16
|
+
)
|
|
17
|
+
|
|
16
18
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
17
19
|
from deepeval.models.utils import parse_model_name
|
|
20
|
+
from deepeval.constants import ProviderSlug as PS
|
|
18
21
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
openai.APIConnectionError,
|
|
22
|
-
openai.APITimeoutError,
|
|
23
|
-
openai.LengthFinishReasonError,
|
|
24
|
-
)
|
|
22
|
+
|
|
23
|
+
retry_azure = create_retry_decorator(PS.AZURE)
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
@@ -67,11 +66,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
67
66
|
# Other generate functions
|
|
68
67
|
###############################################
|
|
69
68
|
|
|
70
|
-
@
|
|
71
|
-
wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
|
|
72
|
-
retry=retry_if_exception_type(openai.RateLimitError),
|
|
73
|
-
after=log_retry_error,
|
|
74
|
-
)
|
|
69
|
+
@retry_azure
|
|
75
70
|
def generate(
|
|
76
71
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
77
72
|
) -> Tuple[Union[str, Dict], float]:
|
|
@@ -130,11 +125,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
130
125
|
else:
|
|
131
126
|
return output, cost
|
|
132
127
|
|
|
133
|
-
@
|
|
134
|
-
wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
|
|
135
|
-
retry=retry_if_exception_type(openai.RateLimitError),
|
|
136
|
-
after=log_retry_error,
|
|
137
|
-
)
|
|
128
|
+
@retry_azure
|
|
138
129
|
async def a_generate(
|
|
139
130
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
140
131
|
) -> Tuple[Union[str, BaseModel], float]:
|
|
@@ -199,11 +190,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
199
190
|
# Other generate functions
|
|
200
191
|
###############################################
|
|
201
192
|
|
|
202
|
-
@
|
|
203
|
-
wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
|
|
204
|
-
retry=retry_if_exception_type(retryable_exceptions),
|
|
205
|
-
after=log_retry_error,
|
|
206
|
-
)
|
|
193
|
+
@retry_azure
|
|
207
194
|
def generate_raw_response(
|
|
208
195
|
self,
|
|
209
196
|
prompt: str,
|
|
@@ -226,11 +213,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
226
213
|
|
|
227
214
|
return completion, cost
|
|
228
215
|
|
|
229
|
-
@
|
|
230
|
-
wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
|
|
231
|
-
retry=retry_if_exception_type(retryable_exceptions),
|
|
232
|
-
after=log_retry_error,
|
|
233
|
-
)
|
|
216
|
+
@retry_azure
|
|
234
217
|
async def a_generate_raw_response(
|
|
235
218
|
self,
|
|
236
219
|
prompt: str,
|
|
@@ -272,17 +255,33 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
272
255
|
|
|
273
256
|
def load_model(self, async_mode: bool = False):
|
|
274
257
|
if not async_mode:
|
|
275
|
-
return AzureOpenAI
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
258
|
+
return self._build_client(AzureOpenAI)
|
|
259
|
+
return self._build_client(AsyncAzureOpenAI)
|
|
260
|
+
|
|
261
|
+
def _client_kwargs(self) -> Dict:
|
|
262
|
+
"""
|
|
263
|
+
If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
|
|
264
|
+
If the user opts into SDK retries for 'azure' via DEEPEVAL_SDK_RETRY_PROVIDERS,
|
|
265
|
+
leave their retry settings as is.
|
|
266
|
+
"""
|
|
267
|
+
kwargs = dict(self.kwargs or {})
|
|
268
|
+
if not sdk_retries_for(PS.AZURE):
|
|
269
|
+
kwargs["max_retries"] = 0
|
|
270
|
+
return kwargs
|
|
271
|
+
|
|
272
|
+
def _build_client(self, cls):
|
|
273
|
+
kw = dict(
|
|
283
274
|
api_key=self.azure_openai_api_key,
|
|
284
275
|
api_version=self.openai_api_version,
|
|
285
276
|
azure_endpoint=self.azure_endpoint,
|
|
286
277
|
azure_deployment=self.deployment_name,
|
|
287
|
-
**self.
|
|
278
|
+
**self._client_kwargs(),
|
|
288
279
|
)
|
|
280
|
+
try:
|
|
281
|
+
return cls(**kw)
|
|
282
|
+
except TypeError as e:
|
|
283
|
+
# older OpenAI SDKs may not accept max_retries, in that case remove and retry once
|
|
284
|
+
if "max_retries" in str(e):
|
|
285
|
+
kw.pop("max_retries", None)
|
|
286
|
+
return cls(**kw)
|
|
287
|
+
raise
|