deepeval 3.5.1__py3-none-any.whl → 3.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +94 -2
- deepeval/config/utils.py +54 -1
- deepeval/constants.py +27 -0
- deepeval/integrations/langchain/__init__.py +2 -3
- deepeval/integrations/langchain/callback.py +126 -301
- deepeval/integrations/langchain/patch.py +24 -13
- deepeval/integrations/langchain/utils.py +203 -1
- deepeval/integrations/pydantic_ai/patcher.py +220 -185
- deepeval/integrations/pydantic_ai/utils.py +86 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +1 -0
- deepeval/metrics/pii_leakage/pii_leakage.py +1 -1
- deepeval/models/embedding_models/azure_embedding_model.py +40 -9
- deepeval/models/embedding_models/local_embedding_model.py +54 -11
- deepeval/models/embedding_models/ollama_embedding_model.py +25 -7
- deepeval/models/embedding_models/openai_embedding_model.py +47 -5
- deepeval/models/llms/amazon_bedrock_model.py +31 -4
- deepeval/models/llms/anthropic_model.py +39 -13
- deepeval/models/llms/azure_model.py +37 -38
- deepeval/models/llms/deepseek_model.py +36 -7
- deepeval/models/llms/gemini_model.py +10 -0
- deepeval/models/llms/grok_model.py +50 -3
- deepeval/models/llms/kimi_model.py +37 -7
- deepeval/models/llms/local_model.py +38 -12
- deepeval/models/llms/ollama_model.py +15 -3
- deepeval/models/llms/openai_model.py +37 -44
- deepeval/models/mlllms/gemini_model.py +21 -3
- deepeval/models/mlllms/ollama_model.py +38 -13
- deepeval/models/mlllms/openai_model.py +18 -42
- deepeval/models/retry_policy.py +548 -64
- deepeval/prompt/api.py +13 -9
- deepeval/prompt/prompt.py +19 -9
- deepeval/tracing/tracing.py +87 -0
- deepeval/utils.py +12 -0
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/METADATA +1 -1
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/RECORD +39 -38
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/LICENSE.md +0 -0
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/WHEEL +0 -0
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/entry_points.txt +0 -0
|
@@ -5,6 +5,15 @@ from pydantic import BaseModel
|
|
|
5
5
|
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
6
6
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
7
7
|
from deepeval.models import DeepEvalBaseLLM
|
|
8
|
+
from deepeval.models.retry_policy import (
|
|
9
|
+
create_retry_decorator,
|
|
10
|
+
sdk_retries_for,
|
|
11
|
+
)
|
|
12
|
+
from deepeval.constants import ProviderSlug as PS
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# consistent retry rules
|
|
16
|
+
retry_deepseek = create_retry_decorator(PS.DEEPSEEK)
|
|
8
17
|
|
|
9
18
|
model_pricing = {
|
|
10
19
|
"deepseek-chat": {
|
|
@@ -55,6 +64,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
55
64
|
# Other generate functions
|
|
56
65
|
###############################################
|
|
57
66
|
|
|
67
|
+
@retry_deepseek
|
|
58
68
|
def generate(
|
|
59
69
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
60
70
|
) -> Tuple[Union[str, Dict], float]:
|
|
@@ -88,6 +98,7 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
88
98
|
)
|
|
89
99
|
return output, cost
|
|
90
100
|
|
|
101
|
+
@retry_deepseek
|
|
91
102
|
async def a_generate(
|
|
92
103
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
93
104
|
) -> Tuple[Union[str, Dict], float]:
|
|
@@ -141,13 +152,31 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
141
152
|
|
|
142
153
|
def load_model(self, async_mode: bool = False):
|
|
143
154
|
if not async_mode:
|
|
144
|
-
return OpenAI
|
|
145
|
-
|
|
146
|
-
)
|
|
147
|
-
else:
|
|
148
|
-
return AsyncOpenAI(
|
|
149
|
-
api_key=self.api_key, base_url=self.base_url, **self.kwargs
|
|
150
|
-
)
|
|
155
|
+
return self._build_client(OpenAI)
|
|
156
|
+
return self._build_client(AsyncOpenAI)
|
|
151
157
|
|
|
152
158
|
def get_model_name(self):
|
|
153
159
|
return f"{self.model_name}"
|
|
160
|
+
|
|
161
|
+
def _client_kwargs(self) -> Dict:
|
|
162
|
+
kwargs = dict(self.kwargs or {})
|
|
163
|
+
# if we are managing retries with Tenacity, force SDK retries off to avoid double retries.
|
|
164
|
+
# if the user opts into SDK retries for "deepseek" via DEEPEVAL_SDK_RETRY_PROVIDERS, honor it.
|
|
165
|
+
if not sdk_retries_for(PS.DEEPSEEK):
|
|
166
|
+
kwargs["max_retries"] = 0
|
|
167
|
+
return kwargs
|
|
168
|
+
|
|
169
|
+
def _build_client(self, cls):
|
|
170
|
+
kw = dict(
|
|
171
|
+
api_key=self.api_key,
|
|
172
|
+
base_url=self.base_url,
|
|
173
|
+
**self._client_kwargs(),
|
|
174
|
+
)
|
|
175
|
+
try:
|
|
176
|
+
return cls(**kw)
|
|
177
|
+
except TypeError as e:
|
|
178
|
+
# In case an older OpenAI client doesn’t accept max_retries, drop it and retry.
|
|
179
|
+
if "max_retries" in str(e):
|
|
180
|
+
kw.pop("max_retries", None)
|
|
181
|
+
return cls(**kw)
|
|
182
|
+
raise
|
|
@@ -3,11 +3,19 @@ from google.genai import types
|
|
|
3
3
|
from typing import Optional, Dict
|
|
4
4
|
from google import genai
|
|
5
5
|
|
|
6
|
+
from deepeval.models.retry_policy import (
|
|
7
|
+
create_retry_decorator,
|
|
8
|
+
)
|
|
6
9
|
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
7
10
|
from deepeval.models.base_model import DeepEvalBaseLLM
|
|
11
|
+
from deepeval.constants import ProviderSlug as PS
|
|
12
|
+
|
|
8
13
|
|
|
9
14
|
default_gemini_model = "gemini-1.5-pro"
|
|
10
15
|
|
|
16
|
+
# consistent retry rules
|
|
17
|
+
retry_gemini = create_retry_decorator(PS.GOOGLE)
|
|
18
|
+
|
|
11
19
|
|
|
12
20
|
class GeminiModel(DeepEvalBaseLLM):
|
|
13
21
|
"""Class that implements Google Gemini models for text-based evaluation.
|
|
@@ -145,6 +153,7 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
145
153
|
]
|
|
146
154
|
return self.client.models
|
|
147
155
|
|
|
156
|
+
@retry_gemini
|
|
148
157
|
def generate(self, prompt: str, schema: Optional[BaseModel] = None) -> str:
|
|
149
158
|
"""Generates text from a prompt.
|
|
150
159
|
|
|
@@ -180,6 +189,7 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
180
189
|
)
|
|
181
190
|
return response.text, 0
|
|
182
191
|
|
|
192
|
+
@retry_gemini
|
|
183
193
|
async def a_generate(
|
|
184
194
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
185
195
|
) -> str:
|
|
@@ -1,10 +1,20 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
1
3
|
from typing import Optional, Tuple, Union, Dict
|
|
2
4
|
from pydantic import BaseModel
|
|
3
|
-
import os
|
|
4
5
|
|
|
6
|
+
from deepeval.models.retry_policy import (
|
|
7
|
+
create_retry_decorator,
|
|
8
|
+
sdk_retries_for,
|
|
9
|
+
)
|
|
5
10
|
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
6
11
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
7
12
|
from deepeval.models import DeepEvalBaseLLM
|
|
13
|
+
from deepeval.constants import ProviderSlug as PS
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# consistent retry rules
|
|
17
|
+
retry_grok = create_retry_decorator(PS.GROK)
|
|
8
18
|
|
|
9
19
|
|
|
10
20
|
structured_outputs_models = [
|
|
@@ -81,6 +91,7 @@ class GrokModel(DeepEvalBaseLLM):
|
|
|
81
91
|
# Other generate functions
|
|
82
92
|
###############################################
|
|
83
93
|
|
|
94
|
+
@retry_grok
|
|
84
95
|
def generate(
|
|
85
96
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
86
97
|
) -> Tuple[Union[str, Dict], float]:
|
|
@@ -118,6 +129,7 @@ class GrokModel(DeepEvalBaseLLM):
|
|
|
118
129
|
else:
|
|
119
130
|
return output, cost
|
|
120
131
|
|
|
132
|
+
@retry_grok
|
|
121
133
|
async def a_generate(
|
|
122
134
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
123
135
|
) -> Tuple[Union[str, Dict], float]:
|
|
@@ -178,9 +190,9 @@ class GrokModel(DeepEvalBaseLLM):
|
|
|
178
190
|
from xai_sdk import Client, AsyncClient
|
|
179
191
|
|
|
180
192
|
if not async_mode:
|
|
181
|
-
return
|
|
193
|
+
return self._build_client(Client)
|
|
182
194
|
else:
|
|
183
|
-
return
|
|
195
|
+
return self._build_client(AsyncClient)
|
|
184
196
|
except ImportError:
|
|
185
197
|
raise ImportError(
|
|
186
198
|
"xai_sdk is required to use GrokModel. Please install it with: pip install xai-sdk"
|
|
@@ -188,3 +200,38 @@ class GrokModel(DeepEvalBaseLLM):
|
|
|
188
200
|
|
|
189
201
|
def get_model_name(self):
|
|
190
202
|
return f"{self.model_name}"
|
|
203
|
+
|
|
204
|
+
def _client_kwargs(self) -> Dict:
|
|
205
|
+
"""
|
|
206
|
+
If Tenacity is managing retries, disable gRPC channel retries to avoid double retry.
|
|
207
|
+
If the user opts into SDK retries for 'grok' via DEEPEVAL_SDK_RETRY_PROVIDERS,
|
|
208
|
+
leave channel options as is
|
|
209
|
+
"""
|
|
210
|
+
kwargs = dict(self.kwargs or {})
|
|
211
|
+
opts = list(kwargs.get("channel_options", []))
|
|
212
|
+
if not sdk_retries_for(PS.GROK):
|
|
213
|
+
# remove any explicit enable flag, then disable retries
|
|
214
|
+
opts = [
|
|
215
|
+
option
|
|
216
|
+
for option in opts
|
|
217
|
+
if not (
|
|
218
|
+
isinstance(option, (tuple, list))
|
|
219
|
+
and option
|
|
220
|
+
and option[0] == "grpc.enable_retries"
|
|
221
|
+
)
|
|
222
|
+
]
|
|
223
|
+
opts.append(("grpc.enable_retries", 0))
|
|
224
|
+
if opts:
|
|
225
|
+
kwargs["channel_options"] = opts
|
|
226
|
+
return kwargs
|
|
227
|
+
|
|
228
|
+
def _build_client(self, cls):
|
|
229
|
+
kw = dict(api_key=self.api_key, **self._client_kwargs())
|
|
230
|
+
try:
|
|
231
|
+
return cls(**kw)
|
|
232
|
+
except TypeError as e:
|
|
233
|
+
# fallback: older SDK version might not accept channel_options
|
|
234
|
+
if "channel_options" in str(e):
|
|
235
|
+
kw.pop("channel_options", None)
|
|
236
|
+
return cls(**kw)
|
|
237
|
+
raise
|
|
@@ -2,11 +2,18 @@ from typing import Optional, Tuple, Union, Dict
|
|
|
2
2
|
from openai import OpenAI, AsyncOpenAI
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
5
|
+
from deepeval.models.retry_policy import (
|
|
6
|
+
create_retry_decorator,
|
|
7
|
+
sdk_retries_for,
|
|
8
|
+
)
|
|
5
9
|
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
6
10
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
7
11
|
from deepeval.models import DeepEvalBaseLLM
|
|
12
|
+
from deepeval.constants import ProviderSlug as PS
|
|
8
13
|
|
|
9
14
|
|
|
15
|
+
retry_kimi = create_retry_decorator(PS.KIMI)
|
|
16
|
+
|
|
10
17
|
json_mode_models = [
|
|
11
18
|
"kimi-thinking-preview",
|
|
12
19
|
"kimi-k2-0711-preview",
|
|
@@ -100,6 +107,7 @@ class KimiModel(DeepEvalBaseLLM):
|
|
|
100
107
|
# Other generate functions
|
|
101
108
|
###############################################
|
|
102
109
|
|
|
110
|
+
@retry_kimi
|
|
103
111
|
def generate(
|
|
104
112
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
105
113
|
) -> Tuple[Union[str, Dict], float]:
|
|
@@ -137,6 +145,7 @@ class KimiModel(DeepEvalBaseLLM):
|
|
|
137
145
|
else:
|
|
138
146
|
return output, cost
|
|
139
147
|
|
|
148
|
+
@retry_kimi
|
|
140
149
|
async def a_generate(
|
|
141
150
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
142
151
|
) -> Tuple[Union[str, Dict], float]:
|
|
@@ -194,13 +203,34 @@ class KimiModel(DeepEvalBaseLLM):
|
|
|
194
203
|
|
|
195
204
|
def load_model(self, async_mode: bool = False):
|
|
196
205
|
if not async_mode:
|
|
197
|
-
return OpenAI
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
206
|
+
return self._build_client(OpenAI)
|
|
207
|
+
return self._build_client(AsyncOpenAI)
|
|
208
|
+
|
|
209
|
+
def _client_kwargs(self) -> Dict:
|
|
210
|
+
"""
|
|
211
|
+
If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
|
|
212
|
+
If the user opts into SDK retries for 'kimi' via DEEPEVAL_SDK_RETRY_PROVIDERS,
|
|
213
|
+
leave their retry settings as is.
|
|
214
|
+
"""
|
|
215
|
+
kwargs = dict(self.kwargs or {})
|
|
216
|
+
if not sdk_retries_for(PS.KIMI):
|
|
217
|
+
kwargs["max_retries"] = 0
|
|
218
|
+
return kwargs
|
|
219
|
+
|
|
220
|
+
def _build_client(self, cls):
|
|
221
|
+
kw = dict(
|
|
222
|
+
api_key=self.api_key,
|
|
223
|
+
base_url=self.base_url,
|
|
224
|
+
**self._client_kwargs(),
|
|
225
|
+
)
|
|
226
|
+
try:
|
|
227
|
+
return cls(**kw)
|
|
228
|
+
except TypeError as e:
|
|
229
|
+
# older OpenAI SDKs may not accept max_retries, in that case remove and retry once
|
|
230
|
+
if "max_retries" in str(e):
|
|
231
|
+
kw.pop("max_retries", None)
|
|
232
|
+
return cls(**kw)
|
|
233
|
+
raise
|
|
204
234
|
|
|
205
235
|
def get_model_name(self):
|
|
206
236
|
return f"{self.model_name}"
|
|
@@ -1,12 +1,20 @@
|
|
|
1
1
|
from typing import Optional, Tuple, Union, Dict
|
|
2
2
|
from pydantic import BaseModel
|
|
3
|
-
|
|
4
3
|
from openai import OpenAI, AsyncOpenAI
|
|
5
4
|
from openai.types.chat import ChatCompletion
|
|
6
5
|
|
|
6
|
+
from deepeval.models.retry_policy import (
|
|
7
|
+
create_retry_decorator,
|
|
8
|
+
sdk_retries_for,
|
|
9
|
+
)
|
|
7
10
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
8
11
|
from deepeval.models import DeepEvalBaseLLM
|
|
9
12
|
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
13
|
+
from deepeval.constants import ProviderSlug as PS
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# consistent retry rules
|
|
17
|
+
retry_local = create_retry_decorator(PS.LOCAL)
|
|
10
18
|
|
|
11
19
|
|
|
12
20
|
class LocalModel(DeepEvalBaseLLM):
|
|
@@ -43,6 +51,7 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
43
51
|
# Other generate functions
|
|
44
52
|
###############################################
|
|
45
53
|
|
|
54
|
+
@retry_local
|
|
46
55
|
def generate(
|
|
47
56
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
48
57
|
) -> Tuple[Union[str, Dict], float]:
|
|
@@ -61,6 +70,7 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
61
70
|
else:
|
|
62
71
|
return res_content, 0.0
|
|
63
72
|
|
|
73
|
+
@retry_local
|
|
64
74
|
async def a_generate(
|
|
65
75
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
66
76
|
) -> Tuple[Union[str, Dict], float]:
|
|
@@ -91,14 +101,30 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
91
101
|
|
|
92
102
|
def load_model(self, async_mode: bool = False):
|
|
93
103
|
if not async_mode:
|
|
94
|
-
return OpenAI
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
104
|
+
return self._build_client(OpenAI)
|
|
105
|
+
return self._build_client(AsyncOpenAI)
|
|
106
|
+
|
|
107
|
+
def _client_kwargs(self) -> Dict:
|
|
108
|
+
"""
|
|
109
|
+
If Tenacity manages retries, turn off OpenAI SDK retries to avoid double retrying.
|
|
110
|
+
If users opt into SDK retries via DEEPEVAL_SDK_RETRY_PROVIDERS=local, leave them enabled.
|
|
111
|
+
"""
|
|
112
|
+
kwargs = dict(self.kwargs or {})
|
|
113
|
+
if not sdk_retries_for(PS.LOCAL):
|
|
114
|
+
kwargs["max_retries"] = 0
|
|
115
|
+
return kwargs
|
|
116
|
+
|
|
117
|
+
def _build_client(self, cls):
|
|
118
|
+
kw = dict(
|
|
119
|
+
api_key=self.local_model_api_key,
|
|
120
|
+
base_url=self.base_url,
|
|
121
|
+
**self._client_kwargs(),
|
|
122
|
+
)
|
|
123
|
+
try:
|
|
124
|
+
return cls(**kw)
|
|
125
|
+
except TypeError as e:
|
|
126
|
+
# Older OpenAI SDKs may not accept max_retries; drop and retry once.
|
|
127
|
+
if "max_retries" in str(e):
|
|
128
|
+
kw.pop("max_retries", None)
|
|
129
|
+
return cls(**kw)
|
|
130
|
+
raise
|
|
@@ -2,8 +2,16 @@ from ollama import Client, AsyncClient, ChatResponse
|
|
|
2
2
|
from typing import Optional, Tuple, Union, Dict
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
5
|
+
from deepeval.models.retry_policy import (
|
|
6
|
+
create_retry_decorator,
|
|
7
|
+
)
|
|
8
|
+
|
|
5
9
|
from deepeval.models import DeepEvalBaseLLM
|
|
6
10
|
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
11
|
+
from deepeval.constants import ProviderSlug as PS
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
retry_ollama = create_retry_decorator(PS.OLLAMA)
|
|
7
15
|
|
|
8
16
|
|
|
9
17
|
class OllamaModel(DeepEvalBaseLLM):
|
|
@@ -34,6 +42,7 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
34
42
|
# Other generate functions
|
|
35
43
|
###############################################
|
|
36
44
|
|
|
45
|
+
@retry_ollama
|
|
37
46
|
def generate(
|
|
38
47
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
39
48
|
) -> Tuple[Union[str, Dict], float]:
|
|
@@ -56,6 +65,7 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
56
65
|
0,
|
|
57
66
|
)
|
|
58
67
|
|
|
68
|
+
@retry_ollama
|
|
59
69
|
async def a_generate(
|
|
60
70
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
61
71
|
) -> Tuple[str, float]:
|
|
@@ -84,9 +94,11 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
84
94
|
|
|
85
95
|
def load_model(self, async_mode: bool = False):
|
|
86
96
|
if not async_mode:
|
|
87
|
-
return
|
|
88
|
-
|
|
89
|
-
|
|
97
|
+
return self._build_client(Client)
|
|
98
|
+
return self._build_client(AsyncClient)
|
|
99
|
+
|
|
100
|
+
def _build_client(self, cls):
|
|
101
|
+
return cls(host=self.base_url, **self.kwargs)
|
|
90
102
|
|
|
91
103
|
def get_model_name(self):
|
|
92
104
|
return f"{self.model_name} (Ollama)"
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
|
|
3
1
|
from openai.types.chat.chat_completion import ChatCompletion
|
|
4
2
|
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
5
3
|
from typing import Optional, Tuple, Union, Dict
|
|
@@ -10,27 +8,17 @@ from openai import (
|
|
|
10
8
|
AsyncOpenAI,
|
|
11
9
|
)
|
|
12
10
|
|
|
13
|
-
from
|
|
14
|
-
|
|
11
|
+
from deepeval.constants import ProviderSlug as PS
|
|
15
12
|
from deepeval.models import DeepEvalBaseLLM
|
|
16
13
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
17
14
|
from deepeval.models.utils import parse_model_name
|
|
18
15
|
from deepeval.models.retry_policy import (
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
default_stop,
|
|
22
|
-
retry_predicate,
|
|
16
|
+
create_retry_decorator,
|
|
17
|
+
sdk_retries_for,
|
|
23
18
|
)
|
|
24
19
|
|
|
25
|
-
logger = logging.getLogger("deepeval.openai_model")
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def log_retry_error(retry_state: RetryCallState):
|
|
29
|
-
exception = retry_state.outcome.exception()
|
|
30
|
-
logger.error(
|
|
31
|
-
f"OpenAI Error: {exception} Retrying: {retry_state.attempt_number} time(s)..."
|
|
32
|
-
)
|
|
33
20
|
|
|
21
|
+
retry_openai = create_retry_decorator(PS.OPENAI)
|
|
34
22
|
|
|
35
23
|
valid_gpt_models = [
|
|
36
24
|
"gpt-3.5-turbo",
|
|
@@ -219,21 +207,6 @@ models_requiring_temperature_1 = [
|
|
|
219
207
|
"gpt-5-chat-latest",
|
|
220
208
|
]
|
|
221
209
|
|
|
222
|
-
_base_retry_rules_kw = dict(
|
|
223
|
-
wait=default_wait(),
|
|
224
|
-
stop=default_stop(),
|
|
225
|
-
retry=retry_predicate(OPENAI_ERROR_POLICY),
|
|
226
|
-
before_sleep=before_sleep_log(
|
|
227
|
-
logger, logging.INFO
|
|
228
|
-
), # <- logs only on retries
|
|
229
|
-
after=log_retry_error,
|
|
230
|
-
)
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
def _openai_client_kwargs():
|
|
234
|
-
# Avoid double-retry at SDK layer by disabling the SDK's own retries so tenacity is the single source of truth for retry logic.
|
|
235
|
-
return {"max_retries": 0}
|
|
236
|
-
|
|
237
210
|
|
|
238
211
|
class GPTModel(DeepEvalBaseLLM):
|
|
239
212
|
def __init__(
|
|
@@ -311,7 +284,7 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
311
284
|
# Generate functions
|
|
312
285
|
###############################################
|
|
313
286
|
|
|
314
|
-
@
|
|
287
|
+
@retry_openai
|
|
315
288
|
def generate(
|
|
316
289
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
317
290
|
) -> Tuple[Union[str, Dict], float]:
|
|
@@ -370,7 +343,7 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
370
343
|
else:
|
|
371
344
|
return output, cost
|
|
372
345
|
|
|
373
|
-
@
|
|
346
|
+
@retry_openai
|
|
374
347
|
async def a_generate(
|
|
375
348
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
376
349
|
) -> Tuple[Union[str, BaseModel], float]:
|
|
@@ -434,7 +407,7 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
434
407
|
# Other generate functions
|
|
435
408
|
###############################################
|
|
436
409
|
|
|
437
|
-
@
|
|
410
|
+
@retry_openai
|
|
438
411
|
def generate_raw_response(
|
|
439
412
|
self,
|
|
440
413
|
prompt: str,
|
|
@@ -457,7 +430,7 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
457
430
|
|
|
458
431
|
return completion, cost
|
|
459
432
|
|
|
460
|
-
@
|
|
433
|
+
@retry_openai
|
|
461
434
|
async def a_generate_raw_response(
|
|
462
435
|
self,
|
|
463
436
|
prompt: str,
|
|
@@ -480,7 +453,7 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
480
453
|
|
|
481
454
|
return completion, cost
|
|
482
455
|
|
|
483
|
-
@
|
|
456
|
+
@retry_openai
|
|
484
457
|
def generate_samples(
|
|
485
458
|
self, prompt: str, n: int, temperature: float
|
|
486
459
|
) -> Tuple[list[str], float]:
|
|
@@ -500,6 +473,7 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
500
473
|
###############################################
|
|
501
474
|
|
|
502
475
|
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
476
|
+
# TODO: consider loggin a warning instead of defaulting to whole model pricing
|
|
503
477
|
pricing = model_pricing.get(self.model_name, model_pricing)
|
|
504
478
|
input_cost = input_tokens * pricing["input"]
|
|
505
479
|
output_cost = output_tokens * pricing["output"]
|
|
@@ -513,13 +487,32 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
513
487
|
return self.model_name
|
|
514
488
|
|
|
515
489
|
def load_model(self, async_mode: bool = False):
|
|
516
|
-
kwargs = {**self.kwargs, **_openai_client_kwargs()}
|
|
517
490
|
if not async_mode:
|
|
518
|
-
return OpenAI
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
491
|
+
return self._build_client(OpenAI)
|
|
492
|
+
return self._build_client(AsyncOpenAI)
|
|
493
|
+
|
|
494
|
+
def _client_kwargs(self) -> Dict:
|
|
495
|
+
"""
|
|
496
|
+
If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
|
|
497
|
+
If the user opts into SDK retries for 'openai' via DEEPEVAL_SDK_RETRY_PROVIDERS,
|
|
498
|
+
leave their retry settings as is.
|
|
499
|
+
"""
|
|
500
|
+
kwargs = dict(self.kwargs or {})
|
|
501
|
+
if not sdk_retries_for(PS.OPENAI):
|
|
502
|
+
kwargs["max_retries"] = 0
|
|
503
|
+
return kwargs
|
|
504
|
+
|
|
505
|
+
def _build_client(self, cls):
|
|
506
|
+
kw = dict(
|
|
507
|
+
api_key=self._openai_api_key,
|
|
508
|
+
base_url=self.base_url,
|
|
509
|
+
**self._client_kwargs(),
|
|
525
510
|
)
|
|
511
|
+
try:
|
|
512
|
+
return cls(**kw)
|
|
513
|
+
except TypeError as e:
|
|
514
|
+
# older OpenAI SDKs may not accept max_retries, in that case remove and retry once
|
|
515
|
+
if "max_retries" in str(e):
|
|
516
|
+
kw.pop("max_retries", None)
|
|
517
|
+
return cls(**kw)
|
|
518
|
+
raise
|
|
@@ -4,12 +4,19 @@ from pydantic import BaseModel
|
|
|
4
4
|
from google.genai import types
|
|
5
5
|
from google import genai
|
|
6
6
|
|
|
7
|
+
from deepeval.models.retry_policy import (
|
|
8
|
+
create_retry_decorator,
|
|
9
|
+
)
|
|
7
10
|
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
8
11
|
from deepeval.models.base_model import DeepEvalBaseMLLM
|
|
9
12
|
from deepeval.test_case import MLLMImage
|
|
13
|
+
from deepeval.config.settings import get_settings
|
|
14
|
+
from deepeval.constants import ProviderSlug as PS
|
|
10
15
|
|
|
11
16
|
|
|
12
17
|
default_multimodal_gemini_model = "gemini-1.5-pro"
|
|
18
|
+
# consistent retry rules
|
|
19
|
+
retry_gemini = create_retry_decorator(PS.GOOGLE)
|
|
13
20
|
|
|
14
21
|
|
|
15
22
|
class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
@@ -147,6 +154,8 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
147
154
|
self.model_temperature = 0.0
|
|
148
155
|
return self.client.models
|
|
149
156
|
|
|
157
|
+
# TODO: Refactor genete prompt to minimize the work done on retry
|
|
158
|
+
@retry_gemini
|
|
150
159
|
def generate_prompt(
|
|
151
160
|
self, multimodal_input: List[Union[str, MLLMImage]] = []
|
|
152
161
|
) -> List[Union[str, MLLMImage]]:
|
|
@@ -162,6 +171,8 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
162
171
|
ValueError: If an invalid input type is provided
|
|
163
172
|
"""
|
|
164
173
|
prompt = []
|
|
174
|
+
settings = get_settings()
|
|
175
|
+
|
|
165
176
|
for ele in multimodal_input:
|
|
166
177
|
if isinstance(ele, str):
|
|
167
178
|
prompt.append(ele)
|
|
@@ -170,9 +181,14 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
170
181
|
with open(ele.url, "rb") as f:
|
|
171
182
|
image_data = f.read()
|
|
172
183
|
else:
|
|
173
|
-
response = requests.get(
|
|
174
|
-
|
|
175
|
-
|
|
184
|
+
response = requests.get(
|
|
185
|
+
ele.url,
|
|
186
|
+
timeout=(
|
|
187
|
+
settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
|
|
188
|
+
settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
|
|
189
|
+
),
|
|
190
|
+
)
|
|
191
|
+
response.raise_for_status()
|
|
176
192
|
image_data = response.content
|
|
177
193
|
|
|
178
194
|
image_part = types.Part.from_bytes(
|
|
@@ -183,6 +199,7 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
183
199
|
raise ValueError(f"Invalid input type: {type(ele)}")
|
|
184
200
|
return prompt
|
|
185
201
|
|
|
202
|
+
@retry_gemini
|
|
186
203
|
def generate(
|
|
187
204
|
self,
|
|
188
205
|
multimodal_input: List[Union[str, MLLMImage]],
|
|
@@ -222,6 +239,7 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
222
239
|
)
|
|
223
240
|
return response.text, 0
|
|
224
241
|
|
|
242
|
+
@retry_gemini
|
|
225
243
|
async def a_generate(
|
|
226
244
|
self,
|
|
227
245
|
multimodal_input: List[Union[str, MLLMImage]],
|