deepeval 3.5.1__py3-none-any.whl → 3.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +94 -2
- deepeval/config/utils.py +54 -1
- deepeval/constants.py +27 -0
- deepeval/integrations/langchain/__init__.py +2 -3
- deepeval/integrations/langchain/callback.py +126 -301
- deepeval/integrations/langchain/patch.py +24 -13
- deepeval/integrations/langchain/utils.py +203 -1
- deepeval/integrations/pydantic_ai/patcher.py +220 -185
- deepeval/integrations/pydantic_ai/utils.py +86 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +1 -0
- deepeval/metrics/pii_leakage/pii_leakage.py +1 -1
- deepeval/models/embedding_models/azure_embedding_model.py +40 -9
- deepeval/models/embedding_models/local_embedding_model.py +54 -11
- deepeval/models/embedding_models/ollama_embedding_model.py +25 -7
- deepeval/models/embedding_models/openai_embedding_model.py +47 -5
- deepeval/models/llms/amazon_bedrock_model.py +31 -4
- deepeval/models/llms/anthropic_model.py +39 -13
- deepeval/models/llms/azure_model.py +37 -38
- deepeval/models/llms/deepseek_model.py +36 -7
- deepeval/models/llms/gemini_model.py +10 -0
- deepeval/models/llms/grok_model.py +50 -3
- deepeval/models/llms/kimi_model.py +37 -7
- deepeval/models/llms/local_model.py +38 -12
- deepeval/models/llms/ollama_model.py +15 -3
- deepeval/models/llms/openai_model.py +37 -44
- deepeval/models/mlllms/gemini_model.py +21 -3
- deepeval/models/mlllms/ollama_model.py +38 -13
- deepeval/models/mlllms/openai_model.py +18 -42
- deepeval/models/retry_policy.py +548 -64
- deepeval/prompt/api.py +13 -9
- deepeval/prompt/prompt.py +19 -9
- deepeval/tracing/tracing.py +87 -0
- deepeval/utils.py +12 -0
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/METADATA +1 -1
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/RECORD +39 -38
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/LICENSE.md +0 -0
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/WHEEL +0 -0
- {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/entry_points.txt +0 -0
|
@@ -5,23 +5,31 @@ import requests
|
|
|
5
5
|
import base64
|
|
6
6
|
import io
|
|
7
7
|
|
|
8
|
+
from deepeval.models.retry_policy import (
|
|
9
|
+
create_retry_decorator,
|
|
10
|
+
)
|
|
8
11
|
from deepeval.key_handler import KEY_FILE_HANDLER, ModelKeyValues
|
|
9
12
|
from deepeval.models import DeepEvalBaseMLLM
|
|
10
13
|
from deepeval.test_case import MLLMImage
|
|
14
|
+
from deepeval.config.settings import get_settings
|
|
15
|
+
from deepeval.constants import ProviderSlug as PS
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
retry_ollama = create_retry_decorator(PS.OLLAMA)
|
|
11
19
|
|
|
12
20
|
|
|
13
21
|
class MultimodalOllamaModel(DeepEvalBaseMLLM):
|
|
14
|
-
def __init__(
|
|
15
|
-
self,
|
|
16
|
-
):
|
|
22
|
+
def __init__(self, **kwargs):
|
|
17
23
|
model_name = KEY_FILE_HANDLER.fetch_data(
|
|
18
24
|
ModelKeyValues.LOCAL_MODEL_NAME
|
|
19
25
|
)
|
|
20
26
|
self.base_url = KEY_FILE_HANDLER.fetch_data(
|
|
21
27
|
ModelKeyValues.LOCAL_MODEL_BASE_URL
|
|
22
28
|
)
|
|
29
|
+
self.kwargs = kwargs
|
|
23
30
|
super().__init__(model_name)
|
|
24
31
|
|
|
32
|
+
@retry_ollama
|
|
25
33
|
def generate(
|
|
26
34
|
self,
|
|
27
35
|
multimodal_input: List[Union[str, MLLMImage]],
|
|
@@ -43,6 +51,7 @@ class MultimodalOllamaModel(DeepEvalBaseMLLM):
|
|
|
43
51
|
0,
|
|
44
52
|
)
|
|
45
53
|
|
|
54
|
+
@retry_ollama
|
|
46
55
|
async def a_generate(
|
|
47
56
|
self,
|
|
48
57
|
multimodal_input: List[Union[str, MLLMImage]],
|
|
@@ -77,12 +86,14 @@ class MultimodalOllamaModel(DeepEvalBaseMLLM):
|
|
|
77
86
|
}
|
|
78
87
|
)
|
|
79
88
|
elif isinstance(ele, MLLMImage):
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
89
|
+
img_b64 = self.convert_to_base64(ele.url, ele.local)
|
|
90
|
+
if img_b64 is not None:
|
|
91
|
+
messages.append(
|
|
92
|
+
{
|
|
93
|
+
"role": "user",
|
|
94
|
+
"images": [img_b64],
|
|
95
|
+
}
|
|
96
|
+
)
|
|
86
97
|
return messages
|
|
87
98
|
|
|
88
99
|
###############################################
|
|
@@ -92,9 +103,17 @@ class MultimodalOllamaModel(DeepEvalBaseMLLM):
|
|
|
92
103
|
def convert_to_base64(self, image_source: str, is_local: bool) -> str:
|
|
93
104
|
from PIL import Image
|
|
94
105
|
|
|
106
|
+
settings = get_settings()
|
|
95
107
|
try:
|
|
96
108
|
if not is_local:
|
|
97
|
-
response = requests.get(
|
|
109
|
+
response = requests.get(
|
|
110
|
+
image_source,
|
|
111
|
+
stream=True,
|
|
112
|
+
timeout=(
|
|
113
|
+
settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
|
|
114
|
+
settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
|
|
115
|
+
),
|
|
116
|
+
)
|
|
98
117
|
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
|
|
99
118
|
image = Image.open(io.BytesIO(response.content))
|
|
100
119
|
else:
|
|
@@ -105,15 +124,21 @@ class MultimodalOllamaModel(DeepEvalBaseMLLM):
|
|
|
105
124
|
img_str = base64.b64encode(buffered.getvalue()).decode()
|
|
106
125
|
return img_str
|
|
107
126
|
|
|
127
|
+
except (requests.exceptions.RequestException, OSError) as e:
|
|
128
|
+
# Log, then rethrow so @retry_ollama can retry generate_messages() on network failures
|
|
129
|
+
print(f"Image fetch/encode failed: {e}")
|
|
130
|
+
raise
|
|
108
131
|
except Exception as e:
|
|
109
132
|
print(f"Error converting image to base64: {e}")
|
|
110
133
|
return None
|
|
111
134
|
|
|
112
135
|
def load_model(self, async_mode: bool = False):
|
|
113
136
|
if not async_mode:
|
|
114
|
-
return
|
|
115
|
-
|
|
116
|
-
|
|
137
|
+
return self._build_client(Client)
|
|
138
|
+
return self._build_client(AsyncClient)
|
|
139
|
+
|
|
140
|
+
def _build_client(self, cls):
|
|
141
|
+
return cls(host=self.base_url, **self.kwargs)
|
|
117
142
|
|
|
118
143
|
def get_model_name(self):
|
|
119
144
|
return f"{self.model_name} (Ollama)"
|
|
@@ -3,15 +3,7 @@ from openai import OpenAI, AsyncOpenAI
|
|
|
3
3
|
from openai.types.chat import ParsedChatCompletion
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
from io import BytesIO
|
|
6
|
-
import logging
|
|
7
|
-
import openai
|
|
8
6
|
import base64
|
|
9
|
-
from tenacity import (
|
|
10
|
-
retry,
|
|
11
|
-
retry_if_exception_type,
|
|
12
|
-
wait_exponential_jitter,
|
|
13
|
-
RetryCallState,
|
|
14
|
-
)
|
|
15
7
|
|
|
16
8
|
from deepeval.models.llms.openai_model import (
|
|
17
9
|
model_pricing,
|
|
@@ -21,21 +13,14 @@ from deepeval.models import DeepEvalBaseMLLM
|
|
|
21
13
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
22
14
|
from deepeval.test_case import MLLMImage
|
|
23
15
|
from deepeval.models.utils import parse_model_name
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
openai.APIConnectionError,
|
|
28
|
-
openai.APITimeoutError,
|
|
29
|
-
openai.LengthFinishReasonError,
|
|
16
|
+
from deepeval.models.retry_policy import (
|
|
17
|
+
create_retry_decorator,
|
|
18
|
+
sdk_retries_for,
|
|
30
19
|
)
|
|
20
|
+
from deepeval.constants import ProviderSlug as PS
|
|
31
21
|
|
|
32
22
|
|
|
33
|
-
|
|
34
|
-
exception = retry_state.outcome.exception()
|
|
35
|
-
logging.error(
|
|
36
|
-
f"OpenAI Error: {exception} Retrying: {retry_state.attempt_number} time(s)..."
|
|
37
|
-
)
|
|
38
|
-
|
|
23
|
+
retry_openai = create_retry_decorator(PS.OPENAI)
|
|
39
24
|
|
|
40
25
|
valid_multimodal_gpt_models = [
|
|
41
26
|
"gpt-4o",
|
|
@@ -95,11 +80,7 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
|
|
|
95
80
|
# Generate functions
|
|
96
81
|
###############################################
|
|
97
82
|
|
|
98
|
-
@
|
|
99
|
-
wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
|
|
100
|
-
retry=retry_if_exception_type(retryable_exceptions),
|
|
101
|
-
after=log_retry_error,
|
|
102
|
-
)
|
|
83
|
+
@retry_openai
|
|
103
84
|
def generate(
|
|
104
85
|
self,
|
|
105
86
|
multimodal_input: List[Union[str, MLLMImage]],
|
|
@@ -136,11 +117,7 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
|
|
|
136
117
|
else:
|
|
137
118
|
return output, cost
|
|
138
119
|
|
|
139
|
-
@
|
|
140
|
-
wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
|
|
141
|
-
retry=retry_if_exception_type(retryable_exceptions),
|
|
142
|
-
after=log_retry_error,
|
|
143
|
-
)
|
|
120
|
+
@retry_openai
|
|
144
121
|
async def a_generate(
|
|
145
122
|
self,
|
|
146
123
|
multimodal_input: List[Union[str, MLLMImage]],
|
|
@@ -181,17 +158,13 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
|
|
|
181
158
|
# Other generate functions
|
|
182
159
|
###############################################
|
|
183
160
|
|
|
184
|
-
@
|
|
185
|
-
wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
|
|
186
|
-
retry=retry_if_exception_type(retryable_exceptions),
|
|
187
|
-
after=log_retry_error,
|
|
188
|
-
)
|
|
161
|
+
@retry_openai
|
|
189
162
|
def generate_raw_response(
|
|
190
163
|
self,
|
|
191
164
|
multimodal_input: List[Union[str, MLLMImage]],
|
|
192
165
|
top_logprobs: int = 5,
|
|
193
166
|
) -> Tuple[ParsedChatCompletion, float]:
|
|
194
|
-
client =
|
|
167
|
+
client = self._client()
|
|
195
168
|
prompt = self.generate_prompt(multimodal_input)
|
|
196
169
|
messages = [{"role": "user", "content": prompt}]
|
|
197
170
|
completion = client.chat.completions.create(
|
|
@@ -206,17 +179,13 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
|
|
|
206
179
|
cost = self.calculate_cost(input_tokens, output_tokens)
|
|
207
180
|
return completion, cost
|
|
208
181
|
|
|
209
|
-
@
|
|
210
|
-
wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10),
|
|
211
|
-
retry=retry_if_exception_type(retryable_exceptions),
|
|
212
|
-
after=log_retry_error,
|
|
213
|
-
)
|
|
182
|
+
@retry_openai
|
|
214
183
|
async def a_generate_raw_response(
|
|
215
184
|
self,
|
|
216
185
|
multimodal_input: List[Union[str, MLLMImage]],
|
|
217
186
|
top_logprobs: int = 5,
|
|
218
187
|
) -> Tuple[ParsedChatCompletion, float]:
|
|
219
|
-
client =
|
|
188
|
+
client = self._client(async_mode=True)
|
|
220
189
|
prompt = self.generate_prompt(multimodal_input)
|
|
221
190
|
messages = [{"role": "user", "content": prompt}]
|
|
222
191
|
completion = await client.chat.completions.create(
|
|
@@ -278,5 +247,12 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
|
|
|
278
247
|
base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
|
|
279
248
|
return base64_encoded_image
|
|
280
249
|
|
|
250
|
+
def _client(self, async_mode: bool = False):
|
|
251
|
+
kw = {"api_key": self._openai_api_key}
|
|
252
|
+
if not sdk_retries_for(PS.OPENAI):
|
|
253
|
+
kw["max_retries"] = 0
|
|
254
|
+
Client = AsyncOpenAI if async_mode else OpenAI
|
|
255
|
+
return Client(**kw)
|
|
256
|
+
|
|
281
257
|
def get_model_name(self):
|
|
282
258
|
return self.model_name
|