deepeval 3.7.4__py3-none-any.whl → 3.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/dataset/golden.py +54 -2
- deepeval/evaluate/evaluate.py +16 -8
- deepeval/evaluate/execute.py +70 -26
- deepeval/evaluate/utils.py +26 -22
- deepeval/integrations/pydantic_ai/agent.py +19 -2
- deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
- deepeval/metrics/__init__.py +14 -12
- deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
- deepeval/metrics/answer_relevancy/template.py +188 -92
- deepeval/metrics/base_metric.py +2 -5
- deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
- deepeval/metrics/contextual_precision/template.py +115 -66
- deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
- deepeval/metrics/contextual_recall/template.py +106 -55
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
- deepeval/metrics/contextual_relevancy/template.py +87 -58
- deepeval/metrics/dag/templates.py +2 -2
- deepeval/metrics/faithfulness/faithfulness.py +70 -27
- deepeval/metrics/faithfulness/schema.py +1 -1
- deepeval/metrics/faithfulness/template.py +200 -115
- deepeval/metrics/g_eval/utils.py +2 -2
- deepeval/metrics/indicator.py +4 -4
- deepeval/metrics/multimodal_metrics/__init__.py +0 -18
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
- deepeval/metrics/ragas.py +3 -3
- deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
- deepeval/metrics/turn_contextual_precision/schema.py +21 -0
- deepeval/metrics/turn_contextual_precision/template.py +187 -0
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
- deepeval/metrics/turn_contextual_recall/schema.py +21 -0
- deepeval/metrics/turn_contextual_recall/template.py +178 -0
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
- deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
- deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
- deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
- deepeval/metrics/turn_faithfulness/template.py +218 -0
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
- deepeval/metrics/utils.py +39 -58
- deepeval/models/__init__.py +0 -12
- deepeval/models/base_model.py +16 -38
- deepeval/models/embedding_models/__init__.py +7 -0
- deepeval/models/embedding_models/azure_embedding_model.py +52 -28
- deepeval/models/embedding_models/local_embedding_model.py +18 -14
- deepeval/models/embedding_models/ollama_embedding_model.py +38 -16
- deepeval/models/embedding_models/openai_embedding_model.py +40 -21
- deepeval/models/llms/amazon_bedrock_model.py +1 -2
- deepeval/models/llms/anthropic_model.py +44 -23
- deepeval/models/llms/azure_model.py +121 -36
- deepeval/models/llms/deepseek_model.py +18 -13
- deepeval/models/llms/gemini_model.py +129 -43
- deepeval/models/llms/grok_model.py +18 -13
- deepeval/models/llms/kimi_model.py +18 -13
- deepeval/models/llms/litellm_model.py +42 -22
- deepeval/models/llms/local_model.py +12 -7
- deepeval/models/llms/ollama_model.py +114 -12
- deepeval/models/llms/openai_model.py +137 -41
- deepeval/models/llms/portkey_model.py +24 -7
- deepeval/models/llms/utils.py +5 -3
- deepeval/models/retry_policy.py +17 -14
- deepeval/models/utils.py +46 -1
- deepeval/optimizer/__init__.py +5 -0
- deepeval/optimizer/algorithms/__init__.py +6 -0
- deepeval/optimizer/algorithms/base.py +29 -0
- deepeval/optimizer/algorithms/configs.py +18 -0
- deepeval/optimizer/algorithms/copro/__init__.py +5 -0
- deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
- deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
- deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
- deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
- deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
- deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
- deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
- deepeval/optimizer/algorithms/simba/__init__.py +5 -0
- deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
- deepeval/{optimization → optimizer}/configs.py +5 -8
- deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
- deepeval/optimizer/prompt_optimizer.py +263 -0
- deepeval/optimizer/rewriter/__init__.py +5 -0
- deepeval/optimizer/rewriter/rewriter.py +124 -0
- deepeval/optimizer/rewriter/utils.py +214 -0
- deepeval/optimizer/scorer/__init__.py +5 -0
- deepeval/optimizer/scorer/base.py +86 -0
- deepeval/optimizer/scorer/scorer.py +316 -0
- deepeval/optimizer/scorer/utils.py +30 -0
- deepeval/optimizer/types.py +148 -0
- deepeval/{optimization → optimizer}/utils.py +47 -165
- deepeval/prompt/prompt.py +5 -9
- deepeval/test_case/__init__.py +1 -3
- deepeval/test_case/api.py +12 -10
- deepeval/test_case/conversational_test_case.py +19 -1
- deepeval/test_case/llm_test_case.py +152 -1
- deepeval/test_case/utils.py +4 -8
- deepeval/test_run/api.py +15 -14
- deepeval/test_run/test_run.py +3 -3
- deepeval/tracing/patchers.py +9 -4
- deepeval/tracing/tracing.py +2 -2
- deepeval/utils.py +65 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
- {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/RECORD +116 -125
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
- deepeval/models/mlllms/__init__.py +0 -4
- deepeval/models/mlllms/azure_model.py +0 -343
- deepeval/models/mlllms/gemini_model.py +0 -313
- deepeval/models/mlllms/ollama_model.py +0 -175
- deepeval/models/mlllms/openai_model.py +0 -309
- deepeval/optimization/__init__.py +0 -13
- deepeval/optimization/adapters/__init__.py +0 -2
- deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
- deepeval/optimization/aggregates.py +0 -14
- deepeval/optimization/copro/configs.py +0 -31
- deepeval/optimization/gepa/__init__.py +0 -7
- deepeval/optimization/gepa/configs.py +0 -115
- deepeval/optimization/miprov2/configs.py +0 -134
- deepeval/optimization/miprov2/loop.py +0 -785
- deepeval/optimization/mutations/__init__.py +0 -0
- deepeval/optimization/mutations/prompt_rewriter.py +0 -458
- deepeval/optimization/policies/__init__.py +0 -16
- deepeval/optimization/policies/tie_breaker.py +0 -67
- deepeval/optimization/prompt_optimizer.py +0 -462
- deepeval/optimization/simba/__init__.py +0 -0
- deepeval/optimization/simba/configs.py +0 -33
- deepeval/optimization/types.py +0 -361
- deepeval/test_case/mllm_test_case.py +0 -170
- /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
- /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
|
@@ -10,7 +10,10 @@ from tenacity import (
|
|
|
10
10
|
)
|
|
11
11
|
|
|
12
12
|
from deepeval.config.settings import get_settings
|
|
13
|
-
from deepeval.models.utils import
|
|
13
|
+
from deepeval.models.utils import (
|
|
14
|
+
require_secret_api_key,
|
|
15
|
+
normalize_kwargs_and_extract_aliases,
|
|
16
|
+
)
|
|
14
17
|
from deepeval.models import DeepEvalBaseLLM
|
|
15
18
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
16
19
|
|
|
@@ -27,6 +30,10 @@ retryable_exceptions = (
|
|
|
27
30
|
Exception, # LiteLLM handles specific exceptions internally
|
|
28
31
|
)
|
|
29
32
|
|
|
33
|
+
_ALIAS_MAP = {
|
|
34
|
+
"base_url": ["api_base"],
|
|
35
|
+
}
|
|
36
|
+
|
|
30
37
|
|
|
31
38
|
class LiteLLMModel(DeepEvalBaseLLM):
|
|
32
39
|
EXP_BASE: int = 2
|
|
@@ -39,16 +46,26 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
39
46
|
self,
|
|
40
47
|
model: Optional[str] = None,
|
|
41
48
|
api_key: Optional[str] = None,
|
|
42
|
-
|
|
49
|
+
base_url: Optional[str] = None,
|
|
43
50
|
temperature: float = 0,
|
|
44
51
|
generation_kwargs: Optional[Dict] = None,
|
|
45
52
|
**kwargs,
|
|
46
53
|
):
|
|
47
54
|
|
|
55
|
+
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
56
|
+
"LiteLLMModel",
|
|
57
|
+
kwargs,
|
|
58
|
+
_ALIAS_MAP,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# re-map depricated keywords to re-named positional args
|
|
62
|
+
if base_url is None and "base_url" in alias_values:
|
|
63
|
+
base_url = alias_values["base_url"]
|
|
64
|
+
|
|
48
65
|
settings = get_settings()
|
|
49
66
|
# Get model name from parameter or key file
|
|
50
|
-
|
|
51
|
-
if not
|
|
67
|
+
model = model or settings.LITELLM_MODEL_NAME
|
|
68
|
+
if not model:
|
|
52
69
|
raise ValueError(
|
|
53
70
|
"Model name must be provided either through parameter or set-litellm command"
|
|
54
71
|
)
|
|
@@ -67,8 +84,8 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
67
84
|
)
|
|
68
85
|
|
|
69
86
|
# Get API base from parameter, key file, or environment variable
|
|
70
|
-
self.
|
|
71
|
-
|
|
87
|
+
self.base_url = (
|
|
88
|
+
base_url
|
|
72
89
|
or (
|
|
73
90
|
str(settings.LITELLM_API_BASE)
|
|
74
91
|
if settings.LITELLM_API_BASE is not None
|
|
@@ -84,10 +101,11 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
84
101
|
if temperature < 0:
|
|
85
102
|
raise ValueError("Temperature must be >= 0.")
|
|
86
103
|
self.temperature = temperature
|
|
87
|
-
|
|
104
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
105
|
+
self.kwargs = normalized_kwargs
|
|
88
106
|
self.generation_kwargs = generation_kwargs or {}
|
|
89
107
|
self.evaluation_cost = 0.0 # Initialize cost to 0.0
|
|
90
|
-
super().__init__(
|
|
108
|
+
super().__init__(model)
|
|
91
109
|
|
|
92
110
|
@retry(
|
|
93
111
|
wait=wait_exponential_jitter(
|
|
@@ -100,10 +118,11 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
100
118
|
def generate(
|
|
101
119
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
102
120
|
) -> Union[str, Dict, Tuple[str, float]]:
|
|
121
|
+
|
|
103
122
|
from litellm import completion
|
|
104
123
|
|
|
105
124
|
completion_params = {
|
|
106
|
-
"model": self.
|
|
125
|
+
"model": self.name,
|
|
107
126
|
"messages": [{"role": "user", "content": prompt}],
|
|
108
127
|
"temperature": self.temperature,
|
|
109
128
|
}
|
|
@@ -116,8 +135,8 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
116
135
|
param_hint="`api_key` to LiteLLMModel(...)",
|
|
117
136
|
)
|
|
118
137
|
completion_params["api_key"] = api_key
|
|
119
|
-
if self.
|
|
120
|
-
completion_params["api_base"] = self.
|
|
138
|
+
if self.base_url:
|
|
139
|
+
completion_params["api_base"] = self.base_url
|
|
121
140
|
|
|
122
141
|
# Add schema if provided
|
|
123
142
|
if schema:
|
|
@@ -155,10 +174,11 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
155
174
|
async def a_generate(
|
|
156
175
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
157
176
|
) -> Union[str, Dict, Tuple[str, float]]:
|
|
177
|
+
|
|
158
178
|
from litellm import acompletion
|
|
159
179
|
|
|
160
180
|
completion_params = {
|
|
161
|
-
"model": self.
|
|
181
|
+
"model": self.name,
|
|
162
182
|
"messages": [{"role": "user", "content": prompt}],
|
|
163
183
|
"temperature": self.temperature,
|
|
164
184
|
}
|
|
@@ -171,8 +191,8 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
171
191
|
param_hint="`api_key` to LiteLLMModel(...)",
|
|
172
192
|
)
|
|
173
193
|
completion_params["api_key"] = api_key
|
|
174
|
-
if self.
|
|
175
|
-
completion_params["api_base"] = self.
|
|
194
|
+
if self.base_url:
|
|
195
|
+
completion_params["api_base"] = self.base_url
|
|
176
196
|
|
|
177
197
|
# Add schema if provided
|
|
178
198
|
if schema:
|
|
@@ -222,11 +242,11 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
222
242
|
param_hint="`api_key` to LiteLLMModel(...)",
|
|
223
243
|
)
|
|
224
244
|
completion_params = {
|
|
225
|
-
"model": self.
|
|
245
|
+
"model": self.name,
|
|
226
246
|
"messages": [{"role": "user", "content": prompt}],
|
|
227
247
|
"temperature": self.temperature,
|
|
228
248
|
"api_key": api_key,
|
|
229
|
-
"api_base": self.
|
|
249
|
+
"api_base": self.base_url,
|
|
230
250
|
"logprobs": True,
|
|
231
251
|
"top_logprobs": top_logprobs,
|
|
232
252
|
}
|
|
@@ -263,11 +283,11 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
263
283
|
param_hint="`api_key` to LiteLLMModel(...)",
|
|
264
284
|
)
|
|
265
285
|
completion_params = {
|
|
266
|
-
"model": self.
|
|
286
|
+
"model": self.name,
|
|
267
287
|
"messages": [{"role": "user", "content": prompt}],
|
|
268
288
|
"temperature": self.temperature,
|
|
269
289
|
"api_key": api_key,
|
|
270
|
-
"api_base": self.
|
|
290
|
+
"api_base": self.base_url,
|
|
271
291
|
"logprobs": True,
|
|
272
292
|
"top_logprobs": top_logprobs,
|
|
273
293
|
}
|
|
@@ -302,12 +322,12 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
302
322
|
param_hint="`api_key` to LiteLLMModel(...)",
|
|
303
323
|
)
|
|
304
324
|
completion_params = {
|
|
305
|
-
"model": self.
|
|
325
|
+
"model": self.name,
|
|
306
326
|
"messages": [{"role": "user", "content": prompt}],
|
|
307
327
|
"temperature": temperature,
|
|
308
328
|
"n": n,
|
|
309
329
|
"api_key": api_key,
|
|
310
|
-
"api_base": self.
|
|
330
|
+
"api_base": self.base_url,
|
|
311
331
|
}
|
|
312
332
|
completion_params.update(self.kwargs)
|
|
313
333
|
|
|
@@ -353,8 +373,8 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
353
373
|
def get_model_name(self) -> str:
|
|
354
374
|
from litellm import get_llm_provider
|
|
355
375
|
|
|
356
|
-
provider = get_llm_provider(self.
|
|
357
|
-
return f"{self.
|
|
376
|
+
provider = get_llm_provider(self.name)
|
|
377
|
+
return f"{self.name} ({provider})"
|
|
358
378
|
|
|
359
379
|
def load_model(self, async_mode: bool = False):
|
|
360
380
|
"""
|
|
@@ -9,7 +9,9 @@ from deepeval.models.retry_policy import (
|
|
|
9
9
|
sdk_retries_for,
|
|
10
10
|
)
|
|
11
11
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
12
|
-
from deepeval.models.utils import
|
|
12
|
+
from deepeval.models.utils import (
|
|
13
|
+
require_secret_api_key,
|
|
14
|
+
)
|
|
13
15
|
from deepeval.models import DeepEvalBaseLLM
|
|
14
16
|
from deepeval.constants import ProviderSlug as PS
|
|
15
17
|
|
|
@@ -22,8 +24,8 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
22
24
|
def __init__(
|
|
23
25
|
self,
|
|
24
26
|
model: Optional[str] = None,
|
|
25
|
-
base_url: Optional[str] = None,
|
|
26
27
|
api_key: Optional[str] = None,
|
|
28
|
+
base_url: Optional[str] = None,
|
|
27
29
|
temperature: float = 0,
|
|
28
30
|
format: Optional[str] = None,
|
|
29
31
|
generation_kwargs: Optional[Dict] = None,
|
|
@@ -31,7 +33,7 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
31
33
|
):
|
|
32
34
|
settings = get_settings()
|
|
33
35
|
|
|
34
|
-
|
|
36
|
+
model = model or settings.LOCAL_MODEL_NAME
|
|
35
37
|
if api_key is not None:
|
|
36
38
|
# keep it secret, keep it safe from serializings, logging and alike
|
|
37
39
|
self.local_model_api_key: SecretStr | None = SecretStr(api_key)
|
|
@@ -47,9 +49,10 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
47
49
|
if temperature < 0:
|
|
48
50
|
raise ValueError("Temperature must be >= 0.")
|
|
49
51
|
self.temperature = temperature
|
|
52
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
50
53
|
self.kwargs = kwargs
|
|
51
54
|
self.generation_kwargs = generation_kwargs or {}
|
|
52
|
-
super().__init__(
|
|
55
|
+
super().__init__(model)
|
|
53
56
|
|
|
54
57
|
###############################################
|
|
55
58
|
# Other generate functions
|
|
@@ -59,9 +62,10 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
59
62
|
def generate(
|
|
60
63
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
61
64
|
) -> Tuple[Union[str, Dict], float]:
|
|
65
|
+
|
|
62
66
|
client = self.load_model(async_mode=False)
|
|
63
67
|
response: ChatCompletion = client.chat.completions.create(
|
|
64
|
-
model=self.
|
|
68
|
+
model=self.name,
|
|
65
69
|
messages=[{"role": "user", "content": prompt}],
|
|
66
70
|
temperature=self.temperature,
|
|
67
71
|
**self.generation_kwargs,
|
|
@@ -78,9 +82,10 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
78
82
|
async def a_generate(
|
|
79
83
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
80
84
|
) -> Tuple[Union[str, Dict], float]:
|
|
85
|
+
|
|
81
86
|
client = self.load_model(async_mode=True)
|
|
82
87
|
response: ChatCompletion = await client.chat.completions.create(
|
|
83
|
-
model=self.
|
|
88
|
+
model=self.name,
|
|
84
89
|
messages=[{"role": "user", "content": prompt}],
|
|
85
90
|
temperature=self.temperature,
|
|
86
91
|
**self.generation_kwargs,
|
|
@@ -98,7 +103,7 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
98
103
|
###############################################
|
|
99
104
|
|
|
100
105
|
def get_model_name(self):
|
|
101
|
-
return f"{self.
|
|
106
|
+
return f"{self.name} (Local Model)"
|
|
102
107
|
|
|
103
108
|
def load_model(self, async_mode: bool = False):
|
|
104
109
|
if not async_mode:
|
|
@@ -1,14 +1,32 @@
|
|
|
1
|
-
from
|
|
2
|
-
from typing import Optional, Tuple, Union, Dict
|
|
1
|
+
from typing import TYPE_CHECKING, Optional, Tuple, Union, Dict, List
|
|
3
2
|
from pydantic import BaseModel
|
|
3
|
+
import requests
|
|
4
|
+
import base64
|
|
5
|
+
import io
|
|
4
6
|
|
|
5
7
|
from deepeval.config.settings import get_settings
|
|
8
|
+
from deepeval.utils import require_dependency
|
|
6
9
|
from deepeval.models.retry_policy import (
|
|
7
10
|
create_retry_decorator,
|
|
8
11
|
)
|
|
12
|
+
from deepeval.utils import convert_to_multi_modal_array, check_if_multimodal
|
|
13
|
+
from deepeval.test_case import MLLMImage
|
|
9
14
|
from deepeval.models import DeepEvalBaseLLM
|
|
10
15
|
from deepeval.constants import ProviderSlug as PS
|
|
11
16
|
|
|
17
|
+
valid_multimodal_models = [
|
|
18
|
+
"llava:7b",
|
|
19
|
+
"llava:13b",
|
|
20
|
+
"llava:34b",
|
|
21
|
+
"llama4",
|
|
22
|
+
"gemma3",
|
|
23
|
+
"qwen3-vl",
|
|
24
|
+
"qwen2.5-vl",
|
|
25
|
+
# TODO: Add more models later on by looking at their catelogue
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from ollama import ChatResponse
|
|
12
30
|
|
|
13
31
|
retry_ollama = create_retry_decorator(PS.OLLAMA)
|
|
14
32
|
|
|
@@ -23,7 +41,7 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
23
41
|
**kwargs,
|
|
24
42
|
):
|
|
25
43
|
settings = get_settings()
|
|
26
|
-
|
|
44
|
+
model = model or settings.LOCAL_MODEL_NAME
|
|
27
45
|
self.base_url = (
|
|
28
46
|
base_url
|
|
29
47
|
or (
|
|
@@ -35,10 +53,10 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
35
53
|
if temperature < 0:
|
|
36
54
|
raise ValueError("Temperature must be >= 0.")
|
|
37
55
|
self.temperature = temperature
|
|
38
|
-
#
|
|
56
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
39
57
|
self.kwargs = kwargs
|
|
40
58
|
self.generation_kwargs = generation_kwargs or {}
|
|
41
|
-
super().__init__(
|
|
59
|
+
super().__init__(model)
|
|
42
60
|
|
|
43
61
|
###############################################
|
|
44
62
|
# Other generate functions
|
|
@@ -49,9 +67,17 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
49
67
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
50
68
|
) -> Tuple[Union[str, Dict], float]:
|
|
51
69
|
chat_model = self.load_model()
|
|
70
|
+
|
|
71
|
+
if check_if_multimodal(prompt):
|
|
72
|
+
prompt = convert_to_multi_modal_array(prompt)
|
|
73
|
+
messages = self.generate_messages(prompt)
|
|
74
|
+
else:
|
|
75
|
+
messages = [{"role": "user", "content": prompt}]
|
|
76
|
+
print(messages)
|
|
77
|
+
|
|
52
78
|
response: ChatResponse = chat_model.chat(
|
|
53
|
-
model=self.
|
|
54
|
-
messages=
|
|
79
|
+
model=self.name,
|
|
80
|
+
messages=messages,
|
|
55
81
|
format=schema.model_json_schema() if schema else None,
|
|
56
82
|
options={
|
|
57
83
|
**{"temperature": self.temperature},
|
|
@@ -72,9 +98,16 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
72
98
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
73
99
|
) -> Tuple[str, float]:
|
|
74
100
|
chat_model = self.load_model(async_mode=True)
|
|
101
|
+
|
|
102
|
+
if check_if_multimodal(prompt):
|
|
103
|
+
prompt = convert_to_multi_modal_array(prompt)
|
|
104
|
+
messages = self.generate_messages(prompt)
|
|
105
|
+
else:
|
|
106
|
+
messages = [{"role": "user", "content": prompt}]
|
|
107
|
+
|
|
75
108
|
response: ChatResponse = await chat_model.chat(
|
|
76
|
-
model=self.
|
|
77
|
-
messages=
|
|
109
|
+
model=self.name,
|
|
110
|
+
messages=messages,
|
|
78
111
|
format=schema.model_json_schema() if schema else None,
|
|
79
112
|
options={
|
|
80
113
|
**{"temperature": self.temperature},
|
|
@@ -90,14 +123,78 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
90
123
|
0,
|
|
91
124
|
)
|
|
92
125
|
|
|
126
|
+
def generate_messages(
|
|
127
|
+
self, multimodal_input: List[Union[str, MLLMImage]] = []
|
|
128
|
+
):
|
|
129
|
+
messages = []
|
|
130
|
+
for ele in multimodal_input:
|
|
131
|
+
if isinstance(ele, str):
|
|
132
|
+
messages.append(
|
|
133
|
+
{
|
|
134
|
+
"role": "user",
|
|
135
|
+
"content": ele,
|
|
136
|
+
}
|
|
137
|
+
)
|
|
138
|
+
elif isinstance(ele, MLLMImage):
|
|
139
|
+
img_b64 = self.convert_to_base64(ele.url, ele.local)
|
|
140
|
+
if img_b64 is not None:
|
|
141
|
+
messages.append(
|
|
142
|
+
{
|
|
143
|
+
"role": "user",
|
|
144
|
+
"images": [img_b64],
|
|
145
|
+
}
|
|
146
|
+
)
|
|
147
|
+
return messages
|
|
148
|
+
|
|
149
|
+
###############################################
|
|
150
|
+
# Utilities
|
|
151
|
+
###############################################
|
|
152
|
+
|
|
153
|
+
def convert_to_base64(self, image_source: str, is_local: bool) -> str:
|
|
154
|
+
from PIL import Image
|
|
155
|
+
|
|
156
|
+
settings = get_settings()
|
|
157
|
+
try:
|
|
158
|
+
if not is_local:
|
|
159
|
+
response = requests.get(
|
|
160
|
+
image_source,
|
|
161
|
+
stream=True,
|
|
162
|
+
timeout=(
|
|
163
|
+
settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
|
|
164
|
+
settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
|
|
165
|
+
),
|
|
166
|
+
)
|
|
167
|
+
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
|
|
168
|
+
image = Image.open(io.BytesIO(response.content))
|
|
169
|
+
else:
|
|
170
|
+
image = Image.open(image_source)
|
|
171
|
+
|
|
172
|
+
buffered = io.BytesIO()
|
|
173
|
+
image.save(buffered, format="JPEG")
|
|
174
|
+
img_str = base64.b64encode(buffered.getvalue()).decode()
|
|
175
|
+
return img_str
|
|
176
|
+
|
|
177
|
+
except (requests.exceptions.RequestException, OSError) as e:
|
|
178
|
+
# Log, then rethrow so @retry_ollama can retry generate_messages() on network failures
|
|
179
|
+
print(f"Image fetch/encode failed: {e}")
|
|
180
|
+
raise
|
|
181
|
+
except Exception as e:
|
|
182
|
+
print(f"Error converting image to base64: {e}")
|
|
183
|
+
return None
|
|
184
|
+
|
|
93
185
|
###############################################
|
|
94
186
|
# Model
|
|
95
187
|
###############################################
|
|
96
188
|
|
|
97
189
|
def load_model(self, async_mode: bool = False):
|
|
190
|
+
ollama = require_dependency(
|
|
191
|
+
"ollama",
|
|
192
|
+
provider_label="OllamaModel",
|
|
193
|
+
install_hint="Install it with `pip install ollama`.",
|
|
194
|
+
)
|
|
98
195
|
if not async_mode:
|
|
99
|
-
return self._build_client(Client)
|
|
100
|
-
return self._build_client(AsyncClient)
|
|
196
|
+
return self._build_client(ollama.Client)
|
|
197
|
+
return self._build_client(ollama.AsyncClient)
|
|
101
198
|
|
|
102
199
|
def _client_kwargs(self) -> Dict:
|
|
103
200
|
"""Return kwargs forwarded to the underlying Ollama Client/AsyncClient."""
|
|
@@ -110,5 +207,10 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
110
207
|
)
|
|
111
208
|
return cls(**kw)
|
|
112
209
|
|
|
210
|
+
def supports_multimodal(self):
|
|
211
|
+
if self.name in valid_multimodal_models:
|
|
212
|
+
return True
|
|
213
|
+
return False
|
|
214
|
+
|
|
113
215
|
def get_model_name(self):
|
|
114
|
-
return f"{self.
|
|
216
|
+
return f"{self.name} (Ollama)"
|