deepeval 3.7.5__py3-none-any.whl → 3.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +35 -1
- deepeval/dataset/api.py +23 -1
- deepeval/dataset/golden.py +106 -21
- deepeval/evaluate/evaluate.py +0 -3
- deepeval/evaluate/execute.py +10 -222
- deepeval/evaluate/utils.py +6 -30
- deepeval/key_handler.py +3 -0
- deepeval/metrics/__init__.py +0 -4
- deepeval/metrics/answer_relevancy/answer_relevancy.py +89 -132
- deepeval/metrics/answer_relevancy/template.py +102 -179
- deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
- deepeval/metrics/arena_g_eval/template.py +17 -1
- deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
- deepeval/metrics/argument_correctness/template.py +19 -2
- deepeval/metrics/base_metric.py +13 -41
- deepeval/metrics/bias/bias.py +102 -108
- deepeval/metrics/bias/template.py +14 -2
- deepeval/metrics/contextual_precision/contextual_precision.py +56 -92
- deepeval/metrics/contextual_recall/contextual_recall.py +58 -85
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +53 -83
- deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
- deepeval/metrics/conversation_completeness/template.py +23 -3
- deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
- deepeval/metrics/conversational_dag/nodes.py +66 -123
- deepeval/metrics/conversational_dag/templates.py +16 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
- deepeval/metrics/dag/dag.py +10 -0
- deepeval/metrics/dag/nodes.py +63 -126
- deepeval/metrics/dag/templates.py +14 -0
- deepeval/metrics/exact_match/exact_match.py +9 -1
- deepeval/metrics/faithfulness/faithfulness.py +82 -136
- deepeval/metrics/g_eval/g_eval.py +87 -78
- deepeval/metrics/g_eval/template.py +18 -1
- deepeval/metrics/g_eval/utils.py +7 -6
- deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
- deepeval/metrics/goal_accuracy/template.py +21 -3
- deepeval/metrics/hallucination/hallucination.py +60 -75
- deepeval/metrics/hallucination/template.py +13 -0
- deepeval/metrics/indicator.py +3 -6
- deepeval/metrics/json_correctness/json_correctness.py +40 -38
- deepeval/metrics/json_correctness/template.py +10 -0
- deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
- deepeval/metrics/knowledge_retention/schema.py +9 -3
- deepeval/metrics/knowledge_retention/template.py +12 -0
- deepeval/metrics/mcp/mcp_task_completion.py +68 -38
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
- deepeval/metrics/mcp/template.py +52 -0
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
- deepeval/metrics/mcp_use_metric/template.py +12 -0
- deepeval/metrics/misuse/misuse.py +77 -97
- deepeval/metrics/misuse/template.py +15 -0
- deepeval/metrics/multimodal_metrics/__init__.py +0 -1
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +37 -38
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +55 -76
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +37 -38
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +37 -38
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +57 -76
- deepeval/metrics/non_advice/non_advice.py +79 -105
- deepeval/metrics/non_advice/template.py +12 -0
- deepeval/metrics/pattern_match/pattern_match.py +12 -4
- deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
- deepeval/metrics/pii_leakage/template.py +14 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
- deepeval/metrics/plan_adherence/template.py +11 -0
- deepeval/metrics/plan_quality/plan_quality.py +63 -87
- deepeval/metrics/plan_quality/template.py +9 -0
- deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
- deepeval/metrics/prompt_alignment/template.py +12 -0
- deepeval/metrics/role_adherence/role_adherence.py +48 -71
- deepeval/metrics/role_adherence/template.py +14 -0
- deepeval/metrics/role_violation/role_violation.py +75 -108
- deepeval/metrics/role_violation/template.py +12 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
- deepeval/metrics/step_efficiency/template.py +11 -0
- deepeval/metrics/summarization/summarization.py +115 -183
- deepeval/metrics/summarization/template.py +19 -0
- deepeval/metrics/task_completion/task_completion.py +67 -73
- deepeval/metrics/tool_correctness/tool_correctness.py +43 -42
- deepeval/metrics/tool_use/tool_use.py +42 -66
- deepeval/metrics/topic_adherence/template.py +13 -0
- deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
- deepeval/metrics/toxicity/template.py +13 -0
- deepeval/metrics/toxicity/toxicity.py +80 -99
- deepeval/metrics/turn_contextual_precision/schema.py +3 -3
- deepeval/metrics/turn_contextual_precision/template.py +1 -1
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +110 -68
- deepeval/metrics/turn_contextual_recall/schema.py +3 -3
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +104 -61
- deepeval/metrics/turn_contextual_relevancy/schema.py +2 -2
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +106 -65
- deepeval/metrics/turn_faithfulness/schema.py +1 -1
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +104 -73
- deepeval/metrics/turn_relevancy/template.py +14 -0
- deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
- deepeval/metrics/utils.py +145 -90
- deepeval/models/base_model.py +44 -6
- deepeval/models/embedding_models/azure_embedding_model.py +34 -12
- deepeval/models/embedding_models/local_embedding_model.py +22 -7
- deepeval/models/embedding_models/ollama_embedding_model.py +17 -6
- deepeval/models/embedding_models/openai_embedding_model.py +3 -2
- deepeval/models/llms/amazon_bedrock_model.py +226 -71
- deepeval/models/llms/anthropic_model.py +141 -47
- deepeval/models/llms/azure_model.py +167 -94
- deepeval/models/llms/constants.py +2032 -0
- deepeval/models/llms/deepseek_model.py +79 -29
- deepeval/models/llms/gemini_model.py +126 -67
- deepeval/models/llms/grok_model.py +125 -59
- deepeval/models/llms/kimi_model.py +126 -81
- deepeval/models/llms/litellm_model.py +92 -18
- deepeval/models/llms/local_model.py +114 -15
- deepeval/models/llms/ollama_model.py +97 -76
- deepeval/models/llms/openai_model.py +167 -310
- deepeval/models/llms/portkey_model.py +58 -16
- deepeval/models/llms/utils.py +5 -2
- deepeval/models/utils.py +60 -4
- deepeval/simulator/conversation_simulator.py +43 -0
- deepeval/simulator/template.py +13 -0
- deepeval/test_case/api.py +24 -45
- deepeval/test_case/arena_test_case.py +7 -2
- deepeval/test_case/conversational_test_case.py +55 -6
- deepeval/test_case/llm_test_case.py +60 -6
- deepeval/test_run/api.py +3 -0
- {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -1
- {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/RECORD +128 -132
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -133
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
- {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
- {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
|
@@ -9,13 +9,17 @@ from tenacity import (
|
|
|
9
9
|
RetryCallState,
|
|
10
10
|
)
|
|
11
11
|
|
|
12
|
+
from deepeval.errors import DeepEvalError
|
|
12
13
|
from deepeval.config.settings import get_settings
|
|
13
14
|
from deepeval.models.utils import (
|
|
14
15
|
require_secret_api_key,
|
|
15
16
|
normalize_kwargs_and_extract_aliases,
|
|
16
17
|
)
|
|
18
|
+
from deepeval.test_case import MLLMImage
|
|
19
|
+
from deepeval.utils import check_if_multimodal, convert_to_multi_modal_array
|
|
17
20
|
from deepeval.models import DeepEvalBaseLLM
|
|
18
21
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
22
|
+
from deepeval.utils import require_param
|
|
19
23
|
|
|
20
24
|
|
|
21
25
|
def log_retry_error(retry_state: RetryCallState):
|
|
@@ -47,11 +51,11 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
47
51
|
model: Optional[str] = None,
|
|
48
52
|
api_key: Optional[str] = None,
|
|
49
53
|
base_url: Optional[str] = None,
|
|
50
|
-
temperature: float =
|
|
54
|
+
temperature: Optional[float] = None,
|
|
51
55
|
generation_kwargs: Optional[Dict] = None,
|
|
52
56
|
**kwargs,
|
|
53
57
|
):
|
|
54
|
-
|
|
58
|
+
settings = get_settings()
|
|
55
59
|
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
56
60
|
"LiteLLMModel",
|
|
57
61
|
kwargs,
|
|
@@ -62,18 +66,13 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
62
66
|
if base_url is None and "base_url" in alias_values:
|
|
63
67
|
base_url = alias_values["base_url"]
|
|
64
68
|
|
|
65
|
-
settings = get_settings()
|
|
66
69
|
# Get model name from parameter or key file
|
|
67
70
|
model = model or settings.LITELLM_MODEL_NAME
|
|
68
|
-
if not model:
|
|
69
|
-
raise ValueError(
|
|
70
|
-
"Model name must be provided either through parameter or set-litellm command"
|
|
71
|
-
)
|
|
72
71
|
|
|
73
72
|
# Get API key from parameter, or settings
|
|
74
73
|
if api_key is not None:
|
|
75
74
|
# keep it secret, keep it safe from serializings, logging and aolike
|
|
76
|
-
self.api_key: SecretStr
|
|
75
|
+
self.api_key: Optional[SecretStr] = SecretStr(api_key)
|
|
77
76
|
else:
|
|
78
77
|
self.api_key = (
|
|
79
78
|
settings.LITELLM_API_KEY
|
|
@@ -84,7 +83,7 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
84
83
|
)
|
|
85
84
|
|
|
86
85
|
# Get API base from parameter, key file, or environment variable
|
|
87
|
-
|
|
86
|
+
base_url = (
|
|
88
87
|
base_url
|
|
89
88
|
or (
|
|
90
89
|
str(settings.LITELLM_API_BASE)
|
|
@@ -97,13 +96,35 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
97
96
|
else None
|
|
98
97
|
)
|
|
99
98
|
)
|
|
99
|
+
self.base_url = (
|
|
100
|
+
str(base_url).rstrip("/") if base_url is not None else None
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if temperature is not None:
|
|
104
|
+
temperature = float(temperature)
|
|
105
|
+
elif settings.TEMPERATURE is not None:
|
|
106
|
+
temperature = settings.TEMPERATURE
|
|
107
|
+
else:
|
|
108
|
+
temperature = 0.0
|
|
109
|
+
|
|
110
|
+
# validation
|
|
111
|
+
model = require_param(
|
|
112
|
+
model,
|
|
113
|
+
provider_label="LiteLLMModel",
|
|
114
|
+
env_var_name="LITELLM_MODEL_NAME",
|
|
115
|
+
param_hint="model",
|
|
116
|
+
)
|
|
100
117
|
|
|
101
118
|
if temperature < 0:
|
|
102
|
-
raise
|
|
119
|
+
raise DeepEvalError("Temperature must be >= 0.")
|
|
103
120
|
self.temperature = temperature
|
|
104
121
|
# Keep sanitized kwargs for client call to strip legacy keys
|
|
105
122
|
self.kwargs = normalized_kwargs
|
|
106
|
-
self.
|
|
123
|
+
self.kwargs.pop("temperature", None)
|
|
124
|
+
|
|
125
|
+
self.generation_kwargs = dict(generation_kwargs or {})
|
|
126
|
+
self.generation_kwargs.pop("temperature", None)
|
|
127
|
+
|
|
107
128
|
self.evaluation_cost = 0.0 # Initialize cost to 0.0
|
|
108
129
|
super().__init__(model)
|
|
109
130
|
|
|
@@ -117,13 +138,19 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
117
138
|
)
|
|
118
139
|
def generate(
|
|
119
140
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
120
|
-
) -> Union[str,
|
|
141
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
121
142
|
|
|
122
143
|
from litellm import completion
|
|
123
144
|
|
|
145
|
+
if check_if_multimodal(prompt):
|
|
146
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
147
|
+
content = self.generate_content(prompt)
|
|
148
|
+
else:
|
|
149
|
+
content = [{"type": "text", "text": prompt}]
|
|
150
|
+
|
|
124
151
|
completion_params = {
|
|
125
152
|
"model": self.name,
|
|
126
|
-
"messages": [{"role": "user", "content":
|
|
153
|
+
"messages": [{"role": "user", "content": content}],
|
|
127
154
|
"temperature": self.temperature,
|
|
128
155
|
}
|
|
129
156
|
|
|
@@ -131,7 +158,7 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
131
158
|
api_key = require_secret_api_key(
|
|
132
159
|
self.api_key,
|
|
133
160
|
provider_label="LiteLLM",
|
|
134
|
-
env_var_name="LITELLM_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY",
|
|
161
|
+
env_var_name="LITELLM_API_KEY|LITELLM_PROXY_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY",
|
|
135
162
|
param_hint="`api_key` to LiteLLMModel(...)",
|
|
136
163
|
)
|
|
137
164
|
completion_params["api_key"] = api_key
|
|
@@ -173,13 +200,19 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
173
200
|
)
|
|
174
201
|
async def a_generate(
|
|
175
202
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
176
|
-
) -> Union[str,
|
|
203
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
177
204
|
|
|
178
205
|
from litellm import acompletion
|
|
179
206
|
|
|
207
|
+
if check_if_multimodal(prompt):
|
|
208
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
209
|
+
content = self.generate_content(prompt)
|
|
210
|
+
else:
|
|
211
|
+
content = [{"type": "text", "text": prompt}]
|
|
212
|
+
|
|
180
213
|
completion_params = {
|
|
181
214
|
"model": self.name,
|
|
182
|
-
"messages": [{"role": "user", "content":
|
|
215
|
+
"messages": [{"role": "user", "content": content}],
|
|
183
216
|
"temperature": self.temperature,
|
|
184
217
|
}
|
|
185
218
|
|
|
@@ -241,9 +274,14 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
241
274
|
env_var_name="LITELLM_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY",
|
|
242
275
|
param_hint="`api_key` to LiteLLMModel(...)",
|
|
243
276
|
)
|
|
277
|
+
if check_if_multimodal(prompt):
|
|
278
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
279
|
+
content = self.generate_content(prompt)
|
|
280
|
+
else:
|
|
281
|
+
content = [{"type": "text", "text": prompt}]
|
|
244
282
|
completion_params = {
|
|
245
283
|
"model": self.name,
|
|
246
|
-
"messages": [{"role": "user", "content":
|
|
284
|
+
"messages": [{"role": "user", "content": content}],
|
|
247
285
|
"temperature": self.temperature,
|
|
248
286
|
"api_key": api_key,
|
|
249
287
|
"api_base": self.base_url,
|
|
@@ -282,9 +320,14 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
282
320
|
env_var_name="LITELLM_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY",
|
|
283
321
|
param_hint="`api_key` to LiteLLMModel(...)",
|
|
284
322
|
)
|
|
323
|
+
if check_if_multimodal(prompt):
|
|
324
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
325
|
+
content = self.generate_content(prompt)
|
|
326
|
+
else:
|
|
327
|
+
content = [{"type": "text", "text": prompt}]
|
|
285
328
|
completion_params = {
|
|
286
329
|
"model": self.name,
|
|
287
|
-
"messages": [{"role": "user", "content":
|
|
330
|
+
"messages": [{"role": "user", "content": content}],
|
|
288
331
|
"temperature": self.temperature,
|
|
289
332
|
"api_key": api_key,
|
|
290
333
|
"api_base": self.base_url,
|
|
@@ -340,6 +383,34 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
340
383
|
logging.error(f"Error in LiteLLM generate_samples: {e}")
|
|
341
384
|
raise
|
|
342
385
|
|
|
386
|
+
def generate_content(
|
|
387
|
+
self, multimodal_input: List[Union[str, MLLMImage]] = []
|
|
388
|
+
):
|
|
389
|
+
content = []
|
|
390
|
+
for element in multimodal_input:
|
|
391
|
+
if isinstance(element, str):
|
|
392
|
+
content.append({"type": "text", "text": element})
|
|
393
|
+
elif isinstance(element, MLLMImage):
|
|
394
|
+
if element.url and not element.local:
|
|
395
|
+
content.append(
|
|
396
|
+
{
|
|
397
|
+
"type": "image_url",
|
|
398
|
+
"image_url": {"url": element.url},
|
|
399
|
+
}
|
|
400
|
+
)
|
|
401
|
+
else:
|
|
402
|
+
element.ensure_images_loaded()
|
|
403
|
+
data_uri = (
|
|
404
|
+
f"data:{element.mimeType};base64,{element.dataBase64}"
|
|
405
|
+
)
|
|
406
|
+
content.append(
|
|
407
|
+
{
|
|
408
|
+
"type": "image_url",
|
|
409
|
+
"image_url": {"url": data_uri},
|
|
410
|
+
}
|
|
411
|
+
)
|
|
412
|
+
return content
|
|
413
|
+
|
|
343
414
|
def calculate_cost(self, response: Any) -> float:
|
|
344
415
|
"""Calculate the cost of the response based on token usage."""
|
|
345
416
|
try:
|
|
@@ -389,3 +460,6 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
389
460
|
None as LiteLLM handles client creation internally
|
|
390
461
|
"""
|
|
391
462
|
return None
|
|
463
|
+
|
|
464
|
+
def supports_multimodal(self):
|
|
465
|
+
return True
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from typing import Optional, Tuple, Union, Dict
|
|
1
|
+
from typing import Optional, Tuple, Union, Dict, List
|
|
2
2
|
from pydantic import BaseModel, SecretStr
|
|
3
3
|
from openai import OpenAI, AsyncOpenAI
|
|
4
4
|
from openai.types.chat import ChatCompletion
|
|
5
5
|
|
|
6
|
+
from deepeval.errors import DeepEvalError
|
|
6
7
|
from deepeval.config.settings import get_settings
|
|
7
8
|
from deepeval.models.retry_policy import (
|
|
8
9
|
create_retry_decorator,
|
|
@@ -14,6 +15,12 @@ from deepeval.models.utils import (
|
|
|
14
15
|
)
|
|
15
16
|
from deepeval.models import DeepEvalBaseLLM
|
|
16
17
|
from deepeval.constants import ProviderSlug as PS
|
|
18
|
+
from deepeval.test_case import MLLMImage
|
|
19
|
+
from deepeval.utils import (
|
|
20
|
+
check_if_multimodal,
|
|
21
|
+
convert_to_multi_modal_array,
|
|
22
|
+
require_param,
|
|
23
|
+
)
|
|
17
24
|
|
|
18
25
|
|
|
19
26
|
# consistent retry rules
|
|
@@ -26,7 +33,7 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
26
33
|
model: Optional[str] = None,
|
|
27
34
|
api_key: Optional[str] = None,
|
|
28
35
|
base_url: Optional[str] = None,
|
|
29
|
-
temperature: float =
|
|
36
|
+
temperature: Optional[float] = None,
|
|
30
37
|
format: Optional[str] = None,
|
|
31
38
|
generation_kwargs: Optional[Dict] = None,
|
|
32
39
|
**kwargs,
|
|
@@ -35,38 +42,64 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
35
42
|
|
|
36
43
|
model = model or settings.LOCAL_MODEL_NAME
|
|
37
44
|
if api_key is not None:
|
|
38
|
-
|
|
39
|
-
self.local_model_api_key: SecretStr | None = SecretStr(api_key)
|
|
45
|
+
self.local_model_api_key: Optional[SecretStr] = SecretStr(api_key)
|
|
40
46
|
else:
|
|
41
47
|
self.local_model_api_key = settings.LOCAL_MODEL_API_KEY
|
|
42
48
|
|
|
49
|
+
base_url = (
|
|
50
|
+
base_url if base_url is not None else settings.LOCAL_MODEL_BASE_URL
|
|
51
|
+
)
|
|
43
52
|
self.base_url = (
|
|
44
|
-
base_url
|
|
45
|
-
or settings.LOCAL_MODEL_BASE_URL
|
|
46
|
-
and str(settings.LOCAL_MODEL_BASE_URL)
|
|
53
|
+
str(base_url).rstrip("/") if base_url is not None else None
|
|
47
54
|
)
|
|
48
55
|
self.format = format or settings.LOCAL_MODEL_FORMAT
|
|
56
|
+
|
|
57
|
+
if temperature is not None:
|
|
58
|
+
temperature = float(temperature)
|
|
59
|
+
elif settings.TEMPERATURE is not None:
|
|
60
|
+
temperature = settings.TEMPERATURE
|
|
61
|
+
else:
|
|
62
|
+
temperature = 0.0
|
|
63
|
+
|
|
64
|
+
# validation
|
|
65
|
+
model = require_param(
|
|
66
|
+
model,
|
|
67
|
+
provider_label="LocalModel",
|
|
68
|
+
env_var_name="LOCAL_MODEL_NAME",
|
|
69
|
+
param_hint="model",
|
|
70
|
+
)
|
|
71
|
+
|
|
49
72
|
if temperature < 0:
|
|
50
|
-
raise
|
|
73
|
+
raise DeepEvalError("Temperature must be >= 0.")
|
|
51
74
|
self.temperature = temperature
|
|
52
|
-
|
|
75
|
+
|
|
53
76
|
self.kwargs = kwargs
|
|
54
|
-
self.
|
|
77
|
+
self.kwargs.pop("temperature", None)
|
|
78
|
+
|
|
79
|
+
self.generation_kwargs = dict(generation_kwargs or {})
|
|
80
|
+
self.generation_kwargs.pop("temperature", None)
|
|
81
|
+
|
|
55
82
|
super().__init__(model)
|
|
56
83
|
|
|
57
84
|
###############################################
|
|
58
|
-
#
|
|
85
|
+
# Generate functions
|
|
59
86
|
###############################################
|
|
60
87
|
|
|
61
88
|
@retry_local
|
|
62
89
|
def generate(
|
|
63
90
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
64
|
-
) -> Tuple[Union[str,
|
|
91
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
92
|
+
|
|
93
|
+
if check_if_multimodal(prompt):
|
|
94
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
95
|
+
content = self.generate_content(prompt)
|
|
96
|
+
else:
|
|
97
|
+
content = prompt
|
|
65
98
|
|
|
66
99
|
client = self.load_model(async_mode=False)
|
|
67
100
|
response: ChatCompletion = client.chat.completions.create(
|
|
68
101
|
model=self.name,
|
|
69
|
-
messages=[{"role": "user", "content":
|
|
102
|
+
messages=[{"role": "user", "content": content}],
|
|
70
103
|
temperature=self.temperature,
|
|
71
104
|
**self.generation_kwargs,
|
|
72
105
|
)
|
|
@@ -81,12 +114,18 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
81
114
|
@retry_local
|
|
82
115
|
async def a_generate(
|
|
83
116
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
84
|
-
) -> Tuple[Union[str,
|
|
117
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
118
|
+
|
|
119
|
+
if check_if_multimodal(prompt):
|
|
120
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
121
|
+
content = self.generate_content(prompt)
|
|
122
|
+
else:
|
|
123
|
+
content = prompt
|
|
85
124
|
|
|
86
125
|
client = self.load_model(async_mode=True)
|
|
87
126
|
response: ChatCompletion = await client.chat.completions.create(
|
|
88
127
|
model=self.name,
|
|
89
|
-
messages=[{"role": "user", "content":
|
|
128
|
+
messages=[{"role": "user", "content": content}],
|
|
90
129
|
temperature=self.temperature,
|
|
91
130
|
**self.generation_kwargs,
|
|
92
131
|
)
|
|
@@ -98,6 +137,63 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
98
137
|
else:
|
|
99
138
|
return res_content, 0.0
|
|
100
139
|
|
|
140
|
+
def generate_content(
|
|
141
|
+
self, multimodal_input: List[Union[str, MLLMImage]] = []
|
|
142
|
+
):
|
|
143
|
+
"""
|
|
144
|
+
Converts multimodal input into OpenAI-compatible format.
|
|
145
|
+
Uses data URIs for all images since we can't guarantee local servers support URL fetching.
|
|
146
|
+
"""
|
|
147
|
+
prompt = []
|
|
148
|
+
for element in multimodal_input:
|
|
149
|
+
if isinstance(element, str):
|
|
150
|
+
prompt.append({"type": "text", "text": element})
|
|
151
|
+
elif isinstance(element, MLLMImage):
|
|
152
|
+
# For local servers, use data URIs for both remote and local images
|
|
153
|
+
# Most local servers don't support fetching external URLs
|
|
154
|
+
if element.url and not element.local:
|
|
155
|
+
import requests
|
|
156
|
+
import base64
|
|
157
|
+
|
|
158
|
+
settings = get_settings()
|
|
159
|
+
try:
|
|
160
|
+
response = requests.get(
|
|
161
|
+
element.url,
|
|
162
|
+
timeout=(
|
|
163
|
+
settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
|
|
164
|
+
settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
|
|
165
|
+
),
|
|
166
|
+
)
|
|
167
|
+
response.raise_for_status()
|
|
168
|
+
|
|
169
|
+
# Get mime type from response
|
|
170
|
+
mime_type = response.headers.get(
|
|
171
|
+
"content-type", element.mimeType or "image/jpeg"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Encode to base64
|
|
175
|
+
b64_data = base64.b64encode(response.content).decode(
|
|
176
|
+
"utf-8"
|
|
177
|
+
)
|
|
178
|
+
data_uri = f"data:{mime_type};base64,{b64_data}"
|
|
179
|
+
|
|
180
|
+
except Exception as e:
|
|
181
|
+
raise ValueError(
|
|
182
|
+
f"Failed to fetch remote image {element.url}: {e}"
|
|
183
|
+
)
|
|
184
|
+
else:
|
|
185
|
+
element.ensure_images_loaded()
|
|
186
|
+
mime_type = element.mimeType or "image/jpeg"
|
|
187
|
+
data_uri = f"data:{mime_type};base64,{element.dataBase64}"
|
|
188
|
+
|
|
189
|
+
prompt.append(
|
|
190
|
+
{
|
|
191
|
+
"type": "image_url",
|
|
192
|
+
"image_url": {"url": data_uri},
|
|
193
|
+
}
|
|
194
|
+
)
|
|
195
|
+
return prompt
|
|
196
|
+
|
|
101
197
|
###############################################
|
|
102
198
|
# Model
|
|
103
199
|
###############################################
|
|
@@ -105,6 +201,9 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
105
201
|
def get_model_name(self):
|
|
106
202
|
return f"{self.name} (Local Model)"
|
|
107
203
|
|
|
204
|
+
def supports_multimodal(self):
|
|
205
|
+
return True
|
|
206
|
+
|
|
108
207
|
def load_model(self, async_mode: bool = False):
|
|
109
208
|
if not async_mode:
|
|
110
209
|
return self._build_client(OpenAI)
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
from typing import TYPE_CHECKING, Optional, Tuple, Union, Dict, List
|
|
2
2
|
from pydantic import BaseModel
|
|
3
|
-
import requests
|
|
4
3
|
import base64
|
|
5
|
-
import io
|
|
6
4
|
|
|
5
|
+
from deepeval.errors import DeepEvalError
|
|
7
6
|
from deepeval.config.settings import get_settings
|
|
8
|
-
from deepeval.utils import require_dependency
|
|
7
|
+
from deepeval.utils import require_dependency, require_param
|
|
9
8
|
from deepeval.models.retry_policy import (
|
|
10
9
|
create_retry_decorator,
|
|
11
10
|
)
|
|
@@ -13,17 +12,7 @@ from deepeval.utils import convert_to_multi_modal_array, check_if_multimodal
|
|
|
13
12
|
from deepeval.test_case import MLLMImage
|
|
14
13
|
from deepeval.models import DeepEvalBaseLLM
|
|
15
14
|
from deepeval.constants import ProviderSlug as PS
|
|
16
|
-
|
|
17
|
-
valid_multimodal_models = [
|
|
18
|
-
"llava:7b",
|
|
19
|
-
"llava:13b",
|
|
20
|
-
"llava:34b",
|
|
21
|
-
"llama4",
|
|
22
|
-
"gemma3",
|
|
23
|
-
"qwen3-vl",
|
|
24
|
-
"qwen2.5-vl",
|
|
25
|
-
# TODO: Add more models later on by looking at their catelogue
|
|
26
|
-
]
|
|
15
|
+
from deepeval.models.llms.constants import OLLAMA_MODELS_DATA
|
|
27
16
|
|
|
28
17
|
if TYPE_CHECKING:
|
|
29
18
|
from ollama import ChatResponse
|
|
@@ -36,26 +25,46 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
36
25
|
self,
|
|
37
26
|
model: Optional[str] = None,
|
|
38
27
|
base_url: Optional[str] = None,
|
|
39
|
-
temperature: float =
|
|
28
|
+
temperature: Optional[float] = None,
|
|
40
29
|
generation_kwargs: Optional[Dict] = None,
|
|
41
30
|
**kwargs,
|
|
42
31
|
):
|
|
43
32
|
settings = get_settings()
|
|
44
|
-
model = model or settings.
|
|
45
|
-
self.
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
)
|
|
51
|
-
|
|
33
|
+
model = model or settings.OLLAMA_MODEL_NAME
|
|
34
|
+
self.model_data = OLLAMA_MODELS_DATA.get(model)
|
|
35
|
+
|
|
36
|
+
if base_url is not None:
|
|
37
|
+
self.base_url = str(base_url).rstrip("/")
|
|
38
|
+
elif settings.LOCAL_MODEL_BASE_URL is not None:
|
|
39
|
+
self.base_url = str(settings.LOCAL_MODEL_BASE_URL).rstrip("/")
|
|
40
|
+
else:
|
|
41
|
+
self.base_url = "http://localhost:11434"
|
|
42
|
+
|
|
43
|
+
if temperature is not None:
|
|
44
|
+
temperature = float(temperature)
|
|
45
|
+
elif settings.TEMPERATURE is not None:
|
|
46
|
+
temperature = settings.TEMPERATURE
|
|
47
|
+
else:
|
|
48
|
+
temperature = 0.0
|
|
49
|
+
|
|
50
|
+
# validation
|
|
51
|
+
model = require_param(
|
|
52
|
+
model,
|
|
53
|
+
provider_label="OllamaModel",
|
|
54
|
+
env_var_name="LOCAL_MODEL_NAME",
|
|
55
|
+
param_hint="model",
|
|
52
56
|
)
|
|
57
|
+
|
|
53
58
|
if temperature < 0:
|
|
54
|
-
raise
|
|
59
|
+
raise DeepEvalError("Temperature must be >= 0.")
|
|
55
60
|
self.temperature = temperature
|
|
56
61
|
# Keep sanitized kwargs for client call to strip legacy keys
|
|
57
62
|
self.kwargs = kwargs
|
|
58
|
-
self.
|
|
63
|
+
self.kwargs.pop("temperature", None)
|
|
64
|
+
|
|
65
|
+
self.generation_kwargs = dict(generation_kwargs or {})
|
|
66
|
+
self.generation_kwargs.pop("temperature", None)
|
|
67
|
+
|
|
59
68
|
super().__init__(model)
|
|
60
69
|
|
|
61
70
|
###############################################
|
|
@@ -65,7 +74,7 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
65
74
|
@retry_ollama
|
|
66
75
|
def generate(
|
|
67
76
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
68
|
-
) -> Tuple[Union[str,
|
|
77
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
69
78
|
chat_model = self.load_model()
|
|
70
79
|
|
|
71
80
|
if check_if_multimodal(prompt):
|
|
@@ -73,7 +82,6 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
73
82
|
messages = self.generate_messages(prompt)
|
|
74
83
|
else:
|
|
75
84
|
messages = [{"role": "user", "content": prompt}]
|
|
76
|
-
print(messages)
|
|
77
85
|
|
|
78
86
|
response: ChatResponse = chat_model.chat(
|
|
79
87
|
model=self.name,
|
|
@@ -96,7 +104,7 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
96
104
|
@retry_ollama
|
|
97
105
|
async def a_generate(
|
|
98
106
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
99
|
-
) -> Tuple[str, float]:
|
|
107
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
100
108
|
chat_model = self.load_model(async_mode=True)
|
|
101
109
|
|
|
102
110
|
if check_if_multimodal(prompt):
|
|
@@ -127,60 +135,78 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
127
135
|
self, multimodal_input: List[Union[str, MLLMImage]] = []
|
|
128
136
|
):
|
|
129
137
|
messages = []
|
|
130
|
-
|
|
131
|
-
|
|
138
|
+
|
|
139
|
+
for element in multimodal_input:
|
|
140
|
+
if isinstance(element, str):
|
|
141
|
+
messages.append(
|
|
142
|
+
{
|
|
143
|
+
"role": "user",
|
|
144
|
+
"content": element,
|
|
145
|
+
}
|
|
146
|
+
)
|
|
147
|
+
elif isinstance(element, MLLMImage):
|
|
148
|
+
if element.url and not element.local:
|
|
149
|
+
import requests
|
|
150
|
+
from PIL import Image
|
|
151
|
+
import io
|
|
152
|
+
|
|
153
|
+
settings = get_settings()
|
|
154
|
+
try:
|
|
155
|
+
response = requests.get(
|
|
156
|
+
element.url,
|
|
157
|
+
stream=True,
|
|
158
|
+
timeout=(
|
|
159
|
+
settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
|
|
160
|
+
settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
|
|
161
|
+
),
|
|
162
|
+
)
|
|
163
|
+
response.raise_for_status()
|
|
164
|
+
|
|
165
|
+
# Convert to JPEG and encode
|
|
166
|
+
image = Image.open(io.BytesIO(response.content))
|
|
167
|
+
buffered = io.BytesIO()
|
|
168
|
+
|
|
169
|
+
# Convert RGBA/LA/P to RGB for JPEG
|
|
170
|
+
if image.mode in ("RGBA", "LA", "P"):
|
|
171
|
+
image = image.convert("RGB")
|
|
172
|
+
|
|
173
|
+
image.save(buffered, format="JPEG")
|
|
174
|
+
img_b64 = base64.b64encode(buffered.getvalue()).decode()
|
|
175
|
+
|
|
176
|
+
except (requests.exceptions.RequestException, OSError) as e:
|
|
177
|
+
print(f"Image fetch/encode failed: {e}")
|
|
178
|
+
raise
|
|
179
|
+
else:
|
|
180
|
+
element.ensure_images_loaded()
|
|
181
|
+
img_b64 = element.dataBase64
|
|
182
|
+
|
|
132
183
|
messages.append(
|
|
133
184
|
{
|
|
134
185
|
"role": "user",
|
|
135
|
-
"
|
|
186
|
+
"images": [img_b64],
|
|
136
187
|
}
|
|
137
188
|
)
|
|
138
|
-
|
|
139
|
-
img_b64 = self.convert_to_base64(ele.url, ele.local)
|
|
140
|
-
if img_b64 is not None:
|
|
141
|
-
messages.append(
|
|
142
|
-
{
|
|
143
|
-
"role": "user",
|
|
144
|
-
"images": [img_b64],
|
|
145
|
-
}
|
|
146
|
-
)
|
|
189
|
+
|
|
147
190
|
return messages
|
|
148
191
|
|
|
149
192
|
###############################################
|
|
150
|
-
#
|
|
193
|
+
# Capabilities
|
|
151
194
|
###############################################
|
|
152
195
|
|
|
153
|
-
def
|
|
154
|
-
|
|
196
|
+
def supports_log_probs(self) -> Union[bool, None]:
|
|
197
|
+
return self.model_data.supports_log_probs
|
|
155
198
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
|
|
168
|
-
image = Image.open(io.BytesIO(response.content))
|
|
169
|
-
else:
|
|
170
|
-
image = Image.open(image_source)
|
|
171
|
-
|
|
172
|
-
buffered = io.BytesIO()
|
|
173
|
-
image.save(buffered, format="JPEG")
|
|
174
|
-
img_str = base64.b64encode(buffered.getvalue()).decode()
|
|
175
|
-
return img_str
|
|
176
|
-
|
|
177
|
-
except (requests.exceptions.RequestException, OSError) as e:
|
|
178
|
-
# Log, then rethrow so @retry_ollama can retry generate_messages() on network failures
|
|
179
|
-
print(f"Image fetch/encode failed: {e}")
|
|
180
|
-
raise
|
|
181
|
-
except Exception as e:
|
|
182
|
-
print(f"Error converting image to base64: {e}")
|
|
183
|
-
return None
|
|
199
|
+
def supports_temperature(self) -> Union[bool, None]:
|
|
200
|
+
return self.model_data.supports_temperature
|
|
201
|
+
|
|
202
|
+
def supports_multimodal(self) -> Union[bool, None]:
|
|
203
|
+
return self.model_data.supports_multimodal
|
|
204
|
+
|
|
205
|
+
def supports_structured_outputs(self) -> Union[bool, None]:
|
|
206
|
+
return self.model_data.supports_structured_outputs
|
|
207
|
+
|
|
208
|
+
def supports_json_mode(self) -> Union[bool, None]:
|
|
209
|
+
return self.model_data.supports_json
|
|
184
210
|
|
|
185
211
|
###############################################
|
|
186
212
|
# Model
|
|
@@ -207,10 +233,5 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
207
233
|
)
|
|
208
234
|
return cls(**kw)
|
|
209
235
|
|
|
210
|
-
def supports_multimodal(self):
|
|
211
|
-
if self.name in valid_multimodal_models:
|
|
212
|
-
return True
|
|
213
|
-
return False
|
|
214
|
-
|
|
215
236
|
def get_model_name(self):
|
|
216
237
|
return f"{self.name} (Ollama)"
|