deepeval 3.7.4__py3-none-any.whl → 3.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/dataset/golden.py +54 -2
  3. deepeval/evaluate/evaluate.py +16 -8
  4. deepeval/evaluate/execute.py +70 -26
  5. deepeval/evaluate/utils.py +26 -22
  6. deepeval/integrations/pydantic_ai/agent.py +19 -2
  7. deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  8. deepeval/metrics/__init__.py +14 -12
  9. deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
  10. deepeval/metrics/answer_relevancy/template.py +188 -92
  11. deepeval/metrics/base_metric.py +2 -5
  12. deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
  13. deepeval/metrics/contextual_precision/template.py +115 -66
  14. deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
  15. deepeval/metrics/contextual_recall/template.py +106 -55
  16. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
  17. deepeval/metrics/contextual_relevancy/template.py +87 -58
  18. deepeval/metrics/dag/templates.py +2 -2
  19. deepeval/metrics/faithfulness/faithfulness.py +70 -27
  20. deepeval/metrics/faithfulness/schema.py +1 -1
  21. deepeval/metrics/faithfulness/template.py +200 -115
  22. deepeval/metrics/g_eval/utils.py +2 -2
  23. deepeval/metrics/indicator.py +4 -4
  24. deepeval/metrics/multimodal_metrics/__init__.py +0 -18
  25. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
  26. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
  27. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
  28. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
  29. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
  30. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
  31. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
  32. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
  33. deepeval/metrics/ragas.py +3 -3
  34. deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
  35. deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  36. deepeval/metrics/turn_contextual_precision/template.py +187 -0
  37. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
  38. deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  39. deepeval/metrics/turn_contextual_recall/template.py +178 -0
  40. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
  41. deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
  42. deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  43. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
  44. deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
  45. deepeval/metrics/turn_faithfulness/template.py +218 -0
  46. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
  47. deepeval/metrics/utils.py +39 -58
  48. deepeval/models/__init__.py +0 -12
  49. deepeval/models/base_model.py +16 -38
  50. deepeval/models/embedding_models/__init__.py +7 -0
  51. deepeval/models/embedding_models/azure_embedding_model.py +52 -28
  52. deepeval/models/embedding_models/local_embedding_model.py +18 -14
  53. deepeval/models/embedding_models/ollama_embedding_model.py +38 -16
  54. deepeval/models/embedding_models/openai_embedding_model.py +40 -21
  55. deepeval/models/llms/amazon_bedrock_model.py +1 -2
  56. deepeval/models/llms/anthropic_model.py +44 -23
  57. deepeval/models/llms/azure_model.py +121 -36
  58. deepeval/models/llms/deepseek_model.py +18 -13
  59. deepeval/models/llms/gemini_model.py +129 -43
  60. deepeval/models/llms/grok_model.py +18 -13
  61. deepeval/models/llms/kimi_model.py +18 -13
  62. deepeval/models/llms/litellm_model.py +42 -22
  63. deepeval/models/llms/local_model.py +12 -7
  64. deepeval/models/llms/ollama_model.py +114 -12
  65. deepeval/models/llms/openai_model.py +137 -41
  66. deepeval/models/llms/portkey_model.py +24 -7
  67. deepeval/models/llms/utils.py +5 -3
  68. deepeval/models/retry_policy.py +17 -14
  69. deepeval/models/utils.py +46 -1
  70. deepeval/optimizer/__init__.py +5 -0
  71. deepeval/optimizer/algorithms/__init__.py +6 -0
  72. deepeval/optimizer/algorithms/base.py +29 -0
  73. deepeval/optimizer/algorithms/configs.py +18 -0
  74. deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  75. deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
  76. deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  77. deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
  78. deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  79. deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  80. deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  81. deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  82. deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  83. deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
  84. deepeval/{optimization → optimizer}/configs.py +5 -8
  85. deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
  86. deepeval/optimizer/prompt_optimizer.py +263 -0
  87. deepeval/optimizer/rewriter/__init__.py +5 -0
  88. deepeval/optimizer/rewriter/rewriter.py +124 -0
  89. deepeval/optimizer/rewriter/utils.py +214 -0
  90. deepeval/optimizer/scorer/__init__.py +5 -0
  91. deepeval/optimizer/scorer/base.py +86 -0
  92. deepeval/optimizer/scorer/scorer.py +316 -0
  93. deepeval/optimizer/scorer/utils.py +30 -0
  94. deepeval/optimizer/types.py +148 -0
  95. deepeval/{optimization → optimizer}/utils.py +47 -165
  96. deepeval/prompt/prompt.py +5 -9
  97. deepeval/test_case/__init__.py +1 -3
  98. deepeval/test_case/api.py +12 -10
  99. deepeval/test_case/conversational_test_case.py +19 -1
  100. deepeval/test_case/llm_test_case.py +152 -1
  101. deepeval/test_case/utils.py +4 -8
  102. deepeval/test_run/api.py +15 -14
  103. deepeval/test_run/test_run.py +3 -3
  104. deepeval/tracing/patchers.py +9 -4
  105. deepeval/tracing/tracing.py +2 -2
  106. deepeval/utils.py +65 -0
  107. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
  108. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/RECORD +116 -125
  109. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  110. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  111. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  112. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  113. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  114. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  115. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  116. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  117. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  118. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  119. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  120. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  121. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  122. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  123. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  124. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  125. deepeval/models/mlllms/__init__.py +0 -4
  126. deepeval/models/mlllms/azure_model.py +0 -343
  127. deepeval/models/mlllms/gemini_model.py +0 -313
  128. deepeval/models/mlllms/ollama_model.py +0 -175
  129. deepeval/models/mlllms/openai_model.py +0 -309
  130. deepeval/optimization/__init__.py +0 -13
  131. deepeval/optimization/adapters/__init__.py +0 -2
  132. deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
  133. deepeval/optimization/aggregates.py +0 -14
  134. deepeval/optimization/copro/configs.py +0 -31
  135. deepeval/optimization/gepa/__init__.py +0 -7
  136. deepeval/optimization/gepa/configs.py +0 -115
  137. deepeval/optimization/miprov2/configs.py +0 -134
  138. deepeval/optimization/miprov2/loop.py +0 -785
  139. deepeval/optimization/mutations/__init__.py +0 -0
  140. deepeval/optimization/mutations/prompt_rewriter.py +0 -458
  141. deepeval/optimization/policies/__init__.py +0 -16
  142. deepeval/optimization/policies/tie_breaker.py +0 -67
  143. deepeval/optimization/prompt_optimizer.py +0 -462
  144. deepeval/optimization/simba/__init__.py +0 -0
  145. deepeval/optimization/simba/configs.py +0 -33
  146. deepeval/optimization/types.py +0 -361
  147. deepeval/test_case/mllm_test_case.py +0 -170
  148. /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
  149. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
  150. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
  151. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
  152. /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
  153. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
  154. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
  155. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
@@ -10,7 +10,10 @@ from tenacity import (
10
10
  )
11
11
 
12
12
  from deepeval.config.settings import get_settings
13
- from deepeval.models.utils import require_secret_api_key
13
+ from deepeval.models.utils import (
14
+ require_secret_api_key,
15
+ normalize_kwargs_and_extract_aliases,
16
+ )
14
17
  from deepeval.models import DeepEvalBaseLLM
15
18
  from deepeval.models.llms.utils import trim_and_load_json
16
19
 
@@ -27,6 +30,10 @@ retryable_exceptions = (
27
30
  Exception, # LiteLLM handles specific exceptions internally
28
31
  )
29
32
 
33
+ _ALIAS_MAP = {
34
+ "base_url": ["api_base"],
35
+ }
36
+
30
37
 
31
38
  class LiteLLMModel(DeepEvalBaseLLM):
32
39
  EXP_BASE: int = 2
@@ -39,16 +46,26 @@ class LiteLLMModel(DeepEvalBaseLLM):
39
46
  self,
40
47
  model: Optional[str] = None,
41
48
  api_key: Optional[str] = None,
42
- api_base: Optional[str] = None,
49
+ base_url: Optional[str] = None,
43
50
  temperature: float = 0,
44
51
  generation_kwargs: Optional[Dict] = None,
45
52
  **kwargs,
46
53
  ):
47
54
 
55
+ normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
56
+ "LiteLLMModel",
57
+ kwargs,
58
+ _ALIAS_MAP,
59
+ )
60
+
61
+ # re-map depricated keywords to re-named positional args
62
+ if base_url is None and "base_url" in alias_values:
63
+ base_url = alias_values["base_url"]
64
+
48
65
  settings = get_settings()
49
66
  # Get model name from parameter or key file
50
- model_name = model or settings.LITELLM_MODEL_NAME
51
- if not model_name:
67
+ model = model or settings.LITELLM_MODEL_NAME
68
+ if not model:
52
69
  raise ValueError(
53
70
  "Model name must be provided either through parameter or set-litellm command"
54
71
  )
@@ -67,8 +84,8 @@ class LiteLLMModel(DeepEvalBaseLLM):
67
84
  )
68
85
 
69
86
  # Get API base from parameter, key file, or environment variable
70
- self.api_base = (
71
- api_base
87
+ self.base_url = (
88
+ base_url
72
89
  or (
73
90
  str(settings.LITELLM_API_BASE)
74
91
  if settings.LITELLM_API_BASE is not None
@@ -84,10 +101,11 @@ class LiteLLMModel(DeepEvalBaseLLM):
84
101
  if temperature < 0:
85
102
  raise ValueError("Temperature must be >= 0.")
86
103
  self.temperature = temperature
87
- self.kwargs = kwargs
104
+ # Keep sanitized kwargs for client call to strip legacy keys
105
+ self.kwargs = normalized_kwargs
88
106
  self.generation_kwargs = generation_kwargs or {}
89
107
  self.evaluation_cost = 0.0 # Initialize cost to 0.0
90
- super().__init__(model_name)
108
+ super().__init__(model)
91
109
 
92
110
  @retry(
93
111
  wait=wait_exponential_jitter(
@@ -100,10 +118,11 @@ class LiteLLMModel(DeepEvalBaseLLM):
100
118
  def generate(
101
119
  self, prompt: str, schema: Optional[BaseModel] = None
102
120
  ) -> Union[str, Dict, Tuple[str, float]]:
121
+
103
122
  from litellm import completion
104
123
 
105
124
  completion_params = {
106
- "model": self.model_name,
125
+ "model": self.name,
107
126
  "messages": [{"role": "user", "content": prompt}],
108
127
  "temperature": self.temperature,
109
128
  }
@@ -116,8 +135,8 @@ class LiteLLMModel(DeepEvalBaseLLM):
116
135
  param_hint="`api_key` to LiteLLMModel(...)",
117
136
  )
118
137
  completion_params["api_key"] = api_key
119
- if self.api_base:
120
- completion_params["api_base"] = self.api_base
138
+ if self.base_url:
139
+ completion_params["api_base"] = self.base_url
121
140
 
122
141
  # Add schema if provided
123
142
  if schema:
@@ -155,10 +174,11 @@ class LiteLLMModel(DeepEvalBaseLLM):
155
174
  async def a_generate(
156
175
  self, prompt: str, schema: Optional[BaseModel] = None
157
176
  ) -> Union[str, Dict, Tuple[str, float]]:
177
+
158
178
  from litellm import acompletion
159
179
 
160
180
  completion_params = {
161
- "model": self.model_name,
181
+ "model": self.name,
162
182
  "messages": [{"role": "user", "content": prompt}],
163
183
  "temperature": self.temperature,
164
184
  }
@@ -171,8 +191,8 @@ class LiteLLMModel(DeepEvalBaseLLM):
171
191
  param_hint="`api_key` to LiteLLMModel(...)",
172
192
  )
173
193
  completion_params["api_key"] = api_key
174
- if self.api_base:
175
- completion_params["api_base"] = self.api_base
194
+ if self.base_url:
195
+ completion_params["api_base"] = self.base_url
176
196
 
177
197
  # Add schema if provided
178
198
  if schema:
@@ -222,11 +242,11 @@ class LiteLLMModel(DeepEvalBaseLLM):
222
242
  param_hint="`api_key` to LiteLLMModel(...)",
223
243
  )
224
244
  completion_params = {
225
- "model": self.model_name,
245
+ "model": self.name,
226
246
  "messages": [{"role": "user", "content": prompt}],
227
247
  "temperature": self.temperature,
228
248
  "api_key": api_key,
229
- "api_base": self.api_base,
249
+ "api_base": self.base_url,
230
250
  "logprobs": True,
231
251
  "top_logprobs": top_logprobs,
232
252
  }
@@ -263,11 +283,11 @@ class LiteLLMModel(DeepEvalBaseLLM):
263
283
  param_hint="`api_key` to LiteLLMModel(...)",
264
284
  )
265
285
  completion_params = {
266
- "model": self.model_name,
286
+ "model": self.name,
267
287
  "messages": [{"role": "user", "content": prompt}],
268
288
  "temperature": self.temperature,
269
289
  "api_key": api_key,
270
- "api_base": self.api_base,
290
+ "api_base": self.base_url,
271
291
  "logprobs": True,
272
292
  "top_logprobs": top_logprobs,
273
293
  }
@@ -302,12 +322,12 @@ class LiteLLMModel(DeepEvalBaseLLM):
302
322
  param_hint="`api_key` to LiteLLMModel(...)",
303
323
  )
304
324
  completion_params = {
305
- "model": self.model_name,
325
+ "model": self.name,
306
326
  "messages": [{"role": "user", "content": prompt}],
307
327
  "temperature": temperature,
308
328
  "n": n,
309
329
  "api_key": api_key,
310
- "api_base": self.api_base,
330
+ "api_base": self.base_url,
311
331
  }
312
332
  completion_params.update(self.kwargs)
313
333
 
@@ -353,8 +373,8 @@ class LiteLLMModel(DeepEvalBaseLLM):
353
373
  def get_model_name(self) -> str:
354
374
  from litellm import get_llm_provider
355
375
 
356
- provider = get_llm_provider(self.model_name)
357
- return f"{self.model_name} ({provider})"
376
+ provider = get_llm_provider(self.name)
377
+ return f"{self.name} ({provider})"
358
378
 
359
379
  def load_model(self, async_mode: bool = False):
360
380
  """
@@ -9,7 +9,9 @@ from deepeval.models.retry_policy import (
9
9
  sdk_retries_for,
10
10
  )
11
11
  from deepeval.models.llms.utils import trim_and_load_json
12
- from deepeval.models.utils import require_secret_api_key
12
+ from deepeval.models.utils import (
13
+ require_secret_api_key,
14
+ )
13
15
  from deepeval.models import DeepEvalBaseLLM
14
16
  from deepeval.constants import ProviderSlug as PS
15
17
 
@@ -22,8 +24,8 @@ class LocalModel(DeepEvalBaseLLM):
22
24
  def __init__(
23
25
  self,
24
26
  model: Optional[str] = None,
25
- base_url: Optional[str] = None,
26
27
  api_key: Optional[str] = None,
28
+ base_url: Optional[str] = None,
27
29
  temperature: float = 0,
28
30
  format: Optional[str] = None,
29
31
  generation_kwargs: Optional[Dict] = None,
@@ -31,7 +33,7 @@ class LocalModel(DeepEvalBaseLLM):
31
33
  ):
32
34
  settings = get_settings()
33
35
 
34
- model_name = model or settings.LOCAL_MODEL_NAME
36
+ model = model or settings.LOCAL_MODEL_NAME
35
37
  if api_key is not None:
36
38
  # keep it secret, keep it safe from serializings, logging and alike
37
39
  self.local_model_api_key: SecretStr | None = SecretStr(api_key)
@@ -47,9 +49,10 @@ class LocalModel(DeepEvalBaseLLM):
47
49
  if temperature < 0:
48
50
  raise ValueError("Temperature must be >= 0.")
49
51
  self.temperature = temperature
52
+ # Keep sanitized kwargs for client call to strip legacy keys
50
53
  self.kwargs = kwargs
51
54
  self.generation_kwargs = generation_kwargs or {}
52
- super().__init__(model_name)
55
+ super().__init__(model)
53
56
 
54
57
  ###############################################
55
58
  # Other generate functions
@@ -59,9 +62,10 @@ class LocalModel(DeepEvalBaseLLM):
59
62
  def generate(
60
63
  self, prompt: str, schema: Optional[BaseModel] = None
61
64
  ) -> Tuple[Union[str, Dict], float]:
65
+
62
66
  client = self.load_model(async_mode=False)
63
67
  response: ChatCompletion = client.chat.completions.create(
64
- model=self.model_name,
68
+ model=self.name,
65
69
  messages=[{"role": "user", "content": prompt}],
66
70
  temperature=self.temperature,
67
71
  **self.generation_kwargs,
@@ -78,9 +82,10 @@ class LocalModel(DeepEvalBaseLLM):
78
82
  async def a_generate(
79
83
  self, prompt: str, schema: Optional[BaseModel] = None
80
84
  ) -> Tuple[Union[str, Dict], float]:
85
+
81
86
  client = self.load_model(async_mode=True)
82
87
  response: ChatCompletion = await client.chat.completions.create(
83
- model=self.model_name,
88
+ model=self.name,
84
89
  messages=[{"role": "user", "content": prompt}],
85
90
  temperature=self.temperature,
86
91
  **self.generation_kwargs,
@@ -98,7 +103,7 @@ class LocalModel(DeepEvalBaseLLM):
98
103
  ###############################################
99
104
 
100
105
  def get_model_name(self):
101
- return f"{self.model_name} (Local Model)"
106
+ return f"{self.name} (Local Model)"
102
107
 
103
108
  def load_model(self, async_mode: bool = False):
104
109
  if not async_mode:
@@ -1,14 +1,32 @@
1
- from ollama import Client, AsyncClient, ChatResponse
2
- from typing import Optional, Tuple, Union, Dict
1
+ from typing import TYPE_CHECKING, Optional, Tuple, Union, Dict, List
3
2
  from pydantic import BaseModel
3
+ import requests
4
+ import base64
5
+ import io
4
6
 
5
7
  from deepeval.config.settings import get_settings
8
+ from deepeval.utils import require_dependency
6
9
  from deepeval.models.retry_policy import (
7
10
  create_retry_decorator,
8
11
  )
12
+ from deepeval.utils import convert_to_multi_modal_array, check_if_multimodal
13
+ from deepeval.test_case import MLLMImage
9
14
  from deepeval.models import DeepEvalBaseLLM
10
15
  from deepeval.constants import ProviderSlug as PS
11
16
 
17
+ valid_multimodal_models = [
18
+ "llava:7b",
19
+ "llava:13b",
20
+ "llava:34b",
21
+ "llama4",
22
+ "gemma3",
23
+ "qwen3-vl",
24
+ "qwen2.5-vl",
25
+ # TODO: Add more models later on by looking at their catelogue
26
+ ]
27
+
28
+ if TYPE_CHECKING:
29
+ from ollama import ChatResponse
12
30
 
13
31
  retry_ollama = create_retry_decorator(PS.OLLAMA)
14
32
 
@@ -23,7 +41,7 @@ class OllamaModel(DeepEvalBaseLLM):
23
41
  **kwargs,
24
42
  ):
25
43
  settings = get_settings()
26
- model_name = model or settings.LOCAL_MODEL_NAME
44
+ model = model or settings.LOCAL_MODEL_NAME
27
45
  self.base_url = (
28
46
  base_url
29
47
  or (
@@ -35,10 +53,10 @@ class OllamaModel(DeepEvalBaseLLM):
35
53
  if temperature < 0:
36
54
  raise ValueError("Temperature must be >= 0.")
37
55
  self.temperature = temperature
38
- # Raw kwargs destined for the underlying Ollama client
56
+ # Keep sanitized kwargs for client call to strip legacy keys
39
57
  self.kwargs = kwargs
40
58
  self.generation_kwargs = generation_kwargs or {}
41
- super().__init__(model_name)
59
+ super().__init__(model)
42
60
 
43
61
  ###############################################
44
62
  # Other generate functions
@@ -49,9 +67,17 @@ class OllamaModel(DeepEvalBaseLLM):
49
67
  self, prompt: str, schema: Optional[BaseModel] = None
50
68
  ) -> Tuple[Union[str, Dict], float]:
51
69
  chat_model = self.load_model()
70
+
71
+ if check_if_multimodal(prompt):
72
+ prompt = convert_to_multi_modal_array(prompt)
73
+ messages = self.generate_messages(prompt)
74
+ else:
75
+ messages = [{"role": "user", "content": prompt}]
76
+ print(messages)
77
+
52
78
  response: ChatResponse = chat_model.chat(
53
- model=self.model_name,
54
- messages=[{"role": "user", "content": prompt}],
79
+ model=self.name,
80
+ messages=messages,
55
81
  format=schema.model_json_schema() if schema else None,
56
82
  options={
57
83
  **{"temperature": self.temperature},
@@ -72,9 +98,16 @@ class OllamaModel(DeepEvalBaseLLM):
72
98
  self, prompt: str, schema: Optional[BaseModel] = None
73
99
  ) -> Tuple[str, float]:
74
100
  chat_model = self.load_model(async_mode=True)
101
+
102
+ if check_if_multimodal(prompt):
103
+ prompt = convert_to_multi_modal_array(prompt)
104
+ messages = self.generate_messages(prompt)
105
+ else:
106
+ messages = [{"role": "user", "content": prompt}]
107
+
75
108
  response: ChatResponse = await chat_model.chat(
76
- model=self.model_name,
77
- messages=[{"role": "user", "content": prompt}],
109
+ model=self.name,
110
+ messages=messages,
78
111
  format=schema.model_json_schema() if schema else None,
79
112
  options={
80
113
  **{"temperature": self.temperature},
@@ -90,14 +123,78 @@ class OllamaModel(DeepEvalBaseLLM):
90
123
  0,
91
124
  )
92
125
 
126
+ def generate_messages(
127
+ self, multimodal_input: List[Union[str, MLLMImage]] = []
128
+ ):
129
+ messages = []
130
+ for ele in multimodal_input:
131
+ if isinstance(ele, str):
132
+ messages.append(
133
+ {
134
+ "role": "user",
135
+ "content": ele,
136
+ }
137
+ )
138
+ elif isinstance(ele, MLLMImage):
139
+ img_b64 = self.convert_to_base64(ele.url, ele.local)
140
+ if img_b64 is not None:
141
+ messages.append(
142
+ {
143
+ "role": "user",
144
+ "images": [img_b64],
145
+ }
146
+ )
147
+ return messages
148
+
149
+ ###############################################
150
+ # Utilities
151
+ ###############################################
152
+
153
+ def convert_to_base64(self, image_source: str, is_local: bool) -> str:
154
+ from PIL import Image
155
+
156
+ settings = get_settings()
157
+ try:
158
+ if not is_local:
159
+ response = requests.get(
160
+ image_source,
161
+ stream=True,
162
+ timeout=(
163
+ settings.MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS,
164
+ settings.MEDIA_IMAGE_READ_TIMEOUT_SECONDS,
165
+ ),
166
+ )
167
+ response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
168
+ image = Image.open(io.BytesIO(response.content))
169
+ else:
170
+ image = Image.open(image_source)
171
+
172
+ buffered = io.BytesIO()
173
+ image.save(buffered, format="JPEG")
174
+ img_str = base64.b64encode(buffered.getvalue()).decode()
175
+ return img_str
176
+
177
+ except (requests.exceptions.RequestException, OSError) as e:
178
+ # Log, then rethrow so @retry_ollama can retry generate_messages() on network failures
179
+ print(f"Image fetch/encode failed: {e}")
180
+ raise
181
+ except Exception as e:
182
+ print(f"Error converting image to base64: {e}")
183
+ return None
184
+
93
185
  ###############################################
94
186
  # Model
95
187
  ###############################################
96
188
 
97
189
  def load_model(self, async_mode: bool = False):
190
+ ollama = require_dependency(
191
+ "ollama",
192
+ provider_label="OllamaModel",
193
+ install_hint="Install it with `pip install ollama`.",
194
+ )
98
195
  if not async_mode:
99
- return self._build_client(Client)
100
- return self._build_client(AsyncClient)
196
+ return self._build_client(ollama.Client)
197
+ return self._build_client(ollama.AsyncClient)
101
198
 
102
199
  def _client_kwargs(self) -> Dict:
103
200
  """Return kwargs forwarded to the underlying Ollama Client/AsyncClient."""
@@ -110,5 +207,10 @@ class OllamaModel(DeepEvalBaseLLM):
110
207
  )
111
208
  return cls(**kw)
112
209
 
210
+ def supports_multimodal(self):
211
+ if self.name in valid_multimodal_models:
212
+ return True
213
+ return False
214
+
113
215
  def get_model_name(self):
114
- return f"{self.model_name} (Ollama)"
216
+ return f"{self.name} (Ollama)"