deepeval 3.7.5__py3-none-any.whl → 3.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/config/settings.py +35 -1
  3. deepeval/dataset/api.py +23 -1
  4. deepeval/dataset/golden.py +106 -21
  5. deepeval/evaluate/evaluate.py +0 -3
  6. deepeval/evaluate/execute.py +10 -222
  7. deepeval/evaluate/utils.py +6 -30
  8. deepeval/key_handler.py +3 -0
  9. deepeval/metrics/__init__.py +0 -4
  10. deepeval/metrics/answer_relevancy/answer_relevancy.py +89 -132
  11. deepeval/metrics/answer_relevancy/template.py +102 -179
  12. deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
  13. deepeval/metrics/arena_g_eval/template.py +17 -1
  14. deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
  15. deepeval/metrics/argument_correctness/template.py +19 -2
  16. deepeval/metrics/base_metric.py +13 -41
  17. deepeval/metrics/bias/bias.py +102 -108
  18. deepeval/metrics/bias/template.py +14 -2
  19. deepeval/metrics/contextual_precision/contextual_precision.py +56 -92
  20. deepeval/metrics/contextual_recall/contextual_recall.py +58 -85
  21. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +53 -83
  22. deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
  23. deepeval/metrics/conversation_completeness/template.py +23 -3
  24. deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
  25. deepeval/metrics/conversational_dag/nodes.py +66 -123
  26. deepeval/metrics/conversational_dag/templates.py +16 -0
  27. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
  28. deepeval/metrics/dag/dag.py +10 -0
  29. deepeval/metrics/dag/nodes.py +63 -126
  30. deepeval/metrics/dag/templates.py +14 -0
  31. deepeval/metrics/exact_match/exact_match.py +9 -1
  32. deepeval/metrics/faithfulness/faithfulness.py +82 -136
  33. deepeval/metrics/g_eval/g_eval.py +87 -78
  34. deepeval/metrics/g_eval/template.py +18 -1
  35. deepeval/metrics/g_eval/utils.py +7 -6
  36. deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
  37. deepeval/metrics/goal_accuracy/template.py +21 -3
  38. deepeval/metrics/hallucination/hallucination.py +60 -75
  39. deepeval/metrics/hallucination/template.py +13 -0
  40. deepeval/metrics/indicator.py +3 -6
  41. deepeval/metrics/json_correctness/json_correctness.py +40 -38
  42. deepeval/metrics/json_correctness/template.py +10 -0
  43. deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
  44. deepeval/metrics/knowledge_retention/schema.py +9 -3
  45. deepeval/metrics/knowledge_retention/template.py +12 -0
  46. deepeval/metrics/mcp/mcp_task_completion.py +68 -38
  47. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
  48. deepeval/metrics/mcp/template.py +52 -0
  49. deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
  50. deepeval/metrics/mcp_use_metric/template.py +12 -0
  51. deepeval/metrics/misuse/misuse.py +77 -97
  52. deepeval/metrics/misuse/template.py +15 -0
  53. deepeval/metrics/multimodal_metrics/__init__.py +0 -1
  54. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +37 -38
  55. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +55 -76
  56. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +37 -38
  57. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +37 -38
  58. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +57 -76
  59. deepeval/metrics/non_advice/non_advice.py +79 -105
  60. deepeval/metrics/non_advice/template.py +12 -0
  61. deepeval/metrics/pattern_match/pattern_match.py +12 -4
  62. deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
  63. deepeval/metrics/pii_leakage/template.py +14 -0
  64. deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
  65. deepeval/metrics/plan_adherence/template.py +11 -0
  66. deepeval/metrics/plan_quality/plan_quality.py +63 -87
  67. deepeval/metrics/plan_quality/template.py +9 -0
  68. deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
  69. deepeval/metrics/prompt_alignment/template.py +12 -0
  70. deepeval/metrics/role_adherence/role_adherence.py +48 -71
  71. deepeval/metrics/role_adherence/template.py +14 -0
  72. deepeval/metrics/role_violation/role_violation.py +75 -108
  73. deepeval/metrics/role_violation/template.py +12 -0
  74. deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
  75. deepeval/metrics/step_efficiency/template.py +11 -0
  76. deepeval/metrics/summarization/summarization.py +115 -183
  77. deepeval/metrics/summarization/template.py +19 -0
  78. deepeval/metrics/task_completion/task_completion.py +67 -73
  79. deepeval/metrics/tool_correctness/tool_correctness.py +43 -42
  80. deepeval/metrics/tool_use/tool_use.py +42 -66
  81. deepeval/metrics/topic_adherence/template.py +13 -0
  82. deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
  83. deepeval/metrics/toxicity/template.py +13 -0
  84. deepeval/metrics/toxicity/toxicity.py +80 -99
  85. deepeval/metrics/turn_contextual_precision/schema.py +3 -3
  86. deepeval/metrics/turn_contextual_precision/template.py +1 -1
  87. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +110 -68
  88. deepeval/metrics/turn_contextual_recall/schema.py +3 -3
  89. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +104 -61
  90. deepeval/metrics/turn_contextual_relevancy/schema.py +2 -2
  91. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +106 -65
  92. deepeval/metrics/turn_faithfulness/schema.py +1 -1
  93. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +104 -73
  94. deepeval/metrics/turn_relevancy/template.py +14 -0
  95. deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
  96. deepeval/metrics/utils.py +145 -90
  97. deepeval/models/base_model.py +44 -6
  98. deepeval/models/embedding_models/azure_embedding_model.py +34 -12
  99. deepeval/models/embedding_models/local_embedding_model.py +22 -7
  100. deepeval/models/embedding_models/ollama_embedding_model.py +17 -6
  101. deepeval/models/embedding_models/openai_embedding_model.py +3 -2
  102. deepeval/models/llms/amazon_bedrock_model.py +226 -71
  103. deepeval/models/llms/anthropic_model.py +141 -47
  104. deepeval/models/llms/azure_model.py +167 -94
  105. deepeval/models/llms/constants.py +2032 -0
  106. deepeval/models/llms/deepseek_model.py +79 -29
  107. deepeval/models/llms/gemini_model.py +126 -67
  108. deepeval/models/llms/grok_model.py +125 -59
  109. deepeval/models/llms/kimi_model.py +126 -81
  110. deepeval/models/llms/litellm_model.py +92 -18
  111. deepeval/models/llms/local_model.py +114 -15
  112. deepeval/models/llms/ollama_model.py +97 -76
  113. deepeval/models/llms/openai_model.py +167 -310
  114. deepeval/models/llms/portkey_model.py +58 -16
  115. deepeval/models/llms/utils.py +5 -2
  116. deepeval/models/utils.py +60 -4
  117. deepeval/simulator/conversation_simulator.py +43 -0
  118. deepeval/simulator/template.py +13 -0
  119. deepeval/test_case/api.py +24 -45
  120. deepeval/test_case/arena_test_case.py +7 -2
  121. deepeval/test_case/conversational_test_case.py +55 -6
  122. deepeval/test_case/llm_test_case.py +60 -6
  123. deepeval/test_run/api.py +3 -0
  124. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -1
  125. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/RECORD +128 -132
  126. deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
  127. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
  128. deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
  129. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -133
  130. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
  131. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
  132. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
  133. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
deepeval/_version.py CHANGED
@@ -1 +1 @@
1
- __version__: str = "3.7.5"
1
+ __version__: str = "3.7.6"
@@ -27,6 +27,7 @@ from pydantic import (
27
27
  field_validator,
28
28
  model_validator,
29
29
  SecretStr,
30
+ PositiveFloat,
30
31
  )
31
32
  from pydantic_settings import BaseSettings, SettingsConfigDict
32
33
  from typing import Any, Dict, List, Optional, NamedTuple
@@ -317,6 +318,19 @@ class Settings(BaseSettings):
317
318
 
318
319
  # Anthropic
319
320
  ANTHROPIC_API_KEY: Optional[SecretStr] = None
321
+ ANTHROPIC_MODEL_NAME: Optional[str] = None
322
+ ANTHROPIC_COST_PER_INPUT_TOKEN: Optional[PositiveFloat] = None
323
+ ANTHROPIC_COST_PER_OUTPUT_TOKEN: Optional[PositiveFloat] = None
324
+
325
+ # AWS
326
+ AWS_ACCESS_KEY_ID: Optional[SecretStr] = None
327
+ AWS_SECRET_ACCESS_KEY: Optional[SecretStr] = None
328
+ # AWS Bedrock
329
+ USE_AWS_BEDROCK_MODEL: Optional[bool] = None
330
+ AWS_BEDROCK_MODEL_NAME: Optional[str] = None
331
+ AWS_BEDROCK_REGION: Optional[str] = None
332
+ AWS_BEDROCK_COST_PER_INPUT_TOKEN: Optional[PositiveFloat] = None
333
+ AWS_BEDROCK_COST_PER_OUTPUT_TOKEN: Optional[PositiveFloat] = None
320
334
  # Azure Open AI
321
335
  AZURE_OPENAI_API_KEY: Optional[SecretStr] = None
322
336
  AZURE_OPENAI_ENDPOINT: Optional[AnyUrl] = None
@@ -329,6 +343,8 @@ class Settings(BaseSettings):
329
343
  USE_DEEPSEEK_MODEL: Optional[bool] = None
330
344
  DEEPSEEK_API_KEY: Optional[SecretStr] = None
331
345
  DEEPSEEK_MODEL_NAME: Optional[str] = None
346
+ DEEPSEEK_COST_PER_INPUT_TOKEN: Optional[float] = None
347
+ DEEPSEEK_COST_PER_OUTPUT_TOKEN: Optional[float] = None
332
348
  # Gemini
333
349
  USE_GEMINI_MODEL: Optional[bool] = None
334
350
  GOOGLE_API_KEY: Optional[SecretStr] = None
@@ -336,11 +352,13 @@ class Settings(BaseSettings):
336
352
  GOOGLE_GENAI_USE_VERTEXAI: Optional[bool] = None
337
353
  GOOGLE_CLOUD_PROJECT: Optional[str] = None
338
354
  GOOGLE_CLOUD_LOCATION: Optional[str] = None
339
- GOOGLE_SERVICE_ACCOUNT_KEY: Optional[str] = None
355
+ GOOGLE_SERVICE_ACCOUNT_KEY: Optional[SecretStr] = None
340
356
  # Grok
341
357
  USE_GROK_MODEL: Optional[bool] = None
342
358
  GROK_API_KEY: Optional[SecretStr] = None
343
359
  GROK_MODEL_NAME: Optional[str] = None
360
+ GROK_COST_PER_INPUT_TOKEN: Optional[float] = None
361
+ GROK_COST_PER_OUTPUT_TOKEN: Optional[float] = None
344
362
  # LiteLLM
345
363
  USE_LITELLM: Optional[bool] = None
346
364
  LITELLM_API_KEY: Optional[SecretStr] = None
@@ -362,6 +380,8 @@ class Settings(BaseSettings):
362
380
  USE_MOONSHOT_MODEL: Optional[bool] = None
363
381
  MOONSHOT_API_KEY: Optional[SecretStr] = None
364
382
  MOONSHOT_MODEL_NAME: Optional[str] = None
383
+ MOONSHOT_COST_PER_INPUT_TOKEN: Optional[float] = None
384
+ MOONSHOT_COST_PER_OUTPUT_TOKEN: Optional[float] = None
365
385
  # Ollama
366
386
  OLLAMA_MODEL_NAME: Optional[str] = None
367
387
  # OpenAI
@@ -388,6 +408,7 @@ class Settings(BaseSettings):
388
408
 
389
409
  # Azure OpenAI
390
410
  USE_AZURE_OPENAI_EMBEDDING: Optional[bool] = None
411
+ AZURE_EMBEDDING_MODEL_NAME: Optional[str] = None
391
412
  AZURE_EMBEDDING_DEPLOYMENT_NAME: Optional[str] = None
392
413
  # Local
393
414
  USE_LOCAL_EMBEDDINGS: Optional[bool] = None
@@ -614,6 +635,7 @@ class Settings(BaseSettings):
614
635
  "SKIP_DEEPEVAL_MISSING_PARAMS",
615
636
  "TOKENIZERS_PARALLELISM",
616
637
  "TRANSFORMERS_NO_ADVISORY_WARNINGS",
638
+ "USE_AWS_BEDROCK_MODEL",
617
639
  "USE_OPENAI_MODEL",
618
640
  "USE_AZURE_OPENAI",
619
641
  "USE_LOCAL_MODEL",
@@ -647,6 +669,8 @@ class Settings(BaseSettings):
647
669
  @field_validator(
648
670
  "OPENAI_COST_PER_INPUT_TOKEN",
649
671
  "OPENAI_COST_PER_OUTPUT_TOKEN",
672
+ "AWS_BEDROCK_COST_PER_INPUT_TOKEN",
673
+ "AWS_BEDROCK_COST_PER_OUTPUT_TOKEN",
650
674
  "TEMPERATURE",
651
675
  "CONFIDENT_TRACE_SAMPLE_RATE",
652
676
  "CONFIDENT_METRIC_LOGGING_SAMPLE_RATE",
@@ -717,6 +741,16 @@ class Settings(BaseSettings):
717
741
  return None
718
742
  return s.upper()
719
743
 
744
+ @field_validator("AWS_BEDROCK_REGION", mode="before")
745
+ @classmethod
746
+ def _normalize_lower(cls, v):
747
+ if v is None:
748
+ return None
749
+ s = str(v).strip()
750
+ if not s:
751
+ return None
752
+ return s.lower()
753
+
720
754
  @field_validator("DEEPEVAL_SDK_RETRY_PROVIDERS", mode="before")
721
755
  @classmethod
722
756
  def _coerce_to_list(cls, v):
deepeval/dataset/api.py CHANGED
@@ -1,4 +1,4 @@
1
- from pydantic import BaseModel, Field
1
+ from pydantic import BaseModel, Field, model_validator
2
2
  from typing import Optional, List
3
3
 
4
4
  from deepeval.dataset.golden import Golden, ConversationalGolden
@@ -11,6 +11,17 @@ class APIDataset(BaseModel):
11
11
  None, alias="conversationalGoldens"
12
12
  )
13
13
 
14
+ @model_validator(mode="after")
15
+ def set_image_mappings_for_goldens(self):
16
+ if self.goldens:
17
+ for golden in self.goldens:
18
+ golden.images_mapping = golden._get_images_mapping()
19
+ if self.conversational_goldens:
20
+ for golden in self.conversational_goldens:
21
+ golden.images_mapping = golden._get_images_mapping()
22
+
23
+ return self
24
+
14
25
 
15
26
  class APIQueueDataset(BaseModel):
16
27
  alias: str
@@ -19,6 +30,17 @@ class APIQueueDataset(BaseModel):
19
30
  None, alias="conversationalGoldens"
20
31
  )
21
32
 
33
+ @model_validator(mode="after")
34
+ def set_image_mappings_for_goldens(self):
35
+ if self.goldens:
36
+ for golden in self.goldens:
37
+ golden.images_mapping = golden._get_images_mapping()
38
+ if self.conversational_goldens:
39
+ for golden in self.conversational_goldens:
40
+ golden.images_mapping = golden._get_images_mapping()
41
+
42
+ return self
43
+
22
44
 
23
45
  class DatasetHttpResponse(BaseModel):
24
46
  id: str
@@ -1,6 +1,8 @@
1
+ import re
1
2
  from pydantic import BaseModel, Field, PrivateAttr, model_validator
2
3
  from typing import Optional, Dict, List
3
4
  from deepeval.test_case import ToolCall, Turn, MLLMImage
5
+ from deepeval.test_case.llm_test_case import _MLLM_IMAGE_REGISTRY
4
6
 
5
7
 
6
8
  class Golden(BaseModel):
@@ -33,6 +35,9 @@ class Golden(BaseModel):
33
35
  default=None, serialization_alias="customColumnKeyValues"
34
36
  )
35
37
  multimodal: bool = Field(False, exclude=True)
38
+ images_mapping: Dict[str, MLLMImage] = Field(
39
+ default=None, alias="imagesMapping"
40
+ )
36
41
  _dataset_rank: Optional[int] = PrivateAttr(default=None)
37
42
  _dataset_alias: Optional[str] = PrivateAttr(default=None)
38
43
  _dataset_id: Optional[str] = PrivateAttr(default=None)
@@ -45,27 +50,60 @@ class Golden(BaseModel):
45
50
  return self
46
51
 
47
52
  pattern = r"\[DEEPEVAL:IMAGE:(.*?)\]"
48
- self.multimodal = (
53
+ auto_detect = (
49
54
  any(
50
55
  [
51
- (
52
- re.search(pattern, self.input) is not None
53
- if self.input
54
- else False
55
- ),
56
- (
57
- re.search(pattern, self.actual_output) is not None
58
- if self.actual_output
59
- else False
60
- ),
56
+ re.search(pattern, self.input or "") is not None,
57
+ re.search(pattern, self.actual_output or "") is not None,
61
58
  ]
62
59
  )
63
60
  if isinstance(self.input, str)
64
61
  else self.multimodal
65
62
  )
63
+ if self.retrieval_context is not None:
64
+ auto_detect = auto_detect or any(
65
+ re.search(pattern, context) is not None
66
+ for context in self.retrieval_context
67
+ )
68
+ if self.context is not None:
69
+ auto_detect = auto_detect or any(
70
+ re.search(pattern, context) is not None
71
+ for context in self.context
72
+ )
73
+
74
+ self.multimodal = auto_detect
66
75
 
67
76
  return self
68
77
 
78
+ def _get_images_mapping(self) -> Dict[str, MLLMImage]:
79
+ pattern = r"\[DEEPEVAL:IMAGE:(.*?)\]"
80
+ image_ids = set()
81
+
82
+ def extract_ids_from_string(s: Optional[str]) -> None:
83
+ """Helper to extract image IDs from a string."""
84
+ if s is not None and isinstance(s, str):
85
+ matches = re.findall(pattern, s)
86
+ image_ids.update(matches)
87
+
88
+ def extract_ids_from_list(lst: Optional[List[str]]) -> None:
89
+ """Helper to extract image IDs from a list of strings."""
90
+ if lst is not None:
91
+ for item in lst:
92
+ extract_ids_from_string(item)
93
+
94
+ extract_ids_from_string(self.input)
95
+ extract_ids_from_string(self.actual_output)
96
+ extract_ids_from_string(self.expected_output)
97
+ extract_ids_from_list(self.context)
98
+ extract_ids_from_list(self.retrieval_context)
99
+
100
+ images_mapping = {}
101
+ for img_id in image_ids:
102
+ if img_id in _MLLM_IMAGE_REGISTRY:
103
+ images_mapping[img_id] = _MLLM_IMAGE_REGISTRY[img_id]
104
+
105
+ return images_mapping if len(images_mapping) > 0 else None
106
+
69
107
 
70
108
  class ConversationalGolden(BaseModel):
71
109
  scenario: str
@@ -86,6 +124,9 @@ class ConversationalGolden(BaseModel):
86
124
  )
87
125
  turns: Optional[List[Turn]] = Field(default=None)
88
126
  multimodal: bool = Field(False, exclude=True)
127
+ images_mapping: Dict[str, MLLMImage] = Field(
128
+ default=None, alias="imagesMapping"
129
+ )
89
130
  _dataset_rank: Optional[int] = PrivateAttr(default=None)
90
131
  _dataset_alias: Optional[str] = PrivateAttr(default=None)
91
132
  _dataset_id: Optional[str] = PrivateAttr(default=None)
@@ -98,15 +139,59 @@ class ConversationalGolden(BaseModel):
98
139
  return self
99
140
 
100
141
  pattern = r"\[DEEPEVAL:IMAGE:(.*?)\]"
101
- self.multimodal = (
102
- any(
103
- [
104
- re.search(pattern, turn.content) is not None
105
- for turn in self.turns
106
- ]
107
- )
108
- if self.turns
109
- else self.multimodal
110
- )
142
+ if self.scenario:
143
+ if re.search(pattern, self.scenario) is not None:
144
+ self.multimodal = True
145
+ return self
146
+ if self.expected_outcome:
147
+ if re.search(pattern, self.expected_outcome) is not None:
148
+ self.multimodal = True
149
+ return self
150
+ if self.user_description:
151
+ if re.search(pattern, self.user_description) is not None:
152
+ self.multimodal = True
153
+ return self
154
+ if self.turns:
155
+ for turn in self.turns:
156
+ if re.search(pattern, turn.content) is not None:
157
+ self.multimodal = True
158
+ return self
159
+ if turn.retrieval_context is not None:
160
+ self.multimodal = any(
161
+ re.search(pattern, context) is not None
162
+ for context in turn.retrieval_context
163
+ )
111
164
 
112
165
  return self
166
+
167
+ def _get_images_mapping(self) -> Dict[str, MLLMImage]:
168
+ pattern = r"\[DEEPEVAL:IMAGE:(.*?)\]"
169
+ image_ids = set()
170
+
171
+ def extract_ids_from_string(s: Optional[str]) -> None:
172
+ """Helper to extract image IDs from a string."""
173
+ if s is not None and isinstance(s, str):
174
+ matches = re.findall(pattern, s)
175
+ image_ids.update(matches)
176
+
177
+ def extract_ids_from_list(lst: Optional[List[str]]) -> None:
178
+ """Helper to extract image IDs from a list of strings."""
179
+ if lst is not None:
180
+ for item in lst:
181
+ extract_ids_from_string(item)
182
+
183
+ extract_ids_from_string(self.scenario)
184
+ extract_ids_from_string(self.expected_outcome)
185
+ extract_ids_from_list(self.context)
186
+ extract_ids_from_string(self.user_description)
187
+ if self.turns:
188
+ for turn in self.turns:
189
+ extract_ids_from_string(turn.content)
190
+ extract_ids_from_list(turn.retrieval_context)
191
+
192
+ images_mapping = {}
193
+ for img_id in image_ids:
194
+ if img_id in _MLLM_IMAGE_REGISTRY:
195
+ images_mapping[img_id] = _MLLM_IMAGE_REGISTRY[img_id]
196
+
197
+ return images_mapping if len(images_mapping) > 0 else None
@@ -46,7 +46,6 @@ from deepeval.telemetry import capture_evaluation_run
46
46
  from deepeval.metrics import (
47
47
  BaseMetric,
48
48
  BaseConversationalMetric,
49
- BaseMultimodalMetric,
50
49
  )
51
50
  from deepeval.metrics.indicator import (
52
51
  format_metric_description,
@@ -75,7 +74,6 @@ def assert_test(
75
74
  Union[
76
75
  List[BaseMetric],
77
76
  List[BaseConversationalMetric],
78
- List[BaseMultimodalMetric],
79
77
  ]
80
78
  ] = None,
81
79
  golden: Optional[Golden] = None,
@@ -190,7 +188,6 @@ def evaluate(
190
188
  Union[
191
189
  List[BaseMetric],
192
190
  List[BaseConversationalMetric],
193
- List[BaseMultimodalMetric],
194
191
  ]
195
192
  ] = None,
196
193
  # Evals on Confident AI