deepeval 3.7.3__py3-none-any.whl → 3.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/cli/test.py +1 -1
  3. deepeval/config/settings.py +102 -13
  4. deepeval/dataset/golden.py +54 -2
  5. deepeval/evaluate/configs.py +1 -1
  6. deepeval/evaluate/evaluate.py +16 -8
  7. deepeval/evaluate/execute.py +74 -27
  8. deepeval/evaluate/utils.py +26 -22
  9. deepeval/integrations/pydantic_ai/agent.py +19 -2
  10. deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  11. deepeval/metrics/__init__.py +14 -12
  12. deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
  13. deepeval/metrics/answer_relevancy/template.py +188 -92
  14. deepeval/metrics/argument_correctness/template.py +2 -2
  15. deepeval/metrics/base_metric.py +2 -5
  16. deepeval/metrics/bias/template.py +3 -3
  17. deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
  18. deepeval/metrics/contextual_precision/template.py +115 -66
  19. deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
  20. deepeval/metrics/contextual_recall/template.py +106 -55
  21. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
  22. deepeval/metrics/contextual_relevancy/template.py +87 -58
  23. deepeval/metrics/conversation_completeness/template.py +2 -2
  24. deepeval/metrics/conversational_dag/templates.py +4 -4
  25. deepeval/metrics/conversational_g_eval/template.py +4 -3
  26. deepeval/metrics/dag/templates.py +5 -5
  27. deepeval/metrics/faithfulness/faithfulness.py +70 -27
  28. deepeval/metrics/faithfulness/schema.py +1 -1
  29. deepeval/metrics/faithfulness/template.py +200 -115
  30. deepeval/metrics/g_eval/utils.py +2 -2
  31. deepeval/metrics/hallucination/template.py +4 -4
  32. deepeval/metrics/indicator.py +4 -4
  33. deepeval/metrics/misuse/template.py +2 -2
  34. deepeval/metrics/multimodal_metrics/__init__.py +0 -18
  35. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
  36. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
  37. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
  38. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
  39. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
  40. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
  41. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
  42. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
  43. deepeval/metrics/non_advice/template.py +2 -2
  44. deepeval/metrics/pii_leakage/template.py +2 -2
  45. deepeval/metrics/prompt_alignment/template.py +4 -4
  46. deepeval/metrics/ragas.py +3 -3
  47. deepeval/metrics/role_violation/template.py +2 -2
  48. deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
  49. deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
  50. deepeval/metrics/toxicity/template.py +4 -4
  51. deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  52. deepeval/metrics/turn_contextual_precision/template.py +187 -0
  53. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
  54. deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  55. deepeval/metrics/turn_contextual_recall/template.py +178 -0
  56. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
  57. deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
  58. deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  59. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
  60. deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
  61. deepeval/metrics/turn_faithfulness/template.py +218 -0
  62. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
  63. deepeval/metrics/turn_relevancy/template.py +2 -2
  64. deepeval/metrics/utils.py +39 -58
  65. deepeval/models/__init__.py +0 -12
  66. deepeval/models/base_model.py +16 -38
  67. deepeval/models/embedding_models/__init__.py +7 -0
  68. deepeval/models/embedding_models/azure_embedding_model.py +69 -32
  69. deepeval/models/embedding_models/local_embedding_model.py +39 -22
  70. deepeval/models/embedding_models/ollama_embedding_model.py +42 -18
  71. deepeval/models/embedding_models/openai_embedding_model.py +50 -15
  72. deepeval/models/llms/amazon_bedrock_model.py +1 -2
  73. deepeval/models/llms/anthropic_model.py +53 -20
  74. deepeval/models/llms/azure_model.py +140 -43
  75. deepeval/models/llms/deepseek_model.py +38 -23
  76. deepeval/models/llms/gemini_model.py +222 -103
  77. deepeval/models/llms/grok_model.py +39 -27
  78. deepeval/models/llms/kimi_model.py +39 -23
  79. deepeval/models/llms/litellm_model.py +103 -45
  80. deepeval/models/llms/local_model.py +35 -22
  81. deepeval/models/llms/ollama_model.py +129 -17
  82. deepeval/models/llms/openai_model.py +151 -50
  83. deepeval/models/llms/portkey_model.py +149 -0
  84. deepeval/models/llms/utils.py +5 -3
  85. deepeval/models/retry_policy.py +17 -14
  86. deepeval/models/utils.py +94 -4
  87. deepeval/optimizer/__init__.py +5 -0
  88. deepeval/optimizer/algorithms/__init__.py +6 -0
  89. deepeval/optimizer/algorithms/base.py +29 -0
  90. deepeval/optimizer/algorithms/configs.py +18 -0
  91. deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  92. deepeval/optimizer/algorithms/copro/copro.py +836 -0
  93. deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  94. deepeval/optimizer/algorithms/gepa/gepa.py +737 -0
  95. deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  96. deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  97. deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  98. deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  99. deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  100. deepeval/optimizer/algorithms/simba/simba.py +999 -0
  101. deepeval/optimizer/algorithms/simba/types.py +15 -0
  102. deepeval/optimizer/configs.py +31 -0
  103. deepeval/optimizer/policies.py +227 -0
  104. deepeval/optimizer/prompt_optimizer.py +263 -0
  105. deepeval/optimizer/rewriter/__init__.py +5 -0
  106. deepeval/optimizer/rewriter/rewriter.py +124 -0
  107. deepeval/optimizer/rewriter/utils.py +214 -0
  108. deepeval/optimizer/scorer/__init__.py +5 -0
  109. deepeval/optimizer/scorer/base.py +86 -0
  110. deepeval/optimizer/scorer/scorer.py +316 -0
  111. deepeval/optimizer/scorer/utils.py +30 -0
  112. deepeval/optimizer/types.py +148 -0
  113. deepeval/optimizer/utils.py +480 -0
  114. deepeval/prompt/prompt.py +7 -6
  115. deepeval/test_case/__init__.py +1 -3
  116. deepeval/test_case/api.py +12 -10
  117. deepeval/test_case/conversational_test_case.py +19 -1
  118. deepeval/test_case/llm_test_case.py +152 -1
  119. deepeval/test_case/utils.py +4 -8
  120. deepeval/test_run/api.py +15 -14
  121. deepeval/test_run/cache.py +2 -0
  122. deepeval/test_run/test_run.py +9 -4
  123. deepeval/tracing/patchers.py +9 -4
  124. deepeval/tracing/tracing.py +2 -2
  125. deepeval/utils.py +89 -0
  126. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
  127. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/RECORD +134 -118
  128. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  129. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  130. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  131. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  132. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  133. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  134. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  135. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  136. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  137. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  138. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  139. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  140. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  141. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  142. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  143. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  144. deepeval/models/mlllms/__init__.py +0 -4
  145. deepeval/models/mlllms/azure_model.py +0 -334
  146. deepeval/models/mlllms/gemini_model.py +0 -284
  147. deepeval/models/mlllms/ollama_model.py +0 -144
  148. deepeval/models/mlllms/openai_model.py +0 -258
  149. deepeval/test_case/mllm_test_case.py +0 -170
  150. /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
  151. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
  152. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
  153. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
  154. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
  155. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
  156. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
@@ -1,170 +0,0 @@
1
- from typing import List, Optional, Dict, Union
2
- from urllib.parse import urlparse, unquote
3
- from dataclasses import dataclass, field
4
- from enum import Enum
5
- import mimetypes
6
- import base64
7
- import os
8
-
9
- from deepeval.test_case import ToolCall
10
-
11
-
12
- @dataclass
13
- class MLLMImage:
14
- dataBase64: Optional[str] = None
15
- mimeType: Optional[str] = None
16
- url: Optional[str] = None
17
- local: Optional[bool] = None
18
- filename: Optional[str] = None
19
-
20
- def __post_init__(self):
21
-
22
- if self.url and self.dataBase64:
23
- raise ValueError(
24
- "You cannot provide both 'url' and 'dataBase64' at the same time when creating an MLLMImage."
25
- )
26
-
27
- if not self.url and not self.dataBase64:
28
- raise ValueError(
29
- "You must provide either a 'url' or both 'dataBase64' and 'mimeType' to create an MLLMImage."
30
- )
31
-
32
- if self.dataBase64 is not None:
33
- if self.mimeType is None:
34
- raise ValueError(
35
- "mimeType must be provided when initializing from Base64 data."
36
- )
37
- else:
38
- is_local = self.is_local_path(self.url)
39
- if self.local is not None:
40
- assert self.local == is_local, "Local path mismatch"
41
- else:
42
- self.local = is_local
43
-
44
- # compute filename, mime_type, and Base64 data
45
- if self.local:
46
- path = self.process_url(self.url)
47
- self.filename = os.path.basename(path)
48
- self.mimeType = (
49
- mimetypes.guess_type(path)[0] or "application/octet-stream"
50
- )
51
- with open(path, "rb") as f:
52
- raw = f.read()
53
- self.dataBase64 = base64.b64encode(raw).decode("ascii")
54
- else:
55
- self.filename = None
56
- self.mimeType = None
57
- self.dataBase64 = None
58
-
59
- @staticmethod
60
- def process_url(url: str) -> str:
61
- if os.path.exists(url):
62
- return url
63
- parsed = urlparse(url)
64
- if parsed.scheme == "file":
65
- raw_path = (
66
- f"//{parsed.netloc}{parsed.path}"
67
- if parsed.netloc
68
- else parsed.path
69
- )
70
- path = unquote(raw_path)
71
- return path
72
- return url
73
-
74
- @staticmethod
75
- def is_local_path(url: str) -> bool:
76
- if os.path.exists(url):
77
- return True
78
- parsed = urlparse(url)
79
- if parsed.scheme == "file":
80
- raw_path = (
81
- f"//{parsed.netloc}{parsed.path}"
82
- if parsed.netloc
83
- else parsed.path
84
- )
85
- path = unquote(raw_path)
86
- return os.path.exists(path)
87
- return False
88
-
89
- def as_data_uri(self) -> Optional[str]:
90
- """Return the image as a data URI string, if Base64 data is available."""
91
- if not self.dataBase64 or not self.mimeType:
92
- return None
93
- return f"data:{self.mimeType};base64,{self.dataBase64}"
94
-
95
-
96
- class MLLMTestCaseParams(Enum):
97
- INPUT = "input"
98
- ACTUAL_OUTPUT = "actual_output"
99
- EXPECTED_OUTPUT = "expected_output"
100
- CONTEXT = "context"
101
- RETRIEVAL_CONTEXT = "retrieval_context"
102
- TOOLS_CALLED = "tools_called"
103
- EXPECTED_TOOLS = "expected_tools"
104
-
105
-
106
- @dataclass
107
- class MLLMTestCase:
108
- input: List[Union[str, MLLMImage]]
109
- actual_output: List[Union[str, MLLMImage]]
110
- expected_output: Optional[List[Union[str, MLLMImage]]] = None
111
- context: Optional[List[Union[str, MLLMImage]]] = None
112
- retrieval_context: Optional[List[Union[str, MLLMImage]]] = None
113
- additional_metadata: Optional[Dict] = None
114
- comments: Optional[str] = None
115
- tools_called: Optional[List[ToolCall]] = None
116
- expected_tools: Optional[List[ToolCall]] = None
117
- token_cost: Optional[float] = None
118
- completion_time: Optional[float] = None
119
- name: Optional[str] = field(default=None)
120
- _dataset_rank: Optional[int] = field(default=None, repr=False)
121
- _dataset_alias: Optional[str] = field(default=None, repr=False)
122
- _dataset_id: Optional[str] = field(default=None, repr=False)
123
-
124
- def __post_init__(self):
125
- # Ensure `expected_output` is None or a list of strings or MLLMImage instances
126
- if self.expected_output is not None:
127
- if not isinstance(self.expected_output, list) or not all(
128
- isinstance(item, (str, MLLMImage))
129
- for item in self.expected_output
130
- ):
131
- raise TypeError(
132
- "'expected_output' must be None or a list of strings or MLLMImage instances"
133
- )
134
-
135
- # Ensure `context` is None or a list of strings or MLLMImage instances
136
- if self.context is not None:
137
- if not isinstance(self.context, list) or not all(
138
- isinstance(item, (str, MLLMImage)) for item in self.context
139
- ):
140
- raise TypeError(
141
- "'context' must be None or a list of strings or MLLMImage instances"
142
- )
143
-
144
- # Ensure `retrieval_context` is None or a list of strings or MLLMImage instances
145
- if self.retrieval_context is not None:
146
- if not isinstance(self.retrieval_context, list) or not all(
147
- isinstance(item, (str, MLLMImage))
148
- for item in self.retrieval_context
149
- ):
150
- raise TypeError(
151
- "'retrieval_context' must be None or a list of strings or MLLMImage instances"
152
- )
153
-
154
- # Ensure `tools_called` is None or a list of strings
155
- if self.tools_called is not None:
156
- if not isinstance(self.tools_called, list) or not all(
157
- isinstance(item, ToolCall) for item in self.tools_called
158
- ):
159
- raise TypeError(
160
- "'tools_called' must be None or a list of `ToolCall`"
161
- )
162
-
163
- # Ensure `expected_tools` is None or a list of strings
164
- if self.expected_tools is not None:
165
- if not isinstance(self.expected_tools, list) or not all(
166
- isinstance(item, ToolCall) for item in self.expected_tools
167
- ):
168
- raise TypeError(
169
- "'expected_tools' must be None or a list of `ToolCall`"
170
- )