deepeval 3.6.6__py3-none-any.whl → 3.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/benchmarks/equity_med_qa/equity_med_qa.py +1 -0
  3. deepeval/cli/main.py +42 -0
  4. deepeval/confident/api.py +1 -0
  5. deepeval/config/settings.py +22 -4
  6. deepeval/constants.py +8 -1
  7. deepeval/dataset/dataset.py +2 -11
  8. deepeval/dataset/utils.py +1 -1
  9. deepeval/errors.py +20 -2
  10. deepeval/evaluate/evaluate.py +5 -1
  11. deepeval/evaluate/execute.py +811 -248
  12. deepeval/evaluate/types.py +1 -0
  13. deepeval/evaluate/utils.py +33 -119
  14. deepeval/integrations/crewai/__init__.py +7 -1
  15. deepeval/integrations/crewai/handler.py +1 -1
  16. deepeval/integrations/crewai/subs.py +51 -0
  17. deepeval/integrations/crewai/tool.py +71 -0
  18. deepeval/integrations/crewai/wrapper.py +45 -5
  19. deepeval/integrations/llama_index/__init__.py +0 -4
  20. deepeval/integrations/llama_index/handler.py +20 -21
  21. deepeval/integrations/pydantic_ai/instrumentator.py +125 -76
  22. deepeval/metrics/__init__.py +13 -0
  23. deepeval/metrics/answer_relevancy/answer_relevancy.py +12 -3
  24. deepeval/metrics/api.py +281 -0
  25. deepeval/metrics/argument_correctness/argument_correctness.py +12 -2
  26. deepeval/metrics/base_metric.py +1 -0
  27. deepeval/metrics/bias/bias.py +12 -3
  28. deepeval/metrics/contextual_precision/contextual_precision.py +39 -24
  29. deepeval/metrics/contextual_recall/contextual_recall.py +12 -3
  30. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +12 -1
  31. deepeval/metrics/conversation_completeness/conversation_completeness.py +12 -0
  32. deepeval/metrics/conversational_dag/conversational_dag.py +12 -0
  33. deepeval/metrics/conversational_dag/nodes.py +12 -4
  34. deepeval/metrics/conversational_g_eval/__init__.py +3 -0
  35. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +84 -66
  36. deepeval/metrics/dag/dag.py +12 -0
  37. deepeval/metrics/dag/nodes.py +12 -4
  38. deepeval/metrics/dag/schema.py +1 -1
  39. deepeval/metrics/dag/templates.py +2 -2
  40. deepeval/metrics/faithfulness/faithfulness.py +12 -1
  41. deepeval/metrics/g_eval/g_eval.py +11 -0
  42. deepeval/metrics/goal_accuracy/__init__.py +1 -0
  43. deepeval/metrics/goal_accuracy/goal_accuracy.py +349 -0
  44. deepeval/metrics/goal_accuracy/schema.py +17 -0
  45. deepeval/metrics/goal_accuracy/template.py +235 -0
  46. deepeval/metrics/hallucination/hallucination.py +20 -9
  47. deepeval/metrics/indicator.py +8 -2
  48. deepeval/metrics/json_correctness/json_correctness.py +12 -1
  49. deepeval/metrics/knowledge_retention/knowledge_retention.py +12 -0
  50. deepeval/metrics/mcp/mcp_task_completion.py +20 -2
  51. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +29 -6
  52. deepeval/metrics/mcp_use_metric/mcp_use_metric.py +14 -2
  53. deepeval/metrics/misuse/misuse.py +12 -1
  54. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +3 -0
  55. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +3 -0
  56. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +3 -0
  57. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +3 -0
  58. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +6 -1
  59. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +38 -25
  60. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +3 -0
  61. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +3 -0
  62. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +3 -0
  63. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +3 -0
  64. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +10 -5
  65. deepeval/metrics/non_advice/non_advice.py +12 -0
  66. deepeval/metrics/pii_leakage/pii_leakage.py +12 -1
  67. deepeval/metrics/plan_adherence/__init__.py +1 -0
  68. deepeval/metrics/plan_adherence/plan_adherence.py +292 -0
  69. deepeval/metrics/plan_adherence/schema.py +11 -0
  70. deepeval/metrics/plan_adherence/template.py +170 -0
  71. deepeval/metrics/plan_quality/__init__.py +1 -0
  72. deepeval/metrics/plan_quality/plan_quality.py +292 -0
  73. deepeval/metrics/plan_quality/schema.py +11 -0
  74. deepeval/metrics/plan_quality/template.py +101 -0
  75. deepeval/metrics/prompt_alignment/prompt_alignment.py +12 -1
  76. deepeval/metrics/role_adherence/role_adherence.py +12 -0
  77. deepeval/metrics/role_violation/role_violation.py +12 -0
  78. deepeval/metrics/step_efficiency/__init__.py +1 -0
  79. deepeval/metrics/step_efficiency/schema.py +11 -0
  80. deepeval/metrics/step_efficiency/step_efficiency.py +234 -0
  81. deepeval/metrics/step_efficiency/template.py +256 -0
  82. deepeval/metrics/summarization/summarization.py +12 -1
  83. deepeval/metrics/task_completion/task_completion.py +4 -0
  84. deepeval/metrics/tool_correctness/schema.py +6 -0
  85. deepeval/metrics/tool_correctness/template.py +88 -0
  86. deepeval/metrics/tool_correctness/tool_correctness.py +233 -21
  87. deepeval/metrics/tool_use/__init__.py +1 -0
  88. deepeval/metrics/tool_use/schema.py +19 -0
  89. deepeval/metrics/tool_use/template.py +220 -0
  90. deepeval/metrics/tool_use/tool_use.py +458 -0
  91. deepeval/metrics/topic_adherence/__init__.py +1 -0
  92. deepeval/metrics/topic_adherence/schema.py +16 -0
  93. deepeval/metrics/topic_adherence/template.py +162 -0
  94. deepeval/metrics/topic_adherence/topic_adherence.py +355 -0
  95. deepeval/metrics/toxicity/toxicity.py +12 -0
  96. deepeval/metrics/turn_relevancy/turn_relevancy.py +12 -0
  97. deepeval/models/embedding_models/azure_embedding_model.py +37 -36
  98. deepeval/models/embedding_models/local_embedding_model.py +30 -32
  99. deepeval/models/embedding_models/ollama_embedding_model.py +18 -20
  100. deepeval/models/embedding_models/openai_embedding_model.py +22 -31
  101. deepeval/models/llms/grok_model.py +1 -1
  102. deepeval/models/llms/openai_model.py +2 -0
  103. deepeval/openai/__init__.py +14 -32
  104. deepeval/openai/extractors.py +85 -50
  105. deepeval/openai/patch.py +258 -167
  106. deepeval/openai/types.py +20 -0
  107. deepeval/openai/utils.py +205 -56
  108. deepeval/prompt/__init__.py +19 -1
  109. deepeval/prompt/api.py +160 -0
  110. deepeval/prompt/prompt.py +245 -62
  111. deepeval/prompt/utils.py +186 -15
  112. deepeval/synthesizer/chunking/context_generator.py +209 -152
  113. deepeval/synthesizer/chunking/doc_chunker.py +46 -12
  114. deepeval/synthesizer/synthesizer.py +19 -15
  115. deepeval/test_case/api.py +131 -0
  116. deepeval/test_case/llm_test_case.py +6 -2
  117. deepeval/test_run/__init__.py +1 -0
  118. deepeval/test_run/hyperparameters.py +47 -8
  119. deepeval/test_run/test_run.py +292 -206
  120. deepeval/tracing/__init__.py +2 -1
  121. deepeval/tracing/api.py +3 -1
  122. deepeval/tracing/otel/exporter.py +3 -4
  123. deepeval/tracing/otel/utils.py +24 -5
  124. deepeval/tracing/trace_context.py +89 -5
  125. deepeval/tracing/tracing.py +74 -3
  126. deepeval/tracing/types.py +20 -2
  127. deepeval/tracing/utils.py +8 -0
  128. deepeval/utils.py +21 -0
  129. {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/METADATA +1 -1
  130. {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/RECORD +133 -103
  131. deepeval/integrations/llama_index/agent/patched.py +0 -68
  132. {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/LICENSE.md +0 -0
  133. {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/WHEEL +0 -0
  134. {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/entry_points.txt +0 -0
deepeval/prompt/utils.py CHANGED
@@ -1,39 +1,80 @@
1
1
  import re
2
+ import uuid
2
3
  from jinja2 import Template
4
+ from typing import Any, Dict, Type, Optional, List, Match
5
+ from pydantic import BaseModel, create_model
3
6
 
4
- from deepeval.prompt.api import PromptInterpolationType
7
+ from deepeval.prompt.api import (
8
+ PromptInterpolationType,
9
+ OutputSchema,
10
+ SchemaDataType,
11
+ OutputSchemaField,
12
+ )
5
13
 
14
+ ###################################
15
+ # Interpolation
16
+ ###################################
6
17
 
7
- def interpolate_mustache(text: str, **kwargs) -> str:
18
+
19
+ def interpolate_mustache(text: str, **kwargs: Any) -> str:
8
20
  """Interpolate using Mustache format: {{variable}}"""
9
- formatted_template = re.sub(r"\{\{(\w+)\}\}", r"{\1}", text)
10
- return formatted_template.format(**kwargs)
21
+
22
+ def replace_match(match: Match[str]) -> str:
23
+ var_name = match.group(1)
24
+ if var_name in kwargs:
25
+ return str(kwargs[var_name])
26
+ # Raise error for missing variables to maintain consistency
27
+ raise KeyError(f"Missing variable in template: {var_name}")
28
+
29
+ return re.sub(r"\{\{([a-zA-Z_][a-zA-Z0-9_]*)\}\}", replace_match, text)
11
30
 
12
31
 
13
- def interpolate_mustache_with_space(text: str, **kwargs) -> str:
32
+ def interpolate_mustache_with_space(text: str, **kwargs: Any) -> str:
14
33
  """Interpolate using Mustache with space format: {{ variable }}"""
15
- formatted_template = re.sub(r"\{\{ (\w+) \}\}", r"{\1}", text)
16
- return formatted_template.format(**kwargs)
17
34
 
35
+ def replace_match(match: Match[str]) -> str:
36
+ var_name = match.group(1)
37
+ if var_name in kwargs:
38
+ return str(kwargs[var_name])
39
+ # Raise error for missing variables to maintain consistency
40
+ raise KeyError(f"Missing variable in template: {var_name}")
18
41
 
19
- def interpolate_fstring(text: str, **kwargs) -> str:
42
+ return re.sub(r"\{\{ ([a-zA-Z_][a-zA-Z0-9_]*) \}\}", replace_match, text)
43
+
44
+
45
+ def interpolate_fstring(text: str, **kwargs: Any) -> str:
20
46
  """Interpolate using F-string format: {variable}"""
21
- return text.format(**kwargs)
22
47
 
48
+ def replace_match(match: Match[str]) -> str:
49
+ var_name = match.group(1)
50
+ if var_name in kwargs:
51
+ return str(kwargs[var_name])
52
+ # Raise error for missing variables to maintain consistency
53
+ raise KeyError(f"Missing variable in template: {var_name}")
54
+
55
+ return re.sub(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}", replace_match, text)
23
56
 
24
- def interpolate_dollar_brackets(text: str, **kwargs) -> str:
57
+
58
+ def interpolate_dollar_brackets(text: str, **kwargs: Any) -> str:
25
59
  """Interpolate using Dollar Brackets format: ${variable}"""
26
- formatted_template = re.sub(r"\$\{(\w+)\}", r"{\1}", text)
27
- return formatted_template.format(**kwargs)
60
+
61
+ def replace_match(match: Match[str]) -> str:
62
+ var_name = match.group(1)
63
+ if var_name in kwargs:
64
+ return str(kwargs[var_name])
65
+ # Raise error for missing variables to maintain consistency
66
+ raise KeyError(f"Missing variable in template: {var_name}")
67
+
68
+ return re.sub(r"\$\{([a-zA-Z_][a-zA-Z0-9_]*)\}", replace_match, text)
28
69
 
29
70
 
30
- def interpolate_jinja(text: str, **kwargs) -> str:
71
+ def interpolate_jinja(text: str, **kwargs: Any) -> str:
31
72
  template = Template(text)
32
73
  return template.render(**kwargs)
33
74
 
34
75
 
35
76
  def interpolate_text(
36
- interpolation_type: PromptInterpolationType, text: str, **kwargs
77
+ interpolation_type: PromptInterpolationType, text: str, **kwargs: Any
37
78
  ) -> str:
38
79
  """Apply the appropriate interpolation method based on the type"""
39
80
  if interpolation_type == PromptInterpolationType.MUSTACHE:
@@ -47,4 +88,134 @@ def interpolate_text(
47
88
  elif interpolation_type == PromptInterpolationType.JINJA:
48
89
  return interpolate_jinja(text, **kwargs)
49
90
 
50
- raise ValueError(f"Unsupported interpolation type: {interpolation_type}")
91
+
92
+ ###################################
93
+ # Output Schema Deconstruction
94
+ ###################################
95
+
96
+ schema_type_map: Dict[str, Any] = {
97
+ SchemaDataType.STRING.value: str,
98
+ SchemaDataType.INTEGER.value: int,
99
+ SchemaDataType.FLOAT.value: float,
100
+ SchemaDataType.BOOLEAN.value: bool,
101
+ SchemaDataType.NULL.value: type(None),
102
+ SchemaDataType.OBJECT.value: dict,
103
+ }
104
+
105
+
106
+ def construct_nested_base_model(
107
+ parent: OutputSchemaField,
108
+ parent_id_map: Dict[Optional[str], List[OutputSchemaField]],
109
+ model_name: str,
110
+ ) -> Type[BaseModel]:
111
+ child_fields: Dict[str, tuple] = {}
112
+ for child in parent_id_map.get(parent.id, []):
113
+ child_type = (
114
+ child.type.value if hasattr(child.type, "value") else child.type
115
+ )
116
+ if child_type == SchemaDataType.OBJECT.value:
117
+ python_type = construct_nested_base_model(
118
+ child, parent_id_map, child.name
119
+ )
120
+ else:
121
+ python_type = schema_type_map.get(child_type, Any)
122
+ default = ... if child.required else None
123
+ child_fields[child.name or child.id] = (python_type, default)
124
+ return create_model(model_name, **child_fields)
125
+
126
+
127
+ def construct_base_model(
128
+ schema: Optional[OutputSchema] = None,
129
+ ) -> Type[BaseModel]:
130
+ if not schema:
131
+ return None
132
+ if not schema.fields:
133
+ return create_model(schema.name)
134
+
135
+ parent_id_map: Dict[Optional[str], List[OutputSchemaField]] = {}
136
+ for field in schema.fields:
137
+ parent_id = field.parent_id or None
138
+ if parent_id_map.get(parent_id) is None:
139
+ parent_id_map[parent_id] = []
140
+ parent_id_map[parent_id].append(field)
141
+
142
+ root_fields: Dict[str, tuple] = {}
143
+ for field in parent_id_map.get(None, []):
144
+ field_type = (
145
+ field.type.value if hasattr(field.type, "value") else field.type
146
+ )
147
+ if field_type == SchemaDataType.OBJECT.value:
148
+ python_type = construct_nested_base_model(
149
+ field, parent_id_map, field.name
150
+ )
151
+ else:
152
+ python_type = schema_type_map.get(field_type, Any)
153
+ default = ... if field.required else None
154
+ root_fields[field.name] = (python_type, default)
155
+
156
+ return create_model(schema.name, **root_fields)
157
+
158
+
159
+ ###################################
160
+ # Output Schema Construction
161
+ ###################################
162
+
163
+
164
+ def _process_model(
165
+ model_class: Type[BaseModel],
166
+ parent_id: Optional[str] = None,
167
+ ) -> List[OutputSchemaField]:
168
+ fields = []
169
+ model_fields = model_class.model_fields
170
+ for field_name, field_info in model_fields.items():
171
+ field_id = str(uuid.uuid4())
172
+ annotation = field_info.annotation
173
+ field_type = "STRING"
174
+ if annotation == str:
175
+ field_type = "STRING"
176
+ elif annotation == int:
177
+ field_type = "INTEGER"
178
+ elif annotation == float:
179
+ field_type = "FLOAT"
180
+ elif annotation == bool:
181
+ field_type = "BOOLEAN"
182
+ elif annotation == list:
183
+ raise ValueError("Unsupported structured output: list")
184
+ elif annotation == dict:
185
+ raise ValueError("Unsupported structured output: dict")
186
+ elif (
187
+ hasattr(annotation, "__bases__")
188
+ and BaseModel in annotation.__bases__
189
+ ):
190
+ field_type = "OBJECT"
191
+ parent_field = OutputSchemaField(
192
+ id=field_id,
193
+ name=field_name,
194
+ type=field_type,
195
+ required=field_info.default is ...,
196
+ parent_id=parent_id,
197
+ )
198
+ fields.append(parent_field)
199
+ nested_fields = _process_model(annotation, field_id)
200
+ fields.extend(nested_fields)
201
+ continue
202
+ required = field_info.default is ...
203
+ fields.append(
204
+ OutputSchemaField(
205
+ id=field_id,
206
+ name=field_name,
207
+ type=field_type,
208
+ required=required,
209
+ parent_id=parent_id,
210
+ )
211
+ )
212
+ return fields
213
+
214
+
215
+ def construct_output_schema(
216
+ base_model_class: Optional[Type[BaseModel]] = None,
217
+ ) -> Optional[OutputSchema]:
218
+ if base_model_class is None:
219
+ return None
220
+ all_fields = _process_model(base_model_class)
221
+ return OutputSchema(fields=all_fields, name=base_model_class.__name__)