deepeval 3.6.6__py3-none-any.whl → 3.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/benchmarks/equity_med_qa/equity_med_qa.py +1 -0
- deepeval/cli/main.py +42 -0
- deepeval/confident/api.py +1 -0
- deepeval/config/settings.py +22 -4
- deepeval/constants.py +8 -1
- deepeval/dataset/dataset.py +2 -11
- deepeval/dataset/utils.py +1 -1
- deepeval/errors.py +20 -2
- deepeval/evaluate/evaluate.py +5 -1
- deepeval/evaluate/execute.py +811 -248
- deepeval/evaluate/types.py +1 -0
- deepeval/evaluate/utils.py +33 -119
- deepeval/integrations/crewai/__init__.py +7 -1
- deepeval/integrations/crewai/handler.py +1 -1
- deepeval/integrations/crewai/subs.py +51 -0
- deepeval/integrations/crewai/tool.py +71 -0
- deepeval/integrations/crewai/wrapper.py +45 -5
- deepeval/integrations/llama_index/__init__.py +0 -4
- deepeval/integrations/llama_index/handler.py +20 -21
- deepeval/integrations/pydantic_ai/instrumentator.py +125 -76
- deepeval/metrics/__init__.py +13 -0
- deepeval/metrics/answer_relevancy/answer_relevancy.py +12 -3
- deepeval/metrics/api.py +281 -0
- deepeval/metrics/argument_correctness/argument_correctness.py +12 -2
- deepeval/metrics/base_metric.py +1 -0
- deepeval/metrics/bias/bias.py +12 -3
- deepeval/metrics/contextual_precision/contextual_precision.py +39 -24
- deepeval/metrics/contextual_recall/contextual_recall.py +12 -3
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +12 -1
- deepeval/metrics/conversation_completeness/conversation_completeness.py +12 -0
- deepeval/metrics/conversational_dag/conversational_dag.py +12 -0
- deepeval/metrics/conversational_dag/nodes.py +12 -4
- deepeval/metrics/conversational_g_eval/__init__.py +3 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +84 -66
- deepeval/metrics/dag/dag.py +12 -0
- deepeval/metrics/dag/nodes.py +12 -4
- deepeval/metrics/dag/schema.py +1 -1
- deepeval/metrics/dag/templates.py +2 -2
- deepeval/metrics/faithfulness/faithfulness.py +12 -1
- deepeval/metrics/g_eval/g_eval.py +11 -0
- deepeval/metrics/goal_accuracy/__init__.py +1 -0
- deepeval/metrics/goal_accuracy/goal_accuracy.py +349 -0
- deepeval/metrics/goal_accuracy/schema.py +17 -0
- deepeval/metrics/goal_accuracy/template.py +235 -0
- deepeval/metrics/hallucination/hallucination.py +20 -9
- deepeval/metrics/indicator.py +8 -2
- deepeval/metrics/json_correctness/json_correctness.py +12 -1
- deepeval/metrics/knowledge_retention/knowledge_retention.py +12 -0
- deepeval/metrics/mcp/mcp_task_completion.py +20 -2
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +29 -6
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +14 -2
- deepeval/metrics/misuse/misuse.py +12 -1
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +3 -0
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +3 -0
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +3 -0
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +6 -1
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +38 -25
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +10 -5
- deepeval/metrics/non_advice/non_advice.py +12 -0
- deepeval/metrics/pii_leakage/pii_leakage.py +12 -1
- deepeval/metrics/plan_adherence/__init__.py +1 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +292 -0
- deepeval/metrics/plan_adherence/schema.py +11 -0
- deepeval/metrics/plan_adherence/template.py +170 -0
- deepeval/metrics/plan_quality/__init__.py +1 -0
- deepeval/metrics/plan_quality/plan_quality.py +292 -0
- deepeval/metrics/plan_quality/schema.py +11 -0
- deepeval/metrics/plan_quality/template.py +101 -0
- deepeval/metrics/prompt_alignment/prompt_alignment.py +12 -1
- deepeval/metrics/role_adherence/role_adherence.py +12 -0
- deepeval/metrics/role_violation/role_violation.py +12 -0
- deepeval/metrics/step_efficiency/__init__.py +1 -0
- deepeval/metrics/step_efficiency/schema.py +11 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +234 -0
- deepeval/metrics/step_efficiency/template.py +256 -0
- deepeval/metrics/summarization/summarization.py +12 -1
- deepeval/metrics/task_completion/task_completion.py +4 -0
- deepeval/metrics/tool_correctness/schema.py +6 -0
- deepeval/metrics/tool_correctness/template.py +88 -0
- deepeval/metrics/tool_correctness/tool_correctness.py +233 -21
- deepeval/metrics/tool_use/__init__.py +1 -0
- deepeval/metrics/tool_use/schema.py +19 -0
- deepeval/metrics/tool_use/template.py +220 -0
- deepeval/metrics/tool_use/tool_use.py +458 -0
- deepeval/metrics/topic_adherence/__init__.py +1 -0
- deepeval/metrics/topic_adherence/schema.py +16 -0
- deepeval/metrics/topic_adherence/template.py +162 -0
- deepeval/metrics/topic_adherence/topic_adherence.py +355 -0
- deepeval/metrics/toxicity/toxicity.py +12 -0
- deepeval/metrics/turn_relevancy/turn_relevancy.py +12 -0
- deepeval/models/embedding_models/azure_embedding_model.py +37 -36
- deepeval/models/embedding_models/local_embedding_model.py +30 -32
- deepeval/models/embedding_models/ollama_embedding_model.py +18 -20
- deepeval/models/embedding_models/openai_embedding_model.py +22 -31
- deepeval/models/llms/grok_model.py +1 -1
- deepeval/models/llms/openai_model.py +2 -0
- deepeval/openai/__init__.py +14 -32
- deepeval/openai/extractors.py +85 -50
- deepeval/openai/patch.py +258 -167
- deepeval/openai/types.py +20 -0
- deepeval/openai/utils.py +205 -56
- deepeval/prompt/__init__.py +19 -1
- deepeval/prompt/api.py +160 -0
- deepeval/prompt/prompt.py +245 -62
- deepeval/prompt/utils.py +186 -15
- deepeval/synthesizer/chunking/context_generator.py +209 -152
- deepeval/synthesizer/chunking/doc_chunker.py +46 -12
- deepeval/synthesizer/synthesizer.py +19 -15
- deepeval/test_case/api.py +131 -0
- deepeval/test_case/llm_test_case.py +6 -2
- deepeval/test_run/__init__.py +1 -0
- deepeval/test_run/hyperparameters.py +47 -8
- deepeval/test_run/test_run.py +292 -206
- deepeval/tracing/__init__.py +2 -1
- deepeval/tracing/api.py +3 -1
- deepeval/tracing/otel/exporter.py +3 -4
- deepeval/tracing/otel/utils.py +24 -5
- deepeval/tracing/trace_context.py +89 -5
- deepeval/tracing/tracing.py +74 -3
- deepeval/tracing/types.py +20 -2
- deepeval/tracing/utils.py +8 -0
- deepeval/utils.py +21 -0
- {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/METADATA +1 -1
- {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/RECORD +133 -103
- deepeval/integrations/llama_index/agent/patched.py +0 -68
- {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/LICENSE.md +0 -0
- {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/WHEEL +0 -0
- {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/entry_points.txt +0 -0
deepeval/prompt/utils.py
CHANGED
|
@@ -1,39 +1,80 @@
|
|
|
1
1
|
import re
|
|
2
|
+
import uuid
|
|
2
3
|
from jinja2 import Template
|
|
4
|
+
from typing import Any, Dict, Type, Optional, List, Match
|
|
5
|
+
from pydantic import BaseModel, create_model
|
|
3
6
|
|
|
4
|
-
from deepeval.prompt.api import
|
|
7
|
+
from deepeval.prompt.api import (
|
|
8
|
+
PromptInterpolationType,
|
|
9
|
+
OutputSchema,
|
|
10
|
+
SchemaDataType,
|
|
11
|
+
OutputSchemaField,
|
|
12
|
+
)
|
|
5
13
|
|
|
14
|
+
###################################
|
|
15
|
+
# Interpolation
|
|
16
|
+
###################################
|
|
6
17
|
|
|
7
|
-
|
|
18
|
+
|
|
19
|
+
def interpolate_mustache(text: str, **kwargs: Any) -> str:
|
|
8
20
|
"""Interpolate using Mustache format: {{variable}}"""
|
|
9
|
-
|
|
10
|
-
|
|
21
|
+
|
|
22
|
+
def replace_match(match: Match[str]) -> str:
|
|
23
|
+
var_name = match.group(1)
|
|
24
|
+
if var_name in kwargs:
|
|
25
|
+
return str(kwargs[var_name])
|
|
26
|
+
# Raise error for missing variables to maintain consistency
|
|
27
|
+
raise KeyError(f"Missing variable in template: {var_name}")
|
|
28
|
+
|
|
29
|
+
return re.sub(r"\{\{([a-zA-Z_][a-zA-Z0-9_]*)\}\}", replace_match, text)
|
|
11
30
|
|
|
12
31
|
|
|
13
|
-
def interpolate_mustache_with_space(text: str, **kwargs) -> str:
|
|
32
|
+
def interpolate_mustache_with_space(text: str, **kwargs: Any) -> str:
|
|
14
33
|
"""Interpolate using Mustache with space format: {{ variable }}"""
|
|
15
|
-
formatted_template = re.sub(r"\{\{ (\w+) \}\}", r"{\1}", text)
|
|
16
|
-
return formatted_template.format(**kwargs)
|
|
17
34
|
|
|
35
|
+
def replace_match(match: Match[str]) -> str:
|
|
36
|
+
var_name = match.group(1)
|
|
37
|
+
if var_name in kwargs:
|
|
38
|
+
return str(kwargs[var_name])
|
|
39
|
+
# Raise error for missing variables to maintain consistency
|
|
40
|
+
raise KeyError(f"Missing variable in template: {var_name}")
|
|
18
41
|
|
|
19
|
-
|
|
42
|
+
return re.sub(r"\{\{ ([a-zA-Z_][a-zA-Z0-9_]*) \}\}", replace_match, text)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def interpolate_fstring(text: str, **kwargs: Any) -> str:
|
|
20
46
|
"""Interpolate using F-string format: {variable}"""
|
|
21
|
-
return text.format(**kwargs)
|
|
22
47
|
|
|
48
|
+
def replace_match(match: Match[str]) -> str:
|
|
49
|
+
var_name = match.group(1)
|
|
50
|
+
if var_name in kwargs:
|
|
51
|
+
return str(kwargs[var_name])
|
|
52
|
+
# Raise error for missing variables to maintain consistency
|
|
53
|
+
raise KeyError(f"Missing variable in template: {var_name}")
|
|
54
|
+
|
|
55
|
+
return re.sub(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}", replace_match, text)
|
|
23
56
|
|
|
24
|
-
|
|
57
|
+
|
|
58
|
+
def interpolate_dollar_brackets(text: str, **kwargs: Any) -> str:
|
|
25
59
|
"""Interpolate using Dollar Brackets format: ${variable}"""
|
|
26
|
-
|
|
27
|
-
|
|
60
|
+
|
|
61
|
+
def replace_match(match: Match[str]) -> str:
|
|
62
|
+
var_name = match.group(1)
|
|
63
|
+
if var_name in kwargs:
|
|
64
|
+
return str(kwargs[var_name])
|
|
65
|
+
# Raise error for missing variables to maintain consistency
|
|
66
|
+
raise KeyError(f"Missing variable in template: {var_name}")
|
|
67
|
+
|
|
68
|
+
return re.sub(r"\$\{([a-zA-Z_][a-zA-Z0-9_]*)\}", replace_match, text)
|
|
28
69
|
|
|
29
70
|
|
|
30
|
-
def interpolate_jinja(text: str, **kwargs) -> str:
|
|
71
|
+
def interpolate_jinja(text: str, **kwargs: Any) -> str:
|
|
31
72
|
template = Template(text)
|
|
32
73
|
return template.render(**kwargs)
|
|
33
74
|
|
|
34
75
|
|
|
35
76
|
def interpolate_text(
|
|
36
|
-
interpolation_type: PromptInterpolationType, text: str, **kwargs
|
|
77
|
+
interpolation_type: PromptInterpolationType, text: str, **kwargs: Any
|
|
37
78
|
) -> str:
|
|
38
79
|
"""Apply the appropriate interpolation method based on the type"""
|
|
39
80
|
if interpolation_type == PromptInterpolationType.MUSTACHE:
|
|
@@ -47,4 +88,134 @@ def interpolate_text(
|
|
|
47
88
|
elif interpolation_type == PromptInterpolationType.JINJA:
|
|
48
89
|
return interpolate_jinja(text, **kwargs)
|
|
49
90
|
|
|
50
|
-
|
|
91
|
+
|
|
92
|
+
###################################
|
|
93
|
+
# Output Schema Deconstruction
|
|
94
|
+
###################################
|
|
95
|
+
|
|
96
|
+
schema_type_map: Dict[str, Any] = {
|
|
97
|
+
SchemaDataType.STRING.value: str,
|
|
98
|
+
SchemaDataType.INTEGER.value: int,
|
|
99
|
+
SchemaDataType.FLOAT.value: float,
|
|
100
|
+
SchemaDataType.BOOLEAN.value: bool,
|
|
101
|
+
SchemaDataType.NULL.value: type(None),
|
|
102
|
+
SchemaDataType.OBJECT.value: dict,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def construct_nested_base_model(
|
|
107
|
+
parent: OutputSchemaField,
|
|
108
|
+
parent_id_map: Dict[Optional[str], List[OutputSchemaField]],
|
|
109
|
+
model_name: str,
|
|
110
|
+
) -> Type[BaseModel]:
|
|
111
|
+
child_fields: Dict[str, tuple] = {}
|
|
112
|
+
for child in parent_id_map.get(parent.id, []):
|
|
113
|
+
child_type = (
|
|
114
|
+
child.type.value if hasattr(child.type, "value") else child.type
|
|
115
|
+
)
|
|
116
|
+
if child_type == SchemaDataType.OBJECT.value:
|
|
117
|
+
python_type = construct_nested_base_model(
|
|
118
|
+
child, parent_id_map, child.name
|
|
119
|
+
)
|
|
120
|
+
else:
|
|
121
|
+
python_type = schema_type_map.get(child_type, Any)
|
|
122
|
+
default = ... if child.required else None
|
|
123
|
+
child_fields[child.name or child.id] = (python_type, default)
|
|
124
|
+
return create_model(model_name, **child_fields)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def construct_base_model(
|
|
128
|
+
schema: Optional[OutputSchema] = None,
|
|
129
|
+
) -> Type[BaseModel]:
|
|
130
|
+
if not schema:
|
|
131
|
+
return None
|
|
132
|
+
if not schema.fields:
|
|
133
|
+
return create_model(schema.name)
|
|
134
|
+
|
|
135
|
+
parent_id_map: Dict[Optional[str], List[OutputSchemaField]] = {}
|
|
136
|
+
for field in schema.fields:
|
|
137
|
+
parent_id = field.parent_id or None
|
|
138
|
+
if parent_id_map.get(parent_id) is None:
|
|
139
|
+
parent_id_map[parent_id] = []
|
|
140
|
+
parent_id_map[parent_id].append(field)
|
|
141
|
+
|
|
142
|
+
root_fields: Dict[str, tuple] = {}
|
|
143
|
+
for field in parent_id_map.get(None, []):
|
|
144
|
+
field_type = (
|
|
145
|
+
field.type.value if hasattr(field.type, "value") else field.type
|
|
146
|
+
)
|
|
147
|
+
if field_type == SchemaDataType.OBJECT.value:
|
|
148
|
+
python_type = construct_nested_base_model(
|
|
149
|
+
field, parent_id_map, field.name
|
|
150
|
+
)
|
|
151
|
+
else:
|
|
152
|
+
python_type = schema_type_map.get(field_type, Any)
|
|
153
|
+
default = ... if field.required else None
|
|
154
|
+
root_fields[field.name] = (python_type, default)
|
|
155
|
+
|
|
156
|
+
return create_model(schema.name, **root_fields)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
###################################
|
|
160
|
+
# Output Schema Construction
|
|
161
|
+
###################################
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _process_model(
|
|
165
|
+
model_class: Type[BaseModel],
|
|
166
|
+
parent_id: Optional[str] = None,
|
|
167
|
+
) -> List[OutputSchemaField]:
|
|
168
|
+
fields = []
|
|
169
|
+
model_fields = model_class.model_fields
|
|
170
|
+
for field_name, field_info in model_fields.items():
|
|
171
|
+
field_id = str(uuid.uuid4())
|
|
172
|
+
annotation = field_info.annotation
|
|
173
|
+
field_type = "STRING"
|
|
174
|
+
if annotation == str:
|
|
175
|
+
field_type = "STRING"
|
|
176
|
+
elif annotation == int:
|
|
177
|
+
field_type = "INTEGER"
|
|
178
|
+
elif annotation == float:
|
|
179
|
+
field_type = "FLOAT"
|
|
180
|
+
elif annotation == bool:
|
|
181
|
+
field_type = "BOOLEAN"
|
|
182
|
+
elif annotation == list:
|
|
183
|
+
raise ValueError("Unsupported structured output: list")
|
|
184
|
+
elif annotation == dict:
|
|
185
|
+
raise ValueError("Unsupported structured output: dict")
|
|
186
|
+
elif (
|
|
187
|
+
hasattr(annotation, "__bases__")
|
|
188
|
+
and BaseModel in annotation.__bases__
|
|
189
|
+
):
|
|
190
|
+
field_type = "OBJECT"
|
|
191
|
+
parent_field = OutputSchemaField(
|
|
192
|
+
id=field_id,
|
|
193
|
+
name=field_name,
|
|
194
|
+
type=field_type,
|
|
195
|
+
required=field_info.default is ...,
|
|
196
|
+
parent_id=parent_id,
|
|
197
|
+
)
|
|
198
|
+
fields.append(parent_field)
|
|
199
|
+
nested_fields = _process_model(annotation, field_id)
|
|
200
|
+
fields.extend(nested_fields)
|
|
201
|
+
continue
|
|
202
|
+
required = field_info.default is ...
|
|
203
|
+
fields.append(
|
|
204
|
+
OutputSchemaField(
|
|
205
|
+
id=field_id,
|
|
206
|
+
name=field_name,
|
|
207
|
+
type=field_type,
|
|
208
|
+
required=required,
|
|
209
|
+
parent_id=parent_id,
|
|
210
|
+
)
|
|
211
|
+
)
|
|
212
|
+
return fields
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def construct_output_schema(
|
|
216
|
+
base_model_class: Optional[Type[BaseModel]] = None,
|
|
217
|
+
) -> Optional[OutputSchema]:
|
|
218
|
+
if base_model_class is None:
|
|
219
|
+
return None
|
|
220
|
+
all_fields = _process_model(base_model_class)
|
|
221
|
+
return OutputSchema(fields=all_fields, name=base_model_class.__name__)
|