deepeval 3.6.7__py3-none-any.whl → 3.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/errors.py +20 -2
- deepeval/evaluate/execute.py +725 -217
- deepeval/evaluate/types.py +1 -0
- deepeval/evaluate/utils.py +13 -3
- deepeval/integrations/crewai/__init__.py +2 -1
- deepeval/integrations/crewai/tool.py +71 -0
- deepeval/integrations/llama_index/__init__.py +0 -4
- deepeval/integrations/llama_index/handler.py +20 -21
- deepeval/integrations/pydantic_ai/instrumentator.py +125 -76
- deepeval/metrics/__init__.py +13 -0
- deepeval/metrics/base_metric.py +1 -0
- deepeval/metrics/contextual_precision/contextual_precision.py +27 -21
- deepeval/metrics/conversational_g_eval/__init__.py +3 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +11 -7
- deepeval/metrics/dag/schema.py +1 -1
- deepeval/metrics/dag/templates.py +2 -2
- deepeval/metrics/goal_accuracy/__init__.py +1 -0
- deepeval/metrics/goal_accuracy/goal_accuracy.py +349 -0
- deepeval/metrics/goal_accuracy/schema.py +17 -0
- deepeval/metrics/goal_accuracy/template.py +235 -0
- deepeval/metrics/hallucination/hallucination.py +8 -8
- deepeval/metrics/mcp/mcp_task_completion.py +7 -2
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +16 -6
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +2 -1
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +32 -24
- deepeval/metrics/plan_adherence/__init__.py +1 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +292 -0
- deepeval/metrics/plan_adherence/schema.py +11 -0
- deepeval/metrics/plan_adherence/template.py +170 -0
- deepeval/metrics/plan_quality/__init__.py +1 -0
- deepeval/metrics/plan_quality/plan_quality.py +292 -0
- deepeval/metrics/plan_quality/schema.py +11 -0
- deepeval/metrics/plan_quality/template.py +101 -0
- deepeval/metrics/step_efficiency/__init__.py +1 -0
- deepeval/metrics/step_efficiency/schema.py +11 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +234 -0
- deepeval/metrics/step_efficiency/template.py +256 -0
- deepeval/metrics/task_completion/task_completion.py +1 -0
- deepeval/metrics/tool_correctness/schema.py +6 -0
- deepeval/metrics/tool_correctness/template.py +88 -0
- deepeval/metrics/tool_correctness/tool_correctness.py +226 -22
- deepeval/metrics/tool_use/__init__.py +1 -0
- deepeval/metrics/tool_use/schema.py +19 -0
- deepeval/metrics/tool_use/template.py +220 -0
- deepeval/metrics/tool_use/tool_use.py +458 -0
- deepeval/metrics/topic_adherence/__init__.py +1 -0
- deepeval/metrics/topic_adherence/schema.py +16 -0
- deepeval/metrics/topic_adherence/template.py +162 -0
- deepeval/metrics/topic_adherence/topic_adherence.py +355 -0
- deepeval/models/embedding_models/azure_embedding_model.py +37 -36
- deepeval/models/embedding_models/local_embedding_model.py +30 -32
- deepeval/models/embedding_models/ollama_embedding_model.py +18 -20
- deepeval/models/embedding_models/openai_embedding_model.py +22 -31
- deepeval/openai/extractors.py +61 -16
- deepeval/openai/patch.py +8 -12
- deepeval/openai/types.py +1 -1
- deepeval/openai/utils.py +108 -1
- deepeval/prompt/prompt.py +1 -0
- deepeval/prompt/utils.py +43 -14
- deepeval/synthesizer/synthesizer.py +11 -10
- deepeval/test_case/llm_test_case.py +6 -2
- deepeval/test_run/test_run.py +190 -207
- deepeval/tracing/__init__.py +2 -1
- deepeval/tracing/otel/exporter.py +3 -4
- deepeval/tracing/otel/utils.py +23 -4
- deepeval/tracing/trace_context.py +53 -38
- deepeval/tracing/tracing.py +23 -0
- deepeval/tracing/types.py +16 -14
- deepeval/utils.py +21 -0
- {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/METADATA +1 -1
- {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/RECORD +75 -53
- deepeval/integrations/llama_index/agent/patched.py +0 -68
- deepeval/tracing/message_types/__init__.py +0 -10
- deepeval/tracing/message_types/base.py +0 -6
- deepeval/tracing/message_types/messages.py +0 -14
- deepeval/tracing/message_types/tools.py +0 -18
- {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/LICENSE.md +0 -0
- {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/WHEEL +0 -0
- {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/entry_points.txt +0 -0
deepeval/prompt/utils.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import re
|
|
2
2
|
import uuid
|
|
3
3
|
from jinja2 import Template
|
|
4
|
-
from typing import Any, Dict, Type, Optional, List
|
|
4
|
+
from typing import Any, Dict, Type, Optional, List, Match
|
|
5
5
|
from pydantic import BaseModel, create_model
|
|
6
6
|
|
|
7
7
|
from deepeval.prompt.api import (
|
|
@@ -16,36 +16,65 @@ from deepeval.prompt.api import (
|
|
|
16
16
|
###################################
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
def interpolate_mustache(text: str, **kwargs) -> str:
|
|
19
|
+
def interpolate_mustache(text: str, **kwargs: Any) -> str:
|
|
20
20
|
"""Interpolate using Mustache format: {{variable}}"""
|
|
21
|
-
formatted_template = re.sub(r"\{\{(\w+)\}\}", r"{\1}", text)
|
|
22
|
-
return formatted_template.format(**kwargs)
|
|
23
21
|
|
|
22
|
+
def replace_match(match: Match[str]) -> str:
|
|
23
|
+
var_name = match.group(1)
|
|
24
|
+
if var_name in kwargs:
|
|
25
|
+
return str(kwargs[var_name])
|
|
26
|
+
# Raise error for missing variables to maintain consistency
|
|
27
|
+
raise KeyError(f"Missing variable in template: {var_name}")
|
|
24
28
|
|
|
25
|
-
|
|
29
|
+
return re.sub(r"\{\{([a-zA-Z_][a-zA-Z0-9_]*)\}\}", replace_match, text)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def interpolate_mustache_with_space(text: str, **kwargs: Any) -> str:
|
|
26
33
|
"""Interpolate using Mustache with space format: {{ variable }}"""
|
|
27
|
-
|
|
28
|
-
|
|
34
|
+
|
|
35
|
+
def replace_match(match: Match[str]) -> str:
|
|
36
|
+
var_name = match.group(1)
|
|
37
|
+
if var_name in kwargs:
|
|
38
|
+
return str(kwargs[var_name])
|
|
39
|
+
# Raise error for missing variables to maintain consistency
|
|
40
|
+
raise KeyError(f"Missing variable in template: {var_name}")
|
|
41
|
+
|
|
42
|
+
return re.sub(r"\{\{ ([a-zA-Z_][a-zA-Z0-9_]*) \}\}", replace_match, text)
|
|
29
43
|
|
|
30
44
|
|
|
31
|
-
def interpolate_fstring(text: str, **kwargs) -> str:
|
|
45
|
+
def interpolate_fstring(text: str, **kwargs: Any) -> str:
|
|
32
46
|
"""Interpolate using F-string format: {variable}"""
|
|
33
|
-
return text.format(**kwargs)
|
|
34
47
|
|
|
48
|
+
def replace_match(match: Match[str]) -> str:
|
|
49
|
+
var_name = match.group(1)
|
|
50
|
+
if var_name in kwargs:
|
|
51
|
+
return str(kwargs[var_name])
|
|
52
|
+
# Raise error for missing variables to maintain consistency
|
|
53
|
+
raise KeyError(f"Missing variable in template: {var_name}")
|
|
35
54
|
|
|
36
|
-
|
|
55
|
+
return re.sub(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}", replace_match, text)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def interpolate_dollar_brackets(text: str, **kwargs: Any) -> str:
|
|
37
59
|
"""Interpolate using Dollar Brackets format: ${variable}"""
|
|
38
|
-
|
|
39
|
-
|
|
60
|
+
|
|
61
|
+
def replace_match(match: Match[str]) -> str:
|
|
62
|
+
var_name = match.group(1)
|
|
63
|
+
if var_name in kwargs:
|
|
64
|
+
return str(kwargs[var_name])
|
|
65
|
+
# Raise error for missing variables to maintain consistency
|
|
66
|
+
raise KeyError(f"Missing variable in template: {var_name}")
|
|
67
|
+
|
|
68
|
+
return re.sub(r"\$\{([a-zA-Z_][a-zA-Z0-9_]*)\}", replace_match, text)
|
|
40
69
|
|
|
41
70
|
|
|
42
|
-
def interpolate_jinja(text: str, **kwargs) -> str:
|
|
71
|
+
def interpolate_jinja(text: str, **kwargs: Any) -> str:
|
|
43
72
|
template = Template(text)
|
|
44
73
|
return template.render(**kwargs)
|
|
45
74
|
|
|
46
75
|
|
|
47
76
|
def interpolate_text(
|
|
48
|
-
interpolation_type: PromptInterpolationType, text: str, **kwargs
|
|
77
|
+
interpolation_type: PromptInterpolationType, text: str, **kwargs: Any
|
|
49
78
|
) -> str:
|
|
50
79
|
"""Apply the appropriate interpolation method based on the type"""
|
|
51
80
|
if interpolation_type == PromptInterpolationType.MUSTACHE:
|
|
@@ -555,7 +555,7 @@ class Synthesizer:
|
|
|
555
555
|
include_expected_output=include_expected_output,
|
|
556
556
|
max_goldens_per_context=max_goldens_per_context,
|
|
557
557
|
source_files=source_files,
|
|
558
|
-
|
|
558
|
+
context_index=index,
|
|
559
559
|
progress=progress,
|
|
560
560
|
pbar_id=pbar_id,
|
|
561
561
|
context_scores=_context_scores,
|
|
@@ -577,7 +577,7 @@ class Synthesizer:
|
|
|
577
577
|
include_expected_output: bool,
|
|
578
578
|
max_goldens_per_context: int,
|
|
579
579
|
source_files: Optional[List[str]],
|
|
580
|
-
|
|
580
|
+
context_index: int,
|
|
581
581
|
progress: Optional[Progress] = None,
|
|
582
582
|
pbar_id: Optional[int] = None,
|
|
583
583
|
context_scores: Optional[List[float]] = None,
|
|
@@ -599,7 +599,7 @@ class Synthesizer:
|
|
|
599
599
|
# Add pbars
|
|
600
600
|
pbar_generate_goldens_id = add_pbar(
|
|
601
601
|
progress,
|
|
602
|
-
f"\t⚡ Generating goldens from context #{
|
|
602
|
+
f"\t⚡ Generating goldens from context #{context_index}",
|
|
603
603
|
total=1 + max_goldens_per_context,
|
|
604
604
|
)
|
|
605
605
|
pbar_generate_inputs_id = add_pbar(
|
|
@@ -643,7 +643,7 @@ class Synthesizer:
|
|
|
643
643
|
|
|
644
644
|
# Helper function to process each input in parallel
|
|
645
645
|
async def process_input(
|
|
646
|
-
|
|
646
|
+
input_index: int,
|
|
647
647
|
data: SyntheticData,
|
|
648
648
|
progress: Optional[Progress] = None,
|
|
649
649
|
):
|
|
@@ -654,7 +654,7 @@ class Synthesizer:
|
|
|
654
654
|
num_evolutions=self.evolution_config.num_evolutions,
|
|
655
655
|
evolutions=self.evolution_config.evolutions,
|
|
656
656
|
progress=progress,
|
|
657
|
-
pbar_evolve_input_id=pbar_evolve_input_ids[
|
|
657
|
+
pbar_evolve_input_id=pbar_evolve_input_ids[input_index],
|
|
658
658
|
remove_pbar=False,
|
|
659
659
|
)
|
|
660
660
|
|
|
@@ -672,7 +672,7 @@ class Synthesizer:
|
|
|
672
672
|
)
|
|
673
673
|
evolved_input = res.input
|
|
674
674
|
update_pbar(
|
|
675
|
-
progress, pbar_evolve_input_ids[
|
|
675
|
+
progress, pbar_evolve_input_ids[input_index], remove=False
|
|
676
676
|
)
|
|
677
677
|
|
|
678
678
|
# Generate expected output
|
|
@@ -685,7 +685,7 @@ class Synthesizer:
|
|
|
685
685
|
)
|
|
686
686
|
expected_output = await self._a_generate(expected_output_prompt)
|
|
687
687
|
update_pbar(
|
|
688
|
-
progress, pbar_evolve_input_ids[
|
|
688
|
+
progress, pbar_evolve_input_ids[input_index], remove=False
|
|
689
689
|
)
|
|
690
690
|
|
|
691
691
|
# Create Golden
|
|
@@ -694,13 +694,14 @@ class Synthesizer:
|
|
|
694
694
|
context=context,
|
|
695
695
|
expected_output=expected_output,
|
|
696
696
|
source_file=(
|
|
697
|
-
source_files[
|
|
698
|
-
if source_files is not None
|
|
697
|
+
source_files[context_index]
|
|
698
|
+
if source_files is not None
|
|
699
|
+
and context_index < len(source_files)
|
|
699
700
|
else None
|
|
700
701
|
),
|
|
701
702
|
additional_metadata={
|
|
702
703
|
"evolutions": evolutions_used,
|
|
703
|
-
"synthetic_input_quality": scores[
|
|
704
|
+
"synthetic_input_quality": scores[input_index],
|
|
704
705
|
# "context_quality": (
|
|
705
706
|
# context_scores[data_index]
|
|
706
707
|
# if context_scores is not None
|
|
@@ -122,7 +122,9 @@ class ToolCall(BaseModel):
|
|
|
122
122
|
|
|
123
123
|
# Handle nested fields like input_parameters
|
|
124
124
|
if self.input_parameters:
|
|
125
|
-
formatted_input = json.dumps(
|
|
125
|
+
formatted_input = json.dumps(
|
|
126
|
+
self.input_parameters, indent=4, ensure_ascii=False
|
|
127
|
+
)
|
|
126
128
|
formatted_input = self._indent_nested_field(
|
|
127
129
|
"input_parameters", formatted_input
|
|
128
130
|
)
|
|
@@ -130,7 +132,9 @@ class ToolCall(BaseModel):
|
|
|
130
132
|
|
|
131
133
|
# Handle nested fields like output
|
|
132
134
|
if isinstance(self.output, dict):
|
|
133
|
-
formatted_output = json.dumps(
|
|
135
|
+
formatted_output = json.dumps(
|
|
136
|
+
self.output, indent=4, ensure_ascii=False
|
|
137
|
+
)
|
|
134
138
|
formatted_output = self._indent_nested_field(
|
|
135
139
|
"output", formatted_output
|
|
136
140
|
)
|