PyPI - deepeval - Versions diffs - 3.6.7__py3-none-any.whl → 3.6.8__py3-none-any.whl - Mend

deepeval 3.6.7py3-none-any.whl → 3.6.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

deepeval/_version.py +1 -1
deepeval/errors.py +20 -2
deepeval/evaluate/execute.py +725 -217
deepeval/evaluate/types.py +1 -0
deepeval/evaluate/utils.py +13 -3
deepeval/integrations/crewai/__init__.py +2 -1
deepeval/integrations/crewai/tool.py +71 -0
deepeval/integrations/llama_index/__init__.py +0 -4
deepeval/integrations/llama_index/handler.py +20 -21
deepeval/integrations/pydantic_ai/instrumentator.py +125 -76
deepeval/metrics/__init__.py +13 -0
deepeval/metrics/base_metric.py +1 -0
deepeval/metrics/contextual_precision/contextual_precision.py +27 -21
deepeval/metrics/conversational_g_eval/__init__.py +3 -0
deepeval/metrics/conversational_g_eval/conversational_g_eval.py +11 -7
deepeval/metrics/dag/schema.py +1 -1
deepeval/metrics/dag/templates.py +2 -2
deepeval/metrics/goal_accuracy/__init__.py +1 -0
deepeval/metrics/goal_accuracy/goal_accuracy.py +349 -0
deepeval/metrics/goal_accuracy/schema.py +17 -0
deepeval/metrics/goal_accuracy/template.py +235 -0
deepeval/metrics/hallucination/hallucination.py +8 -8
deepeval/metrics/mcp/mcp_task_completion.py +7 -2
deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +16 -6
deepeval/metrics/mcp_use_metric/mcp_use_metric.py +2 -1
deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +32 -24
deepeval/metrics/plan_adherence/__init__.py +1 -0
deepeval/metrics/plan_adherence/plan_adherence.py +292 -0
deepeval/metrics/plan_adherence/schema.py +11 -0
deepeval/metrics/plan_adherence/template.py +170 -0
deepeval/metrics/plan_quality/__init__.py +1 -0
deepeval/metrics/plan_quality/plan_quality.py +292 -0
deepeval/metrics/plan_quality/schema.py +11 -0
deepeval/metrics/plan_quality/template.py +101 -0
deepeval/metrics/step_efficiency/__init__.py +1 -0
deepeval/metrics/step_efficiency/schema.py +11 -0
deepeval/metrics/step_efficiency/step_efficiency.py +234 -0
deepeval/metrics/step_efficiency/template.py +256 -0
deepeval/metrics/task_completion/task_completion.py +1 -0
deepeval/metrics/tool_correctness/schema.py +6 -0
deepeval/metrics/tool_correctness/template.py +88 -0
deepeval/metrics/tool_correctness/tool_correctness.py +226 -22
deepeval/metrics/tool_use/__init__.py +1 -0
deepeval/metrics/tool_use/schema.py +19 -0
deepeval/metrics/tool_use/template.py +220 -0
deepeval/metrics/tool_use/tool_use.py +458 -0
deepeval/metrics/topic_adherence/__init__.py +1 -0
deepeval/metrics/topic_adherence/schema.py +16 -0
deepeval/metrics/topic_adherence/template.py +162 -0
deepeval/metrics/topic_adherence/topic_adherence.py +355 -0
deepeval/models/embedding_models/azure_embedding_model.py +37 -36
deepeval/models/embedding_models/local_embedding_model.py +30 -32
deepeval/models/embedding_models/ollama_embedding_model.py +18 -20
deepeval/models/embedding_models/openai_embedding_model.py +22 -31
deepeval/openai/extractors.py +61 -16
deepeval/openai/patch.py +8 -12
deepeval/openai/types.py +1 -1
deepeval/openai/utils.py +108 -1
deepeval/prompt/prompt.py +1 -0
deepeval/prompt/utils.py +43 -14
deepeval/synthesizer/synthesizer.py +11 -10
deepeval/test_case/llm_test_case.py +6 -2
deepeval/test_run/test_run.py +190 -207
deepeval/tracing/__init__.py +2 -1
deepeval/tracing/otel/exporter.py +3 -4
deepeval/tracing/otel/utils.py +23 -4
deepeval/tracing/trace_context.py +53 -38
deepeval/tracing/tracing.py +23 -0
deepeval/tracing/types.py +16 -14
deepeval/utils.py +21 -0
{deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/METADATA +1 -1
{deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/RECORD +75 -53
deepeval/integrations/llama_index/agent/patched.py +0 -68
deepeval/tracing/message_types/__init__.py +0 -10
deepeval/tracing/message_types/base.py +0 -6
deepeval/tracing/message_types/messages.py +0 -14
deepeval/tracing/message_types/tools.py +0 -18
{deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/LICENSE.md +0 -0
{deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/WHEEL +0 -0
{deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/entry_points.txt +0 -0

deepeval/prompt/utils.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import re
 import uuid
 from jinja2 import Template
-from typing import Any, Dict, Type, Optional, List
+from typing import Any, Dict, Type, Optional, List, Match
 from pydantic import BaseModel, create_model
 from deepeval.prompt.api import (
@@ -16,36 +16,65 @@ from deepeval.prompt.api import (
 ###################################
-def interpolate_mustache(text: str, **kwargs) -> str:
+def interpolate_mustache(text: str, **kwargs: Any) -> str:
     """Interpolate using Mustache format: {{variable}}"""
-    formatted_template = re.sub(r"\{\{(\w+)\}\}", r"{\1}", text)
-    return formatted_template.format(**kwargs)
+    def replace_match(match: Match[str]) -> str:
+        var_name = match.group(1)
+        if var_name in kwargs:
+            return str(kwargs[var_name])
+        # Raise error for missing variables to maintain consistency
+        raise KeyError(f"Missing variable in template: {var_name}")
-def interpolate_mustache_with_space(text: str, **kwargs) -> str:
+    return re.sub(r"\{\{([a-zA-Z_][a-zA-Z0-9_]*)\}\}", replace_match, text)
+def interpolate_mustache_with_space(text: str, **kwargs: Any) -> str:
     """Interpolate using Mustache with space format: {{ variable }}"""
-    formatted_template = re.sub(r"\{\{ (\w+) \}\}", r"{\1}", text)
-    return formatted_template.format(**kwargs)
+    def replace_match(match: Match[str]) -> str:
+        var_name = match.group(1)
+        if var_name in kwargs:
+            return str(kwargs[var_name])
+        # Raise error for missing variables to maintain consistency
+        raise KeyError(f"Missing variable in template: {var_name}")
+    return re.sub(r"\{\{ ([a-zA-Z_][a-zA-Z0-9_]*) \}\}", replace_match, text)
-def interpolate_fstring(text: str, **kwargs) -> str:
+def interpolate_fstring(text: str, **kwargs: Any) -> str:
     """Interpolate using F-string format: {variable}"""
-    return text.format(**kwargs)
+    def replace_match(match: Match[str]) -> str:
+        var_name = match.group(1)
+        if var_name in kwargs:
+            return str(kwargs[var_name])
+        # Raise error for missing variables to maintain consistency
+        raise KeyError(f"Missing variable in template: {var_name}")
-def interpolate_dollar_brackets(text: str, **kwargs) -> str:
+    return re.sub(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}", replace_match, text)
+def interpolate_dollar_brackets(text: str, **kwargs: Any) -> str:
     """Interpolate using Dollar Brackets format: ${variable}"""
-    formatted_template = re.sub(r"\$\{(\w+)\}", r"{\1}", text)
-    return formatted_template.format(**kwargs)
+    def replace_match(match: Match[str]) -> str:
+        var_name = match.group(1)
+        if var_name in kwargs:
+            return str(kwargs[var_name])
+        # Raise error for missing variables to maintain consistency
+        raise KeyError(f"Missing variable in template: {var_name}")
+    return re.sub(r"\$\{([a-zA-Z_][a-zA-Z0-9_]*)\}", replace_match, text)
-def interpolate_jinja(text: str, **kwargs) -> str:
+def interpolate_jinja(text: str, **kwargs: Any) -> str:
     template = Template(text)
     return template.render(**kwargs)
 def interpolate_text(
-    interpolation_type: PromptInterpolationType, text: str, **kwargs
+    interpolation_type: PromptInterpolationType, text: str, **kwargs: Any
 ) -> str:
     """Apply the appropriate interpolation method based on the type"""
     if interpolation_type == PromptInterpolationType.MUSTACHE:

deepeval/synthesizer/synthesizer.py CHANGED Viewed

@@ -555,7 +555,7 @@ class Synthesizer:
                     include_expected_output=include_expected_output,
                     max_goldens_per_context=max_goldens_per_context,
                     source_files=source_files,
-                    index=index,
+                    context_index=index,
                     progress=progress,
                     pbar_id=pbar_id,
                     context_scores=_context_scores,
@@ -577,7 +577,7 @@ class Synthesizer:
         include_expected_output: bool,
         max_goldens_per_context: int,
         source_files: Optional[List[str]],
-        index: int,
+        context_index: int,
         progress: Optional[Progress] = None,
         pbar_id: Optional[int] = None,
         context_scores: Optional[List[float]] = None,
@@ -599,7 +599,7 @@ class Synthesizer:
         # Add pbars
         pbar_generate_goldens_id = add_pbar(
             progress,
-            f"\t⚡ Generating goldens from context #{index}",
+            f"\t⚡ Generating goldens from context #{context_index}",
             total=1 + max_goldens_per_context,
         )
         pbar_generate_inputs_id = add_pbar(
@@ -643,7 +643,7 @@ class Synthesizer:
         # Helper function to process each input in parallel
         async def process_input(
-            index: int,
+            input_index: int,
             data: SyntheticData,
             progress: Optional[Progress] = None,
         ):
@@ -654,7 +654,7 @@ class Synthesizer:
                 num_evolutions=self.evolution_config.num_evolutions,
                 evolutions=self.evolution_config.evolutions,
                 progress=progress,
-                pbar_evolve_input_id=pbar_evolve_input_ids[index],
+                pbar_evolve_input_id=pbar_evolve_input_ids[input_index],
                 remove_pbar=False,
             )
@@ -672,7 +672,7 @@ class Synthesizer:
                 )
                 evolved_input = res.input
                 update_pbar(
-                    progress, pbar_evolve_input_ids[index], remove=False
+                    progress, pbar_evolve_input_ids[input_index], remove=False
                 )
             # Generate expected output
@@ -685,7 +685,7 @@ class Synthesizer:
                 )
                 expected_output = await self._a_generate(expected_output_prompt)
                 update_pbar(
-                    progress, pbar_evolve_input_ids[index], remove=False
+                    progress, pbar_evolve_input_ids[input_index], remove=False
                 )
             # Create Golden
@@ -694,13 +694,14 @@ class Synthesizer:
                 context=context,
                 expected_output=expected_output,
                 source_file=(
-                    source_files[index]
-                    if source_files is not None and index < len(source_files)
+                    source_files[context_index]
+                    if source_files is not None
+                    and context_index < len(source_files)
                     else None
                 ),
                 additional_metadata={
                     "evolutions": evolutions_used,
-                    "synthetic_input_quality": scores[index],
+                    "synthetic_input_quality": scores[input_index],
                     # "context_quality": (
                     #     context_scores[data_index]
                     #     if context_scores is not None

deepeval/test_case/llm_test_case.py CHANGED Viewed

@@ -122,7 +122,9 @@ class ToolCall(BaseModel):
         # Handle nested fields like input_parameters
         if self.input_parameters:
-            formatted_input = json.dumps(self.input_parameters, indent=4)
+            formatted_input = json.dumps(
+                self.input_parameters, indent=4, ensure_ascii=False
+            )
             formatted_input = self._indent_nested_field(
                 "input_parameters", formatted_input
             )
@@ -130,7 +132,9 @@ class ToolCall(BaseModel):
         # Handle nested fields like output
         if isinstance(self.output, dict):
-            formatted_output = json.dumps(self.output, indent=4)
+            formatted_output = json.dumps(
+                self.output, indent=4, ensure_ascii=False
+            )
             formatted_output = self._indent_nested_field(
                 "output", formatted_output
             )

deepeval 3.6.7__py3-none-any.whl → 3.6.8__py3-none-any.whl

deepeval 3.6.7py3-none-any.whl → 3.6.8py3-none-any.whl