PyPI - deepeval - Versions diffs - 3.6.6__py3-none-any.whl → 3.6.8__py3-none-any.whl - Mend

deepeval 3.6.6py3-none-any.whl → 3.6.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (134) hide show

deepeval/_version.py +1 -1
deepeval/benchmarks/equity_med_qa/equity_med_qa.py +1 -0
deepeval/cli/main.py +42 -0
deepeval/confident/api.py +1 -0
deepeval/config/settings.py +22 -4
deepeval/constants.py +8 -1
deepeval/dataset/dataset.py +2 -11
deepeval/dataset/utils.py +1 -1
deepeval/errors.py +20 -2
deepeval/evaluate/evaluate.py +5 -1
deepeval/evaluate/execute.py +811 -248
deepeval/evaluate/types.py +1 -0
deepeval/evaluate/utils.py +33 -119
deepeval/integrations/crewai/__init__.py +7 -1
deepeval/integrations/crewai/handler.py +1 -1
deepeval/integrations/crewai/subs.py +51 -0
deepeval/integrations/crewai/tool.py +71 -0
deepeval/integrations/crewai/wrapper.py +45 -5
deepeval/integrations/llama_index/__init__.py +0 -4
deepeval/integrations/llama_index/handler.py +20 -21
deepeval/integrations/pydantic_ai/instrumentator.py +125 -76
deepeval/metrics/__init__.py +13 -0
deepeval/metrics/answer_relevancy/answer_relevancy.py +12 -3
deepeval/metrics/api.py +281 -0
deepeval/metrics/argument_correctness/argument_correctness.py +12 -2
deepeval/metrics/base_metric.py +1 -0
deepeval/metrics/bias/bias.py +12 -3
deepeval/metrics/contextual_precision/contextual_precision.py +39 -24
deepeval/metrics/contextual_recall/contextual_recall.py +12 -3
deepeval/metrics/contextual_relevancy/contextual_relevancy.py +12 -1
deepeval/metrics/conversation_completeness/conversation_completeness.py +12 -0
deepeval/metrics/conversational_dag/conversational_dag.py +12 -0
deepeval/metrics/conversational_dag/nodes.py +12 -4
deepeval/metrics/conversational_g_eval/__init__.py +3 -0
deepeval/metrics/conversational_g_eval/conversational_g_eval.py +84 -66
deepeval/metrics/dag/dag.py +12 -0
deepeval/metrics/dag/nodes.py +12 -4
deepeval/metrics/dag/schema.py +1 -1
deepeval/metrics/dag/templates.py +2 -2
deepeval/metrics/faithfulness/faithfulness.py +12 -1
deepeval/metrics/g_eval/g_eval.py +11 -0
deepeval/metrics/goal_accuracy/__init__.py +1 -0
deepeval/metrics/goal_accuracy/goal_accuracy.py +349 -0
deepeval/metrics/goal_accuracy/schema.py +17 -0
deepeval/metrics/goal_accuracy/template.py +235 -0
deepeval/metrics/hallucination/hallucination.py +20 -9
deepeval/metrics/indicator.py +8 -2
deepeval/metrics/json_correctness/json_correctness.py +12 -1
deepeval/metrics/knowledge_retention/knowledge_retention.py +12 -0
deepeval/metrics/mcp/mcp_task_completion.py +20 -2
deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +29 -6
deepeval/metrics/mcp_use_metric/mcp_use_metric.py +14 -2
deepeval/metrics/misuse/misuse.py +12 -1
deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +3 -0
deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +3 -0
deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +3 -0
deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +3 -0
deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +6 -1
deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +38 -25
deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +3 -0
deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +3 -0
deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +3 -0
deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +3 -0
deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +10 -5
deepeval/metrics/non_advice/non_advice.py +12 -0
deepeval/metrics/pii_leakage/pii_leakage.py +12 -1
deepeval/metrics/plan_adherence/__init__.py +1 -0
deepeval/metrics/plan_adherence/plan_adherence.py +292 -0
deepeval/metrics/plan_adherence/schema.py +11 -0
deepeval/metrics/plan_adherence/template.py +170 -0
deepeval/metrics/plan_quality/__init__.py +1 -0
deepeval/metrics/plan_quality/plan_quality.py +292 -0
deepeval/metrics/plan_quality/schema.py +11 -0
deepeval/metrics/plan_quality/template.py +101 -0
deepeval/metrics/prompt_alignment/prompt_alignment.py +12 -1
deepeval/metrics/role_adherence/role_adherence.py +12 -0
deepeval/metrics/role_violation/role_violation.py +12 -0
deepeval/metrics/step_efficiency/__init__.py +1 -0
deepeval/metrics/step_efficiency/schema.py +11 -0
deepeval/metrics/step_efficiency/step_efficiency.py +234 -0
deepeval/metrics/step_efficiency/template.py +256 -0
deepeval/metrics/summarization/summarization.py +12 -1
deepeval/metrics/task_completion/task_completion.py +4 -0
deepeval/metrics/tool_correctness/schema.py +6 -0
deepeval/metrics/tool_correctness/template.py +88 -0
deepeval/metrics/tool_correctness/tool_correctness.py +233 -21
deepeval/metrics/tool_use/__init__.py +1 -0
deepeval/metrics/tool_use/schema.py +19 -0
deepeval/metrics/tool_use/template.py +220 -0
deepeval/metrics/tool_use/tool_use.py +458 -0
deepeval/metrics/topic_adherence/__init__.py +1 -0
deepeval/metrics/topic_adherence/schema.py +16 -0
deepeval/metrics/topic_adherence/template.py +162 -0
deepeval/metrics/topic_adherence/topic_adherence.py +355 -0
deepeval/metrics/toxicity/toxicity.py +12 -0
deepeval/metrics/turn_relevancy/turn_relevancy.py +12 -0
deepeval/models/embedding_models/azure_embedding_model.py +37 -36
deepeval/models/embedding_models/local_embedding_model.py +30 -32
deepeval/models/embedding_models/ollama_embedding_model.py +18 -20
deepeval/models/embedding_models/openai_embedding_model.py +22 -31
deepeval/models/llms/grok_model.py +1 -1
deepeval/models/llms/openai_model.py +2 -0
deepeval/openai/__init__.py +14 -32
deepeval/openai/extractors.py +85 -50
deepeval/openai/patch.py +258 -167
deepeval/openai/types.py +20 -0
deepeval/openai/utils.py +205 -56
deepeval/prompt/__init__.py +19 -1
deepeval/prompt/api.py +160 -0
deepeval/prompt/prompt.py +245 -62
deepeval/prompt/utils.py +186 -15
deepeval/synthesizer/chunking/context_generator.py +209 -152
deepeval/synthesizer/chunking/doc_chunker.py +46 -12
deepeval/synthesizer/synthesizer.py +19 -15
deepeval/test_case/api.py +131 -0
deepeval/test_case/llm_test_case.py +6 -2
deepeval/test_run/__init__.py +1 -0
deepeval/test_run/hyperparameters.py +47 -8
deepeval/test_run/test_run.py +292 -206
deepeval/tracing/__init__.py +2 -1
deepeval/tracing/api.py +3 -1
deepeval/tracing/otel/exporter.py +3 -4
deepeval/tracing/otel/utils.py +24 -5
deepeval/tracing/trace_context.py +89 -5
deepeval/tracing/tracing.py +74 -3
deepeval/tracing/types.py +20 -2
deepeval/tracing/utils.py +8 -0
deepeval/utils.py +21 -0
{deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/METADATA +1 -1
{deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/RECORD +133 -103
deepeval/integrations/llama_index/agent/patched.py +0 -68
{deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/LICENSE.md +0 -0
{deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/WHEEL +0 -0
{deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/entry_points.txt +0 -0

deepeval/prompt/utils.py CHANGED Viewed

@@ -1,39 +1,80 @@
 import re
+import uuid
 from jinja2 import Template
+from typing import Any, Dict, Type, Optional, List, Match
+from pydantic import BaseModel, create_model
-from deepeval.prompt.api import PromptInterpolationType
+from deepeval.prompt.api import (
+    PromptInterpolationType,
+    OutputSchema,
+    SchemaDataType,
+    OutputSchemaField,
+)
+###################################
+# Interpolation
+###################################
-def interpolate_mustache(text: str, **kwargs) -> str:
+def interpolate_mustache(text: str, **kwargs: Any) -> str:
     """Interpolate using Mustache format: {{variable}}"""
-    formatted_template = re.sub(r"\{\{(\w+)\}\}", r"{\1}", text)
-    return formatted_template.format(**kwargs)
+    def replace_match(match: Match[str]) -> str:
+        var_name = match.group(1)
+        if var_name in kwargs:
+            return str(kwargs[var_name])
+        # Raise error for missing variables to maintain consistency
+        raise KeyError(f"Missing variable in template: {var_name}")
+    return re.sub(r"\{\{([a-zA-Z_][a-zA-Z0-9_]*)\}\}", replace_match, text)
-def interpolate_mustache_with_space(text: str, **kwargs) -> str:
+def interpolate_mustache_with_space(text: str, **kwargs: Any) -> str:
     """Interpolate using Mustache with space format: {{ variable }}"""
-    formatted_template = re.sub(r"\{\{ (\w+) \}\}", r"{\1}", text)
-    return formatted_template.format(**kwargs)
+    def replace_match(match: Match[str]) -> str:
+        var_name = match.group(1)
+        if var_name in kwargs:
+            return str(kwargs[var_name])
+        # Raise error for missing variables to maintain consistency
+        raise KeyError(f"Missing variable in template: {var_name}")
-def interpolate_fstring(text: str, **kwargs) -> str:
+    return re.sub(r"\{\{ ([a-zA-Z_][a-zA-Z0-9_]*) \}\}", replace_match, text)
+def interpolate_fstring(text: str, **kwargs: Any) -> str:
     """Interpolate using F-string format: {variable}"""
-    return text.format(**kwargs)
+    def replace_match(match: Match[str]) -> str:
+        var_name = match.group(1)
+        if var_name in kwargs:
+            return str(kwargs[var_name])
+        # Raise error for missing variables to maintain consistency
+        raise KeyError(f"Missing variable in template: {var_name}")
+    return re.sub(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}", replace_match, text)
-def interpolate_dollar_brackets(text: str, **kwargs) -> str:
+def interpolate_dollar_brackets(text: str, **kwargs: Any) -> str:
     """Interpolate using Dollar Brackets format: ${variable}"""
-    formatted_template = re.sub(r"\$\{(\w+)\}", r"{\1}", text)
-    return formatted_template.format(**kwargs)
+    def replace_match(match: Match[str]) -> str:
+        var_name = match.group(1)
+        if var_name in kwargs:
+            return str(kwargs[var_name])
+        # Raise error for missing variables to maintain consistency
+        raise KeyError(f"Missing variable in template: {var_name}")
+    return re.sub(r"\$\{([a-zA-Z_][a-zA-Z0-9_]*)\}", replace_match, text)
-def interpolate_jinja(text: str, **kwargs) -> str:
+def interpolate_jinja(text: str, **kwargs: Any) -> str:
     template = Template(text)
     return template.render(**kwargs)
 def interpolate_text(
-    interpolation_type: PromptInterpolationType, text: str, **kwargs
+    interpolation_type: PromptInterpolationType, text: str, **kwargs: Any
 ) -> str:
     """Apply the appropriate interpolation method based on the type"""
     if interpolation_type == PromptInterpolationType.MUSTACHE:
@@ -47,4 +88,134 @@ def interpolate_text(
     elif interpolation_type == PromptInterpolationType.JINJA:
         return interpolate_jinja(text, **kwargs)
-    raise ValueError(f"Unsupported interpolation type: {interpolation_type}")
+###################################
+# Output Schema Deconstruction
+###################################
+schema_type_map: Dict[str, Any] = {
+    SchemaDataType.STRING.value: str,
+    SchemaDataType.INTEGER.value: int,
+    SchemaDataType.FLOAT.value: float,
+    SchemaDataType.BOOLEAN.value: bool,
+    SchemaDataType.NULL.value: type(None),
+    SchemaDataType.OBJECT.value: dict,
+}
+def construct_nested_base_model(
+    parent: OutputSchemaField,
+    parent_id_map: Dict[Optional[str], List[OutputSchemaField]],
+    model_name: str,
+) -> Type[BaseModel]:
+    child_fields: Dict[str, tuple] = {}
+    for child in parent_id_map.get(parent.id, []):
+        child_type = (
+            child.type.value if hasattr(child.type, "value") else child.type
+        )
+        if child_type == SchemaDataType.OBJECT.value:
+            python_type = construct_nested_base_model(
+                child, parent_id_map, child.name
+            )
+        else:
+            python_type = schema_type_map.get(child_type, Any)
+        default = ... if child.required else None
+        child_fields[child.name or child.id] = (python_type, default)
+    return create_model(model_name, **child_fields)
+def construct_base_model(
+    schema: Optional[OutputSchema] = None,
+) -> Type[BaseModel]:
+    if not schema:
+        return None
+    if not schema.fields:
+        return create_model(schema.name)
+    parent_id_map: Dict[Optional[str], List[OutputSchemaField]] = {}
+    for field in schema.fields:
+        parent_id = field.parent_id or None
+        if parent_id_map.get(parent_id) is None:
+            parent_id_map[parent_id] = []
+        parent_id_map[parent_id].append(field)
+    root_fields: Dict[str, tuple] = {}
+    for field in parent_id_map.get(None, []):
+        field_type = (
+            field.type.value if hasattr(field.type, "value") else field.type
+        )
+        if field_type == SchemaDataType.OBJECT.value:
+            python_type = construct_nested_base_model(
+                field, parent_id_map, field.name
+            )
+        else:
+            python_type = schema_type_map.get(field_type, Any)
+        default = ... if field.required else None
+        root_fields[field.name] = (python_type, default)
+    return create_model(schema.name, **root_fields)
+###################################
+# Output Schema Construction
+###################################
+def _process_model(
+    model_class: Type[BaseModel],
+    parent_id: Optional[str] = None,
+) -> List[OutputSchemaField]:
+    fields = []
+    model_fields = model_class.model_fields
+    for field_name, field_info in model_fields.items():
+        field_id = str(uuid.uuid4())
+        annotation = field_info.annotation
+        field_type = "STRING"
+        if annotation == str:
+            field_type = "STRING"
+        elif annotation == int:
+            field_type = "INTEGER"
+        elif annotation == float:
+            field_type = "FLOAT"
+        elif annotation == bool:
+            field_type = "BOOLEAN"
+        elif annotation == list:
+            raise ValueError("Unsupported structured output: list")
+        elif annotation == dict:
+            raise ValueError("Unsupported structured output: dict")
+        elif (
+            hasattr(annotation, "__bases__")
+            and BaseModel in annotation.__bases__
+        ):
+            field_type = "OBJECT"
+            parent_field = OutputSchemaField(
+                id=field_id,
+                name=field_name,
+                type=field_type,
+                required=field_info.default is ...,
+                parent_id=parent_id,
+            )
+            fields.append(parent_field)
+            nested_fields = _process_model(annotation, field_id)
+            fields.extend(nested_fields)
+            continue
+        required = field_info.default is ...
+        fields.append(
+            OutputSchemaField(
+                id=field_id,
+                name=field_name,
+                type=field_type,
+                required=required,
+                parent_id=parent_id,
+            )
+        )
+    return fields
+def construct_output_schema(
+    base_model_class: Optional[Type[BaseModel]] = None,
+) -> Optional[OutputSchema]:
+    if base_model_class is None:
+        return None
+    all_fields = _process_model(base_model_class)
+    return OutputSchema(fields=all_fields, name=base_model_class.__name__)

deepeval 3.6.6__py3-none-any.whl → 3.6.8__py3-none-any.whl

deepeval 3.6.6py3-none-any.whl → 3.6.8py3-none-any.whl