PyPI - deepeval - Versions diffs - 3.6.7__py3-none-any.whl → 3.6.9__py3-none-any.whl - Mend

deepeval 3.6.7py3-none-any.whl → 3.6.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

deepeval/_version.py +1 -1
deepeval/config/settings.py +104 -36
deepeval/config/utils.py +5 -0
deepeval/dataset/dataset.py +162 -30
deepeval/dataset/utils.py +41 -13
deepeval/errors.py +20 -2
deepeval/evaluate/execute.py +1662 -688
deepeval/evaluate/types.py +1 -0
deepeval/evaluate/utils.py +13 -3
deepeval/integrations/crewai/__init__.py +2 -1
deepeval/integrations/crewai/tool.py +71 -0
deepeval/integrations/llama_index/__init__.py +0 -4
deepeval/integrations/llama_index/handler.py +20 -21
deepeval/integrations/pydantic_ai/instrumentator.py +125 -76
deepeval/metrics/__init__.py +13 -0
deepeval/metrics/base_metric.py +1 -0
deepeval/metrics/contextual_precision/contextual_precision.py +27 -21
deepeval/metrics/conversational_g_eval/__init__.py +3 -0
deepeval/metrics/conversational_g_eval/conversational_g_eval.py +11 -7
deepeval/metrics/dag/schema.py +1 -1
deepeval/metrics/dag/templates.py +2 -2
deepeval/metrics/goal_accuracy/__init__.py +1 -0
deepeval/metrics/goal_accuracy/goal_accuracy.py +349 -0
deepeval/metrics/goal_accuracy/schema.py +17 -0
deepeval/metrics/goal_accuracy/template.py +235 -0
deepeval/metrics/hallucination/hallucination.py +8 -8
deepeval/metrics/indicator.py +21 -1
deepeval/metrics/mcp/mcp_task_completion.py +7 -2
deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +16 -6
deepeval/metrics/mcp_use_metric/mcp_use_metric.py +2 -1
deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +32 -24
deepeval/metrics/plan_adherence/__init__.py +1 -0
deepeval/metrics/plan_adherence/plan_adherence.py +292 -0
deepeval/metrics/plan_adherence/schema.py +11 -0
deepeval/metrics/plan_adherence/template.py +170 -0
deepeval/metrics/plan_quality/__init__.py +1 -0
deepeval/metrics/plan_quality/plan_quality.py +292 -0
deepeval/metrics/plan_quality/schema.py +11 -0
deepeval/metrics/plan_quality/template.py +101 -0
deepeval/metrics/step_efficiency/__init__.py +1 -0
deepeval/metrics/step_efficiency/schema.py +11 -0
deepeval/metrics/step_efficiency/step_efficiency.py +234 -0
deepeval/metrics/step_efficiency/template.py +256 -0
deepeval/metrics/task_completion/task_completion.py +1 -0
deepeval/metrics/tool_correctness/schema.py +6 -0
deepeval/metrics/tool_correctness/template.py +88 -0
deepeval/metrics/tool_correctness/tool_correctness.py +226 -22
deepeval/metrics/tool_use/__init__.py +1 -0
deepeval/metrics/tool_use/schema.py +19 -0
deepeval/metrics/tool_use/template.py +220 -0
deepeval/metrics/tool_use/tool_use.py +458 -0
deepeval/metrics/topic_adherence/__init__.py +1 -0
deepeval/metrics/topic_adherence/schema.py +16 -0
deepeval/metrics/topic_adherence/template.py +162 -0
deepeval/metrics/topic_adherence/topic_adherence.py +355 -0
deepeval/models/embedding_models/azure_embedding_model.py +37 -36
deepeval/models/embedding_models/local_embedding_model.py +30 -32
deepeval/models/embedding_models/ollama_embedding_model.py +18 -20
deepeval/models/embedding_models/openai_embedding_model.py +22 -31
deepeval/models/llms/amazon_bedrock_model.py +20 -17
deepeval/models/llms/openai_model.py +10 -1
deepeval/models/retry_policy.py +103 -20
deepeval/openai/extractors.py +61 -16
deepeval/openai/patch.py +8 -12
deepeval/openai/types.py +1 -1
deepeval/openai/utils.py +108 -1
deepeval/prompt/prompt.py +1 -0
deepeval/prompt/utils.py +43 -14
deepeval/simulator/conversation_simulator.py +25 -18
deepeval/synthesizer/chunking/context_generator.py +9 -1
deepeval/synthesizer/synthesizer.py +11 -10
deepeval/test_case/llm_test_case.py +6 -2
deepeval/test_run/test_run.py +190 -207
deepeval/tracing/__init__.py +2 -1
deepeval/tracing/otel/exporter.py +3 -4
deepeval/tracing/otel/utils.py +23 -4
deepeval/tracing/trace_context.py +53 -38
deepeval/tracing/tracing.py +23 -0
deepeval/tracing/types.py +16 -14
deepeval/utils.py +21 -0
{deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/METADATA +1 -1
{deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/RECORD +85 -63
deepeval/integrations/llama_index/agent/patched.py +0 -68
deepeval/tracing/message_types/__init__.py +0 -10
deepeval/tracing/message_types/base.py +0 -6
deepeval/tracing/message_types/messages.py +0 -14
deepeval/tracing/message_types/tools.py +0 -18
{deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/LICENSE.md +0 -0
{deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/WHEEL +0 -0
{deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/entry_points.txt +0 -0

deepeval/openai/extractors.py CHANGED Viewed

@@ -4,17 +4,26 @@ from typing import Any, Union, Dict
 from openai.types.responses import Response
 from deepeval.test_case.llm_test_case import ToolCall
-from deepeval.openai.utils import stringify_multimodal_content
+from deepeval.openai.utils import (
+    render_response_input,
+    stringify_multimodal_content,
+    render_messages,
+)
 from deepeval.openai.types import InputParameters, OutputParameters
+from deepeval.tracing.types import Message
-def extract_input_parameters(
+# guarding against errors to be compatible with legacy APIs
+def safe_extract_input_parameters(
     is_completion: bool, kwargs: Dict[str, Any]
 ) -> InputParameters:
-    if is_completion:
-        return extract_input_parameters_from_completion(kwargs)
-    else:
-        return extract_input_parameters_from_response(kwargs)
+    try:
+        if is_completion:
+            return extract_input_parameters_from_completion(kwargs)
+        else:
+            return extract_input_parameters_from_response(kwargs)
+    except:
+        return InputParameters(model="NA")
 def extract_input_parameters_from_completion(
@@ -43,6 +52,9 @@ def extract_input_parameters_from_completion(
     if len(user_messages) > 0:
         input_arg = user_messages[0]
+    # render messages
+    messages = render_messages(messages)
     return InputParameters(
         model=model,
         input=stringify_multimodal_content(input_arg),
@@ -64,7 +76,24 @@ def extract_input_parameters_from_response(
         if tools is not None
         else None
     )
-    messages = input_payload if isinstance(input_payload, list) else None
+    messages = []
+    if isinstance(input_payload, list):
+        messages = render_response_input(input_payload)
+    elif isinstance(input_payload, str):
+        messages = [
+            {
+                "role": "user",
+                "content": input_payload,
+            }
+        ]
+    if instructions:
+        messages.insert(
+            0,
+            {
+                "role": "system",
+                "content": instructions,
+            },
+        )
     return InputParameters(
         model=model,
         input=stringify_multimodal_content(input_payload),
@@ -75,19 +104,24 @@ def extract_input_parameters_from_response(
     )
-def extract_output_parameters(
+def safe_extract_output_parameters(
     is_completion: bool,
     response: Union[ChatCompletion, ParsedChatCompletion, Response],
     input_parameters: InputParameters,
 ) -> OutputParameters:
-    if is_completion:
-        return extract_output_parameters_from_completion(
-            response, input_parameters
-        )
-    else:
-        return extract_output_parameters_from_response(
-            response, input_parameters
-        )
+    # guarding against errors to be compatible with legacy APIs
+    try:
+        if is_completion:
+            return extract_output_parameters_from_completion(
+                response, input_parameters
+            )
+        else:
+            return extract_output_parameters_from_response(
+                response, input_parameters
+            )
+    except:
+        return OutputParameters()
 def extract_output_parameters_from_completion(
@@ -113,6 +147,12 @@ def extract_output_parameters_from_completion(
                 )
             )
+    if not output and tools_called:
+        tool_calls = []
+        for tool_call in tools_called:
+            tool_calls.append(tool_call)
+        output = tool_calls
     return OutputParameters(
         output=output,
         prompt_tokens=prompt_tokens,
@@ -144,6 +184,11 @@ def extract_output_parameters_from_response(
                     description=tool_descriptions.get(tool_call.name),
                 )
             )
+    if not output and tools_called:
+        tool_calls = []
+        for tool_call in tools_called:
+            tool_calls.append(tool_call)
+        output = tool_calls
     return OutputParameters(
         output=output,

deepeval/openai/patch.py CHANGED Viewed

@@ -3,8 +3,8 @@ from functools import wraps
 from deepeval.openai.extractors import (
-    extract_output_parameters,
-    extract_input_parameters,
+    safe_extract_output_parameters,
+    safe_extract_input_parameters,
     InputParameters,
     OutputParameters,
 )
@@ -16,7 +16,6 @@ from deepeval.tracing.context import (
 )
 from deepeval.tracing import observe
 from deepeval.tracing.trace_context import current_llm_context
-from deepeval.openai.utils import create_child_tool_spans
 # Store original methods for safety and potential unpatching
 _ORIGINAL_METHODS = {}
@@ -123,7 +122,7 @@ def _patch_async_openai_client_method(
 ):
     @wraps(orig_method)
     async def patched_async_openai_method(*args, **kwargs):
-        input_parameters: InputParameters = extract_input_parameters(
+        input_parameters: InputParameters = safe_extract_input_parameters(
             is_completion_method, kwargs
         )
@@ -137,7 +136,7 @@ def _patch_async_openai_client_method(
         )
         async def llm_generation(*args, **kwargs):
             response = await orig_method(*args, **kwargs)
-            output_parameters = extract_output_parameters(
+            output_parameters = safe_extract_output_parameters(
                 is_completion_method, response, input_parameters
             )
             _update_all_attributes(
@@ -162,7 +161,7 @@ def _patch_sync_openai_client_method(
 ):
     @wraps(orig_method)
     def patched_sync_openai_method(*args, **kwargs):
-        input_parameters: InputParameters = extract_input_parameters(
+        input_parameters: InputParameters = safe_extract_input_parameters(
             is_completion_method, kwargs
         )
@@ -176,7 +175,7 @@ def _patch_sync_openai_client_method(
         )
         def llm_generation(*args, **kwargs):
             response = orig_method(*args, **kwargs)
-            output_parameters = extract_output_parameters(
+            output_parameters = safe_extract_output_parameters(
                 is_completion_method, response, input_parameters
             )
             _update_all_attributes(
@@ -205,8 +204,8 @@ def _update_all_attributes(
 ):
     """Update span and trace attributes with input/output parameters."""
     update_current_span(
-        input=input_parameters.input or input_parameters.messages or "NA",
-        output=output_parameters.output or "NA",
+        input=input_parameters.messages,
+        output=output_parameters.output or output_parameters.tools_called,
         tools_called=output_parameters.tools_called,
         # attributes to be added
         expected_output=expected_output,
@@ -223,9 +222,6 @@ def _update_all_attributes(
         prompt=llm_context.prompt,
     )
-    if output_parameters.tools_called:
-        create_child_tool_spans(output_parameters)
     __update_input_and_output_of_current_trace(
         input_parameters, output_parameters
     )

deepeval/openai/types.py CHANGED Viewed

@@ -14,7 +14,7 @@ class InputParameters(BaseModel):
 class OutputParameters(BaseModel):
-    output: Optional[str] = None
+    output: Optional[Any] = None
     prompt_tokens: Optional[int] = None
     completion_tokens: Optional[int] = None
     tools_called: Optional[List[ToolCall]] = None

deepeval/openai/utils.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import json
 import uuid
-from typing import Any, List, Optional
+from typing import Any, Dict, List, Optional, Iterable
+from openai.types.chat.chat_completion_message_param import (
+    ChatCompletionMessageParam,
+)
 from deepeval.tracing.types import ToolSpan, TraceSpanStatus
 from deepeval.tracing.context import current_span_context
@@ -126,3 +130,106 @@ def stringify_multimodal_content(content: Any) -> str:
     # unknown dicts and types returned as shortened JSON
     return _compact_dump(content)
+def render_messages(
+    messages: Iterable[ChatCompletionMessageParam],
+) -> List[Dict[str, Any]]:
+    messages_list = []
+    for message in messages:
+        role = message.get("role")
+        content = message.get("content")
+        if role == "assistant" and message.get("tool_calls"):
+            tool_calls = message.get("tool_calls")
+            if isinstance(tool_calls, list):
+                for tool_call in tool_calls:
+                    # Extract type - either "function" or "custom"
+                    tool_type = tool_call.get("type", "function")
+                    # Extract name and arguments based on type
+                    if tool_type == "function":
+                        function_data = tool_call.get("function", {})
+                        name = function_data.get("name", "")
+                        arguments = function_data.get("arguments", "")
+                    elif tool_type == "custom":
+                        custom_data = tool_call.get("custom", {})
+                        name = custom_data.get("name", "")
+                        arguments = custom_data.get("input", "")
+                    else:
+                        name = ""
+                        arguments = ""
+                    messages_list.append(
+                        {
+                            "id": tool_call.get("id", ""),
+                            "call_id": tool_call.get(
+                                "id", ""
+                            ),  # OpenAI uses 'id', not 'call_id'
+                            "name": name,
+                            "type": tool_type,
+                            "arguments": json.loads(arguments),
+                        }
+                    )
+        elif role == "tool":
+            messages_list.append(
+                {
+                    "call_id": message.get("tool_call_id", ""),
+                    "type": role,  # "tool"
+                    "output": message.get("content", {}),
+                }
+            )
+        else:
+            messages_list.append(
+                {
+                    "role": role,
+                    "content": content,
+                }
+            )
+    return messages_list
+def render_response_input(input: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    messages_list = []
+    for item in input:
+        type = item.get("type")
+        role = item.get("role")
+        if type == "message":
+            messages_list.append(
+                {
+                    "role": role,
+                    "content": item.get("content"),
+                }
+            )
+        else:
+            messages_list.append(item)
+    return messages_list
+def _render_content(content: Dict[str, Any], indent: int = 0) -> str:
+    """
+    Renders a dictionary as a formatted string with indentation for nested structures.
+    """
+    if not content:
+        return ""
+    lines = []
+    prefix = "  " * indent
+    for key, value in content.items():
+        if isinstance(value, dict):
+            lines.append(f"{prefix}{key}:")
+            lines.append(_render_content(value, indent + 1))
+        elif isinstance(value, list):
+            lines.append(f"{prefix}{key}: {_compact_dump(value)}")
+        else:
+            lines.append(f"{prefix}{key}: {value}")
+    return "\n".join(lines)

deepeval/prompt/prompt.py CHANGED Viewed

@@ -202,6 +202,7 @@ class Prompt:
                     "Unable to interpolate empty prompt template. Please pull a prompt from Confident AI or set template manually to continue."
                 )
+            print("@@@@@")
             return interpolate_text(interpolation_type, text_template, **kwargs)
         elif prompt_type == PromptType.LIST:

deepeval/prompt/utils.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import re
 import uuid
 from jinja2 import Template
-from typing import Any, Dict, Type, Optional, List
+from typing import Any, Dict, Type, Optional, List, Match
 from pydantic import BaseModel, create_model
 from deepeval.prompt.api import (
@@ -16,36 +16,65 @@ from deepeval.prompt.api import (
 ###################################
-def interpolate_mustache(text: str, **kwargs) -> str:
+def interpolate_mustache(text: str, **kwargs: Any) -> str:
     """Interpolate using Mustache format: {{variable}}"""
-    formatted_template = re.sub(r"\{\{(\w+)\}\}", r"{\1}", text)
-    return formatted_template.format(**kwargs)
+    def replace_match(match: Match[str]) -> str:
+        var_name = match.group(1)
+        if var_name in kwargs:
+            return str(kwargs[var_name])
+        # Raise error for missing variables to maintain consistency
+        raise KeyError(f"Missing variable in template: {var_name}")
-def interpolate_mustache_with_space(text: str, **kwargs) -> str:
+    return re.sub(r"\{\{([a-zA-Z_][a-zA-Z0-9_]*)\}\}", replace_match, text)
+def interpolate_mustache_with_space(text: str, **kwargs: Any) -> str:
     """Interpolate using Mustache with space format: {{ variable }}"""
-    formatted_template = re.sub(r"\{\{ (\w+) \}\}", r"{\1}", text)
-    return formatted_template.format(**kwargs)
+    def replace_match(match: Match[str]) -> str:
+        var_name = match.group(1)
+        if var_name in kwargs:
+            return str(kwargs[var_name])
+        # Raise error for missing variables to maintain consistency
+        raise KeyError(f"Missing variable in template: {var_name}")
+    return re.sub(r"\{\{ ([a-zA-Z_][a-zA-Z0-9_]*) \}\}", replace_match, text)
-def interpolate_fstring(text: str, **kwargs) -> str:
+def interpolate_fstring(text: str, **kwargs: Any) -> str:
     """Interpolate using F-string format: {variable}"""
-    return text.format(**kwargs)
+    def replace_match(match: Match[str]) -> str:
+        var_name = match.group(1)
+        if var_name in kwargs:
+            return str(kwargs[var_name])
+        # Raise error for missing variables to maintain consistency
+        raise KeyError(f"Missing variable in template: {var_name}")
-def interpolate_dollar_brackets(text: str, **kwargs) -> str:
+    return re.sub(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}", replace_match, text)
+def interpolate_dollar_brackets(text: str, **kwargs: Any) -> str:
     """Interpolate using Dollar Brackets format: ${variable}"""
-    formatted_template = re.sub(r"\$\{(\w+)\}", r"{\1}", text)
-    return formatted_template.format(**kwargs)
+    def replace_match(match: Match[str]) -> str:
+        var_name = match.group(1)
+        if var_name in kwargs:
+            return str(kwargs[var_name])
+        # Raise error for missing variables to maintain consistency
+        raise KeyError(f"Missing variable in template: {var_name}")
+    return re.sub(r"\$\{([a-zA-Z_][a-zA-Z0-9_]*)\}", replace_match, text)
-def interpolate_jinja(text: str, **kwargs) -> str:
+def interpolate_jinja(text: str, **kwargs: Any) -> str:
     template = Template(text)
     return template.render(**kwargs)
 def interpolate_text(
-    interpolation_type: PromptInterpolationType, text: str, **kwargs
+    interpolation_type: PromptInterpolationType, text: str, **kwargs: Any
 ) -> str:
     """Apply the appropriate interpolation method based on the type"""
     if interpolation_type == PromptInterpolationType.MUSTACHE:

deepeval/simulator/conversation_simulator.py CHANGED Viewed

@@ -35,7 +35,6 @@ class ConversationSimulator:
         self,
         model_callback: Callable[[str], str],
         simulator_model: Optional[Union[str, DeepEvalBaseLLM]] = None,
-        opening_message: Optional[str] = None,
         max_concurrent: int = 5,
         async_mode: bool = True,
         language: str = "English",
@@ -45,7 +44,6 @@ class ConversationSimulator:
         self.is_callback_async = inspect.iscoroutinefunction(
             self.model_callback
         )
-        self.opening_message = opening_message
         self.semaphore = asyncio.Semaphore(max_concurrent)
         self.async_mode = async_mode
         self.language = language
@@ -68,6 +66,9 @@ class ConversationSimulator:
         self,
         conversational_goldens: List[ConversationalGolden],
         max_user_simulations: int = 10,
+        on_simulation_complete: Optional[
+            Callable[[ConversationalTestCase, int], None]
+        ] = None,
     ) -> List[ConversationalTestCase]:
         self.simulation_cost = 0 if self.using_native_model else None
@@ -87,6 +88,7 @@ class ConversationSimulator:
                     self._a_simulate(
                         conversational_goldens=conversational_goldens,
                         max_user_simulations=max_user_simulations,
+                        on_simulation_complete=on_simulation_complete,
                         progress=progress,
                         pbar_id=pbar_id,
                     )
@@ -103,6 +105,7 @@ class ConversationSimulator:
                             index=conversation_index,
                             progress=progress,
                             pbar_id=pbar_id,
+                            on_simulation_complete=on_simulation_complete,
                         )
                     )
                     conversational_test_cases.append(conversational_test_case)
@@ -115,6 +118,9 @@ class ConversationSimulator:
         self,
         conversational_goldens: List[ConversationalGolden],
         max_user_simulations: int,
+        on_simulation_complete: Optional[
+            Callable[[ConversationalTestCase, int], None]
+        ] = None,
         progress: Optional[Progress] = None,
         pbar_id: Optional[int] = None,
     ) -> List[ConversationalTestCase]:
@@ -131,6 +137,7 @@ class ConversationSimulator:
                     index=conversation_index,
                     progress=progress,
                     pbar_id=pbar_id,
+                    on_simulation_complete=on_simulation_complete,
                 )
         tasks = [
@@ -150,6 +157,9 @@ class ConversationSimulator:
         index: int,
         progress: Optional[Progress] = None,
         pbar_id: Optional[int] = None,
+        on_simulation_complete: Optional[
+            Callable[[ConversationalTestCase, int], None]
+        ] = None,
     ) -> ConversationalTestCase:
         simulation_counter = 0
         if max_user_simulations <= 0:
@@ -166,8 +176,6 @@ class ConversationSimulator:
         user_input = None
         thread_id = str(uuid.uuid4())
         turns: List[Turn] = []
-        if self.opening_message and golden.turns is None:
-            turns.append(Turn(role="assistant", content=self.opening_message))
         if golden.turns is not None:
             turns.extend(golden.turns)
@@ -187,11 +195,7 @@ class ConversationSimulator:
             if simulation_counter >= max_user_simulations:
                 update_pbar(progress, pbar_max_user_simluations_id)
                 break
-            if len(turns) == 0 or (
-                len(turns) == 1
-                and self.opening_message
-                and golden.turns is None
-            ):
+            if len(turns) == 0:
                 # Generate first user input
                 user_input = self.generate_first_user_input(golden)
                 turns.append(Turn(role="user", content=user_input))
@@ -225,7 +229,7 @@ class ConversationSimulator:
             turns.append(turn)
         update_pbar(progress, pbar_id)
-        return ConversationalTestCase(
+        conversational_test_case = ConversationalTestCase(
             turns=turns,
             scenario=golden.scenario,
             expected_outcome=golden.expected_outcome,
@@ -241,6 +245,9 @@ class ConversationSimulator:
             _dataset_alias=golden._dataset_alias,
             _dataset_id=golden._dataset_id,
         )
+        if on_simulation_complete:
+            on_simulation_complete(conversational_test_case, index)
+        return conversational_test_case
     async def _a_simulate_single_conversation(
         self,
@@ -249,6 +256,9 @@ class ConversationSimulator:
         index: Optional[int] = None,
         progress: Optional[Progress] = None,
         pbar_id: Optional[int] = None,
+        on_simulation_complete: Optional[
+            Callable[[ConversationalTestCase, int], None]
+        ] = None,
     ) -> ConversationalTestCase:
         simulation_counter = 0
         if max_user_simulations <= 0:
@@ -265,8 +275,6 @@ class ConversationSimulator:
         user_input = None
         thread_id = str(uuid.uuid4())
         turns: List[Turn] = []
-        if self.opening_message and golden.turns is None:
-            turns.append(Turn(role="assistant", content=self.opening_message))
         if golden.turns is not None:
             turns.extend(golden.turns)
@@ -286,11 +294,7 @@ class ConversationSimulator:
             if simulation_counter >= max_user_simulations:
                 update_pbar(progress, pbar_max_user_simluations_id)
                 break
-            if len(turns) == 0 or (
-                len(turns) == 1
-                and self.opening_message
-                and golden.turns is None
-            ):
+            if len(turns) == 0:
                 # Generate first user input
                 user_input = await self.a_generate_first_user_input(golden)
                 turns.append(Turn(role="user", content=user_input))
@@ -324,7 +328,7 @@ class ConversationSimulator:
             turns.append(turn)
         update_pbar(progress, pbar_id)
-        return ConversationalTestCase(
+        conversational_test_case = ConversationalTestCase(
             turns=turns,
             scenario=golden.scenario,
             expected_outcome=golden.expected_outcome,
@@ -340,6 +344,9 @@ class ConversationSimulator:
             _dataset_alias=golden._dataset_alias,
             _dataset_id=golden._dataset_id,
         )
+        if on_simulation_complete:
+            on_simulation_complete(conversational_test_case, index)
+        return conversational_test_case
     ############################################
     ### Generate User Inputs ###################

deepeval/synthesizer/chunking/context_generator.py CHANGED Viewed

@@ -249,8 +249,16 @@ class ContextGenerator:
                 except Exception as exc:
                     # record and continue with other docs
+                    show_trace = bool(get_settings().DEEPEVAL_LOG_STACK_TRACES)
+                    exc_info = (
+                        (type(exc), exc, getattr(exc, "__traceback__", None))
+                        if show_trace
+                        else None
+                    )
                     logger.exception(
-                        "Document pipeline failed for %s", path, exc_info=exc
+                        "Document pipeline failed for %s",
+                        path,
+                        exc_info=exc_info,
                     )
                 finally:
                     # drop the collection asap to avoid too many open collections

deepeval/synthesizer/synthesizer.py CHANGED Viewed

@@ -555,7 +555,7 @@ class Synthesizer:
                     include_expected_output=include_expected_output,
                     max_goldens_per_context=max_goldens_per_context,
                     source_files=source_files,
-                    index=index,
+                    context_index=index,
                     progress=progress,
                     pbar_id=pbar_id,
                     context_scores=_context_scores,
@@ -577,7 +577,7 @@ class Synthesizer:
         include_expected_output: bool,
         max_goldens_per_context: int,
         source_files: Optional[List[str]],
-        index: int,
+        context_index: int,
         progress: Optional[Progress] = None,
         pbar_id: Optional[int] = None,
         context_scores: Optional[List[float]] = None,
@@ -599,7 +599,7 @@ class Synthesizer:
         # Add pbars
         pbar_generate_goldens_id = add_pbar(
             progress,
-            f"\t⚡ Generating goldens from context #{index}",
+            f"\t⚡ Generating goldens from context #{context_index}",
             total=1 + max_goldens_per_context,
         )
         pbar_generate_inputs_id = add_pbar(
@@ -643,7 +643,7 @@ class Synthesizer:
         # Helper function to process each input in parallel
         async def process_input(
-            index: int,
+            input_index: int,
             data: SyntheticData,
             progress: Optional[Progress] = None,
         ):
@@ -654,7 +654,7 @@ class Synthesizer:
                 num_evolutions=self.evolution_config.num_evolutions,
                 evolutions=self.evolution_config.evolutions,
                 progress=progress,
-                pbar_evolve_input_id=pbar_evolve_input_ids[index],
+                pbar_evolve_input_id=pbar_evolve_input_ids[input_index],
                 remove_pbar=False,
             )
@@ -672,7 +672,7 @@ class Synthesizer:
                 )
                 evolved_input = res.input
                 update_pbar(
-                    progress, pbar_evolve_input_ids[index], remove=False
+                    progress, pbar_evolve_input_ids[input_index], remove=False
                 )
             # Generate expected output
@@ -685,7 +685,7 @@ class Synthesizer:
                 )
                 expected_output = await self._a_generate(expected_output_prompt)
                 update_pbar(
-                    progress, pbar_evolve_input_ids[index], remove=False
+                    progress, pbar_evolve_input_ids[input_index], remove=False
                 )
             # Create Golden
@@ -694,13 +694,14 @@ class Synthesizer:
                 context=context,
                 expected_output=expected_output,
                 source_file=(
-                    source_files[index]
-                    if source_files is not None and index < len(source_files)
+                    source_files[context_index]
+                    if source_files is not None
+                    and context_index < len(source_files)
                     else None
                 ),
                 additional_metadata={
                     "evolutions": evolutions_used,
-                    "synthetic_input_quality": scores[index],
+                    "synthetic_input_quality": scores[input_index],
                     # "context_quality": (
                     #     context_scores[data_index]
                     #     if context_scores is not None

deepeval 3.6.7__py3-none-any.whl → 3.6.9__py3-none-any.whl

deepeval 3.6.7py3-none-any.whl → 3.6.9py3-none-any.whl