PyPI - deepeval - Versions diffs - 3.7.4__tar.gz → 3.7.5__tar.gz - Mend

deepeval 3.7.4tar.gz → 3.7.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (577) hide show

{deepeval-3.7.4 → deepeval-3.7.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: deepeval
-Version: 3.7.4
+Version: 3.7.5
 Summary: The LLM Evaluation Framework
 Home-page: https://github.com/confident-ai/deepeval
 License: Apache-2.0
@@ -13,13 +13,10 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Requires-Dist: aiohttp
-Requires-Dist: anthropic
 Requires-Dist: click (>=8.0.0,<8.3.0)
-Requires-Dist: google-genai (>=1.9.0,<2.0.0)
 Requires-Dist: grpcio (>=1.67.1,<2.0.0)
 Requires-Dist: jinja2
 Requires-Dist: nest_asyncio
-Requires-Dist: ollama
 Requires-Dist: openai
 Requires-Dist: opentelemetry-api (>=1.24.0,<2.0.0)
 Requires-Dist: opentelemetry-exporter-otlp-proto-grpc (>=1.24.0,<2.0.0)

deepeval-3.7.5/deepeval/_version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__: str = "3.7.5"

{deepeval-3.7.4 → deepeval-3.7.5}/deepeval/dataset/golden.py RENAMED Viewed

@@ -1,6 +1,6 @@
-from pydantic import BaseModel, Field, PrivateAttr
+from pydantic import BaseModel, Field, PrivateAttr, model_validator
 from typing import Optional, Dict, List
-from deepeval.test_case import ToolCall, Turn
+from deepeval.test_case import ToolCall, Turn, MLLMImage
 class Golden(BaseModel):
@@ -32,10 +32,40 @@ class Golden(BaseModel):
     custom_column_key_values: Optional[Dict[str, str]] = Field(
         default=None, serialization_alias="customColumnKeyValues"
     )
+    multimodal: bool = Field(False, exclude=True)
     _dataset_rank: Optional[int] = PrivateAttr(default=None)
     _dataset_alias: Optional[str] = PrivateAttr(default=None)
     _dataset_id: Optional[str] = PrivateAttr(default=None)
+    @model_validator(mode="after")
+    def set_is_multimodal(self):
+        import re
+        if self.multimodal is True:
+            return self
+        pattern = r"\[DEEPEVAL:IMAGE:(.*?)\]"
+        self.multimodal = (
+            any(
+                [
+                    (
+                        re.search(pattern, self.input) is not None
+                        if self.input
+                        else False
+                    ),
+                    (
+                        re.search(pattern, self.actual_output) is not None
+                        if self.actual_output
+                        else False
+                    ),
+                ]
+            )
+            if isinstance(self.input, str)
+            else self.multimodal
+        )
+        return self
 class ConversationalGolden(BaseModel):
     scenario: str
@@ -55,6 +85,28 @@ class ConversationalGolden(BaseModel):
         default=None, serialization_alias="customColumnKeyValues"
     )
     turns: Optional[List[Turn]] = Field(default=None)
+    multimodal: bool = Field(False, exclude=True)
     _dataset_rank: Optional[int] = PrivateAttr(default=None)
     _dataset_alias: Optional[str] = PrivateAttr(default=None)
     _dataset_id: Optional[str] = PrivateAttr(default=None)
+    @model_validator(mode="after")
+    def set_is_multimodal(self):
+        import re
+        if self.multimodal is True:
+            return self
+        pattern = r"\[DEEPEVAL:IMAGE:(.*?)\]"
+        self.multimodal = (
+            any(
+                [
+                    re.search(pattern, turn.content) is not None
+                    for turn in self.turns
+                ]
+            )
+            if self.turns
+            else self.multimodal
+        )
+        return self

{deepeval-3.7.4 → deepeval-3.7.5}/deepeval/evaluate/evaluate.py RENAMED Viewed

@@ -54,7 +54,6 @@ from deepeval.metrics.indicator import (
 from deepeval.test_case import (
     LLMTestCase,
     ConversationalTestCase,
-    MLLMTestCase,
 )
 from deepeval.test_run import (
     global_test_run_manager,
@@ -71,9 +70,7 @@ from deepeval.evaluate.execute import (
 def assert_test(
-    test_case: Optional[
-        Union[LLMTestCase, ConversationalTestCase, MLLMTestCase]
-    ] = None,
+    test_case: Optional[Union[LLMTestCase, ConversationalTestCase]] = None,
     metrics: Optional[
         Union[
             List[BaseMetric],
@@ -175,7 +172,7 @@ def assert_test(
                 try:
                     if not metric_data.success:
                         failed_metrics_data.append(metric_data)
-                except:
+                except Exception:
                     failed_metrics_data.append(metric_data)
         failed_metrics_str = ", ".join(
@@ -188,9 +185,7 @@ def assert_test(
 def evaluate(
-    test_cases: Union[
-        List[LLMTestCase], List[ConversationalTestCase], List[MLLMTestCase]
-    ],
+    test_cases: Union[List[LLMTestCase], List[ConversationalTestCase]],
     metrics: Optional[
         Union[
             List[BaseMetric],
@@ -272,6 +267,19 @@ def evaluate(
         test_run.hyperparameters = process_hyperparameters(hyperparameters)
         test_run.prompts = process_prompts(hyperparameters)
         global_test_run_manager.save_test_run(TEMP_FILE_PATH)
+        # In CLI mode (`deepeval test run`), the CLI owns finalization and will
+        # call `wrap_up_test_run()` once after pytest finishes. Finalizing here
+        # as well would double finalize the run and consequently result in
+        # duplicate uploads / local saves and temp file races, so only
+        # do it when we're NOT in CLI mode.
+        if get_is_running_deepeval():
+            return EvaluationResult(
+                test_results=test_results,
+                confident_link=None,
+                test_run_id=None,
+            )
         res = global_test_run_manager.wrap_up_test_run(
             run_duration, display_table=False
         )

{deepeval-3.7.4 → deepeval-3.7.5}/deepeval/evaluate/execute.py RENAMED Viewed

@@ -58,6 +58,13 @@ from deepeval.metrics import (
     BaseConversationalMetric,
     BaseMultimodalMetric,
     TaskCompletionMetric,
+    # RAG metrics that support both single-turn and multimodal
+    ContextualPrecisionMetric,
+    ContextualRecallMetric,
+    ContextualRelevancyMetric,
+    AnswerRelevancyMetric,
+    FaithfulnessMetric,
+    ToolCorrectnessMetric,
 )
 from deepeval.metrics.indicator import (
     measure_metrics_with_indicator,
@@ -70,7 +77,6 @@ from deepeval.models.retry_policy import (
 from deepeval.test_case import (
     LLMTestCase,
     ConversationalTestCase,
-    MLLMTestCase,
 )
 from deepeval.test_case.api import create_api_test_case
 from deepeval.test_run import (
@@ -110,6 +116,15 @@ from deepeval.test_run.hyperparameters import (
 logger = logging.getLogger(__name__)
+MLLM_SUPPORTED_METRICS = [
+    ContextualPrecisionMetric,
+    ContextualRecallMetric,
+    ContextualRelevancyMetric,
+    AnswerRelevancyMetric,
+    FaithfulnessMetric,
+    ToolCorrectnessMetric,
+]
 def _skip_metrics_for_error(
     span: Optional[BaseSpan] = None,
@@ -263,9 +278,7 @@ async def _await_with_outer_deadline(obj, *args, timeout: float, **kwargs):
 def execute_test_cases(
-    test_cases: Union[
-        List[LLMTestCase], List[ConversationalTestCase], List[MLLMTestCase]
-    ],
+    test_cases: Union[List[LLMTestCase], List[ConversationalTestCase]],
     metrics: Union[
         List[BaseMetric],
         List[BaseConversationalMetric],
@@ -307,6 +320,8 @@ def execute_test_cases(
         metric.async_mode = False
         if isinstance(metric, BaseMetric):
             llm_metrics.append(metric)
+            if type(metric) in MLLM_SUPPORTED_METRICS:
+                mllm_metrics.append(metric)
         elif isinstance(metric, BaseConversationalMetric):
             conversational_metrics.append(metric)
         elif isinstance(metric, BaseMultimodalMetric):
@@ -325,12 +340,12 @@ def execute_test_cases(
         )
         for i, test_case in enumerate(test_cases):
             # skip what we know we won't run
-            if isinstance(test_case, LLMTestCase):
+            if isinstance(test_case, LLMTestCase) and not test_case.multimodal:
                 if not llm_metrics:
                     update_pbar(progress, pbar_id)
                     continue
                 per_case_total = len(llm_metrics)
-            elif isinstance(test_case, MLLMTestCase):
+            elif isinstance(test_case, LLMTestCase) and test_case.multimodal:
                 if not mllm_metrics:
                     update_pbar(progress, pbar_id)
                     continue
@@ -349,10 +364,16 @@ def execute_test_cases(
             metrics_for_case = (
                 llm_metrics
-                if isinstance(test_case, LLMTestCase)
+                if (
+                    isinstance(test_case, LLMTestCase)
+                    and not test_case.multimodal
+                )
                 else (
                     mllm_metrics
-                    if isinstance(test_case, MLLMTestCase)
+                    if (
+                        isinstance(test_case, LLMTestCase)
+                        and test_case.multimodal
+                    )
                     else conversational_metrics
                 )
             )
@@ -360,10 +381,16 @@ def execute_test_cases(
                 test_case=test_case,
                 index=(
                     llm_test_case_count + 1
-                    if isinstance(test_case, LLMTestCase)
+                    if (
+                        isinstance(test_case, LLMTestCase)
+                        and not test_case.multimodal
+                    )
                     else (
                         mllm_test_case_count + 1
-                        if isinstance(test_case, MLLMTestCase)
+                        if (
+                            isinstance(test_case, LLMTestCase)
+                            and test_case.multimodal
+                        )
                         else conversational_test_case_count + 1
                     )
                 ),
@@ -383,7 +410,10 @@ def execute_test_cases(
                         for metric in metrics:
                             metric.error = None  # Reset metric error
-                        if isinstance(test_case, LLMTestCase):
+                        if (
+                            isinstance(test_case, LLMTestCase)
+                            and not test_case.multimodal
+                        ):
                             llm_test_case_count += 1
                             cached_test_case = None
                             if cache_config.use_cache:
@@ -436,7 +466,10 @@ def execute_test_cases(
                                 update_pbar(progress, pbar_test_case_id)
                         # No caching and not sending test cases to Confident AI for multimodal metrics yet
-                        elif isinstance(test_case, MLLMTestCase):
+                        elif (
+                            isinstance(test_case, LLMTestCase)
+                            and test_case.multimodal
+                        ):
                             mllm_test_case_count += 1
                             for metric in mllm_metrics:
                                 current_index = index_of[id(metric)]
@@ -560,9 +593,7 @@ def execute_test_cases(
 async def a_execute_test_cases(
-    test_cases: Union[
-        List[LLMTestCase], List[ConversationalTestCase], List[MLLMTestCase]
-    ],
+    test_cases: Union[List[LLMTestCase], List[ConversationalTestCase]],
     metrics: Union[
         List[BaseMetric],
         List[BaseConversationalMetric],
@@ -605,6 +636,8 @@ async def a_execute_test_cases(
     for metric in metrics:
         if isinstance(metric, BaseMetric):
             llm_metrics.append(metric)
+            if type(metric) in MLLM_SUPPORTED_METRICS:
+                mllm_metrics.append(metric)
         elif isinstance(metric, BaseMultimodalMetric):
             mllm_metrics.append(metric)
         elif isinstance(metric, BaseConversationalMetric):
@@ -613,7 +646,7 @@ async def a_execute_test_cases(
     llm_test_case_counter = -1
     mllm_test_case_counter = -1
     conversational_test_case_counter = -1
-    test_results: List[Union[TestResult, MLLMTestCase]] = []
+    test_results: List[Union[TestResult, LLMTestCase]] = []
     tasks = []
     if display_config.show_indicator and _use_bar_indicator:
@@ -632,7 +665,10 @@ async def a_execute_test_cases(
         with progress:
             for test_case in test_cases:
                 with capture_evaluation_run("test case"):
-                    if isinstance(test_case, LLMTestCase):
+                    if (
+                        isinstance(test_case, LLMTestCase)
+                        and not test_case.multimodal
+                    ):
                         if len(llm_metrics) == 0:
                             update_pbar(progress, pbar_id)
                             continue
@@ -660,7 +696,10 @@ async def a_execute_test_cases(
                         )
                         tasks.append(asyncio.create_task(task))
-                    elif isinstance(test_case, MLLMTestCase):
+                    elif (
+                        isinstance(test_case, LLMTestCase)
+                        and test_case.multimodal
+                    ):
                         mllm_test_case_counter += 1
                         copied_multimodal_metrics: List[
                             BaseMultimodalMetric
@@ -724,7 +763,10 @@ async def a_execute_test_cases(
     else:
         for test_case in test_cases:
             with capture_evaluation_run("test case"):
-                if isinstance(test_case, LLMTestCase):
+                if (
+                    isinstance(test_case, LLMTestCase)
+                    and not test_case.multimodal
+                ):
                     if len(llm_metrics) == 0:
                         continue
                     llm_test_case_counter += 1
@@ -772,7 +814,9 @@ async def a_execute_test_cases(
                     )
                     tasks.append(asyncio.create_task((task)))
-                elif isinstance(test_case, MLLMTestCase):
+                elif (
+                    isinstance(test_case, LLMTestCase) and test_case.multimodal
+                ):
                     mllm_test_case_counter += 1
                     copied_multimodal_metrics: List[BaseMultimodalMetric] = (
                         copy_metrics(mllm_metrics)
@@ -815,7 +859,7 @@ async def _a_execute_llm_test_cases(
     metrics: List[BaseMetric],
     test_case: LLMTestCase,
     test_run_manager: TestRunManager,
-    test_results: List[Union[TestResult, MLLMTestCase]],
+    test_results: List[Union[TestResult, LLMTestCase]],
     count: int,
     test_run: TestRun,
     ignore_errors: bool,
@@ -934,9 +978,9 @@ async def _a_execute_llm_test_cases(
 async def _a_execute_mllm_test_cases(
     metrics: List[BaseMultimodalMetric],
-    test_case: MLLMTestCase,
+    test_case: LLMTestCase,
     test_run_manager: TestRunManager,
-    test_results: List[Union[TestResult, MLLMTestCase]],
+    test_results: List[Union[TestResult, LLMTestCase]],
     count: int,
     ignore_errors: bool,
     skip_on_missing_params: bool,
@@ -1013,7 +1057,7 @@ async def _a_execute_conversational_test_cases(
     ],
     test_case: ConversationalTestCase,
     test_run_manager: TestRunManager,
-    test_results: List[Union[TestResult, MLLMTestCase]],
+    test_results: List[Union[TestResult, LLMTestCase]],
     count: int,
     ignore_errors: bool,
     skip_on_missing_params: bool,
@@ -1776,7 +1820,7 @@ async def a_execute_agentic_test_cases(
 async def _a_execute_agentic_test_case(
     golden: Golden,
     test_run_manager: TestRunManager,
-    test_results: List[Union[TestResult, MLLMTestCase]],
+    test_results: List[Union[TestResult, LLMTestCase]],
     count: int,
     verbose_mode: Optional[bool],
     ignore_errors: bool,
@@ -3205,7 +3249,7 @@ async def _evaluate_test_case_pairs(
 def _execute_metric(
     metric: BaseMetric,
-    test_case: Union[LLMTestCase, ConversationalTestCase, MLLMTestCase],
+    test_case: Union[LLMTestCase, ConversationalTestCase],
     show_metric_indicator: bool,
     in_component: bool,
     error_config: ErrorConfig,

{deepeval-3.7.4 → deepeval-3.7.5}/deepeval/evaluate/utils.py RENAMED Viewed

@@ -16,7 +16,6 @@ from deepeval.metrics import (
 from deepeval.test_case import (
     LLMTestCase,
     ConversationalTestCase,
-    MLLMTestCase,
 )
 from deepeval.test_run import (
     LLMApiTestCase,
@@ -129,17 +128,14 @@ def create_test_result(
             turns=api_test_case.turns,
         )
     else:
-        multimodal = (
-            api_test_case.multimodal_input is not None
-            and api_test_case.multimodal_input_actual_output is not None
-        )
+        multimodal = api_test_case.images_mapping
         if multimodal:
             return TestResult(
                 name=name,
                 success=api_test_case.success,
                 metrics_data=api_test_case.metrics_data,
-                input=api_test_case.multimodal_input,
-                actual_output=api_test_case.multimodal_input_actual_output,
+                input=api_test_case.input,
+                actual_output=api_test_case.actual_output,
                 conversational=False,
                 multimodal=True,
                 additional_metadata=api_test_case.additional_metadata,
@@ -222,9 +218,9 @@ def validate_assert_test_inputs(
         )
     if test_case and metrics:
-        if isinstance(test_case, LLMTestCase) and not all(
-            isinstance(metric, BaseMetric) for metric in metrics
-        ):
+        if (
+            isinstance(test_case, LLMTestCase) and not test_case.multimodal
+        ) and not all(isinstance(metric, BaseMetric) for metric in metrics):
             raise ValueError(
                 "All 'metrics' for an 'LLMTestCase' must be instances of 'BaseMetric' only."
             )
@@ -234,11 +230,17 @@ def validate_assert_test_inputs(
             raise ValueError(
                 "All 'metrics' for an 'ConversationalTestCase' must be instances of 'BaseConversationalMetric' only."
             )
-        if isinstance(test_case, MLLMTestCase) and not all(
-            isinstance(metric, BaseMultimodalMetric) for metric in metrics
+        if (
+            isinstance(test_case, LLMTestCase) and test_case.multimodal
+        ) and not all(
+            (
+                isinstance(metric, BaseMultimodalMetric)
+                or isinstance(metric, BaseMetric)
+            )
+            for metric in metrics
         ):
             raise ValueError(
-                "All 'metrics' for an 'MLLMTestCase' must be instances of 'BaseMultimodalMetric' only."
+                "All 'metrics' for multi-modal LLMTestCase must be instances of 'BaseMultimodalMetric' only."
             )
     if not ((golden and observed_callback) or (test_case and metrics)):
@@ -251,9 +253,7 @@ def validate_evaluate_inputs(
     goldens: Optional[List] = None,
     observed_callback: Optional[Callable] = None,
     test_cases: Optional[
-        Union[
-            List[LLMTestCase], List[ConversationalTestCase], List[MLLMTestCase]
-        ]
+        Union[List[LLMTestCase], List[ConversationalTestCase]]
     ] = None,
     metrics: Optional[
         Union[
@@ -292,9 +292,10 @@ def validate_evaluate_inputs(
     if test_cases and metrics:
         for test_case in test_cases:
             for metric in metrics:
-                if isinstance(test_case, LLMTestCase) and not isinstance(
-                    metric, BaseMetric
-                ):
+                if (
+                    isinstance(test_case, LLMTestCase)
+                    and not test_case.multimodal
+                ) and not isinstance(metric, BaseMetric):
                     raise ValueError(
                         f"Metric {metric.__name__} is not a valid metric for LLMTestCase."
                     )
@@ -305,11 +306,14 @@ def validate_evaluate_inputs(
                     raise ValueError(
                         f"Metric {metric.__name__} is not a valid metric for ConversationalTestCase."
                     )
-                if isinstance(test_case, MLLMTestCase) and not isinstance(
-                    metric, BaseMultimodalMetric
+                if (
+                    isinstance(test_case, LLMTestCase) and test_case.multimodal
+                ) and not (
+                    isinstance(metric, BaseMultimodalMetric)
+                    or isinstance(metric, BaseMetric)
                 ):
                     raise ValueError(
-                        f"Metric {metric.__name__} is not a valid metric for MLLMTestCase."
+                        f"Metric {metric.__name__} is not a valid metric for multi-modal LLMTestCase."
                     )

deepeval-3.7.5/deepeval/integrations/pydantic_ai/agent.py ADDED Viewed

@@ -0,0 +1,38 @@
+import warnings
+from typing import TYPE_CHECKING, Any
+try:
+    from pydantic_ai.agent import Agent as _BaseAgent
+    is_pydantic_ai_installed = True
+except ImportError:
+    is_pydantic_ai_installed = False
+    class _BaseAgent:
+        """Dummy fallback so imports don't crash when pydantic-ai is missing."""
+        def __init__(self, *args: Any, **kwargs: Any) -> None:
+            # No-op: for compatibility
+            pass
+if TYPE_CHECKING:
+    # For type checkers: use the real Agent if available.
+    from pydantic_ai.agent import Agent  # type: ignore[unused-ignore]
+else:
+    # At runtime we always have some base: real Agent or our dummy.
+    # This is just to avoid blow-ups.
+    Agent = _BaseAgent
+class DeepEvalPydanticAIAgent(Agent):
+    def __init__(self, *args, **kwargs):
+        warnings.warn(
+            "instrument_pydantic_ai is deprecated and will be removed in a future version. "
+            "Please use the new ConfidentInstrumentationSettings instead. Docs: https://www.confident-ai.com/docs/integrations/third-party/pydantic-ai",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        super().__init__(*args, **kwargs)

{deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/pydantic_ai/instrumentator.py RENAMED Viewed

@@ -1,40 +1,58 @@
+from __future__ import annotations
 import json
 import logging
 import os
 from time import perf_counter
-from typing import Literal, Optional, List
+from typing import Any, List, Optional, TYPE_CHECKING
 from deepeval.config.settings import get_settings
 from deepeval.confident.api import get_confident_api_key
 from deepeval.metrics.base_metric import BaseMetric
 from deepeval.prompt import Prompt
 from deepeval.tracing.context import current_trace_context
-from deepeval.tracing.types import Trace
-from deepeval.tracing.otel.utils import to_hex_string
-from deepeval.tracing.tracing import trace_manager
-from deepeval.tracing.otel.utils import normalize_pydantic_ai_messages
 from deepeval.tracing.otel.exporter import ConfidentSpanExporter
+from deepeval.tracing.otel.test_exporter import test_exporter
+from deepeval.tracing.otel.utils import (
+    normalize_pydantic_ai_messages,
+    to_hex_string,
+)
+from deepeval.tracing.perf_epoch_bridge import init_clock_bridge
+from deepeval.tracing.tracing import trace_manager
+from deepeval.tracing.types import (
+    AgentSpan,
+    Trace,
+    TraceSpanStatus,
+    ToolCall,
+)
 logger = logging.getLogger(__name__)
 try:
-    from pydantic_ai.models.instrumented import InstrumentationSettings
-    from opentelemetry.sdk.trace import SpanProcessor, TracerProvider
+    # Optional dependencies
+    from opentelemetry.sdk.trace import (
+        ReadableSpan as _ReadableSpan,
+        SpanProcessor as _SpanProcessor,
+        TracerProvider,
+    )
     from opentelemetry.sdk.trace.export import BatchSpanProcessor
     from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
         OTLPSpanExporter,
     )
-    from opentelemetry.sdk.trace import ReadableSpan
+    from pydantic_ai.models.instrumented import (
+        InstrumentationSettings as _BaseInstrumentationSettings,
+    )
     dependency_installed = True
 except ImportError as e:
+    dependency_installed = False
+    # Preserve previous behavior: only log when verbose mode is enabled.
     if get_settings().DEEPEVAL_VERBOSE_MODE:
         if isinstance(e, ModuleNotFoundError):
             logger.warning(
                 "Optional tracing dependency not installed: %s",
-                e.name,
+                getattr(e, "name", repr(e)),
                 stacklevel=2,
             )
         else:
@@ -43,26 +61,47 @@ except ImportError as e:
                 e,
                 stacklevel=2,
             )
-    dependency_installed = False
+    # Dummy fallbacks so imports and class definitions don't crash when
+    # optional deps are missing. Actual use is still guarded by
+    # is_dependency_installed().
+    class _BaseInstrumentationSettings:
+        def __init__(self, *args: Any, **kwargs: Any) -> None:
+            pass
+    class _SpanProcessor:
+        def __init__(self, *args: Any, **kwargs: Any) -> None:
+            pass
+        def on_start(self, span: Any, parent_context: Any) -> None:
+            pass
+        def on_end(self, span: Any) -> None:
+            pass
+    class _ReadableSpan:
+        pass
-def is_dependency_installed():
+def is_dependency_installed() -> bool:
     if not dependency_installed:
         raise ImportError(
-            "Dependencies are not installed. Please install it with `pip install pydantic-ai opentelemetry-sdk opentelemetry-exporter-otlp-proto-http`."
+            "Dependencies are not installed. Please install it with "
+            "`pip install pydantic-ai opentelemetry-sdk "
+            "opentelemetry-exporter-otlp-proto-http`."
         )
     return True
-from deepeval.tracing.types import AgentSpan
-from deepeval.confident.api import get_confident_api_key
-from deepeval.prompt import Prompt
-from deepeval.tracing.otel.test_exporter import test_exporter
-from deepeval.tracing.context import current_trace_context
-from deepeval.tracing.types import Trace
-from deepeval.tracing.otel.utils import to_hex_string
-from deepeval.tracing.types import TraceSpanStatus, ToolCall
-from deepeval.tracing.perf_epoch_bridge import init_clock_bridge
+if TYPE_CHECKING:
+    # For type checkers, use real types
+    from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor
+    from pydantic_ai.models.instrumented import InstrumentationSettings
+else:
+    # At runtime we always have something to subclass / annotate with
+    InstrumentationSettings = _BaseInstrumentationSettings
+    SpanProcessor = _SpanProcessor
+    ReadableSpan = _ReadableSpan
 # OTLP_ENDPOINT = "http://127.0.0.1:4318/v1/traces"
 OTLP_ENDPOINT = "https://otel.confident-ai.com/v1/traces"

deepeval 3.7.4__tar.gz → 3.7.5__tar.gz

deepeval 3.7.4tar.gz → 3.7.5tar.gz