PyPI - deepeval - Versions diffs - 3.6.7__py3-none-any.whl → 3.6.9__py3-none-any.whl - Mend

deepeval 3.6.7py3-none-any.whl → 3.6.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

deepeval/_version.py +1 -1
deepeval/config/settings.py +104 -36
deepeval/config/utils.py +5 -0
deepeval/dataset/dataset.py +162 -30
deepeval/dataset/utils.py +41 -13
deepeval/errors.py +20 -2
deepeval/evaluate/execute.py +1662 -688
deepeval/evaluate/types.py +1 -0
deepeval/evaluate/utils.py +13 -3
deepeval/integrations/crewai/__init__.py +2 -1
deepeval/integrations/crewai/tool.py +71 -0
deepeval/integrations/llama_index/__init__.py +0 -4
deepeval/integrations/llama_index/handler.py +20 -21
deepeval/integrations/pydantic_ai/instrumentator.py +125 -76
deepeval/metrics/__init__.py +13 -0
deepeval/metrics/base_metric.py +1 -0
deepeval/metrics/contextual_precision/contextual_precision.py +27 -21
deepeval/metrics/conversational_g_eval/__init__.py +3 -0
deepeval/metrics/conversational_g_eval/conversational_g_eval.py +11 -7
deepeval/metrics/dag/schema.py +1 -1
deepeval/metrics/dag/templates.py +2 -2
deepeval/metrics/goal_accuracy/__init__.py +1 -0
deepeval/metrics/goal_accuracy/goal_accuracy.py +349 -0
deepeval/metrics/goal_accuracy/schema.py +17 -0
deepeval/metrics/goal_accuracy/template.py +235 -0
deepeval/metrics/hallucination/hallucination.py +8 -8
deepeval/metrics/indicator.py +21 -1
deepeval/metrics/mcp/mcp_task_completion.py +7 -2
deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +16 -6
deepeval/metrics/mcp_use_metric/mcp_use_metric.py +2 -1
deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +32 -24
deepeval/metrics/plan_adherence/__init__.py +1 -0
deepeval/metrics/plan_adherence/plan_adherence.py +292 -0
deepeval/metrics/plan_adherence/schema.py +11 -0
deepeval/metrics/plan_adherence/template.py +170 -0
deepeval/metrics/plan_quality/__init__.py +1 -0
deepeval/metrics/plan_quality/plan_quality.py +292 -0
deepeval/metrics/plan_quality/schema.py +11 -0
deepeval/metrics/plan_quality/template.py +101 -0
deepeval/metrics/step_efficiency/__init__.py +1 -0
deepeval/metrics/step_efficiency/schema.py +11 -0
deepeval/metrics/step_efficiency/step_efficiency.py +234 -0
deepeval/metrics/step_efficiency/template.py +256 -0
deepeval/metrics/task_completion/task_completion.py +1 -0
deepeval/metrics/tool_correctness/schema.py +6 -0
deepeval/metrics/tool_correctness/template.py +88 -0
deepeval/metrics/tool_correctness/tool_correctness.py +226 -22
deepeval/metrics/tool_use/__init__.py +1 -0
deepeval/metrics/tool_use/schema.py +19 -0
deepeval/metrics/tool_use/template.py +220 -0
deepeval/metrics/tool_use/tool_use.py +458 -0
deepeval/metrics/topic_adherence/__init__.py +1 -0
deepeval/metrics/topic_adherence/schema.py +16 -0
deepeval/metrics/topic_adherence/template.py +162 -0
deepeval/metrics/topic_adherence/topic_adherence.py +355 -0
deepeval/models/embedding_models/azure_embedding_model.py +37 -36
deepeval/models/embedding_models/local_embedding_model.py +30 -32
deepeval/models/embedding_models/ollama_embedding_model.py +18 -20
deepeval/models/embedding_models/openai_embedding_model.py +22 -31
deepeval/models/llms/amazon_bedrock_model.py +20 -17
deepeval/models/llms/openai_model.py +10 -1
deepeval/models/retry_policy.py +103 -20
deepeval/openai/extractors.py +61 -16
deepeval/openai/patch.py +8 -12
deepeval/openai/types.py +1 -1
deepeval/openai/utils.py +108 -1
deepeval/prompt/prompt.py +1 -0
deepeval/prompt/utils.py +43 -14
deepeval/simulator/conversation_simulator.py +25 -18
deepeval/synthesizer/chunking/context_generator.py +9 -1
deepeval/synthesizer/synthesizer.py +11 -10
deepeval/test_case/llm_test_case.py +6 -2
deepeval/test_run/test_run.py +190 -207
deepeval/tracing/__init__.py +2 -1
deepeval/tracing/otel/exporter.py +3 -4
deepeval/tracing/otel/utils.py +23 -4
deepeval/tracing/trace_context.py +53 -38
deepeval/tracing/tracing.py +23 -0
deepeval/tracing/types.py +16 -14
deepeval/utils.py +21 -0
{deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/METADATA +1 -1
{deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/RECORD +85 -63
deepeval/integrations/llama_index/agent/patched.py +0 -68
deepeval/tracing/message_types/__init__.py +0 -10
deepeval/tracing/message_types/base.py +0 -6
deepeval/tracing/message_types/messages.py +0 -14
deepeval/tracing/message_types/tools.py +0 -18
{deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/LICENSE.md +0 -0
{deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/WHEEL +0 -0
{deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/entry_points.txt +0 -0

deepeval/models/embedding_models/local_embedding_model.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from openai import OpenAI, AsyncOpenAI
-from typing import Dict, List
+from typing import Dict, List, Optional
 from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
 from deepeval.models import DeepEvalBaseEmbeddingModel
@@ -15,25 +15,32 @@ retry_local = create_retry_decorator(PS.LOCAL)
 class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
-    def __init__(self, **kwargs):
-        self.base_url = KEY_FILE_HANDLER.fetch_data(
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        base_url: Optional[str] = None,
+        model: Optional[str] = None,
+        generation_kwargs: Optional[Dict] = None,
+        **client_kwargs,
+    ):
+        self.api_key = api_key or KEY_FILE_HANDLER.fetch_data(
+            EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
+        )
+        self.base_url = base_url or KEY_FILE_HANDLER.fetch_data(
             EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
         )
-        model_name = KEY_FILE_HANDLER.fetch_data(
+        self.model_name = model or KEY_FILE_HANDLER.fetch_data(
             EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
         )
-        self.api_key = KEY_FILE_HANDLER.fetch_data(
-            EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
-        )
-        self.kwargs = kwargs
-        super().__init__(model_name)
+        self.client_kwargs = client_kwargs or {}
+        self.generation_kwargs = generation_kwargs or {}
+        super().__init__(self.model_name)
     @retry_local
     def embed_text(self, text: str) -> List[float]:
         embedding_model = self.load_model()
         response = embedding_model.embeddings.create(
-            model=self.model_name,
-            input=[text],
+            model=self.model_name, input=[text], **self.generation_kwargs
         )
         return response.data[0].embedding
@@ -41,8 +48,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
     def embed_texts(self, texts: List[str]) -> List[List[float]]:
         embedding_model = self.load_model()
         response = embedding_model.embeddings.create(
-            model=self.model_name,
-            input=texts,
+            model=self.model_name, input=texts, **self.generation_kwargs
         )
         return [data.embedding for data in response.data]
@@ -50,8 +56,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
     async def a_embed_text(self, text: str) -> List[float]:
         embedding_model = self.load_model(async_mode=True)
         response = await embedding_model.embeddings.create(
-            model=self.model_name,
-            input=[text],
+            model=self.model_name, input=[text], **self.generation_kwargs
         )
         return response.data[0].embedding
@@ -59,8 +64,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
     async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
         embedding_model = self.load_model(async_mode=True)
         response = await embedding_model.embeddings.create(
-            model=self.model_name,
-            input=texts,
+            model=self.model_name, input=texts, **self.generation_kwargs
         )
         return [data.embedding for data in response.data]
@@ -76,27 +80,21 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
             return self._build_client(OpenAI)
         return self._build_client(AsyncOpenAI)
-    def _client_kwargs(self) -> Dict:
-        """
-        If Tenacity manages retries, turn off OpenAI SDK retries to avoid double retrying.
-        If users opt into SDK retries via DEEPEVAL_SDK_RETRY_PROVIDERS=local, leave them enabled.
-        """
-        kwargs = dict(self.kwargs or {})
+    def _build_client(self, cls):
+        client_kwargs = self.client_kwargs.copy()
         if not sdk_retries_for(PS.LOCAL):
-            kwargs["max_retries"] = 0
-        return kwargs
+            client_kwargs["max_retries"] = 0
-    def _build_client(self, cls):
-        kw = dict(
+        client_init_kwargs = dict(
             api_key=self.api_key,
             base_url=self.base_url,
-            **self._client_kwargs(),
+            **client_kwargs,
         )
         try:
-            return cls(**kw)
+            return cls(**client_init_kwargs)
         except TypeError as e:
-            # Older OpenAI SDKs may not accept max_retries; drop and retry once.
+            # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
             if "max_retries" in str(e):
-                kw.pop("max_retries", None)
-                return cls(**kw)
+                client_init_kwargs.pop("max_retries", None)
+                return cls(**client_init_kwargs)
             raise

deepeval/models/embedding_models/ollama_embedding_model.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from ollama import Client, AsyncClient
-from typing import List
+from typing import List, Optional, Dict
 from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
 from deepeval.models import DeepEvalBaseEmbeddingModel
@@ -13,27 +13,28 @@ retry_ollama = create_retry_decorator(PS.OLLAMA)
 class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
-    def __init__(self, *args, **kwargs):
-        self.base_url = KEY_FILE_HANDLER.fetch_data(
+    def __init__(
+        self,
+        model: Optional[str] = None,
+        host: Optional[str] = None,
+        generation_kwargs: Optional[Dict] = None,
+        **client_kwargs,
+    ):
+        self.host = host or KEY_FILE_HANDLER.fetch_data(
             EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
         )
-        model_name = KEY_FILE_HANDLER.fetch_data(
+        self.model_name = model or KEY_FILE_HANDLER.fetch_data(
             EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
         )
-        # TODO: This is not being used. Clean it up in consistency PR
-        self.api_key = KEY_FILE_HANDLER.fetch_data(
-            EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
-        )
-        self.args = args
-        self.kwargs = kwargs
-        super().__init__(model_name)
+        self.client_kwargs = client_kwargs or {}
+        self.generation_kwargs = generation_kwargs or {}
+        super().__init__(self.model_name)
     @retry_ollama
     def embed_text(self, text: str) -> List[float]:
         embedding_model = self.load_model()
         response = embedding_model.embed(
-            model=self.model_name,
-            input=text,
+            model=self.model_name, input=text, **self.generation_kwargs
         )
         return response["embeddings"][0]
@@ -41,8 +42,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
     def embed_texts(self, texts: List[str]) -> List[List[float]]:
         embedding_model = self.load_model()
         response = embedding_model.embed(
-            model=self.model_name,
-            input=texts,
+            model=self.model_name, input=texts, **self.generation_kwargs
         )
         return response["embeddings"]
@@ -50,8 +50,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
     async def a_embed_text(self, text: str) -> List[float]:
         embedding_model = self.load_model(async_mode=True)
         response = await embedding_model.embed(
-            model=self.model_name,
-            input=text,
+            model=self.model_name, input=text, **self.generation_kwargs
         )
         return response["embeddings"][0]
@@ -59,8 +58,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
     async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
         embedding_model = self.load_model(async_mode=True)
         response = await embedding_model.embed(
-            model=self.model_name,
-            input=texts,
+            model=self.model_name, input=texts, **self.generation_kwargs
         )
         return response["embeddings"]
@@ -74,7 +72,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
         return self._build_client(AsyncClient)
     def _build_client(self, cls):
-        return cls(host=self.base_url, **self.kwargs)
+        return cls(host=self.host, **self.client_kwargs)
     def get_model_name(self):
         return f"{self.model_name} (Ollama)"

deepeval/models/embedding_models/openai_embedding_model.py CHANGED Viewed

@@ -19,27 +19,28 @@ default_openai_embedding_model = "text-embedding-3-small"
 class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
     def __init__(
         self,
         model: Optional[str] = None,
-        _openai_api_key: Optional[str] = None,
-        **kwargs,
+        openai_api_key: Optional[str] = None,
+        generation_kwargs: Optional[Dict] = None,
+        **client_kwargs,
     ):
-        model_name = model if model else default_openai_embedding_model
-        if model_name not in valid_openai_embedding_models:
+        self.openai_api_key = openai_api_key
+        self.model_name = model if model else default_openai_embedding_model
+        if self.model_name not in valid_openai_embedding_models:
             raise ValueError(
                 f"Invalid model. Available OpenAI Embedding models: {', '.join(valid_openai_embedding_models)}"
             )
-        self._openai_api_key = _openai_api_key
-        self.model_name = model_name
-        self.kwargs = kwargs
+        self.client_kwargs = client_kwargs or {}
+        self.generation_kwargs = generation_kwargs or {}
     @retry_openai
     def embed_text(self, text: str) -> List[float]:
         client = self.load_model(async_mode=False)
         response = client.embeddings.create(
-            input=text,
-            model=self.model_name,
+            input=text, model=self.model_name, **self.generation_kwargs
         )
         return response.data[0].embedding
@@ -47,8 +48,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
     def embed_texts(self, texts: List[str]) -> List[List[float]]:
         client = self.load_model(async_mode=False)
         response = client.embeddings.create(
-            input=texts,
-            model=self.model_name,
+            input=texts, model=self.model_name, **self.generation_kwargs
         )
         return [item.embedding for item in response.data]
@@ -56,8 +56,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
     async def a_embed_text(self, text: str) -> List[float]:
         client = self.load_model(async_mode=True)
         response = await client.embeddings.create(
-            input=text,
-            model=self.model_name,
+            input=text, model=self.model_name, **self.generation_kwargs
         )
         return response.data[0].embedding
@@ -65,8 +64,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
     async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
         client = self.load_model(async_mode=True)
         response = await client.embeddings.create(
-            input=texts,
-            model=self.model_name,
+            input=texts, model=self.model_name, **self.generation_kwargs
         )
         return [item.embedding for item in response.data]
@@ -82,27 +80,20 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
             return self._build_client(OpenAI)
         return self._build_client(AsyncOpenAI)
-    def _client_kwargs(self) -> Dict:
-        """
-        If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
-        If the user opts into SDK retries for 'openai' via DEEPEVAL_SDK_RETRY_PROVIDERS,
-        leave their retry settings as is.
-        """
-        kwargs = dict(self.kwargs or {})
+    def _build_client(self, cls):
+        client_kwargs = self.client_kwargs.copy()
         if not sdk_retries_for(PS.OPENAI):
-            kwargs["max_retries"] = 0
-        return kwargs
+            client_kwargs["max_retries"] = 0
-    def _build_client(self, cls):
-        kw = dict(
-            api_key=self._openai_api_key,
-            **self._client_kwargs(),
+        client_init_kwargs = dict(
+            api_key=self.openai_api_key,
+            **client_kwargs,
         )
         try:
-            return cls(**kw)
+            return cls(**client_init_kwargs)
         except TypeError as e:
             # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
             if "max_retries" in str(e):
-                kw.pop("max_retries", None)
-                return cls(**kw)
+                client_init_kwargs.pop("max_retries", None)
+                return cls(**client_init_kwargs)
             raise

deepeval/models/llms/amazon_bedrock_model.py CHANGED Viewed

@@ -76,23 +76,26 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
     async def a_generate(
         self, prompt: str, schema: Optional[BaseModel] = None
     ) -> Tuple[Union[str, Dict], float]:
-        payload = self.get_converse_request_body(prompt)
-        client = await self._ensure_client()
-        response = await client.converse(
-            modelId=self.model_id,
-            messages=payload["messages"],
-            inferenceConfig=payload["inferenceConfig"],
-        )
-        message = response["output"]["message"]["content"][0]["text"]
-        cost = self.calculate_cost(
-            response["usage"]["inputTokens"],
-            response["usage"]["outputTokens"],
-        )
-        if schema is None:
-            return message, cost
-        else:
-            json_output = trim_and_load_json(message)
-            return schema.model_validate(json_output), cost
+        try:
+            payload = self.get_converse_request_body(prompt)
+            client = await self._ensure_client()
+            response = await client.converse(
+                modelId=self.model_id,
+                messages=payload["messages"],
+                inferenceConfig=payload["inferenceConfig"],
+            )
+            message = response["output"]["message"]["content"][0]["text"]
+            cost = self.calculate_cost(
+                response["usage"]["inputTokens"],
+                response["usage"]["outputTokens"],
+            )
+            if schema is None:
+                return message, cost
+            else:
+                json_output = trim_and_load_json(message)
+                return schema.model_validate(json_output), cost
+        finally:
+            await self.close()
     ###############################################
     # Client management

deepeval/models/llms/openai_model.py CHANGED Viewed

@@ -8,6 +8,7 @@ from openai import (
     AsyncOpenAI,
 )
+from deepeval.config.settings import get_settings
 from deepeval.constants import ProviderSlug as PS
 from deepeval.models import DeepEvalBaseLLM
 from deepeval.models.llms.utils import trim_and_load_json
@@ -209,6 +210,11 @@ models_requiring_temperature_1 = [
 ]
+def _request_timeout_seconds() -> float:
+    timeout = float(get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0)
+    return timeout if timeout > 0 else 30.0
 class GPTModel(DeepEvalBaseLLM):
     def __init__(
         self,
@@ -387,7 +393,6 @@ class GPTModel(DeepEvalBaseLLM):
                 )
                 return schema.model_validate(json_output), cost
-        client: AsyncOpenAI
         completion = await client.chat.completions.create(
             model=self.model_name,
             messages=[{"role": "user", "content": prompt}],
@@ -501,9 +506,13 @@ class GPTModel(DeepEvalBaseLLM):
         kwargs = dict(self.kwargs or {})
         if not sdk_retries_for(PS.OPENAI):
             kwargs["max_retries"] = 0
+        if not kwargs.get("timeout"):
+            kwargs["timeout"] = _request_timeout_seconds()
         return kwargs
     def _build_client(self, cls):
         kw = dict(
             api_key=self._openai_api_key,
             base_url=self.base_url,

deepeval/models/retry_policy.py CHANGED Viewed

@@ -39,6 +39,7 @@ import itertools
 import functools
 import threading
 import logging
+import time
 from dataclasses import dataclass, field
 from typing import Callable, Iterable, Mapping, Optional, Sequence, Tuple, Union
@@ -52,6 +53,7 @@ from tenacity import (
 )
 from tenacity.stop import stop_base
 from tenacity.wait import wait_base
+from contextvars import ContextVar, copy_context
 from deepeval.constants import (
     ProviderSlug as PS,
@@ -65,6 +67,81 @@ Provider = Union[str, PS]
 _MAX_TIMEOUT_THREADS = get_settings().DEEPEVAL_TIMEOUT_THREAD_LIMIT
 _TIMEOUT_SEMA = threading.BoundedSemaphore(_MAX_TIMEOUT_THREADS)
 _WORKER_ID = itertools.count(1)
+_OUTER_DEADLINE = ContextVar("deepeval_outer_deadline", default=None)
+def set_outer_deadline(seconds: float | None):
+    """Set (or clear) the outer task time budget.
+    Stores a deadline in a local context variable so nested code
+    can cooperatively respect a shared budget. Always pair this with
+    `reset_outer_deadline(token)` in a `finally` block.
+    Args:
+        seconds: Number of seconds from now to set as the deadline. If `None`,
+            `0`, or a non-positive value is provided, the deadline is cleared.
+    Returns:
+        contextvars.Token: The token returned by the underlying ContextVar `.set()`
+        call, which must be passed to `reset_outer_deadline` to restore the
+        previous value.
+    """
+    if seconds and seconds > 0:
+        return _OUTER_DEADLINE.set(time.monotonic() + seconds)
+    return _OUTER_DEADLINE.set(None)
+def reset_outer_deadline(token):
+    """Restore the previous outer deadline set by `set_outer_deadline`.
+    This should be called in a `finally` block to ensure the deadline
+    is restored even if an exception occurs.
+    Args:
+        token: The `contextvars.Token` returned by `set_outer_deadline`.
+    """
+    if token is not None:
+        _OUTER_DEADLINE.reset(token)
+def _remaining_budget() -> float | None:
+    dl = _OUTER_DEADLINE.get()
+    if dl is None:
+        return None
+    return max(0.0, dl - time.monotonic())
+def _is_budget_spent() -> bool:
+    rem = _remaining_budget()
+    return rem is not None and rem <= 0.0
+def resolve_effective_attempt_timeout():
+    """Resolve the timeout to use for a single provider attempt.
+    Combines the configured per-attempt timeout with any remaining outer budget:
+    - If `DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS` is `0` or `None`, returns `0`
+      callers should skip `asyncio.wait_for` in this case and rely on the outer cap.
+    - If positive and an outer deadline is present, returns
+      `min(per_attempt, remaining_budget)`.
+    - If positive and no outer deadline is present, returns `per_attempt`.
+    Returns:
+        float: Seconds to use for the inner per-attempt timeout. `0` means
+        disable inner timeout and rely on the outer budget instead.
+    """
+    per_attempt = float(
+        get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0
+    )
+    # 0 or None disable inner wait_for. That means rely on outer task cap for timeouts instead.
+    if per_attempt <= 0:
+        return 0
+    # If we do have a positive per-attempt, use up to remaining outer budget.
+    rem = _remaining_budget()
+    if rem is not None:
+        return max(0.0, min(per_attempt, rem))
+    return per_attempt
 # --------------------------
 # Policy description
@@ -399,9 +476,10 @@ def make_after_log(slug: str):
         if not _logger.isEnabledFor(after_level):
             return
+        show_trace = bool(get_settings().DEEPEVAL_LOG_STACK_TRACES)
         exc_info = (
             (type(exc), exc, getattr(exc, "__traceback__", None))
-            if after_level >= logging.ERROR
+            if show_trace
             else None
         )
@@ -416,7 +494,7 @@ def make_after_log(slug: str):
     return _after
-def _make_timeout_error(timeout_seconds: float) -> TimeoutError:
+def _make_timeout_error(timeout_seconds: float) -> asyncio.TimeoutError:
     settings = get_settings()
     if logger.isEnabledFor(logging.DEBUG):
         logger.debug(
@@ -427,12 +505,12 @@ def _make_timeout_error(timeout_seconds: float) -> TimeoutError:
         )
     msg = (
         f"call timed out after {timeout_seconds:g}s (per attempt). "
-        "Increase DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS (0 disables) or reduce work per attempt."
+        "Increase DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE (None disables) or reduce work per attempt."
     )
-    return TimeoutError(msg)
+    return asyncio.TimeoutError(msg)
-def _run_sync_with_timeout(func, timeout_seconds, *args, **kwargs):
+def run_sync_with_timeout(func, timeout_seconds, *args, **kwargs):
     """
     Run a synchronous callable with a soft timeout enforced by a helper thread,
     with a global cap on concurrent timeout-workers.
@@ -499,9 +577,11 @@ def _run_sync_with_timeout(func, timeout_seconds, *args, **kwargs):
     done = threading.Event()
     result = {"value": None, "exc": None}
+    context = copy_context()
     def target():
         try:
-            result["value"] = func(*args, **kwargs)
+            result["value"] = context.run(func, *args, **kwargs)
         except BaseException as e:
             result["exc"] = e
         finally:
@@ -562,37 +642,40 @@ def create_retry_decorator(provider: Provider):
             @functools.wraps(func)
             async def attempt(*args, **kwargs):
-                timeout_seconds = (
-                    get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0
-                )
+                if _is_budget_spent():
+                    raise _make_timeout_error(0)
+                per_attempt_timeout = resolve_effective_attempt_timeout()
                 coro = func(*args, **kwargs)
-                if timeout_seconds > 0:
+                if per_attempt_timeout > 0:
                     try:
-                        return await asyncio.wait_for(coro, timeout_seconds)
-                    except asyncio.TimeoutError as e:
+                        return await asyncio.wait_for(coro, per_attempt_timeout)
+                    except (asyncio.TimeoutError, TimeoutError) as e:
                         if (
                             logger.isEnabledFor(logging.DEBUG)
                             and get_settings().DEEPEVAL_VERBOSE_MODE is True
                         ):
                             logger.debug(
                                 "async timeout after %.3fs (active_threads=%d, tasks=%d)",
-                                timeout_seconds,
+                                per_attempt_timeout,
                                 threading.active_count(),
                                 len(asyncio.all_tasks()),
                             )
-                        raise _make_timeout_error(timeout_seconds) from e
+                        raise _make_timeout_error(per_attempt_timeout) from e
                 return await coro
             return base_retry(attempt)
         @functools.wraps(func)
         def attempt(*args, **kwargs):
-            timeout_seconds = (
-                get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0
-            )
-            if timeout_seconds > 0:
-                return _run_sync_with_timeout(
-                    func, timeout_seconds, *args, **kwargs
+            if _is_budget_spent():
+                raise _make_timeout_error(0)
+            per_attempt_timeout = resolve_effective_attempt_timeout()
+            if per_attempt_timeout > 0:
+                return run_sync_with_timeout(
+                    func, per_attempt_timeout, *args, **kwargs
                 )
             return func(*args, **kwargs)

deepeval 3.6.7__py3-none-any.whl → 3.6.9__py3-none-any.whl

deepeval 3.6.7py3-none-any.whl → 3.6.9py3-none-any.whl