deepeval 3.6.7__py3-none-any.whl → 3.6.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/config/settings.py +104 -36
  3. deepeval/config/utils.py +5 -0
  4. deepeval/dataset/dataset.py +162 -30
  5. deepeval/dataset/utils.py +41 -13
  6. deepeval/errors.py +20 -2
  7. deepeval/evaluate/execute.py +1662 -688
  8. deepeval/evaluate/types.py +1 -0
  9. deepeval/evaluate/utils.py +13 -3
  10. deepeval/integrations/crewai/__init__.py +2 -1
  11. deepeval/integrations/crewai/tool.py +71 -0
  12. deepeval/integrations/llama_index/__init__.py +0 -4
  13. deepeval/integrations/llama_index/handler.py +20 -21
  14. deepeval/integrations/pydantic_ai/instrumentator.py +125 -76
  15. deepeval/metrics/__init__.py +13 -0
  16. deepeval/metrics/base_metric.py +1 -0
  17. deepeval/metrics/contextual_precision/contextual_precision.py +27 -21
  18. deepeval/metrics/conversational_g_eval/__init__.py +3 -0
  19. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +11 -7
  20. deepeval/metrics/dag/schema.py +1 -1
  21. deepeval/metrics/dag/templates.py +2 -2
  22. deepeval/metrics/goal_accuracy/__init__.py +1 -0
  23. deepeval/metrics/goal_accuracy/goal_accuracy.py +349 -0
  24. deepeval/metrics/goal_accuracy/schema.py +17 -0
  25. deepeval/metrics/goal_accuracy/template.py +235 -0
  26. deepeval/metrics/hallucination/hallucination.py +8 -8
  27. deepeval/metrics/indicator.py +21 -1
  28. deepeval/metrics/mcp/mcp_task_completion.py +7 -2
  29. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +16 -6
  30. deepeval/metrics/mcp_use_metric/mcp_use_metric.py +2 -1
  31. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +32 -24
  32. deepeval/metrics/plan_adherence/__init__.py +1 -0
  33. deepeval/metrics/plan_adherence/plan_adherence.py +292 -0
  34. deepeval/metrics/plan_adherence/schema.py +11 -0
  35. deepeval/metrics/plan_adherence/template.py +170 -0
  36. deepeval/metrics/plan_quality/__init__.py +1 -0
  37. deepeval/metrics/plan_quality/plan_quality.py +292 -0
  38. deepeval/metrics/plan_quality/schema.py +11 -0
  39. deepeval/metrics/plan_quality/template.py +101 -0
  40. deepeval/metrics/step_efficiency/__init__.py +1 -0
  41. deepeval/metrics/step_efficiency/schema.py +11 -0
  42. deepeval/metrics/step_efficiency/step_efficiency.py +234 -0
  43. deepeval/metrics/step_efficiency/template.py +256 -0
  44. deepeval/metrics/task_completion/task_completion.py +1 -0
  45. deepeval/metrics/tool_correctness/schema.py +6 -0
  46. deepeval/metrics/tool_correctness/template.py +88 -0
  47. deepeval/metrics/tool_correctness/tool_correctness.py +226 -22
  48. deepeval/metrics/tool_use/__init__.py +1 -0
  49. deepeval/metrics/tool_use/schema.py +19 -0
  50. deepeval/metrics/tool_use/template.py +220 -0
  51. deepeval/metrics/tool_use/tool_use.py +458 -0
  52. deepeval/metrics/topic_adherence/__init__.py +1 -0
  53. deepeval/metrics/topic_adherence/schema.py +16 -0
  54. deepeval/metrics/topic_adherence/template.py +162 -0
  55. deepeval/metrics/topic_adherence/topic_adherence.py +355 -0
  56. deepeval/models/embedding_models/azure_embedding_model.py +37 -36
  57. deepeval/models/embedding_models/local_embedding_model.py +30 -32
  58. deepeval/models/embedding_models/ollama_embedding_model.py +18 -20
  59. deepeval/models/embedding_models/openai_embedding_model.py +22 -31
  60. deepeval/models/llms/amazon_bedrock_model.py +20 -17
  61. deepeval/models/llms/openai_model.py +10 -1
  62. deepeval/models/retry_policy.py +103 -20
  63. deepeval/openai/extractors.py +61 -16
  64. deepeval/openai/patch.py +8 -12
  65. deepeval/openai/types.py +1 -1
  66. deepeval/openai/utils.py +108 -1
  67. deepeval/prompt/prompt.py +1 -0
  68. deepeval/prompt/utils.py +43 -14
  69. deepeval/simulator/conversation_simulator.py +25 -18
  70. deepeval/synthesizer/chunking/context_generator.py +9 -1
  71. deepeval/synthesizer/synthesizer.py +11 -10
  72. deepeval/test_case/llm_test_case.py +6 -2
  73. deepeval/test_run/test_run.py +190 -207
  74. deepeval/tracing/__init__.py +2 -1
  75. deepeval/tracing/otel/exporter.py +3 -4
  76. deepeval/tracing/otel/utils.py +23 -4
  77. deepeval/tracing/trace_context.py +53 -38
  78. deepeval/tracing/tracing.py +23 -0
  79. deepeval/tracing/types.py +16 -14
  80. deepeval/utils.py +21 -0
  81. {deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/METADATA +1 -1
  82. {deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/RECORD +85 -63
  83. deepeval/integrations/llama_index/agent/patched.py +0 -68
  84. deepeval/tracing/message_types/__init__.py +0 -10
  85. deepeval/tracing/message_types/base.py +0 -6
  86. deepeval/tracing/message_types/messages.py +0 -14
  87. deepeval/tracing/message_types/tools.py +0 -18
  88. {deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/LICENSE.md +0 -0
  89. {deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/WHEEL +0 -0
  90. {deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  from openai import OpenAI, AsyncOpenAI
2
- from typing import Dict, List
2
+ from typing import Dict, List, Optional
3
3
 
4
4
  from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
5
5
  from deepeval.models import DeepEvalBaseEmbeddingModel
@@ -15,25 +15,32 @@ retry_local = create_retry_decorator(PS.LOCAL)
15
15
 
16
16
 
17
17
  class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
18
- def __init__(self, **kwargs):
19
- self.base_url = KEY_FILE_HANDLER.fetch_data(
18
+ def __init__(
19
+ self,
20
+ api_key: Optional[str] = None,
21
+ base_url: Optional[str] = None,
22
+ model: Optional[str] = None,
23
+ generation_kwargs: Optional[Dict] = None,
24
+ **client_kwargs,
25
+ ):
26
+ self.api_key = api_key or KEY_FILE_HANDLER.fetch_data(
27
+ EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
28
+ )
29
+ self.base_url = base_url or KEY_FILE_HANDLER.fetch_data(
20
30
  EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
21
31
  )
22
- model_name = KEY_FILE_HANDLER.fetch_data(
32
+ self.model_name = model or KEY_FILE_HANDLER.fetch_data(
23
33
  EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
24
34
  )
25
- self.api_key = KEY_FILE_HANDLER.fetch_data(
26
- EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
27
- )
28
- self.kwargs = kwargs
29
- super().__init__(model_name)
35
+ self.client_kwargs = client_kwargs or {}
36
+ self.generation_kwargs = generation_kwargs or {}
37
+ super().__init__(self.model_name)
30
38
 
31
39
  @retry_local
32
40
  def embed_text(self, text: str) -> List[float]:
33
41
  embedding_model = self.load_model()
34
42
  response = embedding_model.embeddings.create(
35
- model=self.model_name,
36
- input=[text],
43
+ model=self.model_name, input=[text], **self.generation_kwargs
37
44
  )
38
45
  return response.data[0].embedding
39
46
 
@@ -41,8 +48,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
41
48
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
42
49
  embedding_model = self.load_model()
43
50
  response = embedding_model.embeddings.create(
44
- model=self.model_name,
45
- input=texts,
51
+ model=self.model_name, input=texts, **self.generation_kwargs
46
52
  )
47
53
  return [data.embedding for data in response.data]
48
54
 
@@ -50,8 +56,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
50
56
  async def a_embed_text(self, text: str) -> List[float]:
51
57
  embedding_model = self.load_model(async_mode=True)
52
58
  response = await embedding_model.embeddings.create(
53
- model=self.model_name,
54
- input=[text],
59
+ model=self.model_name, input=[text], **self.generation_kwargs
55
60
  )
56
61
  return response.data[0].embedding
57
62
 
@@ -59,8 +64,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
59
64
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
60
65
  embedding_model = self.load_model(async_mode=True)
61
66
  response = await embedding_model.embeddings.create(
62
- model=self.model_name,
63
- input=texts,
67
+ model=self.model_name, input=texts, **self.generation_kwargs
64
68
  )
65
69
  return [data.embedding for data in response.data]
66
70
 
@@ -76,27 +80,21 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
76
80
  return self._build_client(OpenAI)
77
81
  return self._build_client(AsyncOpenAI)
78
82
 
79
- def _client_kwargs(self) -> Dict:
80
- """
81
- If Tenacity manages retries, turn off OpenAI SDK retries to avoid double retrying.
82
- If users opt into SDK retries via DEEPEVAL_SDK_RETRY_PROVIDERS=local, leave them enabled.
83
- """
84
- kwargs = dict(self.kwargs or {})
83
+ def _build_client(self, cls):
84
+ client_kwargs = self.client_kwargs.copy()
85
85
  if not sdk_retries_for(PS.LOCAL):
86
- kwargs["max_retries"] = 0
87
- return kwargs
86
+ client_kwargs["max_retries"] = 0
88
87
 
89
- def _build_client(self, cls):
90
- kw = dict(
88
+ client_init_kwargs = dict(
91
89
  api_key=self.api_key,
92
90
  base_url=self.base_url,
93
- **self._client_kwargs(),
91
+ **client_kwargs,
94
92
  )
95
93
  try:
96
- return cls(**kw)
94
+ return cls(**client_init_kwargs)
97
95
  except TypeError as e:
98
- # Older OpenAI SDKs may not accept max_retries; drop and retry once.
96
+ # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
99
97
  if "max_retries" in str(e):
100
- kw.pop("max_retries", None)
101
- return cls(**kw)
98
+ client_init_kwargs.pop("max_retries", None)
99
+ return cls(**client_init_kwargs)
102
100
  raise
@@ -1,5 +1,5 @@
1
1
  from ollama import Client, AsyncClient
2
- from typing import List
2
+ from typing import List, Optional, Dict
3
3
 
4
4
  from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
5
5
  from deepeval.models import DeepEvalBaseEmbeddingModel
@@ -13,27 +13,28 @@ retry_ollama = create_retry_decorator(PS.OLLAMA)
13
13
 
14
14
 
15
15
  class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
16
- def __init__(self, *args, **kwargs):
17
- self.base_url = KEY_FILE_HANDLER.fetch_data(
16
+ def __init__(
17
+ self,
18
+ model: Optional[str] = None,
19
+ host: Optional[str] = None,
20
+ generation_kwargs: Optional[Dict] = None,
21
+ **client_kwargs,
22
+ ):
23
+ self.host = host or KEY_FILE_HANDLER.fetch_data(
18
24
  EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
19
25
  )
20
- model_name = KEY_FILE_HANDLER.fetch_data(
26
+ self.model_name = model or KEY_FILE_HANDLER.fetch_data(
21
27
  EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
22
28
  )
23
- # TODO: This is not being used. Clean it up in consistency PR
24
- self.api_key = KEY_FILE_HANDLER.fetch_data(
25
- EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
26
- )
27
- self.args = args
28
- self.kwargs = kwargs
29
- super().__init__(model_name)
29
+ self.client_kwargs = client_kwargs or {}
30
+ self.generation_kwargs = generation_kwargs or {}
31
+ super().__init__(self.model_name)
30
32
 
31
33
  @retry_ollama
32
34
  def embed_text(self, text: str) -> List[float]:
33
35
  embedding_model = self.load_model()
34
36
  response = embedding_model.embed(
35
- model=self.model_name,
36
- input=text,
37
+ model=self.model_name, input=text, **self.generation_kwargs
37
38
  )
38
39
  return response["embeddings"][0]
39
40
 
@@ -41,8 +42,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
41
42
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
42
43
  embedding_model = self.load_model()
43
44
  response = embedding_model.embed(
44
- model=self.model_name,
45
- input=texts,
45
+ model=self.model_name, input=texts, **self.generation_kwargs
46
46
  )
47
47
  return response["embeddings"]
48
48
 
@@ -50,8 +50,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
50
50
  async def a_embed_text(self, text: str) -> List[float]:
51
51
  embedding_model = self.load_model(async_mode=True)
52
52
  response = await embedding_model.embed(
53
- model=self.model_name,
54
- input=text,
53
+ model=self.model_name, input=text, **self.generation_kwargs
55
54
  )
56
55
  return response["embeddings"][0]
57
56
 
@@ -59,8 +58,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
59
58
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
60
59
  embedding_model = self.load_model(async_mode=True)
61
60
  response = await embedding_model.embed(
62
- model=self.model_name,
63
- input=texts,
61
+ model=self.model_name, input=texts, **self.generation_kwargs
64
62
  )
65
63
  return response["embeddings"]
66
64
 
@@ -74,7 +72,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
74
72
  return self._build_client(AsyncClient)
75
73
 
76
74
  def _build_client(self, cls):
77
- return cls(host=self.base_url, **self.kwargs)
75
+ return cls(host=self.host, **self.client_kwargs)
78
76
 
79
77
  def get_model_name(self):
80
78
  return f"{self.model_name} (Ollama)"
@@ -19,27 +19,28 @@ default_openai_embedding_model = "text-embedding-3-small"
19
19
 
20
20
 
21
21
  class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
22
+
22
23
  def __init__(
23
24
  self,
24
25
  model: Optional[str] = None,
25
- _openai_api_key: Optional[str] = None,
26
- **kwargs,
26
+ openai_api_key: Optional[str] = None,
27
+ generation_kwargs: Optional[Dict] = None,
28
+ **client_kwargs,
27
29
  ):
28
- model_name = model if model else default_openai_embedding_model
29
- if model_name not in valid_openai_embedding_models:
30
+ self.openai_api_key = openai_api_key
31
+ self.model_name = model if model else default_openai_embedding_model
32
+ if self.model_name not in valid_openai_embedding_models:
30
33
  raise ValueError(
31
34
  f"Invalid model. Available OpenAI Embedding models: {', '.join(valid_openai_embedding_models)}"
32
35
  )
33
- self._openai_api_key = _openai_api_key
34
- self.model_name = model_name
35
- self.kwargs = kwargs
36
+ self.client_kwargs = client_kwargs or {}
37
+ self.generation_kwargs = generation_kwargs or {}
36
38
 
37
39
  @retry_openai
38
40
  def embed_text(self, text: str) -> List[float]:
39
41
  client = self.load_model(async_mode=False)
40
42
  response = client.embeddings.create(
41
- input=text,
42
- model=self.model_name,
43
+ input=text, model=self.model_name, **self.generation_kwargs
43
44
  )
44
45
  return response.data[0].embedding
45
46
 
@@ -47,8 +48,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
47
48
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
48
49
  client = self.load_model(async_mode=False)
49
50
  response = client.embeddings.create(
50
- input=texts,
51
- model=self.model_name,
51
+ input=texts, model=self.model_name, **self.generation_kwargs
52
52
  )
53
53
  return [item.embedding for item in response.data]
54
54
 
@@ -56,8 +56,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
56
56
  async def a_embed_text(self, text: str) -> List[float]:
57
57
  client = self.load_model(async_mode=True)
58
58
  response = await client.embeddings.create(
59
- input=text,
60
- model=self.model_name,
59
+ input=text, model=self.model_name, **self.generation_kwargs
61
60
  )
62
61
  return response.data[0].embedding
63
62
 
@@ -65,8 +64,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
65
64
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
66
65
  client = self.load_model(async_mode=True)
67
66
  response = await client.embeddings.create(
68
- input=texts,
69
- model=self.model_name,
67
+ input=texts, model=self.model_name, **self.generation_kwargs
70
68
  )
71
69
  return [item.embedding for item in response.data]
72
70
 
@@ -82,27 +80,20 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
82
80
  return self._build_client(OpenAI)
83
81
  return self._build_client(AsyncOpenAI)
84
82
 
85
- def _client_kwargs(self) -> Dict:
86
- """
87
- If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
88
- If the user opts into SDK retries for 'openai' via DEEPEVAL_SDK_RETRY_PROVIDERS,
89
- leave their retry settings as is.
90
- """
91
- kwargs = dict(self.kwargs or {})
83
+ def _build_client(self, cls):
84
+ client_kwargs = self.client_kwargs.copy()
92
85
  if not sdk_retries_for(PS.OPENAI):
93
- kwargs["max_retries"] = 0
94
- return kwargs
86
+ client_kwargs["max_retries"] = 0
95
87
 
96
- def _build_client(self, cls):
97
- kw = dict(
98
- api_key=self._openai_api_key,
99
- **self._client_kwargs(),
88
+ client_init_kwargs = dict(
89
+ api_key=self.openai_api_key,
90
+ **client_kwargs,
100
91
  )
101
92
  try:
102
- return cls(**kw)
93
+ return cls(**client_init_kwargs)
103
94
  except TypeError as e:
104
95
  # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
105
96
  if "max_retries" in str(e):
106
- kw.pop("max_retries", None)
107
- return cls(**kw)
97
+ client_init_kwargs.pop("max_retries", None)
98
+ return cls(**client_init_kwargs)
108
99
  raise
@@ -76,23 +76,26 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
76
76
  async def a_generate(
77
77
  self, prompt: str, schema: Optional[BaseModel] = None
78
78
  ) -> Tuple[Union[str, Dict], float]:
79
- payload = self.get_converse_request_body(prompt)
80
- client = await self._ensure_client()
81
- response = await client.converse(
82
- modelId=self.model_id,
83
- messages=payload["messages"],
84
- inferenceConfig=payload["inferenceConfig"],
85
- )
86
- message = response["output"]["message"]["content"][0]["text"]
87
- cost = self.calculate_cost(
88
- response["usage"]["inputTokens"],
89
- response["usage"]["outputTokens"],
90
- )
91
- if schema is None:
92
- return message, cost
93
- else:
94
- json_output = trim_and_load_json(message)
95
- return schema.model_validate(json_output), cost
79
+ try:
80
+ payload = self.get_converse_request_body(prompt)
81
+ client = await self._ensure_client()
82
+ response = await client.converse(
83
+ modelId=self.model_id,
84
+ messages=payload["messages"],
85
+ inferenceConfig=payload["inferenceConfig"],
86
+ )
87
+ message = response["output"]["message"]["content"][0]["text"]
88
+ cost = self.calculate_cost(
89
+ response["usage"]["inputTokens"],
90
+ response["usage"]["outputTokens"],
91
+ )
92
+ if schema is None:
93
+ return message, cost
94
+ else:
95
+ json_output = trim_and_load_json(message)
96
+ return schema.model_validate(json_output), cost
97
+ finally:
98
+ await self.close()
96
99
 
97
100
  ###############################################
98
101
  # Client management
@@ -8,6 +8,7 @@ from openai import (
8
8
  AsyncOpenAI,
9
9
  )
10
10
 
11
+ from deepeval.config.settings import get_settings
11
12
  from deepeval.constants import ProviderSlug as PS
12
13
  from deepeval.models import DeepEvalBaseLLM
13
14
  from deepeval.models.llms.utils import trim_and_load_json
@@ -209,6 +210,11 @@ models_requiring_temperature_1 = [
209
210
  ]
210
211
 
211
212
 
213
+ def _request_timeout_seconds() -> float:
214
+ timeout = float(get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0)
215
+ return timeout if timeout > 0 else 30.0
216
+
217
+
212
218
  class GPTModel(DeepEvalBaseLLM):
213
219
  def __init__(
214
220
  self,
@@ -387,7 +393,6 @@ class GPTModel(DeepEvalBaseLLM):
387
393
  )
388
394
  return schema.model_validate(json_output), cost
389
395
 
390
- client: AsyncOpenAI
391
396
  completion = await client.chat.completions.create(
392
397
  model=self.model_name,
393
398
  messages=[{"role": "user", "content": prompt}],
@@ -501,9 +506,13 @@ class GPTModel(DeepEvalBaseLLM):
501
506
  kwargs = dict(self.kwargs or {})
502
507
  if not sdk_retries_for(PS.OPENAI):
503
508
  kwargs["max_retries"] = 0
509
+
510
+ if not kwargs.get("timeout"):
511
+ kwargs["timeout"] = _request_timeout_seconds()
504
512
  return kwargs
505
513
 
506
514
  def _build_client(self, cls):
515
+
507
516
  kw = dict(
508
517
  api_key=self._openai_api_key,
509
518
  base_url=self.base_url,
@@ -39,6 +39,7 @@ import itertools
39
39
  import functools
40
40
  import threading
41
41
  import logging
42
+ import time
42
43
 
43
44
  from dataclasses import dataclass, field
44
45
  from typing import Callable, Iterable, Mapping, Optional, Sequence, Tuple, Union
@@ -52,6 +53,7 @@ from tenacity import (
52
53
  )
53
54
  from tenacity.stop import stop_base
54
55
  from tenacity.wait import wait_base
56
+ from contextvars import ContextVar, copy_context
55
57
 
56
58
  from deepeval.constants import (
57
59
  ProviderSlug as PS,
@@ -65,6 +67,81 @@ Provider = Union[str, PS]
65
67
  _MAX_TIMEOUT_THREADS = get_settings().DEEPEVAL_TIMEOUT_THREAD_LIMIT
66
68
  _TIMEOUT_SEMA = threading.BoundedSemaphore(_MAX_TIMEOUT_THREADS)
67
69
  _WORKER_ID = itertools.count(1)
70
+ _OUTER_DEADLINE = ContextVar("deepeval_outer_deadline", default=None)
71
+
72
+
73
+ def set_outer_deadline(seconds: float | None):
74
+ """Set (or clear) the outer task time budget.
75
+
76
+ Stores a deadline in a local context variable so nested code
77
+ can cooperatively respect a shared budget. Always pair this with
78
+ `reset_outer_deadline(token)` in a `finally` block.
79
+
80
+ Args:
81
+ seconds: Number of seconds from now to set as the deadline. If `None`,
82
+ `0`, or a non-positive value is provided, the deadline is cleared.
83
+
84
+ Returns:
85
+ contextvars.Token: The token returned by the underlying ContextVar `.set()`
86
+ call, which must be passed to `reset_outer_deadline` to restore the
87
+ previous value.
88
+ """
89
+ if seconds and seconds > 0:
90
+ return _OUTER_DEADLINE.set(time.monotonic() + seconds)
91
+ return _OUTER_DEADLINE.set(None)
92
+
93
+
94
+ def reset_outer_deadline(token):
95
+ """Restore the previous outer deadline set by `set_outer_deadline`.
96
+
97
+ This should be called in a `finally` block to ensure the deadline
98
+ is restored even if an exception occurs.
99
+
100
+ Args:
101
+ token: The `contextvars.Token` returned by `set_outer_deadline`.
102
+ """
103
+ if token is not None:
104
+ _OUTER_DEADLINE.reset(token)
105
+
106
+
107
+ def _remaining_budget() -> float | None:
108
+ dl = _OUTER_DEADLINE.get()
109
+ if dl is None:
110
+ return None
111
+ return max(0.0, dl - time.monotonic())
112
+
113
+
114
+ def _is_budget_spent() -> bool:
115
+ rem = _remaining_budget()
116
+ return rem is not None and rem <= 0.0
117
+
118
+
119
+ def resolve_effective_attempt_timeout():
120
+ """Resolve the timeout to use for a single provider attempt.
121
+
122
+ Combines the configured per-attempt timeout with any remaining outer budget:
123
+ - If `DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS` is `0` or `None`, returns `0`
124
+ callers should skip `asyncio.wait_for` in this case and rely on the outer cap.
125
+ - If positive and an outer deadline is present, returns
126
+ `min(per_attempt, remaining_budget)`.
127
+ - If positive and no outer deadline is present, returns `per_attempt`.
128
+
129
+ Returns:
130
+ float: Seconds to use for the inner per-attempt timeout. `0` means
131
+ disable inner timeout and rely on the outer budget instead.
132
+ """
133
+ per_attempt = float(
134
+ get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0
135
+ )
136
+ # 0 or None disable inner wait_for. That means rely on outer task cap for timeouts instead.
137
+ if per_attempt <= 0:
138
+ return 0
139
+ # If we do have a positive per-attempt, use up to remaining outer budget.
140
+ rem = _remaining_budget()
141
+ if rem is not None:
142
+ return max(0.0, min(per_attempt, rem))
143
+ return per_attempt
144
+
68
145
 
69
146
  # --------------------------
70
147
  # Policy description
@@ -399,9 +476,10 @@ def make_after_log(slug: str):
399
476
  if not _logger.isEnabledFor(after_level):
400
477
  return
401
478
 
479
+ show_trace = bool(get_settings().DEEPEVAL_LOG_STACK_TRACES)
402
480
  exc_info = (
403
481
  (type(exc), exc, getattr(exc, "__traceback__", None))
404
- if after_level >= logging.ERROR
482
+ if show_trace
405
483
  else None
406
484
  )
407
485
 
@@ -416,7 +494,7 @@ def make_after_log(slug: str):
416
494
  return _after
417
495
 
418
496
 
419
- def _make_timeout_error(timeout_seconds: float) -> TimeoutError:
497
+ def _make_timeout_error(timeout_seconds: float) -> asyncio.TimeoutError:
420
498
  settings = get_settings()
421
499
  if logger.isEnabledFor(logging.DEBUG):
422
500
  logger.debug(
@@ -427,12 +505,12 @@ def _make_timeout_error(timeout_seconds: float) -> TimeoutError:
427
505
  )
428
506
  msg = (
429
507
  f"call timed out after {timeout_seconds:g}s (per attempt). "
430
- "Increase DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS (0 disables) or reduce work per attempt."
508
+ "Increase DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE (None disables) or reduce work per attempt."
431
509
  )
432
- return TimeoutError(msg)
510
+ return asyncio.TimeoutError(msg)
433
511
 
434
512
 
435
- def _run_sync_with_timeout(func, timeout_seconds, *args, **kwargs):
513
+ def run_sync_with_timeout(func, timeout_seconds, *args, **kwargs):
436
514
  """
437
515
  Run a synchronous callable with a soft timeout enforced by a helper thread,
438
516
  with a global cap on concurrent timeout-workers.
@@ -499,9 +577,11 @@ def _run_sync_with_timeout(func, timeout_seconds, *args, **kwargs):
499
577
  done = threading.Event()
500
578
  result = {"value": None, "exc": None}
501
579
 
580
+ context = copy_context()
581
+
502
582
  def target():
503
583
  try:
504
- result["value"] = func(*args, **kwargs)
584
+ result["value"] = context.run(func, *args, **kwargs)
505
585
  except BaseException as e:
506
586
  result["exc"] = e
507
587
  finally:
@@ -562,37 +642,40 @@ def create_retry_decorator(provider: Provider):
562
642
 
563
643
  @functools.wraps(func)
564
644
  async def attempt(*args, **kwargs):
565
- timeout_seconds = (
566
- get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0
567
- )
645
+ if _is_budget_spent():
646
+ raise _make_timeout_error(0)
647
+
648
+ per_attempt_timeout = resolve_effective_attempt_timeout()
649
+
568
650
  coro = func(*args, **kwargs)
569
- if timeout_seconds > 0:
651
+ if per_attempt_timeout > 0:
570
652
  try:
571
- return await asyncio.wait_for(coro, timeout_seconds)
572
- except asyncio.TimeoutError as e:
653
+ return await asyncio.wait_for(coro, per_attempt_timeout)
654
+ except (asyncio.TimeoutError, TimeoutError) as e:
573
655
  if (
574
656
  logger.isEnabledFor(logging.DEBUG)
575
657
  and get_settings().DEEPEVAL_VERBOSE_MODE is True
576
658
  ):
577
659
  logger.debug(
578
660
  "async timeout after %.3fs (active_threads=%d, tasks=%d)",
579
- timeout_seconds,
661
+ per_attempt_timeout,
580
662
  threading.active_count(),
581
663
  len(asyncio.all_tasks()),
582
664
  )
583
- raise _make_timeout_error(timeout_seconds) from e
665
+ raise _make_timeout_error(per_attempt_timeout) from e
584
666
  return await coro
585
667
 
586
668
  return base_retry(attempt)
587
669
 
588
670
  @functools.wraps(func)
589
671
  def attempt(*args, **kwargs):
590
- timeout_seconds = (
591
- get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0
592
- )
593
- if timeout_seconds > 0:
594
- return _run_sync_with_timeout(
595
- func, timeout_seconds, *args, **kwargs
672
+ if _is_budget_spent():
673
+ raise _make_timeout_error(0)
674
+
675
+ per_attempt_timeout = resolve_effective_attempt_timeout()
676
+ if per_attempt_timeout > 0:
677
+ return run_sync_with_timeout(
678
+ func, per_attempt_timeout, *args, **kwargs
596
679
  )
597
680
  return func(*args, **kwargs)
598
681