deepeval 3.7.6__py3-none-any.whl → 3.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/cli/main.py +2022 -759
  3. deepeval/cli/utils.py +208 -36
  4. deepeval/config/dotenv_handler.py +19 -0
  5. deepeval/config/settings.py +658 -262
  6. deepeval/config/utils.py +9 -1
  7. deepeval/dataset/test_run_tracer.py +4 -6
  8. deepeval/evaluate/execute.py +153 -94
  9. deepeval/integrations/pydantic_ai/instrumentator.py +4 -2
  10. deepeval/integrations/pydantic_ai/otel.py +5 -1
  11. deepeval/key_handler.py +121 -51
  12. deepeval/metrics/base_metric.py +9 -3
  13. deepeval/metrics/g_eval/g_eval.py +6 -1
  14. deepeval/metrics/indicator.py +8 -4
  15. deepeval/metrics/mcp/mcp_task_completion.py +15 -16
  16. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +15 -15
  17. deepeval/metrics/mcp/schema.py +4 -0
  18. deepeval/metrics/mcp/template.py +8 -1
  19. deepeval/metrics/prompt_alignment/prompt_alignment.py +6 -3
  20. deepeval/metrics/tool_use/schema.py +4 -0
  21. deepeval/metrics/tool_use/template.py +16 -2
  22. deepeval/metrics/tool_use/tool_use.py +30 -28
  23. deepeval/metrics/topic_adherence/schema.py +4 -0
  24. deepeval/metrics/topic_adherence/template.py +8 -1
  25. deepeval/metrics/topic_adherence/topic_adherence.py +15 -14
  26. deepeval/metrics/turn_contextual_precision/template.py +8 -1
  27. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +44 -86
  28. deepeval/metrics/turn_contextual_recall/template.py +8 -1
  29. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +44 -82
  30. deepeval/metrics/turn_contextual_relevancy/template.py +8 -1
  31. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +48 -92
  32. deepeval/metrics/turn_faithfulness/template.py +8 -1
  33. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +76 -130
  34. deepeval/metrics/utils.py +16 -1
  35. deepeval/models/__init__.py +2 -0
  36. deepeval/models/llms/__init__.py +2 -0
  37. deepeval/models/llms/amazon_bedrock_model.py +5 -4
  38. deepeval/models/llms/anthropic_model.py +4 -3
  39. deepeval/models/llms/azure_model.py +4 -3
  40. deepeval/models/llms/deepseek_model.py +5 -8
  41. deepeval/models/llms/grok_model.py +5 -8
  42. deepeval/models/llms/kimi_model.py +5 -8
  43. deepeval/models/llms/litellm_model.py +2 -0
  44. deepeval/models/llms/local_model.py +1 -1
  45. deepeval/models/llms/openai_model.py +4 -3
  46. deepeval/models/retry_policy.py +10 -5
  47. deepeval/models/utils.py +1 -5
  48. deepeval/simulator/conversation_simulator.py +6 -2
  49. deepeval/simulator/template.py +3 -1
  50. deepeval/synthesizer/synthesizer.py +19 -17
  51. deepeval/test_run/test_run.py +6 -1
  52. deepeval/utils.py +26 -0
  53. {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/METADATA +3 -3
  54. {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/RECORD +57 -56
  55. {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/LICENSE.md +0 -0
  56. {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/WHEEL +0 -0
  57. {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/entry_points.txt +0 -0
deepeval/config/utils.py CHANGED
@@ -1,7 +1,8 @@
1
1
  import json
2
2
  import os
3
3
  import re
4
-
4
+ from dotenv import dotenv_values
5
+ from pathlib import Path
5
6
  from typing import Any, Iterable, List, Optional
6
7
 
7
8
 
@@ -142,3 +143,10 @@ def dedupe_preserve_order(items: Iterable[str]) -> List[str]:
142
143
  def constrain_between(value: float, lo: float, hi: float) -> float:
143
144
  """Return value constrained to the inclusive range [lo, hi]."""
144
145
  return min(max(value, lo), hi)
146
+
147
+
148
+ def read_dotenv_file(path: Path) -> dict[str, str]:
149
+ if not path.exists():
150
+ return {}
151
+ values = dotenv_values(path)
152
+ return {key: value for key, value in values.items() if value is not None}
@@ -5,6 +5,7 @@ from opentelemetry.trace import Tracer as OTelTracer
5
5
  from opentelemetry.sdk.trace import SpanProcessor
6
6
  from opentelemetry.sdk.trace import TracerProvider
7
7
  from opentelemetry.sdk.trace.export import BatchSpanProcessor
8
+ from deepeval.config.settings import get_settings
8
9
 
9
10
  try:
10
11
  from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
@@ -26,11 +27,8 @@ def is_opentelemetry_available():
26
27
 
27
28
  from deepeval.confident.api import get_confident_api_key
28
29
 
29
- OTLP_ENDPOINT = (
30
- os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
31
- if os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
32
- else "https://otel.confident-ai.com"
33
- )
30
+ settings = get_settings()
31
+ OTLP_ENDPOINT = str(settings.CONFIDENT_OTEL_URL)
34
32
  # OTLP_ENDPOINT = "http://127.0.0.1:4318"
35
33
 
36
34
  # Module-level globals to be imported and used by other code
@@ -67,7 +65,7 @@ def init_global_test_run_tracer(api_key: Optional[str] = None):
67
65
 
68
66
  provider = TracerProvider()
69
67
  exporter = OTLPSpanExporter(
70
- endpoint=f"{OTLP_ENDPOINT}/v1/traces",
68
+ endpoint=f"{OTLP_ENDPOINT}v1/traces",
71
69
  headers={"x-confident-api-key": api_key},
72
70
  )
73
71
  provider.add_span_processor(RunIdSpanProcessor())
@@ -51,6 +51,10 @@ from deepeval.utils import (
51
51
  shorten,
52
52
  len_medium,
53
53
  format_error_text,
54
+ are_timeouts_disabled,
55
+ get_per_task_timeout_seconds,
56
+ get_gather_timeout_seconds,
57
+ get_gather_timeout,
54
58
  )
55
59
  from deepeval.telemetry import capture_evaluation_run
56
60
  from deepeval.metrics import (
@@ -109,6 +113,57 @@ from deepeval.test_run.hyperparameters import (
109
113
  logger = logging.getLogger(__name__)
110
114
 
111
115
 
116
+ def _timeout_msg(action: str, seconds: float) -> str:
117
+ if are_timeouts_disabled():
118
+ return (
119
+ f"Timeout occurred while {action} "
120
+ "(DeepEval timeouts are disabled; this likely came from the model/provider SDK or network layer). "
121
+ "Set DEEPEVAL_LOG_STACK_TRACES=1 for full traceback."
122
+ )
123
+ return (
124
+ f"Timed out after {seconds:.2f}s while {action}. "
125
+ "Increase DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE or set "
126
+ "DEEPEVAL_LOG_STACK_TRACES=1 for full traceback."
127
+ )
128
+
129
+
130
+ def _log_gather_timeout(
131
+ logger,
132
+ *,
133
+ exc: Optional[BaseException] = None,
134
+ pending: Optional[int] = None,
135
+ ) -> None:
136
+ settings = get_settings()
137
+ if are_timeouts_disabled():
138
+ logger.warning(
139
+ "A task raised %s while waiting for gathered results; DeepEval gather/per-task timeouts are disabled%s. "
140
+ "This likely came from the model/provider SDK or network layer.",
141
+ type(exc).__name__ if exc else "TimeoutError",
142
+ f" (pending={pending})" if pending is not None else "",
143
+ exc_info=settings.DEEPEVAL_LOG_STACK_TRACES,
144
+ )
145
+ else:
146
+ if pending is not None:
147
+ logger.warning(
148
+ "Gather TIMEOUT after %.1fs; pending=%d tasks. "
149
+ "Some metrics may be marked as timed out. "
150
+ "To give tasks more time, consider increasing "
151
+ "DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE or "
152
+ "DEEPEVAL_TASK_GATHER_BUFFER_SECONDS_OVERRIDE.",
153
+ get_gather_timeout_seconds(),
154
+ pending,
155
+ )
156
+
157
+ else:
158
+ logger.warning(
159
+ "gather TIMEOUT after %.1fs. Some metrics may be marked as timed out. "
160
+ "To give tasks more time, consider increasing "
161
+ "DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE or "
162
+ "DEEPEVAL_TASK_GATHER_BUFFER_SECONDS_OVERRIDE.",
163
+ get_gather_timeout_seconds(),
164
+ )
165
+
166
+
112
167
  def _skip_metrics_for_error(
113
168
  span: Optional[BaseSpan] = None,
114
169
  trace: Optional[Trace] = None,
@@ -217,18 +272,6 @@ async def _snapshot_tasks():
217
272
  return {t for t in asyncio.all_tasks() if t is not cur}
218
273
 
219
274
 
220
- def _per_task_timeout() -> float:
221
- return get_settings().DEEPEVAL_PER_TASK_TIMEOUT_SECONDS
222
-
223
-
224
- def _gather_timeout() -> float:
225
- s = get_settings()
226
- return (
227
- s.DEEPEVAL_PER_TASK_TIMEOUT_SECONDS
228
- + s.DEEPEVAL_TASK_GATHER_BUFFER_SECONDS
229
- )
230
-
231
-
232
275
  def filter_duplicate_results(
233
276
  main_result: TestResult, results: List[TestResult]
234
277
  ) -> List[TestResult]:
@@ -250,6 +293,10 @@ async def _await_with_outer_deadline(obj, *args, timeout: float, **kwargs):
250
293
  coro = obj
251
294
  else:
252
295
  coro = obj(*args, **kwargs)
296
+
297
+ if get_settings().DEEPEVAL_DISABLE_TIMEOUTS:
298
+ return await coro
299
+
253
300
  return await asyncio.wait_for(coro, timeout=timeout)
254
301
  finally:
255
302
  reset_outer_deadline(token)
@@ -350,7 +397,7 @@ def execute_test_cases(
350
397
  index_of = {id(m): i for i, m in enumerate(metrics_for_case)}
351
398
  current_index = -1
352
399
  start_time = time.perf_counter()
353
- deadline_timeout = _per_task_timeout()
400
+ deadline_timeout = get_per_task_timeout_seconds()
354
401
  deadline_token = set_outer_deadline(deadline_timeout)
355
402
  new_cached_test_case: CachedTestCase = None
356
403
  try:
@@ -435,25 +482,20 @@ def execute_test_cases(
435
482
 
436
483
  run_sync_with_timeout(_run_case, deadline_timeout)
437
484
  except (asyncio.TimeoutError, TimeoutError):
438
- msg = (
439
- f"Timed out after {deadline_timeout:.2f}s while evaluating metric. "
440
- "Increase DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE or set "
441
- "DEEPEVAL_LOG_STACK_TRACES=1 for full traceback."
442
- )
443
- for i, m in enumerate(metrics_for_case):
444
- if getattr(m, "skipped", False):
485
+
486
+ msg = _timeout_msg("evaluating metric", deadline_timeout)
487
+ for i, metric in enumerate(metrics_for_case):
488
+ if metric.skipped:
445
489
  continue
446
490
  # already finished or errored? leave it
447
- if getattr(m, "success", None) is not None or getattr(
448
- m, "error", None
449
- ):
491
+ if metric.success is not None or metric.error is not None:
450
492
  continue
451
493
  if i == current_index:
452
- m.success = False
453
- m.error = msg
494
+ metric.success = False
495
+ metric.error = msg
454
496
  elif i > current_index:
455
- m.success = False
456
- m.error = "Skipped due to case timeout."
497
+ metric.success = False
498
+ metric.error = "Skipped due to case timeout."
457
499
 
458
500
  if not error_config.ignore_errors:
459
501
  raise
@@ -478,12 +520,12 @@ def execute_test_cases(
478
520
  )
479
521
 
480
522
  # Attach MetricData for *all* metrics (finished or synthesized)
481
- for i, m in enumerate(metrics_for_case):
482
- if getattr(m, "skipped", False):
523
+ for i, metric in enumerate(metrics_for_case):
524
+ if metric.skipped:
483
525
  continue
484
526
  if not emitted[i]:
485
527
  api_test_case.update_metric_data(
486
- create_metric_data(m)
528
+ create_metric_data(metric)
487
529
  )
488
530
 
489
531
  elapsed = time.perf_counter() - start_time
@@ -536,9 +578,8 @@ async def a_execute_test_cases(
536
578
 
537
579
  async def execute_with_semaphore(func: Callable, *args, **kwargs):
538
580
  async with semaphore:
539
- timeout = _per_task_timeout()
540
581
  return await _await_with_outer_deadline(
541
- func, *args, timeout=timeout, **kwargs
582
+ func, *args, timeout=get_per_task_timeout_seconds(), **kwargs
542
583
  )
543
584
 
544
585
  global_test_run_cache_manager.disable_write_cache = (
@@ -636,17 +677,16 @@ async def a_execute_test_cases(
636
677
  try:
637
678
  await asyncio.wait_for(
638
679
  asyncio.gather(*tasks),
639
- timeout=_gather_timeout(),
680
+ timeout=get_gather_timeout(),
640
681
  )
641
- except (asyncio.TimeoutError, TimeoutError):
682
+ except (asyncio.TimeoutError, TimeoutError) as e:
642
683
  for t in tasks:
643
684
  if not t.done():
644
685
  t.cancel()
645
686
  await asyncio.gather(*tasks, return_exceptions=True)
646
- logging.getLogger("deepeval").error(
647
- "Gather timed out after %.1fs. Some metrics may be marked as timed out.",
648
- _gather_timeout(),
649
- )
687
+
688
+ _log_gather_timeout(logger, exc=e)
689
+
650
690
  if not error_config.ignore_errors:
651
691
  raise
652
692
 
@@ -706,7 +746,7 @@ async def a_execute_test_cases(
706
746
  try:
707
747
  await asyncio.wait_for(
708
748
  asyncio.gather(*tasks),
709
- timeout=_gather_timeout(),
749
+ timeout=get_gather_timeout(),
710
750
  )
711
751
  except (asyncio.TimeoutError, TimeoutError):
712
752
  # Cancel any still-pending tasks and drain them
@@ -775,11 +815,18 @@ async def _a_execute_llm_test_cases(
775
815
  progress=progress,
776
816
  )
777
817
  except asyncio.CancelledError:
778
- msg = (
779
- "Timed out/cancelled while evaluating metric. "
780
- "Increase DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE or set "
781
- "DEEPEVAL_LOG_STACK_TRACES=1 for full traceback."
782
- )
818
+ if get_settings().DEEPEVAL_DISABLE_TIMEOUTS:
819
+ msg = (
820
+ "Cancelled while evaluating metric. "
821
+ "(DeepEval timeouts are disabled; this cancellation likely came from upstream orchestration or manual cancellation). "
822
+ "Set DEEPEVAL_LOG_STACK_TRACES=1 for full traceback."
823
+ )
824
+ else:
825
+ msg = (
826
+ "Timed out/cancelled while evaluating metric. "
827
+ "Increase DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE or set "
828
+ "DEEPEVAL_LOG_STACK_TRACES=1 for full traceback."
829
+ )
783
830
  for m in metrics:
784
831
  if getattr(m, "skipped", False):
785
832
  continue
@@ -885,11 +932,18 @@ async def _a_execute_conversational_test_cases(
885
932
  )
886
933
 
887
934
  except asyncio.CancelledError:
888
- msg = (
889
- "Timed out/cancelled while evaluating metric. "
890
- "Increase DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE or set "
891
- "DEEPEVAL_LOG_STACK_TRACES=1 for full traceback."
892
- )
935
+ if get_settings().DEEPEVAL_DISABLE_TIMEOUTS:
936
+ msg = (
937
+ "Cancelled while evaluating metric. "
938
+ "(DeepEval timeouts are disabled; this cancellation likely came from upstream orchestration or manual cancellation). "
939
+ "Set DEEPEVAL_LOG_STACK_TRACES=1 for full traceback."
940
+ )
941
+ else:
942
+ msg = (
943
+ "Timed out/cancelled while evaluating metric. "
944
+ "Increase DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE or set "
945
+ "DEEPEVAL_LOG_STACK_TRACES=1 for full traceback."
946
+ )
893
947
  for m in metrics:
894
948
  if getattr(m, "skipped", False):
895
949
  continue
@@ -999,7 +1053,7 @@ def execute_agentic_test_cases(
999
1053
  loop.run_until_complete(
1000
1054
  _await_with_outer_deadline(
1001
1055
  coro,
1002
- timeout=_per_task_timeout(),
1056
+ timeout=get_per_task_timeout_seconds(),
1003
1057
  )
1004
1058
  )
1005
1059
  else:
@@ -1326,17 +1380,13 @@ def execute_agentic_test_cases(
1326
1380
 
1327
1381
  # run the golden with a timeout
1328
1382
  start_time = time.perf_counter()
1329
- deadline = _per_task_timeout()
1383
+ deadline = get_per_task_timeout_seconds()
1330
1384
 
1331
1385
  try:
1332
1386
  run_sync_with_timeout(_run_golden, deadline)
1333
1387
  except (asyncio.TimeoutError, TimeoutError):
1334
1388
  # mark any not yet finished trace level and span level metrics as timed out.
1335
- msg = (
1336
- f"Timed out after {deadline:.2f}s while executing agentic test case. "
1337
- "Increase DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE or set "
1338
- "DEEPEVAL_LOG_STACK_TRACES=1 for full traceback."
1339
- )
1389
+ msg = _timeout_msg("executing agentic test case", deadline)
1340
1390
 
1341
1391
  if current_trace is not None:
1342
1392
  # Trace-level metrics
@@ -1517,9 +1567,8 @@ async def a_execute_agentic_test_cases(
1517
1567
 
1518
1568
  async def execute_with_semaphore(func: Callable, *args, **kwargs):
1519
1569
  async with semaphore:
1520
- timeout = _per_task_timeout()
1521
1570
  return await _await_with_outer_deadline(
1522
- func, *args, timeout=timeout, **kwargs
1571
+ func, *args, timeout=get_per_task_timeout_seconds(), **kwargs
1523
1572
  )
1524
1573
 
1525
1574
  test_run_manager = global_test_run_manager
@@ -1570,7 +1619,7 @@ async def a_execute_agentic_test_cases(
1570
1619
  try:
1571
1620
  await asyncio.wait_for(
1572
1621
  asyncio.gather(*tasks),
1573
- timeout=_gather_timeout(),
1622
+ timeout=get_gather_timeout(),
1574
1623
  )
1575
1624
  except (asyncio.TimeoutError, TimeoutError):
1576
1625
  # Cancel any still-pending tasks and drain them
@@ -1651,7 +1700,7 @@ async def _a_execute_agentic_test_case(
1651
1700
  await _await_with_outer_deadline(
1652
1701
  observed_callback,
1653
1702
  golden.input,
1654
- timeout=_per_task_timeout(),
1703
+ timeout=get_per_task_timeout_seconds(),
1655
1704
  )
1656
1705
  else:
1657
1706
  observed_callback(golden.input)
@@ -1745,7 +1794,7 @@ async def _a_execute_agentic_test_case(
1745
1794
  try:
1746
1795
  await asyncio.wait_for(
1747
1796
  asyncio.gather(*child_tasks),
1748
- timeout=_gather_timeout(),
1797
+ timeout=get_gather_timeout(),
1749
1798
  )
1750
1799
  except (asyncio.TimeoutError, TimeoutError):
1751
1800
  for t in child_tasks:
@@ -1768,11 +1817,18 @@ async def _a_execute_agentic_test_case(
1768
1817
  )
1769
1818
  except asyncio.CancelledError:
1770
1819
  # mark any unfinished metrics as cancelled
1771
- cancel_msg = (
1772
- "Timed out/cancelled while evaluating agentic test case. "
1773
- "Increase DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE or set "
1774
- "DEEPEVAL_LOG_STACK_TRACES=1 for full traceback."
1775
- )
1820
+ if get_settings().DEEPEVAL_DISABLE_TIMEOUTS:
1821
+ cancel_msg = (
1822
+ "Cancelled while evaluating agentic test case. "
1823
+ "(DeepEval timeouts are disabled; this cancellation likely came from upstream orchestration or manual cancellation). "
1824
+ "Set DEEPEVAL_LOG_STACK_TRACES=1 for full traceback."
1825
+ )
1826
+ else:
1827
+ cancel_msg = (
1828
+ "Timed out/cancelled while evaluating agentic test case. "
1829
+ "Increase DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE or set "
1830
+ "DEEPEVAL_LOG_STACK_TRACES=1 for full traceback."
1831
+ )
1776
1832
 
1777
1833
  if trace_metrics:
1778
1834
  for m in trace_metrics:
@@ -2464,8 +2520,9 @@ def a_execute_agentic_test_cases_from_loop(
2464
2520
 
2465
2521
  async def execute_callback_with_semaphore(coroutine: Awaitable):
2466
2522
  async with semaphore:
2467
- timeout = _per_task_timeout()
2468
- return await _await_with_outer_deadline(coroutine, timeout=timeout)
2523
+ return await _await_with_outer_deadline(
2524
+ coroutine, timeout=get_per_task_timeout_seconds()
2525
+ )
2469
2526
 
2470
2527
  def evaluate_test_cases(
2471
2528
  progress: Optional[Progress] = None,
@@ -2687,15 +2744,18 @@ def a_execute_agentic_test_cases_from_loop(
2687
2744
  loop.run_until_complete(
2688
2745
  asyncio.wait_for(
2689
2746
  asyncio.gather(*created_tasks, return_exceptions=True),
2690
- timeout=_gather_timeout(),
2747
+ timeout=get_gather_timeout(),
2691
2748
  )
2692
2749
  )
2693
2750
 
2694
- except (asyncio.TimeoutError, TimeoutError):
2751
+ except (asyncio.TimeoutError, TimeoutError) as e:
2695
2752
  import traceback
2696
2753
 
2754
+ settings = get_settings()
2697
2755
  pending = [t for t in created_tasks if not t.done()]
2698
2756
 
2757
+ _log_gather_timeout(logger, exc=e, pending=len(pending))
2758
+
2699
2759
  # Log the elapsed time for each task that was pending
2700
2760
  for t in pending:
2701
2761
  meta = task_meta.get(t, {})
@@ -2703,26 +2763,27 @@ def a_execute_agentic_test_cases_from_loop(
2703
2763
  elapsed_time = time.perf_counter() - start_time
2704
2764
 
2705
2765
  # Determine if it was a per task or gather timeout based on task's elapsed time
2706
- if elapsed_time >= _per_task_timeout():
2707
- timeout_type = "per-task"
2766
+ if not settings.DEEPEVAL_DISABLE_TIMEOUTS:
2767
+ timeout_type = (
2768
+ "per-task"
2769
+ if elapsed_time >= get_per_task_timeout_seconds()
2770
+ else "gather"
2771
+ )
2772
+ logger.info(
2773
+ " - PENDING %s elapsed_time=%.2fs timeout_type=%s meta=%s",
2774
+ t.get_name(),
2775
+ elapsed_time,
2776
+ timeout_type,
2777
+ meta,
2778
+ )
2708
2779
  else:
2709
- timeout_type = "gather"
2710
-
2711
- logger.warning(
2712
- f"[deepeval] gather TIMEOUT after {_gather_timeout()}s; "
2713
- f"pending={len(pending)} tasks. Timeout type: {timeout_type}. "
2714
- f"To give tasks more time, consider increasing "
2715
- f"DEEPEVAL_PER_TASK_TIMEOUT_SECONDS for longer task completion time or "
2716
- f"DEEPEVAL_TASK_GATHER_BUFFER_SECONDS to allow more time for gathering results."
2717
- )
2780
+ logger.info(
2781
+ " - PENDING %s elapsed_time=%.2fs meta=%s",
2782
+ t.get_name(),
2783
+ elapsed_time,
2784
+ meta,
2785
+ )
2718
2786
 
2719
- # Log pending tasks and their stack traces
2720
- logger.info(
2721
- " - PENDING %s elapsed_time=%.2fs meta=%s",
2722
- t.get_name(),
2723
- elapsed_time,
2724
- meta,
2725
- )
2726
2787
  if loop.get_debug() and get_settings().DEEPEVAL_DEBUG_ASYNC:
2727
2788
  frames = t.get_stack(limit=6)
2728
2789
  if frames:
@@ -2904,9 +2965,8 @@ async def _a_evaluate_traces(
2904
2965
 
2905
2966
  async def execute_evals_with_semaphore(func: Callable, *args, **kwargs):
2906
2967
  async with semaphore:
2907
- timeout = _per_task_timeout()
2908
2968
  return await _await_with_outer_deadline(
2909
- func, *args, timeout=timeout, **kwargs
2969
+ func, *args, timeout=get_per_task_timeout_seconds(), **kwargs
2910
2970
  )
2911
2971
 
2912
2972
  eval_tasks = []
@@ -2954,7 +3014,7 @@ async def _a_evaluate_traces(
2954
3014
  try:
2955
3015
  await asyncio.wait_for(
2956
3016
  asyncio.gather(*eval_tasks),
2957
- timeout=_gather_timeout(),
3017
+ timeout=get_gather_timeout(),
2958
3018
  )
2959
3019
  except (asyncio.TimeoutError, TimeoutError):
2960
3020
  for t in eval_tasks:
@@ -2984,9 +3044,8 @@ async def _evaluate_test_case_pairs(
2984
3044
 
2985
3045
  async def execute_with_semaphore(func: Callable, *args, **kwargs):
2986
3046
  async with semaphore:
2987
- timeout = _per_task_timeout()
2988
3047
  return await _await_with_outer_deadline(
2989
- func, *args, timeout=timeout, **kwargs
3048
+ func, *args, timeout=get_per_task_timeout_seconds(), **kwargs
2990
3049
  )
2991
3050
 
2992
3051
  tasks = []
@@ -3024,7 +3083,7 @@ async def _evaluate_test_case_pairs(
3024
3083
  try:
3025
3084
  await asyncio.wait_for(
3026
3085
  asyncio.gather(*tasks),
3027
- timeout=_gather_timeout(),
3086
+ timeout=get_gather_timeout(),
3028
3087
  )
3029
3088
  except (asyncio.TimeoutError, TimeoutError):
3030
3089
  # Cancel any still-pending tasks and drain them
@@ -27,6 +27,7 @@ from deepeval.tracing.types import (
27
27
  )
28
28
 
29
29
  logger = logging.getLogger(__name__)
30
+ settings = get_settings()
30
31
 
31
32
  try:
32
33
  # Optional dependencies
@@ -48,7 +49,7 @@ except ImportError as e:
48
49
  dependency_installed = False
49
50
 
50
51
  # Preserve previous behavior: only log when verbose mode is enabled.
51
- if get_settings().DEEPEVAL_VERBOSE_MODE:
52
+ if settings.DEEPEVAL_VERBOSE_MODE:
52
53
  if isinstance(e, ModuleNotFoundError):
53
54
  logger.warning(
54
55
  "Optional tracing dependency not installed: %s",
@@ -104,7 +105,8 @@ else:
104
105
  ReadableSpan = _ReadableSpan
105
106
 
106
107
  # OTLP_ENDPOINT = "http://127.0.0.1:4318/v1/traces"
107
- OTLP_ENDPOINT = "https://otel.confident-ai.com/v1/traces"
108
+ # OTLP_ENDPOINT = "https://otel.confident-ai.com/v1/traces"
109
+ OTLP_ENDPOINT = str(settings.CONFIDENT_OTEL_URL) + "v1/traces"
108
110
  init_clock_bridge() # initialize clock bridge for perf_counter() to epoch_nanos conversion
109
111
 
110
112
 
@@ -1,6 +1,7 @@
1
1
  import warnings
2
2
  from typing import Optional
3
3
  from deepeval.telemetry import capture_tracing_integration
4
+ from deepeval.config.settings import get_settings
4
5
 
5
6
  try:
6
7
  from opentelemetry import trace
@@ -23,7 +24,10 @@ def is_opentelemetry_available():
23
24
  return True
24
25
 
25
26
 
26
- OTLP_ENDPOINT = "https://otel.confident-ai.com/v1/traces"
27
+ settings = get_settings()
28
+ # OTLP_ENDPOINT = "https://otel.confident-ai.com/v1/traces"
29
+
30
+ OTLP_ENDPOINT = str(settings.CONFIDENT_OTEL_URL) + "v1/traces"
27
31
 
28
32
 
29
33
  def instrument_pydantic_ai(api_key: Optional[str] = None):