deepeval 3.6.8__py3-none-any.whl → 3.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/anthropic/__init__.py +19 -0
  3. deepeval/anthropic/extractors.py +94 -0
  4. deepeval/anthropic/patch.py +169 -0
  5. deepeval/anthropic/utils.py +225 -0
  6. deepeval/benchmarks/drop/drop.py +40 -14
  7. deepeval/benchmarks/ifeval/ifeval.py +2 -2
  8. deepeval/confident/types.py +4 -2
  9. deepeval/config/settings.py +258 -47
  10. deepeval/config/settings_manager.py +4 -0
  11. deepeval/config/utils.py +5 -0
  12. deepeval/dataset/dataset.py +162 -30
  13. deepeval/dataset/utils.py +41 -13
  14. deepeval/evaluate/execute.py +1099 -633
  15. deepeval/integrations/crewai/handler.py +36 -0
  16. deepeval/integrations/langchain/callback.py +27 -2
  17. deepeval/integrations/llama_index/handler.py +58 -4
  18. deepeval/integrations/llama_index/utils.py +24 -0
  19. deepeval/metrics/__init__.py +5 -0
  20. deepeval/metrics/exact_match/__init__.py +0 -0
  21. deepeval/metrics/exact_match/exact_match.py +94 -0
  22. deepeval/metrics/indicator.py +21 -1
  23. deepeval/metrics/pattern_match/__init__.py +0 -0
  24. deepeval/metrics/pattern_match/pattern_match.py +103 -0
  25. deepeval/metrics/task_completion/task_completion.py +9 -2
  26. deepeval/model_integrations/__init__.py +0 -0
  27. deepeval/model_integrations/utils.py +116 -0
  28. deepeval/models/base_model.py +3 -1
  29. deepeval/models/llms/amazon_bedrock_model.py +20 -17
  30. deepeval/models/llms/openai_model.py +10 -1
  31. deepeval/models/retry_policy.py +103 -20
  32. deepeval/openai/__init__.py +3 -1
  33. deepeval/openai/extractors.py +2 -2
  34. deepeval/openai/utils.py +7 -31
  35. deepeval/prompt/api.py +11 -10
  36. deepeval/prompt/prompt.py +5 -4
  37. deepeval/simulator/conversation_simulator.py +25 -18
  38. deepeval/synthesizer/chunking/context_generator.py +9 -1
  39. deepeval/telemetry.py +3 -3
  40. deepeval/test_case/llm_test_case.py +3 -2
  41. deepeval/test_run/api.py +3 -2
  42. deepeval/test_run/cache.py +4 -3
  43. deepeval/test_run/test_run.py +24 -5
  44. deepeval/tracing/api.py +11 -10
  45. deepeval/tracing/otel/exporter.py +11 -0
  46. deepeval/tracing/patchers.py +102 -1
  47. deepeval/tracing/trace_context.py +13 -4
  48. deepeval/tracing/tracing.py +10 -1
  49. deepeval/tracing/types.py +8 -8
  50. deepeval/tracing/utils.py +9 -0
  51. deepeval/utils.py +44 -2
  52. {deepeval-3.6.8.dist-info → deepeval-3.7.0.dist-info}/METADATA +2 -2
  53. {deepeval-3.6.8.dist-info → deepeval-3.7.0.dist-info}/RECORD +57 -47
  54. /deepeval/{openai → model_integrations}/types.py +0 -0
  55. {deepeval-3.6.8.dist-info → deepeval-3.7.0.dist-info}/LICENSE.md +0 -0
  56. {deepeval-3.6.8.dist-info → deepeval-3.7.0.dist-info}/WHEEL +0 -0
  57. {deepeval-3.6.8.dist-info → deepeval-3.7.0.dist-info}/entry_points.txt +0 -0
@@ -9,10 +9,13 @@ Central config for DeepEval.
9
9
  type coercion.
10
10
  """
11
11
 
12
+ import hashlib
13
+ import json
12
14
  import logging
13
15
  import math
14
16
  import os
15
17
  import re
18
+ import threading
16
19
 
17
20
  from dotenv import dotenv_values
18
21
  from pathlib import Path
@@ -22,6 +25,7 @@ from pydantic import (
22
25
  confloat,
23
26
  conint,
24
27
  field_validator,
28
+ model_validator,
25
29
  SecretStr,
26
30
  )
27
31
  from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -30,6 +34,7 @@ from typing import Any, Dict, List, Optional, NamedTuple
30
34
  from deepeval.config.utils import (
31
35
  parse_bool,
32
36
  coerce_to_list,
37
+ constrain_between,
33
38
  dedupe_preserve_order,
34
39
  )
35
40
  from deepeval.constants import SUPPORTED_PROVIDER_SLUGS, slugify
@@ -38,6 +43,13 @@ from deepeval.constants import SUPPORTED_PROVIDER_SLUGS, slugify
38
43
  logger = logging.getLogger(__name__)
39
44
  _SAVE_RE = re.compile(r"^(?P<scheme>dotenv)(?::(?P<path>.+))?$")
40
45
 
46
+ # settings that were converted to computed fields with override counterparts
47
+ _DEPRECATED_TO_OVERRIDE = {
48
+ "DEEPEVAL_PER_TASK_TIMEOUT_SECONDS": "DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE",
49
+ "DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS": "DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE",
50
+ "DEEPEVAL_TASK_GATHER_BUFFER_SECONDS": "DEEPEVAL_TASK_GATHER_BUFFER_SECONDS_OVERRIDE",
51
+ }
52
+
41
53
 
42
54
  def _find_legacy_enum(env_key: str):
43
55
  from deepeval.key_handler import (
@@ -336,6 +348,7 @@ class Settings(BaseSettings):
336
348
  IGNORE_DEEPEVAL_ERRORS: Optional[bool] = None
337
349
  SKIP_DEEPEVAL_MISSING_PARAMS: Optional[bool] = None
338
350
  DEEPEVAL_VERBOSE_MODE: Optional[bool] = None
351
+ DEEPEVAL_LOG_STACK_TRACES: Optional[bool] = None
339
352
  ENABLE_DEEPEVAL_CACHE: Optional[bool] = None
340
353
 
341
354
  CONFIDENT_TRACE_FLUSH: Optional[bool] = None
@@ -355,11 +368,19 @@ class Settings(BaseSettings):
355
368
  #
356
369
  MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS: float = 3.05
357
370
  MEDIA_IMAGE_READ_TIMEOUT_SECONDS: float = 10.0
358
- # DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS: per-attempt timeout for provider calls enforced by our retry decorator.
359
- # This timeout interacts with retry policy and the task level budget (DEEPEVAL_PER_TASK_TIMEOUT_SECONDS) below.
360
- # If you leave this at 0/None, the computed outer budget defaults to 180s.
361
- DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS: Optional[confloat(ge=0)] = (
362
- None # per-attempt timeout. Set 0/None to disable
371
+ # DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE
372
+ # Per-attempt timeout (seconds) for provider calls used by the retry policy.
373
+ # This is an OVERRIDE setting. The effective value you should rely on at runtime is
374
+ # the computed property: DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS.
375
+ #
376
+ # If this is None or 0 the DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS is computed from either:
377
+ # - DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE: slice the outer budget
378
+ # across attempts after subtracting expected backoff and a small safety buffer
379
+ # - the default outer budget (180s) if no outer override is set.
380
+ #
381
+ # Tip: Set this OR the outer override, but generally not both
382
+ DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE: Optional[confloat(gt=0)] = (
383
+ None
363
384
  )
364
385
 
365
386
  #
@@ -373,76 +394,115 @@ class Settings(BaseSettings):
373
394
  #
374
395
  DEEPEVAL_TIMEOUT_THREAD_LIMIT: conint(ge=1) = 128
375
396
  DEEPEVAL_TIMEOUT_SEMAPHORE_WARN_AFTER_SECONDS: confloat(ge=0) = 5.0
376
- # DEEPEVAL_PER_TASK_TIMEOUT_SECONDS is the outer time budget for one metric/task.
377
- # It is computed from per-attempt timeout + retries/backoff unless you explicitly override it.
378
- # - OVERRIDE = None or 0 -> auto compute as:
379
- # attempts * per_attempt_timeout + sum(backoff_sleeps) + ~jitter/2 per sleep + 1s safety
380
- # (If per_attempt_timeout is 0/None, the auto outer budget defaults to 180s.)
381
- # - OVERRIDE > 0 -> use that exact value. A warning is logged if it is likely too small
382
- # to permit the configured attempts/backoff.
397
+ # DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE
398
+ # Outer time budget (seconds) for a single metric/test-case, including retries and backoff.
399
+ # This is an OVERRIDE setting. If None or 0 the DEEPEVAL_PER_TASK_TIMEOUT_SECONDS field is computed:
400
+ # attempts * per_attempt_timeout + expected_backoff + 1s safety
401
+ # (When neither override is set 180s is used.)
383
402
  #
384
- # Tip:
385
- # Most users only need to set DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS and DEEPEVAL_RETRY_MAX_ATTEMPTS.
386
- # Leave the outer budget on auto unless you have very strict SLAs.
387
- DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE: Optional[conint(ge=0)] = None
403
+ # If > 0, we use the value exactly and log a warning if it is likely too small
404
+ # to accommodate the configured attempts/backoff.
405
+ #
406
+ # usage:
407
+ # - set DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE along with DEEPEVAL_RETRY_MAX_ATTEMPTS, or
408
+ # - set DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE alone.
409
+ DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE: Optional[confloat(ge=0)] = None
388
410
 
389
411
  # Buffer time for gathering results from all tasks, added to the longest task duration
390
412
  # Increase if many tasks are running concurrently
391
- DEEPEVAL_TASK_GATHER_BUFFER_SECONDS: confloat(ge=0) = 60
413
+ # DEEPEVAL_TASK_GATHER_BUFFER_SECONDS: confloat(ge=0) = (
414
+ # 30 # 15s seemed like not enough. we may make this computed later.
415
+ # )
416
+ DEEPEVAL_TASK_GATHER_BUFFER_SECONDS_OVERRIDE: Optional[confloat(ge=0)] = (
417
+ None
418
+ )
392
419
 
393
420
  ###################
394
421
  # Computed Fields #
395
422
  ###################
396
423
 
397
- def _calc_auto_outer_timeout(self) -> int:
424
+ def _calc_auto_outer_timeout(self) -> float:
398
425
  """Compute outer budget from per-attempt timeout + retries/backoff.
399
426
  Never reference the computed property itself here.
400
427
  """
401
428
  attempts = self.DEEPEVAL_RETRY_MAX_ATTEMPTS or 1
402
- timeout_seconds = float(self.DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0)
429
+ timeout_seconds = float(
430
+ self.DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE or 0
431
+ )
403
432
  if timeout_seconds <= 0:
404
433
  # No per-attempt timeout set -> default outer budget
405
434
  return 180
406
435
 
407
- sleeps = max(0, attempts - 1)
408
- cur = float(self.DEEPEVAL_RETRY_INITIAL_SECONDS)
409
- cap = float(self.DEEPEVAL_RETRY_CAP_SECONDS)
410
- base = float(self.DEEPEVAL_RETRY_EXP_BASE)
411
- jitter = float(self.DEEPEVAL_RETRY_JITTER)
412
-
413
- backoff = 0.0
414
- for _ in range(sleeps):
415
- backoff += min(cap, cur)
416
- cur *= base
417
- backoff += sleeps * (jitter / 2.0) # expected jitter
418
-
436
+ backoff = self._expected_backoff(attempts)
419
437
  safety_overhead = 1.0
420
- return int(
438
+ return float(
421
439
  math.ceil(attempts * timeout_seconds + backoff + safety_overhead)
422
440
  )
423
441
 
424
442
  @computed_field
425
443
  @property
426
- def DEEPEVAL_PER_TASK_TIMEOUT_SECONDS(self) -> int:
444
+ def DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS(self) -> float:
445
+ over = self.DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE
446
+ if over is not None and float(over) > 0:
447
+ return float(over)
448
+
449
+ attempts = int(self.DEEPEVAL_RETRY_MAX_ATTEMPTS or 1)
450
+ outer_over = self.DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE
451
+
452
+ # If the user set an outer override, slice it up
453
+ if outer_over and float(outer_over) > 0 and attempts > 0:
454
+ backoff = self._expected_backoff(attempts)
455
+ safety = 1.0
456
+ usable = max(0.0, float(outer_over) - backoff - safety)
457
+ return 0.0 if usable <= 0 else (usable / attempts)
458
+
459
+ # NEW: when neither override is set, derive from the default outer (180s)
460
+ default_outer = 180.0
461
+ backoff = self._expected_backoff(attempts)
462
+ safety = 1.0
463
+ usable = max(0.0, default_outer - backoff - safety)
464
+ # Keep per-attempt sensible (cap to at least 1s)
465
+ return 0.0 if usable <= 0 else max(1.0, usable / attempts)
466
+
467
+ @computed_field
468
+ @property
469
+ def DEEPEVAL_PER_TASK_TIMEOUT_SECONDS(self) -> float:
427
470
  """If OVERRIDE is set (nonzero), return it; else return the derived budget."""
428
471
  outer = self.DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE
429
472
  if outer not in (None, 0):
430
473
  # Warn if user-provided outer is likely to truncate retries
431
474
  if (self.DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0) > 0:
432
475
  min_needed = self._calc_auto_outer_timeout()
433
- if int(outer) < min_needed:
476
+ if float(outer) < min_needed:
434
477
  if self.DEEPEVAL_VERBOSE_MODE:
435
478
  logger.warning(
436
479
  "Metric timeout (outer=%ss) is less than attempts × per-attempt "
437
480
  "timeout + backoff (≈%ss). Retries may be cut short.",
438
- int(outer),
481
+ float(outer),
439
482
  min_needed,
440
483
  )
441
- return int(outer)
484
+ return float(outer)
442
485
 
443
486
  # Auto mode
444
487
  return self._calc_auto_outer_timeout()
445
488
 
489
+ @computed_field
490
+ @property
491
+ def DEEPEVAL_TASK_GATHER_BUFFER_SECONDS(self) -> float:
492
+ """
493
+ Buffer time we add to the longest task’s duration to allow gather/drain
494
+ to complete. If an override is provided, use it; otherwise derive a
495
+ sensible default from the task-level budget:
496
+ buffer = constrain_between(0.15 * DEEPEVAL_PER_TASK_TIMEOUT_SECONDS, 10, 60)
497
+ """
498
+ over = self.DEEPEVAL_TASK_GATHER_BUFFER_SECONDS_OVERRIDE
499
+ if over is not None and float(over) >= 0:
500
+ return float(over)
501
+
502
+ outer = float(self.DEEPEVAL_PER_TASK_TIMEOUT_SECONDS or 0.0)
503
+ base = 0.15 * outer
504
+ return constrain_between(base, 10.0, 60.0)
505
+
446
506
  ##############
447
507
  # Validators #
448
508
  ##############
@@ -641,12 +701,119 @@ class Settings(BaseSettings):
641
701
  "CRITICAL, NOTSET, or a numeric logging level."
642
702
  )
643
703
 
704
+ @field_validator("DEEPEVAL_TELEMETRY_OPT_OUT", mode="before")
705
+ @classmethod
706
+ def _apply_telemetry_enabled_alias(cls, v):
707
+ """
708
+ Precedence (most secure):
709
+ - Any OFF signal wins if both are set:
710
+ - DEEPEVAL_TELEMETRY_OPT_OUT = truthy -> OFF
711
+ - DEEPEVAL_TELEMETRY_ENABLED = falsy -> OFF
712
+ - Else, ON signal:
713
+ - DEEPEVAL_TELEMETRY_OPT_OUT = falsy -> ON
714
+ - DEEPEVAL_TELEMETRY_ENABLED = truthy -> ON
715
+ - Else None (unset) -> ON
716
+ """
717
+
718
+ def normalize(x):
719
+ if x is None:
720
+ return None
721
+ s = str(x).strip()
722
+ return None if s == "" else parse_bool(s, default=False)
723
+
724
+ new_opt_out = normalize(v) # True means OFF, False means ON
725
+ legacy_enabled = normalize(
726
+ os.getenv("DEEPEVAL_TELEMETRY_ENABLED")
727
+ ) # True means ON, False means OFF
728
+
729
+ off_signal = (new_opt_out is True) or (legacy_enabled is False)
730
+ on_signal = (new_opt_out is False) or (legacy_enabled is True)
731
+
732
+ # Conflict: simultaneous OFF and ON signals
733
+ if off_signal and on_signal:
734
+ # Only warn if verbose or debug
735
+ if parse_bool(
736
+ os.getenv("DEEPEVAL_VERBOSE_MODE"), default=False
737
+ ) or logger.isEnabledFor(logging.DEBUG):
738
+ logger.warning(
739
+ "Conflicting telemetry flags detected: DEEPEVAL_TELEMETRY_OPT_OUT=%r, "
740
+ "DEEPEVAL_TELEMETRY_ENABLED=%r. Defaulting to OFF.",
741
+ new_opt_out,
742
+ legacy_enabled,
743
+ )
744
+ return True # OFF wins
745
+
746
+ # Clear winner
747
+ if off_signal:
748
+ return True # OFF
749
+ if on_signal:
750
+ return False # ON
751
+
752
+ # Unset means ON
753
+ return False
754
+
755
+ @model_validator(mode="after")
756
+ def _apply_deprecated_computed_env_aliases(self):
757
+ """
758
+ Backwards compatibility courtesy:
759
+ - If users still set a deprecated computed field in the environment,
760
+ emit a deprecation warning and mirror its value into the matching
761
+ *_OVERRIDE field (unless the override is already set).
762
+ - Override always wins if both are present.
763
+ """
764
+ for old_key, override_key in _DEPRECATED_TO_OVERRIDE.items():
765
+ raw = os.getenv(old_key)
766
+ if raw is None or str(raw).strip() == "":
767
+ continue
768
+
769
+ # if override already set, ignore the deprecated one but log a warning
770
+ if getattr(self, override_key) is not None:
771
+ logger.warning(
772
+ "Config deprecation: %s is deprecated and was ignored because %s "
773
+ "is already set. Please remove %s and use %s going forward.",
774
+ old_key,
775
+ override_key,
776
+ old_key,
777
+ override_key,
778
+ )
779
+ continue
780
+
781
+ # apply the deprecated value into the override field.
782
+ try:
783
+ # let pydantic coerce the string to the target type on assignment
784
+ setattr(self, override_key, raw)
785
+ logger.warning(
786
+ "Config deprecation: %s is deprecated. Its value (%r) was applied to %s. "
787
+ "Please migrate to %s and remove %s from your environment.",
788
+ old_key,
789
+ raw,
790
+ override_key,
791
+ override_key,
792
+ old_key,
793
+ )
794
+ except Exception as e:
795
+ # do not let exception bubble up, just warn
796
+ logger.warning(
797
+ "Config deprecation: %s is deprecated and could not be applied to %s "
798
+ "(value=%r): %s",
799
+ old_key,
800
+ override_key,
801
+ raw,
802
+ e,
803
+ )
804
+ return self
805
+
644
806
  #######################
645
807
  # Persistence support #
646
808
  #######################
647
809
  class _SettingsEditCtx:
810
+ # TODO: will generate this list in future PR
648
811
  COMPUTED_FIELDS: frozenset[str] = frozenset(
649
- {"DEEPEVAL_PER_TASK_TIMEOUT_SECONDS"}
812
+ {
813
+ "DEEPEVAL_PER_TASK_TIMEOUT_SECONDS",
814
+ "DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS",
815
+ "DEEPEVAL_TASK_GATHER_BUFFER_SECONDS",
816
+ }
650
817
  )
651
818
 
652
819
  def __init__(
@@ -810,18 +977,60 @@ class Settings(BaseSettings):
810
977
  ctx.switch_model_provider(target)
811
978
  return ctx.result
812
979
 
980
+ def _expected_backoff(self, attempts: int) -> float:
981
+ """Sum of expected sleeps for (attempts-1) retries, including jitter expectation."""
982
+ sleeps = max(0, attempts - 1)
983
+ cur = float(self.DEEPEVAL_RETRY_INITIAL_SECONDS)
984
+ cap = float(self.DEEPEVAL_RETRY_CAP_SECONDS)
985
+ base = float(self.DEEPEVAL_RETRY_EXP_BASE)
986
+ jitter = float(self.DEEPEVAL_RETRY_JITTER)
987
+
988
+ backoff = 0.0
989
+ for _ in range(sleeps):
990
+ backoff += min(cap, cur)
991
+ cur *= base
992
+ backoff += sleeps * (jitter / 2.0) # expected jitter
993
+ return backoff
994
+
995
+ def _constrain_between(self, value: float, lo: float, hi: float) -> float:
996
+ """Return value constrained to the inclusive range [lo, hi]."""
997
+ return min(max(value, lo), hi)
998
+
813
999
 
814
1000
  _settings_singleton: Optional[Settings] = None
1001
+ _settings_env_fingerprint: "str | None" = None
1002
+ _settings_lock = threading.RLock()
1003
+
1004
+
1005
+ def _calc_env_fingerprint() -> str:
1006
+ env = os.environ.copy()
1007
+ # must hash in a stable order.
1008
+ keys = sorted(
1009
+ key
1010
+ for key in Settings.model_fields.keys()
1011
+ if key != "_DEPRECATED_TELEMETRY_ENABLED" # exclude deprecated
1012
+ )
1013
+ # encode as triples: (key, present?, value)
1014
+ items = [(k, k in env, env.get(k)) for k in keys]
1015
+ payload = json.dumps(items, ensure_ascii=False, separators=(",", ":"))
1016
+ return hashlib.sha256(payload.encode("utf-8")).hexdigest()
815
1017
 
816
1018
 
817
1019
  def get_settings() -> Settings:
818
- global _settings_singleton
819
- if _settings_singleton is None:
820
- _settings_singleton = Settings()
821
- from deepeval.config.logging import apply_deepeval_log_level
1020
+ global _settings_singleton, _settings_env_fingerprint
1021
+ fingerprint = _calc_env_fingerprint()
1022
+
1023
+ with _settings_lock:
1024
+ if (
1025
+ _settings_singleton is None
1026
+ or _settings_env_fingerprint != fingerprint
1027
+ ):
1028
+ _settings_singleton = Settings()
1029
+ _settings_env_fingerprint = fingerprint
1030
+ from deepeval.config.logging import apply_deepeval_log_level
822
1031
 
823
- apply_deepeval_log_level()
824
- return _settings_singleton
1032
+ apply_deepeval_log_level()
1033
+ return _settings_singleton
825
1034
 
826
1035
 
827
1036
  def reset_settings(*, reload_dotenv: bool = False) -> Settings:
@@ -837,8 +1046,10 @@ def reset_settings(*, reload_dotenv: bool = False) -> Settings:
837
1046
  Returns:
838
1047
  The fresh Settings instance.
839
1048
  """
840
- global _settings_singleton
841
- if reload_dotenv:
842
- autoload_dotenv()
843
- _settings_singleton = None
1049
+ global _settings_singleton, _settings_env_fingerprint
1050
+ with _settings_lock:
1051
+ if reload_dotenv:
1052
+ autoload_dotenv()
1053
+ _settings_singleton = None
1054
+ _settings_env_fingerprint = None
844
1055
  return get_settings()
@@ -4,6 +4,7 @@ dotenv file. Also syncs os.environ, handles unsets, and warns on unknown fields.
4
4
  Primary entrypoint: update_settings_and_persist.
5
5
  """
6
6
 
7
+ import json
7
8
  import logging
8
9
  import os
9
10
 
@@ -33,6 +34,9 @@ def _normalize_for_env(val: Any) -> Optional[str]:
33
34
  return val.get_secret_value()
34
35
  if isinstance(val, bool):
35
36
  return bool_to_env_str(val)
37
+ # encode sequences as JSON so Settings can parse them back reliably.
38
+ if isinstance(val, (list, tuple, set)):
39
+ return json.dumps(list(val))
36
40
  return str(val)
37
41
 
38
42
 
deepeval/config/utils.py CHANGED
@@ -137,3 +137,8 @@ def dedupe_preserve_order(items: Iterable[str]) -> List[str]:
137
137
  seen.add(x)
138
138
  out.append(x)
139
139
  return out
140
+
141
+
142
+ def constrain_between(value: float, lo: float, hi: float) -> float:
143
+ """Return value constrained to the inclusive range [lo, hi]."""
144
+ return min(max(value, lo), hi)