deepeval 3.6.8__py3-none-any.whl → 3.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/anthropic/__init__.py +19 -0
- deepeval/anthropic/extractors.py +94 -0
- deepeval/anthropic/patch.py +169 -0
- deepeval/anthropic/utils.py +225 -0
- deepeval/benchmarks/drop/drop.py +40 -14
- deepeval/benchmarks/ifeval/ifeval.py +2 -2
- deepeval/confident/types.py +4 -2
- deepeval/config/settings.py +258 -47
- deepeval/config/settings_manager.py +4 -0
- deepeval/config/utils.py +5 -0
- deepeval/dataset/dataset.py +162 -30
- deepeval/dataset/utils.py +41 -13
- deepeval/evaluate/execute.py +1099 -633
- deepeval/integrations/crewai/handler.py +36 -0
- deepeval/integrations/langchain/callback.py +27 -2
- deepeval/integrations/llama_index/handler.py +58 -4
- deepeval/integrations/llama_index/utils.py +24 -0
- deepeval/metrics/__init__.py +5 -0
- deepeval/metrics/exact_match/__init__.py +0 -0
- deepeval/metrics/exact_match/exact_match.py +94 -0
- deepeval/metrics/indicator.py +21 -1
- deepeval/metrics/pattern_match/__init__.py +0 -0
- deepeval/metrics/pattern_match/pattern_match.py +103 -0
- deepeval/metrics/task_completion/task_completion.py +9 -2
- deepeval/model_integrations/__init__.py +0 -0
- deepeval/model_integrations/utils.py +116 -0
- deepeval/models/base_model.py +3 -1
- deepeval/models/llms/amazon_bedrock_model.py +20 -17
- deepeval/models/llms/openai_model.py +10 -1
- deepeval/models/retry_policy.py +103 -20
- deepeval/openai/__init__.py +3 -1
- deepeval/openai/extractors.py +2 -2
- deepeval/openai/utils.py +7 -31
- deepeval/prompt/api.py +11 -10
- deepeval/prompt/prompt.py +5 -4
- deepeval/simulator/conversation_simulator.py +25 -18
- deepeval/synthesizer/chunking/context_generator.py +9 -1
- deepeval/telemetry.py +3 -3
- deepeval/test_case/llm_test_case.py +3 -2
- deepeval/test_run/api.py +3 -2
- deepeval/test_run/cache.py +4 -3
- deepeval/test_run/test_run.py +24 -5
- deepeval/tracing/api.py +11 -10
- deepeval/tracing/otel/exporter.py +11 -0
- deepeval/tracing/patchers.py +102 -1
- deepeval/tracing/trace_context.py +13 -4
- deepeval/tracing/tracing.py +10 -1
- deepeval/tracing/types.py +8 -8
- deepeval/tracing/utils.py +9 -0
- deepeval/utils.py +44 -2
- {deepeval-3.6.8.dist-info → deepeval-3.7.0.dist-info}/METADATA +2 -2
- {deepeval-3.6.8.dist-info → deepeval-3.7.0.dist-info}/RECORD +57 -47
- /deepeval/{openai → model_integrations}/types.py +0 -0
- {deepeval-3.6.8.dist-info → deepeval-3.7.0.dist-info}/LICENSE.md +0 -0
- {deepeval-3.6.8.dist-info → deepeval-3.7.0.dist-info}/WHEEL +0 -0
- {deepeval-3.6.8.dist-info → deepeval-3.7.0.dist-info}/entry_points.txt +0 -0
deepeval/config/settings.py
CHANGED
|
@@ -9,10 +9,13 @@ Central config for DeepEval.
|
|
|
9
9
|
type coercion.
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
|
+
import hashlib
|
|
13
|
+
import json
|
|
12
14
|
import logging
|
|
13
15
|
import math
|
|
14
16
|
import os
|
|
15
17
|
import re
|
|
18
|
+
import threading
|
|
16
19
|
|
|
17
20
|
from dotenv import dotenv_values
|
|
18
21
|
from pathlib import Path
|
|
@@ -22,6 +25,7 @@ from pydantic import (
|
|
|
22
25
|
confloat,
|
|
23
26
|
conint,
|
|
24
27
|
field_validator,
|
|
28
|
+
model_validator,
|
|
25
29
|
SecretStr,
|
|
26
30
|
)
|
|
27
31
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
@@ -30,6 +34,7 @@ from typing import Any, Dict, List, Optional, NamedTuple
|
|
|
30
34
|
from deepeval.config.utils import (
|
|
31
35
|
parse_bool,
|
|
32
36
|
coerce_to_list,
|
|
37
|
+
constrain_between,
|
|
33
38
|
dedupe_preserve_order,
|
|
34
39
|
)
|
|
35
40
|
from deepeval.constants import SUPPORTED_PROVIDER_SLUGS, slugify
|
|
@@ -38,6 +43,13 @@ from deepeval.constants import SUPPORTED_PROVIDER_SLUGS, slugify
|
|
|
38
43
|
logger = logging.getLogger(__name__)
|
|
39
44
|
_SAVE_RE = re.compile(r"^(?P<scheme>dotenv)(?::(?P<path>.+))?$")
|
|
40
45
|
|
|
46
|
+
# settings that were converted to computed fields with override counterparts
|
|
47
|
+
_DEPRECATED_TO_OVERRIDE = {
|
|
48
|
+
"DEEPEVAL_PER_TASK_TIMEOUT_SECONDS": "DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE",
|
|
49
|
+
"DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS": "DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE",
|
|
50
|
+
"DEEPEVAL_TASK_GATHER_BUFFER_SECONDS": "DEEPEVAL_TASK_GATHER_BUFFER_SECONDS_OVERRIDE",
|
|
51
|
+
}
|
|
52
|
+
|
|
41
53
|
|
|
42
54
|
def _find_legacy_enum(env_key: str):
|
|
43
55
|
from deepeval.key_handler import (
|
|
@@ -336,6 +348,7 @@ class Settings(BaseSettings):
|
|
|
336
348
|
IGNORE_DEEPEVAL_ERRORS: Optional[bool] = None
|
|
337
349
|
SKIP_DEEPEVAL_MISSING_PARAMS: Optional[bool] = None
|
|
338
350
|
DEEPEVAL_VERBOSE_MODE: Optional[bool] = None
|
|
351
|
+
DEEPEVAL_LOG_STACK_TRACES: Optional[bool] = None
|
|
339
352
|
ENABLE_DEEPEVAL_CACHE: Optional[bool] = None
|
|
340
353
|
|
|
341
354
|
CONFIDENT_TRACE_FLUSH: Optional[bool] = None
|
|
@@ -355,11 +368,19 @@ class Settings(BaseSettings):
|
|
|
355
368
|
#
|
|
356
369
|
MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS: float = 3.05
|
|
357
370
|
MEDIA_IMAGE_READ_TIMEOUT_SECONDS: float = 10.0
|
|
358
|
-
#
|
|
359
|
-
#
|
|
360
|
-
#
|
|
361
|
-
|
|
362
|
-
|
|
371
|
+
# DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE
|
|
372
|
+
# Per-attempt timeout (seconds) for provider calls used by the retry policy.
|
|
373
|
+
# This is an OVERRIDE setting. The effective value you should rely on at runtime is
|
|
374
|
+
# the computed property: DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS.
|
|
375
|
+
#
|
|
376
|
+
# If this is None or 0 the DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS is computed from either:
|
|
377
|
+
# - DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE: slice the outer budget
|
|
378
|
+
# across attempts after subtracting expected backoff and a small safety buffer
|
|
379
|
+
# - the default outer budget (180s) if no outer override is set.
|
|
380
|
+
#
|
|
381
|
+
# Tip: Set this OR the outer override, but generally not both
|
|
382
|
+
DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE: Optional[confloat(gt=0)] = (
|
|
383
|
+
None
|
|
363
384
|
)
|
|
364
385
|
|
|
365
386
|
#
|
|
@@ -373,76 +394,115 @@ class Settings(BaseSettings):
|
|
|
373
394
|
#
|
|
374
395
|
DEEPEVAL_TIMEOUT_THREAD_LIMIT: conint(ge=1) = 128
|
|
375
396
|
DEEPEVAL_TIMEOUT_SEMAPHORE_WARN_AFTER_SECONDS: confloat(ge=0) = 5.0
|
|
376
|
-
#
|
|
377
|
-
#
|
|
378
|
-
#
|
|
379
|
-
# attempts * per_attempt_timeout +
|
|
380
|
-
#
|
|
381
|
-
# - OVERRIDE > 0 -> use that exact value. A warning is logged if it is likely too small
|
|
382
|
-
# to permit the configured attempts/backoff.
|
|
397
|
+
# DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE
|
|
398
|
+
# Outer time budget (seconds) for a single metric/test-case, including retries and backoff.
|
|
399
|
+
# This is an OVERRIDE setting. If None or 0 the DEEPEVAL_PER_TASK_TIMEOUT_SECONDS field is computed:
|
|
400
|
+
# attempts * per_attempt_timeout + expected_backoff + 1s safety
|
|
401
|
+
# (When neither override is set 180s is used.)
|
|
383
402
|
#
|
|
384
|
-
#
|
|
385
|
-
#
|
|
386
|
-
#
|
|
387
|
-
|
|
403
|
+
# If > 0, we use the value exactly and log a warning if it is likely too small
|
|
404
|
+
# to accommodate the configured attempts/backoff.
|
|
405
|
+
#
|
|
406
|
+
# usage:
|
|
407
|
+
# - set DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE along with DEEPEVAL_RETRY_MAX_ATTEMPTS, or
|
|
408
|
+
# - set DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE alone.
|
|
409
|
+
DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE: Optional[confloat(ge=0)] = None
|
|
388
410
|
|
|
389
411
|
# Buffer time for gathering results from all tasks, added to the longest task duration
|
|
390
412
|
# Increase if many tasks are running concurrently
|
|
391
|
-
DEEPEVAL_TASK_GATHER_BUFFER_SECONDS: confloat(ge=0) =
|
|
413
|
+
# DEEPEVAL_TASK_GATHER_BUFFER_SECONDS: confloat(ge=0) = (
|
|
414
|
+
# 30 # 15s seemed like not enough. we may make this computed later.
|
|
415
|
+
# )
|
|
416
|
+
DEEPEVAL_TASK_GATHER_BUFFER_SECONDS_OVERRIDE: Optional[confloat(ge=0)] = (
|
|
417
|
+
None
|
|
418
|
+
)
|
|
392
419
|
|
|
393
420
|
###################
|
|
394
421
|
# Computed Fields #
|
|
395
422
|
###################
|
|
396
423
|
|
|
397
|
-
def _calc_auto_outer_timeout(self) ->
|
|
424
|
+
def _calc_auto_outer_timeout(self) -> float:
|
|
398
425
|
"""Compute outer budget from per-attempt timeout + retries/backoff.
|
|
399
426
|
Never reference the computed property itself here.
|
|
400
427
|
"""
|
|
401
428
|
attempts = self.DEEPEVAL_RETRY_MAX_ATTEMPTS or 1
|
|
402
|
-
timeout_seconds = float(
|
|
429
|
+
timeout_seconds = float(
|
|
430
|
+
self.DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE or 0
|
|
431
|
+
)
|
|
403
432
|
if timeout_seconds <= 0:
|
|
404
433
|
# No per-attempt timeout set -> default outer budget
|
|
405
434
|
return 180
|
|
406
435
|
|
|
407
|
-
|
|
408
|
-
cur = float(self.DEEPEVAL_RETRY_INITIAL_SECONDS)
|
|
409
|
-
cap = float(self.DEEPEVAL_RETRY_CAP_SECONDS)
|
|
410
|
-
base = float(self.DEEPEVAL_RETRY_EXP_BASE)
|
|
411
|
-
jitter = float(self.DEEPEVAL_RETRY_JITTER)
|
|
412
|
-
|
|
413
|
-
backoff = 0.0
|
|
414
|
-
for _ in range(sleeps):
|
|
415
|
-
backoff += min(cap, cur)
|
|
416
|
-
cur *= base
|
|
417
|
-
backoff += sleeps * (jitter / 2.0) # expected jitter
|
|
418
|
-
|
|
436
|
+
backoff = self._expected_backoff(attempts)
|
|
419
437
|
safety_overhead = 1.0
|
|
420
|
-
return
|
|
438
|
+
return float(
|
|
421
439
|
math.ceil(attempts * timeout_seconds + backoff + safety_overhead)
|
|
422
440
|
)
|
|
423
441
|
|
|
424
442
|
@computed_field
|
|
425
443
|
@property
|
|
426
|
-
def
|
|
444
|
+
def DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS(self) -> float:
|
|
445
|
+
over = self.DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE
|
|
446
|
+
if over is not None and float(over) > 0:
|
|
447
|
+
return float(over)
|
|
448
|
+
|
|
449
|
+
attempts = int(self.DEEPEVAL_RETRY_MAX_ATTEMPTS or 1)
|
|
450
|
+
outer_over = self.DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE
|
|
451
|
+
|
|
452
|
+
# If the user set an outer override, slice it up
|
|
453
|
+
if outer_over and float(outer_over) > 0 and attempts > 0:
|
|
454
|
+
backoff = self._expected_backoff(attempts)
|
|
455
|
+
safety = 1.0
|
|
456
|
+
usable = max(0.0, float(outer_over) - backoff - safety)
|
|
457
|
+
return 0.0 if usable <= 0 else (usable / attempts)
|
|
458
|
+
|
|
459
|
+
# NEW: when neither override is set, derive from the default outer (180s)
|
|
460
|
+
default_outer = 180.0
|
|
461
|
+
backoff = self._expected_backoff(attempts)
|
|
462
|
+
safety = 1.0
|
|
463
|
+
usable = max(0.0, default_outer - backoff - safety)
|
|
464
|
+
# Keep per-attempt sensible (cap to at least 1s)
|
|
465
|
+
return 0.0 if usable <= 0 else max(1.0, usable / attempts)
|
|
466
|
+
|
|
467
|
+
@computed_field
|
|
468
|
+
@property
|
|
469
|
+
def DEEPEVAL_PER_TASK_TIMEOUT_SECONDS(self) -> float:
|
|
427
470
|
"""If OVERRIDE is set (nonzero), return it; else return the derived budget."""
|
|
428
471
|
outer = self.DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE
|
|
429
472
|
if outer not in (None, 0):
|
|
430
473
|
# Warn if user-provided outer is likely to truncate retries
|
|
431
474
|
if (self.DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0) > 0:
|
|
432
475
|
min_needed = self._calc_auto_outer_timeout()
|
|
433
|
-
if
|
|
476
|
+
if float(outer) < min_needed:
|
|
434
477
|
if self.DEEPEVAL_VERBOSE_MODE:
|
|
435
478
|
logger.warning(
|
|
436
479
|
"Metric timeout (outer=%ss) is less than attempts × per-attempt "
|
|
437
480
|
"timeout + backoff (≈%ss). Retries may be cut short.",
|
|
438
|
-
|
|
481
|
+
float(outer),
|
|
439
482
|
min_needed,
|
|
440
483
|
)
|
|
441
|
-
return
|
|
484
|
+
return float(outer)
|
|
442
485
|
|
|
443
486
|
# Auto mode
|
|
444
487
|
return self._calc_auto_outer_timeout()
|
|
445
488
|
|
|
489
|
+
@computed_field
|
|
490
|
+
@property
|
|
491
|
+
def DEEPEVAL_TASK_GATHER_BUFFER_SECONDS(self) -> float:
|
|
492
|
+
"""
|
|
493
|
+
Buffer time we add to the longest task’s duration to allow gather/drain
|
|
494
|
+
to complete. If an override is provided, use it; otherwise derive a
|
|
495
|
+
sensible default from the task-level budget:
|
|
496
|
+
buffer = constrain_between(0.15 * DEEPEVAL_PER_TASK_TIMEOUT_SECONDS, 10, 60)
|
|
497
|
+
"""
|
|
498
|
+
over = self.DEEPEVAL_TASK_GATHER_BUFFER_SECONDS_OVERRIDE
|
|
499
|
+
if over is not None and float(over) >= 0:
|
|
500
|
+
return float(over)
|
|
501
|
+
|
|
502
|
+
outer = float(self.DEEPEVAL_PER_TASK_TIMEOUT_SECONDS or 0.0)
|
|
503
|
+
base = 0.15 * outer
|
|
504
|
+
return constrain_between(base, 10.0, 60.0)
|
|
505
|
+
|
|
446
506
|
##############
|
|
447
507
|
# Validators #
|
|
448
508
|
##############
|
|
@@ -641,12 +701,119 @@ class Settings(BaseSettings):
|
|
|
641
701
|
"CRITICAL, NOTSET, or a numeric logging level."
|
|
642
702
|
)
|
|
643
703
|
|
|
704
|
+
@field_validator("DEEPEVAL_TELEMETRY_OPT_OUT", mode="before")
|
|
705
|
+
@classmethod
|
|
706
|
+
def _apply_telemetry_enabled_alias(cls, v):
|
|
707
|
+
"""
|
|
708
|
+
Precedence (most secure):
|
|
709
|
+
- Any OFF signal wins if both are set:
|
|
710
|
+
- DEEPEVAL_TELEMETRY_OPT_OUT = truthy -> OFF
|
|
711
|
+
- DEEPEVAL_TELEMETRY_ENABLED = falsy -> OFF
|
|
712
|
+
- Else, ON signal:
|
|
713
|
+
- DEEPEVAL_TELEMETRY_OPT_OUT = falsy -> ON
|
|
714
|
+
- DEEPEVAL_TELEMETRY_ENABLED = truthy -> ON
|
|
715
|
+
- Else None (unset) -> ON
|
|
716
|
+
"""
|
|
717
|
+
|
|
718
|
+
def normalize(x):
|
|
719
|
+
if x is None:
|
|
720
|
+
return None
|
|
721
|
+
s = str(x).strip()
|
|
722
|
+
return None if s == "" else parse_bool(s, default=False)
|
|
723
|
+
|
|
724
|
+
new_opt_out = normalize(v) # True means OFF, False means ON
|
|
725
|
+
legacy_enabled = normalize(
|
|
726
|
+
os.getenv("DEEPEVAL_TELEMETRY_ENABLED")
|
|
727
|
+
) # True means ON, False means OFF
|
|
728
|
+
|
|
729
|
+
off_signal = (new_opt_out is True) or (legacy_enabled is False)
|
|
730
|
+
on_signal = (new_opt_out is False) or (legacy_enabled is True)
|
|
731
|
+
|
|
732
|
+
# Conflict: simultaneous OFF and ON signals
|
|
733
|
+
if off_signal and on_signal:
|
|
734
|
+
# Only warn if verbose or debug
|
|
735
|
+
if parse_bool(
|
|
736
|
+
os.getenv("DEEPEVAL_VERBOSE_MODE"), default=False
|
|
737
|
+
) or logger.isEnabledFor(logging.DEBUG):
|
|
738
|
+
logger.warning(
|
|
739
|
+
"Conflicting telemetry flags detected: DEEPEVAL_TELEMETRY_OPT_OUT=%r, "
|
|
740
|
+
"DEEPEVAL_TELEMETRY_ENABLED=%r. Defaulting to OFF.",
|
|
741
|
+
new_opt_out,
|
|
742
|
+
legacy_enabled,
|
|
743
|
+
)
|
|
744
|
+
return True # OFF wins
|
|
745
|
+
|
|
746
|
+
# Clear winner
|
|
747
|
+
if off_signal:
|
|
748
|
+
return True # OFF
|
|
749
|
+
if on_signal:
|
|
750
|
+
return False # ON
|
|
751
|
+
|
|
752
|
+
# Unset means ON
|
|
753
|
+
return False
|
|
754
|
+
|
|
755
|
+
@model_validator(mode="after")
|
|
756
|
+
def _apply_deprecated_computed_env_aliases(self):
|
|
757
|
+
"""
|
|
758
|
+
Backwards compatibility courtesy:
|
|
759
|
+
- If users still set a deprecated computed field in the environment,
|
|
760
|
+
emit a deprecation warning and mirror its value into the matching
|
|
761
|
+
*_OVERRIDE field (unless the override is already set).
|
|
762
|
+
- Override always wins if both are present.
|
|
763
|
+
"""
|
|
764
|
+
for old_key, override_key in _DEPRECATED_TO_OVERRIDE.items():
|
|
765
|
+
raw = os.getenv(old_key)
|
|
766
|
+
if raw is None or str(raw).strip() == "":
|
|
767
|
+
continue
|
|
768
|
+
|
|
769
|
+
# if override already set, ignore the deprecated one but log a warning
|
|
770
|
+
if getattr(self, override_key) is not None:
|
|
771
|
+
logger.warning(
|
|
772
|
+
"Config deprecation: %s is deprecated and was ignored because %s "
|
|
773
|
+
"is already set. Please remove %s and use %s going forward.",
|
|
774
|
+
old_key,
|
|
775
|
+
override_key,
|
|
776
|
+
old_key,
|
|
777
|
+
override_key,
|
|
778
|
+
)
|
|
779
|
+
continue
|
|
780
|
+
|
|
781
|
+
# apply the deprecated value into the override field.
|
|
782
|
+
try:
|
|
783
|
+
# let pydantic coerce the string to the target type on assignment
|
|
784
|
+
setattr(self, override_key, raw)
|
|
785
|
+
logger.warning(
|
|
786
|
+
"Config deprecation: %s is deprecated. Its value (%r) was applied to %s. "
|
|
787
|
+
"Please migrate to %s and remove %s from your environment.",
|
|
788
|
+
old_key,
|
|
789
|
+
raw,
|
|
790
|
+
override_key,
|
|
791
|
+
override_key,
|
|
792
|
+
old_key,
|
|
793
|
+
)
|
|
794
|
+
except Exception as e:
|
|
795
|
+
# do not let exception bubble up, just warn
|
|
796
|
+
logger.warning(
|
|
797
|
+
"Config deprecation: %s is deprecated and could not be applied to %s "
|
|
798
|
+
"(value=%r): %s",
|
|
799
|
+
old_key,
|
|
800
|
+
override_key,
|
|
801
|
+
raw,
|
|
802
|
+
e,
|
|
803
|
+
)
|
|
804
|
+
return self
|
|
805
|
+
|
|
644
806
|
#######################
|
|
645
807
|
# Persistence support #
|
|
646
808
|
#######################
|
|
647
809
|
class _SettingsEditCtx:
|
|
810
|
+
# TODO: will generate this list in future PR
|
|
648
811
|
COMPUTED_FIELDS: frozenset[str] = frozenset(
|
|
649
|
-
{
|
|
812
|
+
{
|
|
813
|
+
"DEEPEVAL_PER_TASK_TIMEOUT_SECONDS",
|
|
814
|
+
"DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS",
|
|
815
|
+
"DEEPEVAL_TASK_GATHER_BUFFER_SECONDS",
|
|
816
|
+
}
|
|
650
817
|
)
|
|
651
818
|
|
|
652
819
|
def __init__(
|
|
@@ -810,18 +977,60 @@ class Settings(BaseSettings):
|
|
|
810
977
|
ctx.switch_model_provider(target)
|
|
811
978
|
return ctx.result
|
|
812
979
|
|
|
980
|
+
def _expected_backoff(self, attempts: int) -> float:
|
|
981
|
+
"""Sum of expected sleeps for (attempts-1) retries, including jitter expectation."""
|
|
982
|
+
sleeps = max(0, attempts - 1)
|
|
983
|
+
cur = float(self.DEEPEVAL_RETRY_INITIAL_SECONDS)
|
|
984
|
+
cap = float(self.DEEPEVAL_RETRY_CAP_SECONDS)
|
|
985
|
+
base = float(self.DEEPEVAL_RETRY_EXP_BASE)
|
|
986
|
+
jitter = float(self.DEEPEVAL_RETRY_JITTER)
|
|
987
|
+
|
|
988
|
+
backoff = 0.0
|
|
989
|
+
for _ in range(sleeps):
|
|
990
|
+
backoff += min(cap, cur)
|
|
991
|
+
cur *= base
|
|
992
|
+
backoff += sleeps * (jitter / 2.0) # expected jitter
|
|
993
|
+
return backoff
|
|
994
|
+
|
|
995
|
+
def _constrain_between(self, value: float, lo: float, hi: float) -> float:
|
|
996
|
+
"""Return value constrained to the inclusive range [lo, hi]."""
|
|
997
|
+
return min(max(value, lo), hi)
|
|
998
|
+
|
|
813
999
|
|
|
814
1000
|
_settings_singleton: Optional[Settings] = None
|
|
1001
|
+
_settings_env_fingerprint: "str | None" = None
|
|
1002
|
+
_settings_lock = threading.RLock()
|
|
1003
|
+
|
|
1004
|
+
|
|
1005
|
+
def _calc_env_fingerprint() -> str:
|
|
1006
|
+
env = os.environ.copy()
|
|
1007
|
+
# must hash in a stable order.
|
|
1008
|
+
keys = sorted(
|
|
1009
|
+
key
|
|
1010
|
+
for key in Settings.model_fields.keys()
|
|
1011
|
+
if key != "_DEPRECATED_TELEMETRY_ENABLED" # exclude deprecated
|
|
1012
|
+
)
|
|
1013
|
+
# encode as triples: (key, present?, value)
|
|
1014
|
+
items = [(k, k in env, env.get(k)) for k in keys]
|
|
1015
|
+
payload = json.dumps(items, ensure_ascii=False, separators=(",", ":"))
|
|
1016
|
+
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
|
|
815
1017
|
|
|
816
1018
|
|
|
817
1019
|
def get_settings() -> Settings:
|
|
818
|
-
global _settings_singleton
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
1020
|
+
global _settings_singleton, _settings_env_fingerprint
|
|
1021
|
+
fingerprint = _calc_env_fingerprint()
|
|
1022
|
+
|
|
1023
|
+
with _settings_lock:
|
|
1024
|
+
if (
|
|
1025
|
+
_settings_singleton is None
|
|
1026
|
+
or _settings_env_fingerprint != fingerprint
|
|
1027
|
+
):
|
|
1028
|
+
_settings_singleton = Settings()
|
|
1029
|
+
_settings_env_fingerprint = fingerprint
|
|
1030
|
+
from deepeval.config.logging import apply_deepeval_log_level
|
|
822
1031
|
|
|
823
|
-
|
|
824
|
-
|
|
1032
|
+
apply_deepeval_log_level()
|
|
1033
|
+
return _settings_singleton
|
|
825
1034
|
|
|
826
1035
|
|
|
827
1036
|
def reset_settings(*, reload_dotenv: bool = False) -> Settings:
|
|
@@ -837,8 +1046,10 @@ def reset_settings(*, reload_dotenv: bool = False) -> Settings:
|
|
|
837
1046
|
Returns:
|
|
838
1047
|
The fresh Settings instance.
|
|
839
1048
|
"""
|
|
840
|
-
global _settings_singleton
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
1049
|
+
global _settings_singleton, _settings_env_fingerprint
|
|
1050
|
+
with _settings_lock:
|
|
1051
|
+
if reload_dotenv:
|
|
1052
|
+
autoload_dotenv()
|
|
1053
|
+
_settings_singleton = None
|
|
1054
|
+
_settings_env_fingerprint = None
|
|
844
1055
|
return get_settings()
|
|
@@ -4,6 +4,7 @@ dotenv file. Also syncs os.environ, handles unsets, and warns on unknown fields.
|
|
|
4
4
|
Primary entrypoint: update_settings_and_persist.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import json
|
|
7
8
|
import logging
|
|
8
9
|
import os
|
|
9
10
|
|
|
@@ -33,6 +34,9 @@ def _normalize_for_env(val: Any) -> Optional[str]:
|
|
|
33
34
|
return val.get_secret_value()
|
|
34
35
|
if isinstance(val, bool):
|
|
35
36
|
return bool_to_env_str(val)
|
|
37
|
+
# encode sequences as JSON so Settings can parse them back reliably.
|
|
38
|
+
if isinstance(val, (list, tuple, set)):
|
|
39
|
+
return json.dumps(list(val))
|
|
36
40
|
return str(val)
|
|
37
41
|
|
|
38
42
|
|
deepeval/config/utils.py
CHANGED
|
@@ -137,3 +137,8 @@ def dedupe_preserve_order(items: Iterable[str]) -> List[str]:
|
|
|
137
137
|
seen.add(x)
|
|
138
138
|
out.append(x)
|
|
139
139
|
return out
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def constrain_between(value: float, lo: float, hi: float) -> float:
|
|
143
|
+
"""Return value constrained to the inclusive range [lo, hi]."""
|
|
144
|
+
return min(max(value, lo), hi)
|