deepeval 3.7.5__py3-none-any.whl → 3.7.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/cli/main.py +2022 -759
  3. deepeval/cli/utils.py +208 -36
  4. deepeval/config/dotenv_handler.py +19 -0
  5. deepeval/config/settings.py +675 -245
  6. deepeval/config/utils.py +9 -1
  7. deepeval/dataset/api.py +23 -1
  8. deepeval/dataset/golden.py +106 -21
  9. deepeval/evaluate/evaluate.py +0 -3
  10. deepeval/evaluate/execute.py +162 -315
  11. deepeval/evaluate/utils.py +6 -30
  12. deepeval/key_handler.py +124 -51
  13. deepeval/metrics/__init__.py +0 -4
  14. deepeval/metrics/answer_relevancy/answer_relevancy.py +89 -132
  15. deepeval/metrics/answer_relevancy/template.py +102 -179
  16. deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
  17. deepeval/metrics/arena_g_eval/template.py +17 -1
  18. deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
  19. deepeval/metrics/argument_correctness/template.py +19 -2
  20. deepeval/metrics/base_metric.py +19 -41
  21. deepeval/metrics/bias/bias.py +102 -108
  22. deepeval/metrics/bias/template.py +14 -2
  23. deepeval/metrics/contextual_precision/contextual_precision.py +56 -92
  24. deepeval/metrics/contextual_recall/contextual_recall.py +58 -85
  25. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +53 -83
  26. deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
  27. deepeval/metrics/conversation_completeness/template.py +23 -3
  28. deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
  29. deepeval/metrics/conversational_dag/nodes.py +66 -123
  30. deepeval/metrics/conversational_dag/templates.py +16 -0
  31. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
  32. deepeval/metrics/dag/dag.py +10 -0
  33. deepeval/metrics/dag/nodes.py +63 -126
  34. deepeval/metrics/dag/templates.py +14 -0
  35. deepeval/metrics/exact_match/exact_match.py +9 -1
  36. deepeval/metrics/faithfulness/faithfulness.py +82 -136
  37. deepeval/metrics/g_eval/g_eval.py +93 -79
  38. deepeval/metrics/g_eval/template.py +18 -1
  39. deepeval/metrics/g_eval/utils.py +7 -6
  40. deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
  41. deepeval/metrics/goal_accuracy/template.py +21 -3
  42. deepeval/metrics/hallucination/hallucination.py +60 -75
  43. deepeval/metrics/hallucination/template.py +13 -0
  44. deepeval/metrics/indicator.py +11 -10
  45. deepeval/metrics/json_correctness/json_correctness.py +40 -38
  46. deepeval/metrics/json_correctness/template.py +10 -0
  47. deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
  48. deepeval/metrics/knowledge_retention/schema.py +9 -3
  49. deepeval/metrics/knowledge_retention/template.py +12 -0
  50. deepeval/metrics/mcp/mcp_task_completion.py +72 -43
  51. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +93 -75
  52. deepeval/metrics/mcp/schema.py +4 -0
  53. deepeval/metrics/mcp/template.py +59 -0
  54. deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
  55. deepeval/metrics/mcp_use_metric/template.py +12 -0
  56. deepeval/metrics/misuse/misuse.py +77 -97
  57. deepeval/metrics/misuse/template.py +15 -0
  58. deepeval/metrics/multimodal_metrics/__init__.py +0 -1
  59. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +37 -38
  60. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +55 -76
  61. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +37 -38
  62. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +37 -38
  63. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +57 -76
  64. deepeval/metrics/non_advice/non_advice.py +79 -105
  65. deepeval/metrics/non_advice/template.py +12 -0
  66. deepeval/metrics/pattern_match/pattern_match.py +12 -4
  67. deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
  68. deepeval/metrics/pii_leakage/template.py +14 -0
  69. deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
  70. deepeval/metrics/plan_adherence/template.py +11 -0
  71. deepeval/metrics/plan_quality/plan_quality.py +63 -87
  72. deepeval/metrics/plan_quality/template.py +9 -0
  73. deepeval/metrics/prompt_alignment/prompt_alignment.py +78 -86
  74. deepeval/metrics/prompt_alignment/template.py +12 -0
  75. deepeval/metrics/role_adherence/role_adherence.py +48 -71
  76. deepeval/metrics/role_adherence/template.py +14 -0
  77. deepeval/metrics/role_violation/role_violation.py +75 -108
  78. deepeval/metrics/role_violation/template.py +12 -0
  79. deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
  80. deepeval/metrics/step_efficiency/template.py +11 -0
  81. deepeval/metrics/summarization/summarization.py +115 -183
  82. deepeval/metrics/summarization/template.py +19 -0
  83. deepeval/metrics/task_completion/task_completion.py +67 -73
  84. deepeval/metrics/tool_correctness/tool_correctness.py +43 -42
  85. deepeval/metrics/tool_use/schema.py +4 -0
  86. deepeval/metrics/tool_use/template.py +16 -2
  87. deepeval/metrics/tool_use/tool_use.py +72 -94
  88. deepeval/metrics/topic_adherence/schema.py +4 -0
  89. deepeval/metrics/topic_adherence/template.py +21 -1
  90. deepeval/metrics/topic_adherence/topic_adherence.py +68 -81
  91. deepeval/metrics/toxicity/template.py +13 -0
  92. deepeval/metrics/toxicity/toxicity.py +80 -99
  93. deepeval/metrics/turn_contextual_precision/schema.py +3 -3
  94. deepeval/metrics/turn_contextual_precision/template.py +9 -2
  95. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +154 -154
  96. deepeval/metrics/turn_contextual_recall/schema.py +3 -3
  97. deepeval/metrics/turn_contextual_recall/template.py +8 -1
  98. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +148 -143
  99. deepeval/metrics/turn_contextual_relevancy/schema.py +2 -2
  100. deepeval/metrics/turn_contextual_relevancy/template.py +8 -1
  101. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +154 -157
  102. deepeval/metrics/turn_faithfulness/schema.py +1 -1
  103. deepeval/metrics/turn_faithfulness/template.py +8 -1
  104. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +180 -203
  105. deepeval/metrics/turn_relevancy/template.py +14 -0
  106. deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
  107. deepeval/metrics/utils.py +161 -91
  108. deepeval/models/__init__.py +2 -0
  109. deepeval/models/base_model.py +44 -6
  110. deepeval/models/embedding_models/azure_embedding_model.py +34 -12
  111. deepeval/models/embedding_models/local_embedding_model.py +22 -7
  112. deepeval/models/embedding_models/ollama_embedding_model.py +17 -6
  113. deepeval/models/embedding_models/openai_embedding_model.py +3 -2
  114. deepeval/models/llms/__init__.py +2 -0
  115. deepeval/models/llms/amazon_bedrock_model.py +229 -73
  116. deepeval/models/llms/anthropic_model.py +143 -48
  117. deepeval/models/llms/azure_model.py +169 -95
  118. deepeval/models/llms/constants.py +2032 -0
  119. deepeval/models/llms/deepseek_model.py +82 -35
  120. deepeval/models/llms/gemini_model.py +126 -67
  121. deepeval/models/llms/grok_model.py +128 -65
  122. deepeval/models/llms/kimi_model.py +129 -87
  123. deepeval/models/llms/litellm_model.py +94 -18
  124. deepeval/models/llms/local_model.py +115 -16
  125. deepeval/models/llms/ollama_model.py +97 -76
  126. deepeval/models/llms/openai_model.py +169 -311
  127. deepeval/models/llms/portkey_model.py +58 -16
  128. deepeval/models/llms/utils.py +5 -2
  129. deepeval/models/retry_policy.py +10 -5
  130. deepeval/models/utils.py +56 -4
  131. deepeval/simulator/conversation_simulator.py +49 -2
  132. deepeval/simulator/template.py +16 -1
  133. deepeval/synthesizer/synthesizer.py +19 -17
  134. deepeval/test_case/api.py +24 -45
  135. deepeval/test_case/arena_test_case.py +7 -2
  136. deepeval/test_case/conversational_test_case.py +55 -6
  137. deepeval/test_case/llm_test_case.py +60 -6
  138. deepeval/test_run/api.py +3 -0
  139. deepeval/test_run/test_run.py +6 -1
  140. deepeval/utils.py +26 -0
  141. {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/METADATA +3 -3
  142. {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/RECORD +145 -148
  143. deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
  144. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
  145. deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
  146. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -133
  147. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
  148. {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/LICENSE.md +0 -0
  149. {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/WHEEL +0 -0
  150. {deepeval-3.7.5.dist-info → deepeval-3.7.7.dist-info}/entry_points.txt +0 -0
@@ -17,25 +17,37 @@ import os
17
17
  import re
18
18
  import threading
19
19
 
20
- from dotenv import dotenv_values
20
+ from contextvars import ContextVar
21
21
  from pathlib import Path
22
22
  from pydantic import (
23
23
  AnyUrl,
24
24
  computed_field,
25
25
  confloat,
26
26
  conint,
27
+ Field,
27
28
  field_validator,
28
29
  model_validator,
29
30
  SecretStr,
31
+ PositiveFloat,
30
32
  )
31
33
  from pydantic_settings import BaseSettings, SettingsConfigDict
32
- from typing import Any, Dict, List, Optional, NamedTuple
34
+ from typing import (
35
+ Any,
36
+ Dict,
37
+ List,
38
+ Optional,
39
+ Union,
40
+ NamedTuple,
41
+ get_args,
42
+ get_origin,
43
+ )
33
44
 
34
45
  from deepeval.config.utils import (
35
- parse_bool,
36
46
  coerce_to_list,
37
47
  constrain_between,
38
48
  dedupe_preserve_order,
49
+ parse_bool,
50
+ read_dotenv_file,
39
51
  )
40
52
  from deepeval.constants import SUPPORTED_PROVIDER_SLUGS, slugify
41
53
 
@@ -43,6 +55,10 @@ from deepeval.constants import SUPPORTED_PROVIDER_SLUGS, slugify
43
55
  logger = logging.getLogger(__name__)
44
56
  _SAVE_RE = re.compile(r"^(?P<scheme>dotenv)(?::(?P<path>.+))?$")
45
57
 
58
+ _ACTIVE_SETTINGS_EDIT_CTX: ContextVar[Optional["Settings._SettingsEditCtx"]] = (
59
+ ContextVar("_ACTIVE_SETTINGS_EDIT_CTX", default=None)
60
+ )
61
+
46
62
  # settings that were converted to computed fields with override counterparts
47
63
  _DEPRECATED_TO_OVERRIDE = {
48
64
  "DEEPEVAL_PER_TASK_TIMEOUT_SECONDS": "DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE",
@@ -75,14 +91,12 @@ def _find_legacy_enum(env_key: str):
75
91
  return None
76
92
 
77
93
 
78
- def _is_secret_key(settings: "Settings", env_key: str) -> bool:
79
- field = type(settings).model_fields.get(env_key)
94
+ def _is_secret_key(env_key: str) -> bool:
95
+ field = Settings.model_fields.get(env_key)
80
96
  if not field:
81
97
  return False
82
98
  if field.annotation is SecretStr:
83
99
  return True
84
- # Optional[SecretStr] etc.
85
- from typing import get_origin, get_args, Union
86
100
 
87
101
  origin = get_origin(field.annotation)
88
102
  if origin is Union:
@@ -110,7 +124,6 @@ def _merge_legacy_keyfile_into_env() -> None:
110
124
  KeyValues,
111
125
  ModelKeyValues,
112
126
  EmbeddingKeyValues,
113
- SECRET_KEYS,
114
127
  )
115
128
 
116
129
  key_path = Path(HIDDEN_DIR) / KEY_FILE
@@ -147,41 +160,28 @@ def _merge_legacy_keyfile_into_env() -> None:
147
160
  continue
148
161
 
149
162
  # Mirror the legacy warning semantics for secrets, but only once per key
150
- if (
151
- json_key in SECRET_KEYS
152
- and json_key not in _LEGACY_KEYFILE_SECRET_WARNED
163
+ if env_key not in _LEGACY_KEYFILE_SECRET_WARNED and _is_secret_key(
164
+ env_key
153
165
  ):
154
166
  logger.warning(
155
- "Reading secret '%s' from legacy %s/%s. "
167
+ "Reading secret '%s' (legacy key '%s') from legacy %s/%s. "
156
168
  "Persisting API keys in plaintext is deprecated. "
157
169
  "Move this to your environment (.env / .env.local). "
158
170
  "This fallback will be removed in a future release.",
171
+ env_key,
159
172
  json_key,
160
173
  HIDDEN_DIR,
161
174
  KEY_FILE,
162
175
  )
163
- _LEGACY_KEYFILE_SECRET_WARNED.add(json_key)
164
-
176
+ _LEGACY_KEYFILE_SECRET_WARNED.add(env_key)
165
177
  # Let Settings validators coerce types; we just inject the raw string
166
178
  os.environ[env_key] = str(raw)
167
179
 
168
180
 
169
- def _read_env_file(path: Path) -> Dict[str, str]:
170
- if not path.exists():
171
- return {}
172
- try:
173
- # filter out None to avoid writing "None" later
174
- return {
175
- k: v for k, v in dotenv_values(str(path)).items() if v is not None
176
- }
177
- except Exception:
178
- return {}
179
-
180
-
181
181
  def _discover_app_env_from_files(env_dir: Path) -> Optional[str]:
182
182
  # prefer base .env.local, then .env for APP_ENV discovery
183
183
  for name in (".env.local", ".env"):
184
- v = _read_env_file(env_dir / name).get("APP_ENV")
184
+ v = read_dotenv_file(env_dir / name).get("APP_ENV")
185
185
  if v:
186
186
  v = str(v).strip()
187
187
  if v:
@@ -210,8 +210,8 @@ def autoload_dotenv() -> None:
210
210
  env_dir = Path(os.getcwd())
211
211
 
212
212
  # merge files in precedence order
213
- base = _read_env_file(env_dir / ".env")
214
- local = _read_env_file(env_dir / ".env.local")
213
+ base = read_dotenv_file(env_dir / ".env")
214
+ local = read_dotenv_file(env_dir / ".env.local")
215
215
 
216
216
  # Pick APP_ENV (process -> .env.local -> .env -> default)
217
217
  app_env = (
@@ -222,7 +222,7 @@ def autoload_dotenv() -> None:
222
222
  if app_env is not None:
223
223
  app_env = app_env.strip()
224
224
  if app_env:
225
- env_specific = _read_env_file(env_dir / f".env.{app_env}")
225
+ env_specific = read_dotenv_file(env_dir / f".env.{app_env}")
226
226
  merged.setdefault("APP_ENV", app_env)
227
227
 
228
228
  merged.update(base)
@@ -242,6 +242,14 @@ class PersistResult(NamedTuple):
242
242
 
243
243
 
244
244
  class Settings(BaseSettings):
245
+ # def __init__(self):
246
+ # super().__init__()
247
+ def __setattr__(self, name: str, value):
248
+ ctx = _ACTIVE_SETTINGS_EDIT_CTX.get()
249
+ if ctx is not None and name in type(self).model_fields:
250
+ ctx._touched.add(name)
251
+ return super().__setattr__(name, value)
252
+
245
253
  model_config = SettingsConfigDict(
246
254
  extra="ignore",
247
255
  case_sensitive=True,
@@ -252,23 +260,50 @@ class Settings(BaseSettings):
252
260
  # General
253
261
  #
254
262
 
255
- APP_ENV: str = "dev"
256
- LOG_LEVEL: Optional[int] = None
257
- PYTHONPATH: str = "."
258
- CONFIDENT_REGION: Optional[str] = None
259
- CONFIDENT_OPEN_BROWSER: Optional[bool] = True
263
+ APP_ENV: str = Field(
264
+ "dev",
265
+ description="Application environment name used for dotenv selection (loads .env.<APP_ENV> if present).",
266
+ )
267
+ LOG_LEVEL: Optional[int] = Field(
268
+ None,
269
+ description="Global logging level (e.g. DEBUG/INFO/WARNING/ERROR/CRITICAL or numeric).",
270
+ )
271
+ PYTHONPATH: str = Field(
272
+ ".",
273
+ description="Extra PYTHONPATH used by the CLI runner (default: current project '.').",
274
+ )
275
+ CONFIDENT_REGION: Optional[str] = Field(
276
+ None,
277
+ description="Optional Confident AI region hint (uppercased).",
278
+ )
279
+ CONFIDENT_OPEN_BROWSER: Optional[bool] = Field(
280
+ True,
281
+ description="Open a browser automatically for Confident AI links/flows when available.",
282
+ )
260
283
 
261
284
  #
262
285
  # CLI
263
286
  #
264
-
265
- DEEPEVAL_DEFAULT_SAVE: Optional[str] = None
266
- DEEPEVAL_DISABLE_DOTENV: Optional[bool] = None
267
- ENV_DIR_PATH: Optional[Path] = (
268
- None # where .env files live (CWD if not set)
287
+ DEEPEVAL_DEFAULT_SAVE: Optional[str] = Field(
288
+ None,
289
+ description="Default persistence target for settings changes (e.g. 'dotenv' or 'dotenv:/path/to/.env.local').",
290
+ )
291
+ DEEPEVAL_DISABLE_DOTENV: Optional[bool] = Field(
292
+ None,
293
+ description="Disable dotenv autoloading (.env → .env.<APP_ENV> → .env.local). Tip: set to 1 in pytest/CI to prevent loading env files on import.",
294
+ )
295
+ ENV_DIR_PATH: Optional[Path] = Field(
296
+ None,
297
+ description="Directory containing .env files (default: current working directory).",
298
+ )
299
+ DEEPEVAL_FILE_SYSTEM: Optional[str] = Field(
300
+ None,
301
+ description="Filesystem mode for runtime/CLI (currently supports READ_ONLY).",
302
+ )
303
+ DEEPEVAL_IDENTIFIER: Optional[str] = Field(
304
+ None,
305
+ description="Identifier/tag to help identify your test run on Confident AI.",
269
306
  )
270
- DEEPEVAL_FILE_SYSTEM: Optional[str] = None
271
- DEEPEVAL_IDENTIFIER: Optional[str] = None
272
307
 
273
308
  #
274
309
  # Storage & Output
@@ -276,123 +311,360 @@ class Settings(BaseSettings):
276
311
 
277
312
  # When set, DeepEval will export a timestamped JSON of the latest test run
278
313
  # into this directory. The directory will be created on demand.
279
- DEEPEVAL_RESULTS_FOLDER: Optional[Path] = None
314
+ DEEPEVAL_RESULTS_FOLDER: Optional[Path] = Field(
315
+ None,
316
+ description="If set, export a timestamped JSON of the latest test run into this folder (created if missing).",
317
+ )
280
318
 
281
319
  # Display / Truncation
282
- DEEPEVAL_MAXLEN_TINY: Optional[int] = 40
283
- DEEPEVAL_MAXLEN_SHORT: Optional[int] = 60
284
- DEEPEVAL_MAXLEN_MEDIUM: Optional[int] = 120
285
- DEEPEVAL_MAXLEN_LONG: Optional[int] = 240
320
+ DEEPEVAL_MAXLEN_TINY: Optional[int] = Field(
321
+ 40,
322
+ description="Default truncation length for 'tiny' displays in logs/UI.",
323
+ )
324
+ DEEPEVAL_MAXLEN_SHORT: Optional[int] = Field(
325
+ 60,
326
+ description="Default truncation length for 'short' displays in logs/UI.",
327
+ )
328
+ DEEPEVAL_MAXLEN_MEDIUM: Optional[int] = Field(
329
+ 120,
330
+ description="Default truncation length for 'medium' displays in logs/UI.",
331
+ )
332
+ DEEPEVAL_MAXLEN_LONG: Optional[int] = Field(
333
+ 240,
334
+ description="Default truncation length for 'long' displays in logs/UI.",
335
+ )
286
336
 
287
337
  # If set, this overrides the default max_len used by deepeval/utils shorten
288
338
  # falls back to DEEPEVAL_MAXLEN_LONG when None.
289
- DEEPEVAL_SHORTEN_DEFAULT_MAXLEN: Optional[int] = None
339
+ DEEPEVAL_SHORTEN_DEFAULT_MAXLEN: Optional[int] = Field(
340
+ None,
341
+ description="Override default max_len for deepeval.utils.shorten (falls back to DEEPEVAL_MAXLEN_LONG when unset).",
342
+ )
290
343
 
291
344
  # Optional global suffix (keeps your "..." default).
292
- DEEPEVAL_SHORTEN_SUFFIX: Optional[str] = "..."
345
+ DEEPEVAL_SHORTEN_SUFFIX: Optional[str] = Field(
346
+ "...",
347
+ description="Suffix appended by deepeval.utils.shorten when truncating (default: '...').",
348
+ )
293
349
 
294
350
  #
295
351
  # GPU and perf toggles
296
352
  #
297
353
 
298
- CUDA_LAUNCH_BLOCKING: Optional[bool] = None
299
- CUDA_VISIBLE_DEVICES: Optional[str] = None
300
- TOKENIZERS_PARALLELISM: Optional[bool] = None
301
- TRANSFORMERS_NO_ADVISORY_WARNINGS: Optional[bool] = None
354
+ CUDA_LAUNCH_BLOCKING: Optional[bool] = Field(
355
+ None,
356
+ description="CUDA debug toggle (forces synchronous CUDA ops). Useful for debugging GPU errors.",
357
+ )
358
+ CUDA_VISIBLE_DEVICES: Optional[str] = Field(
359
+ None,
360
+ description="CUDA device visibility mask (e.g. '0' or '0,1').",
361
+ )
362
+ TOKENIZERS_PARALLELISM: Optional[bool] = Field(
363
+ None,
364
+ description="HuggingFace tokenizers parallelism toggle (set to false to reduce warnings/noise).",
365
+ )
366
+ TRANSFORMERS_NO_ADVISORY_WARNINGS: Optional[bool] = Field(
367
+ None,
368
+ description="Disable advisory warnings from transformers (reduces console noise).",
369
+ )
302
370
 
303
371
  #
304
372
  # Model Keys
305
373
  #
306
374
 
307
- API_KEY: Optional[SecretStr] = None
308
- CONFIDENT_API_KEY: Optional[SecretStr] = None
375
+ API_KEY: Optional[SecretStr] = Field(
376
+ None,
377
+ description="Alias for CONFIDENT_API_KEY (Confident AI API key).",
378
+ )
379
+ CONFIDENT_API_KEY: Optional[SecretStr] = Field(
380
+ None,
381
+ description="Confident AI API key (used for uploading results/telemetry to Confident).",
382
+ )
309
383
 
310
384
  # ======
311
385
  # Base URL for Confident AI API server
312
386
  # ======
313
- CONFIDENT_BASE_URL: Optional[str] = None
387
+ CONFIDENT_BASE_URL: Optional[str] = Field(
388
+ None,
389
+ description="Base URL for Confident AI API server (set only if using a custom/hosted endpoint).",
390
+ )
314
391
 
315
392
  # General
316
- TEMPERATURE: Optional[confloat(ge=0, le=2)] = None
393
+ TEMPERATURE: Optional[confloat(ge=0, le=2)] = Field(
394
+ None,
395
+ description="Global default model temperature (0–2). Model-specific constructors may override.",
396
+ )
317
397
 
318
398
  # Anthropic
319
- ANTHROPIC_API_KEY: Optional[SecretStr] = None
399
+ USE_ANTHROPIC_MODEL: Optional[bool] = Field(
400
+ None,
401
+ description="Select Anthropic as the active LLM provider (USE_* flags are mutually exclusive in CLI helpers).",
402
+ )
403
+ ANTHROPIC_API_KEY: Optional[SecretStr] = Field(
404
+ None, description="Anthropic API key."
405
+ )
406
+ ANTHROPIC_MODEL_NAME: Optional[str] = Field(
407
+ None, description="Anthropic model name (e.g. 'claude-3-...')."
408
+ )
409
+ ANTHROPIC_COST_PER_INPUT_TOKEN: Optional[PositiveFloat] = Field(
410
+ None,
411
+ description="Anthropic input token cost (used for cost reporting).",
412
+ )
413
+ ANTHROPIC_COST_PER_OUTPUT_TOKEN: Optional[PositiveFloat] = Field(
414
+ None,
415
+ description="Anthropic output token cost (used for cost reporting).",
416
+ )
417
+
418
+ # AWS
419
+ AWS_ACCESS_KEY_ID: Optional[SecretStr] = Field(
420
+ None,
421
+ description="AWS access key ID (for Bedrock or other AWS-backed integrations).",
422
+ )
423
+ AWS_SECRET_ACCESS_KEY: Optional[SecretStr] = Field(
424
+ None,
425
+ description="AWS secret access key (for Bedrock or other AWS-backed integrations).",
426
+ )
427
+ # AWS Bedrock
428
+ USE_AWS_BEDROCK_MODEL: Optional[bool] = Field(
429
+ None, description="Select AWS Bedrock as the active LLM provider."
430
+ )
431
+ AWS_BEDROCK_MODEL_NAME: Optional[str] = Field(
432
+ None, description="AWS Bedrock model identifier."
433
+ )
434
+ AWS_BEDROCK_REGION: Optional[str] = Field(
435
+ None, description="AWS region for Bedrock (normalized to lowercase)."
436
+ )
437
+ AWS_BEDROCK_COST_PER_INPUT_TOKEN: Optional[PositiveFloat] = Field(
438
+ None, description="Bedrock input token cost (used for cost reporting)."
439
+ )
440
+ AWS_BEDROCK_COST_PER_OUTPUT_TOKEN: Optional[PositiveFloat] = Field(
441
+ None, description="Bedrock output token cost (used for cost reporting)."
442
+ )
320
443
  # Azure Open AI
321
- AZURE_OPENAI_API_KEY: Optional[SecretStr] = None
322
- AZURE_OPENAI_ENDPOINT: Optional[AnyUrl] = None
323
- OPENAI_API_VERSION: Optional[str] = None
324
- AZURE_DEPLOYMENT_NAME: Optional[str] = None
325
- AZURE_MODEL_NAME: Optional[str] = None
326
- AZURE_MODEL_VERSION: Optional[str] = None
327
- USE_AZURE_OPENAI: Optional[bool] = None
444
+ USE_AZURE_OPENAI: Optional[bool] = Field(
445
+ None, description="Select Azure OpenAI as the active LLM provider."
446
+ )
447
+ AZURE_OPENAI_API_KEY: Optional[SecretStr] = Field(
448
+ None, description="Azure OpenAI API key."
449
+ )
450
+ AZURE_OPENAI_ENDPOINT: Optional[AnyUrl] = Field(
451
+ None, description="Azure OpenAI endpoint URL."
452
+ )
453
+ OPENAI_API_VERSION: Optional[str] = Field(
454
+ None,
455
+ description="Azure OpenAI API version (if required by your deployment).",
456
+ )
457
+ AZURE_DEPLOYMENT_NAME: Optional[str] = Field(
458
+ None,
459
+ description="Azure OpenAI deployment name (required for most Azure configs).",
460
+ )
461
+ AZURE_MODEL_NAME: Optional[str] = Field(
462
+ None,
463
+ description="Azure model name label (informational; may be used in reporting).",
464
+ )
465
+ AZURE_MODEL_VERSION: Optional[str] = Field(
466
+ None,
467
+ description="Azure model version label (informational; may be used in reporting).",
468
+ )
328
469
  # DeepSeek
329
- USE_DEEPSEEK_MODEL: Optional[bool] = None
330
- DEEPSEEK_API_KEY: Optional[SecretStr] = None
331
- DEEPSEEK_MODEL_NAME: Optional[str] = None
470
+ USE_DEEPSEEK_MODEL: Optional[bool] = Field(
471
+ None, description="Select DeepSeek as the active LLM provider."
472
+ )
473
+ DEEPSEEK_API_KEY: Optional[SecretStr] = Field(
474
+ None, description="DeepSeek API key."
475
+ )
476
+ DEEPSEEK_MODEL_NAME: Optional[str] = Field(
477
+ None, description="DeepSeek model name."
478
+ )
479
+ DEEPSEEK_COST_PER_INPUT_TOKEN: Optional[float] = Field(
480
+ None, description="DeepSeek input token cost (used for cost reporting)."
481
+ )
482
+ DEEPSEEK_COST_PER_OUTPUT_TOKEN: Optional[float] = Field(
483
+ None,
484
+ description="DeepSeek output token cost (used for cost reporting).",
485
+ )
332
486
  # Gemini
333
- USE_GEMINI_MODEL: Optional[bool] = None
334
- GOOGLE_API_KEY: Optional[SecretStr] = None
335
- GEMINI_MODEL_NAME: Optional[str] = None
336
- GOOGLE_GENAI_USE_VERTEXAI: Optional[bool] = None
337
- GOOGLE_CLOUD_PROJECT: Optional[str] = None
338
- GOOGLE_CLOUD_LOCATION: Optional[str] = None
339
- GOOGLE_SERVICE_ACCOUNT_KEY: Optional[str] = None
487
+ USE_GEMINI_MODEL: Optional[bool] = Field(
488
+ None, description="Select Google Gemini as the active LLM provider."
489
+ )
490
+ GOOGLE_API_KEY: Optional[SecretStr] = Field(
491
+ None, description="Google API key for Gemini (non-Vertex usage)."
492
+ )
493
+ GEMINI_MODEL_NAME: Optional[str] = Field(
494
+ None, description="Gemini model name (e.g. 'gemini-...')."
495
+ )
496
+ GOOGLE_GENAI_USE_VERTEXAI: Optional[bool] = Field(
497
+ None,
498
+ description="Use Vertex AI for Gemini requests instead of direct API key mode.",
499
+ )
500
+ GOOGLE_CLOUD_PROJECT: Optional[str] = Field(
501
+ None,
502
+ description="GCP project ID for Vertex AI (required if GOOGLE_GENAI_USE_VERTEXAI=true).",
503
+ )
504
+ GOOGLE_CLOUD_LOCATION: Optional[str] = Field(
505
+ None,
506
+ description="GCP region/location for Vertex AI (e.g. 'us-central1').",
507
+ )
508
+ GOOGLE_SERVICE_ACCOUNT_KEY: Optional[SecretStr] = Field(
509
+ None,
510
+ description="Service account JSON key for Vertex AI auth (if not using ADC).",
511
+ )
340
512
  # Grok
341
- USE_GROK_MODEL: Optional[bool] = None
342
- GROK_API_KEY: Optional[SecretStr] = None
343
- GROK_MODEL_NAME: Optional[str] = None
513
+ USE_GROK_MODEL: Optional[bool] = Field(
514
+ None, description="Select Grok as the active LLM provider."
515
+ )
516
+ GROK_API_KEY: Optional[SecretStr] = Field(None, description="Grok API key.")
517
+ GROK_MODEL_NAME: Optional[str] = Field(None, description="Grok model name.")
518
+ GROK_COST_PER_INPUT_TOKEN: Optional[float] = Field(
519
+ None, description="Grok input token cost (used for cost reporting)."
520
+ )
521
+ GROK_COST_PER_OUTPUT_TOKEN: Optional[float] = Field(
522
+ None, description="Grok output token cost (used for cost reporting)."
523
+ )
344
524
  # LiteLLM
345
- USE_LITELLM: Optional[bool] = None
346
- LITELLM_API_KEY: Optional[SecretStr] = None
347
- LITELLM_MODEL_NAME: Optional[str] = None
348
- LITELLM_API_BASE: Optional[AnyUrl] = None
349
- LITELLM_PROXY_API_BASE: Optional[AnyUrl] = None
350
- LITELLM_PROXY_API_KEY: Optional[SecretStr] = None
525
+ USE_LITELLM: Optional[bool] = Field(
526
+ None, description="Select LiteLLM as the active LLM provider."
527
+ )
528
+ LITELLM_API_KEY: Optional[SecretStr] = Field(
529
+ None,
530
+ description="LiteLLM API key (if required by your LiteLLM deployment).",
531
+ )
532
+ LITELLM_MODEL_NAME: Optional[str] = Field(
533
+ None,
534
+ description="LiteLLM model name (as exposed by your LiteLLM endpoint).",
535
+ )
536
+ LITELLM_API_BASE: Optional[AnyUrl] = Field(
537
+ None, description="LiteLLM API base URL (direct)."
538
+ )
539
+ LITELLM_PROXY_API_BASE: Optional[AnyUrl] = Field(
540
+ None, description="LiteLLM proxy base URL (if using proxy mode)."
541
+ )
542
+ LITELLM_PROXY_API_KEY: Optional[SecretStr] = Field(
543
+ None, description="LiteLLM proxy API key (if required)."
544
+ )
351
545
  # LM Studio
352
- LM_STUDIO_API_KEY: Optional[SecretStr] = None
353
- LM_STUDIO_MODEL_NAME: Optional[str] = None
546
+ LM_STUDIO_API_KEY: Optional[SecretStr] = Field(
547
+ None, description="LM Studio API key (if configured)."
548
+ )
549
+ LM_STUDIO_MODEL_NAME: Optional[str] = Field(
550
+ None, description="LM Studio model name."
551
+ )
354
552
  # Local Model
355
- USE_LOCAL_MODEL: Optional[bool] = None
356
- LOCAL_MODEL_API_KEY: Optional[SecretStr] = None
357
- LOCAL_EMBEDDING_API_KEY: Optional[SecretStr] = None
358
- LOCAL_MODEL_NAME: Optional[str] = None
359
- LOCAL_MODEL_BASE_URL: Optional[AnyUrl] = None
360
- LOCAL_MODEL_FORMAT: Optional[str] = None
553
+ USE_LOCAL_MODEL: Optional[bool] = Field(
554
+ None,
555
+ description="Select a local/self-hosted model as the active LLM provider.",
556
+ )
557
+ LOCAL_MODEL_API_KEY: Optional[SecretStr] = Field(
558
+ None,
559
+ description="API key for a local/self-hosted LLM endpoint (if required).",
560
+ )
561
+ LOCAL_EMBEDDING_API_KEY: Optional[SecretStr] = Field(
562
+ None,
563
+ description="API key for a local/self-hosted embedding endpoint (if required).",
564
+ )
565
+ LOCAL_MODEL_NAME: Optional[str] = Field(
566
+ None,
567
+ description="Local/self-hosted model name (informational / routing).",
568
+ )
569
+ LOCAL_MODEL_BASE_URL: Optional[AnyUrl] = Field(
570
+ None, description="Base URL for a local/self-hosted LLM endpoint."
571
+ )
572
+ LOCAL_MODEL_FORMAT: Optional[str] = Field(
573
+ None,
574
+ description="Local model API format identifier (implementation-specific).",
575
+ )
361
576
  # Moonshot
362
- USE_MOONSHOT_MODEL: Optional[bool] = None
363
- MOONSHOT_API_KEY: Optional[SecretStr] = None
364
- MOONSHOT_MODEL_NAME: Optional[str] = None
577
+ USE_MOONSHOT_MODEL: Optional[bool] = Field(
578
+ None, description="Select Moonshot as the active LLM provider."
579
+ )
580
+ MOONSHOT_API_KEY: Optional[SecretStr] = Field(
581
+ None, description="Moonshot API key."
582
+ )
583
+ MOONSHOT_MODEL_NAME: Optional[str] = Field(
584
+ None, description="Moonshot model name."
585
+ )
586
+ MOONSHOT_COST_PER_INPUT_TOKEN: Optional[float] = Field(
587
+ None, description="Moonshot input token cost (used for cost reporting)."
588
+ )
589
+ MOONSHOT_COST_PER_OUTPUT_TOKEN: Optional[float] = Field(
590
+ None,
591
+ description="Moonshot output token cost (used for cost reporting).",
592
+ )
365
593
  # Ollama
366
- OLLAMA_MODEL_NAME: Optional[str] = None
594
+ OLLAMA_MODEL_NAME: Optional[str] = Field(
595
+ None,
596
+ description="Ollama model name (used when running via Ollama integration).",
597
+ )
367
598
  # OpenAI
368
- USE_OPENAI_MODEL: Optional[bool] = None
369
- OPENAI_API_KEY: Optional[SecretStr] = None
370
- OPENAI_MODEL_NAME: Optional[str] = None
371
- OPENAI_COST_PER_INPUT_TOKEN: Optional[float] = None
372
- OPENAI_COST_PER_OUTPUT_TOKEN: Optional[float] = None
599
+ USE_OPENAI_MODEL: Optional[bool] = Field(
600
+ None, description="Select OpenAI as the active LLM provider."
601
+ )
602
+ OPENAI_API_KEY: Optional[SecretStr] = Field(
603
+ None, description="OpenAI API key."
604
+ )
605
+ OPENAI_MODEL_NAME: Optional[str] = Field(
606
+ None, description="OpenAI model name (e.g. 'gpt-4.1')."
607
+ )
608
+ OPENAI_COST_PER_INPUT_TOKEN: Optional[float] = Field(
609
+ None, description="OpenAI input token cost (used for cost reporting)."
610
+ )
611
+ OPENAI_COST_PER_OUTPUT_TOKEN: Optional[float] = Field(
612
+ None, description="OpenAI output token cost (used for cost reporting)."
613
+ )
373
614
  # PortKey
374
- USE_PORTKEY_MODEL: Optional[bool] = None
375
- PORTKEY_API_KEY: Optional[SecretStr] = None
376
- PORTKEY_MODEL_NAME: Optional[str] = None
377
- PORTKEY_BASE_URL: Optional[AnyUrl] = None
378
- PORTKEY_PROVIDER_NAME: Optional[str] = None
615
+ USE_PORTKEY_MODEL: Optional[bool] = Field(
616
+ None, description="Select Portkey as the active LLM provider."
617
+ )
618
+ PORTKEY_API_KEY: Optional[SecretStr] = Field(
619
+ None, description="Portkey API key."
620
+ )
621
+ PORTKEY_MODEL_NAME: Optional[str] = Field(
622
+ None, description="Portkey model name (as configured in Portkey)."
623
+ )
624
+ PORTKEY_BASE_URL: Optional[AnyUrl] = Field(
625
+ None, description="Portkey base URL (if using a custom endpoint)."
626
+ )
627
+ PORTKEY_PROVIDER_NAME: Optional[str] = Field(
628
+ None, description="Provider name/routing hint for Portkey."
629
+ )
379
630
  # Vertex AI
380
- VERTEX_AI_MODEL_NAME: Optional[str] = None
631
+ VERTEX_AI_MODEL_NAME: Optional[str] = Field(
632
+ None,
633
+ description="Vertex AI model name (used by some Google integrations).",
634
+ )
381
635
  # VLLM
382
- VLLM_API_KEY: Optional[SecretStr] = None
383
- VLLM_MODEL_NAME: Optional[str] = None
636
+ VLLM_API_KEY: Optional[SecretStr] = Field(
637
+ None, description="vLLM API key (if required by your vLLM gateway)."
638
+ )
639
+ VLLM_MODEL_NAME: Optional[str] = Field(None, description="vLLM model name.")
384
640
 
385
641
  #
386
642
  # Embedding Keys
387
643
  #
388
644
 
389
645
  # Azure OpenAI
390
- USE_AZURE_OPENAI_EMBEDDING: Optional[bool] = None
391
- AZURE_EMBEDDING_DEPLOYMENT_NAME: Optional[str] = None
646
+ USE_AZURE_OPENAI_EMBEDDING: Optional[bool] = Field(
647
+ None, description="Use Azure OpenAI for embeddings."
648
+ )
649
+ AZURE_EMBEDDING_MODEL_NAME: Optional[str] = Field(
650
+ None, description="Azure embedding model name label."
651
+ )
652
+ AZURE_EMBEDDING_DEPLOYMENT_NAME: Optional[str] = Field(
653
+ None, description="Azure embedding deployment name."
654
+ )
655
+
392
656
  # Local
393
- USE_LOCAL_EMBEDDINGS: Optional[bool] = None
394
- LOCAL_EMBEDDING_MODEL_NAME: Optional[str] = None
395
- LOCAL_EMBEDDING_BASE_URL: Optional[AnyUrl] = None
657
+ USE_LOCAL_EMBEDDINGS: Optional[bool] = Field(
658
+ None, description="Use a local/self-hosted embeddings endpoint."
659
+ )
660
+ LOCAL_EMBEDDING_MODEL_NAME: Optional[str] = Field(
661
+ None,
662
+ description="Local embedding model name (informational / routing).",
663
+ )
664
+ LOCAL_EMBEDDING_BASE_URL: Optional[AnyUrl] = Field(
665
+ None,
666
+ description="Base URL for a local/self-hosted embeddings endpoint.",
667
+ )
396
668
 
397
669
  #
398
670
  # Retry Policy
@@ -404,60 +676,133 @@ class Settings(BaseSettings):
404
676
  # contribution is ~ JITTER/2 per sleep.
405
677
  # - logging levels are looked up dynamically each attempt, so if you change LOG_LEVEL at runtime,
406
678
  # the retry loggers will honor it without restart.
407
- DEEPEVAL_SDK_RETRY_PROVIDERS: Optional[List[str]] = (
408
- None # ["*"] to delegate all retries to SDKs
679
+ DEEPEVAL_SDK_RETRY_PROVIDERS: Optional[List[str]] = Field(
680
+ None,
681
+ description="Providers for which retries should be delegated to the provider SDK (use ['*'] for all).",
682
+ )
683
+ DEEPEVAL_RETRY_BEFORE_LOG_LEVEL: Optional[int] = Field(
684
+ None,
685
+ description="Log level for 'before retry' logs (defaults to LOG_LEVEL if set, else INFO).",
686
+ )
687
+ DEEPEVAL_RETRY_AFTER_LOG_LEVEL: Optional[int] = Field(
688
+ None,
689
+ description="Log level for 'after retry' logs (defaults to ERROR).",
409
690
  )
410
- DEEPEVAL_RETRY_BEFORE_LOG_LEVEL: Optional[int] = (
411
- None # default is LOG_LEVEL if set, else INFO
691
+ DEEPEVAL_RETRY_MAX_ATTEMPTS: conint(ge=1) = Field(
692
+ 2,
693
+ description="Max attempts per provider call (includes the first call; 1 = no retries).",
412
694
  )
413
- DEEPEVAL_RETRY_AFTER_LOG_LEVEL: Optional[int] = None # default -> ERROR
414
- DEEPEVAL_RETRY_MAX_ATTEMPTS: conint(ge=1) = (
415
- 2 # attempts = first try + retries
695
+ DEEPEVAL_RETRY_INITIAL_SECONDS: confloat(ge=0) = Field(
696
+ 1.0,
697
+ description="Initial backoff sleep (seconds) before the first retry.",
416
698
  )
417
- DEEPEVAL_RETRY_INITIAL_SECONDS: confloat(ge=0) = (
418
- 1.0 # first sleep before retry, if any
699
+ DEEPEVAL_RETRY_EXP_BASE: confloat(ge=1) = Field(
700
+ 2.0, description="Exponential backoff growth factor."
419
701
  )
420
- DEEPEVAL_RETRY_EXP_BASE: confloat(ge=1) = (
421
- 2.0 # exponential growth factor for sleeps
702
+ DEEPEVAL_RETRY_JITTER: confloat(ge=0) = Field(
703
+ 2.0, description="Uniform jitter added to each retry sleep (seconds)."
422
704
  )
423
- DEEPEVAL_RETRY_JITTER: confloat(ge=0) = 2.0 # uniform jitter
424
- DEEPEVAL_RETRY_CAP_SECONDS: confloat(ge=0) = (
425
- 5.0 # cap for each backoff sleep
705
+ DEEPEVAL_RETRY_CAP_SECONDS: confloat(ge=0) = Field(
706
+ 5.0, description="Maximum backoff sleep per retry (seconds)."
426
707
  )
427
708
 
428
709
  #
429
710
  # Telemetry and Debug
430
711
  #
431
- DEEPEVAL_DEBUG_ASYNC: Optional[bool] = None
432
- DEEPEVAL_TELEMETRY_OPT_OUT: Optional[bool] = None
433
- DEEPEVAL_UPDATE_WARNING_OPT_IN: Optional[bool] = None
434
- DEEPEVAL_GRPC_LOGGING: Optional[bool] = None
435
- GRPC_VERBOSITY: Optional[str] = None
436
- GRPC_TRACE: Optional[str] = None
437
- ERROR_REPORTING: Optional[bool] = None
438
- IGNORE_DEEPEVAL_ERRORS: Optional[bool] = None
439
- SKIP_DEEPEVAL_MISSING_PARAMS: Optional[bool] = None
440
- DEEPEVAL_VERBOSE_MODE: Optional[bool] = None
441
- DEEPEVAL_LOG_STACK_TRACES: Optional[bool] = None
442
- ENABLE_DEEPEVAL_CACHE: Optional[bool] = None
443
-
444
- CONFIDENT_TRACE_FLUSH: Optional[bool] = None
445
- CONFIDENT_TRACE_ENVIRONMENT: Optional[str] = "development"
446
- CONFIDENT_TRACE_VERBOSE: Optional[bool] = True
447
- CONFIDENT_TRACE_SAMPLE_RATE: Optional[float] = 1.0
448
-
449
- CONFIDENT_METRIC_LOGGING_FLUSH: Optional[bool] = None
450
- CONFIDENT_METRIC_LOGGING_VERBOSE: Optional[bool] = True
451
- CONFIDENT_METRIC_LOGGING_SAMPLE_RATE: Optional[float] = 1.0
452
- CONFIDENT_METRIC_LOGGING_ENABLED: Optional[bool] = True
453
-
454
- OTEL_EXPORTER_OTLP_ENDPOINT: Optional[AnyUrl] = None
712
+ DEEPEVAL_DEBUG_ASYNC: Optional[bool] = Field(
713
+ None, description="Enable extra async debugging logs/behavior."
714
+ )
715
+ DEEPEVAL_TELEMETRY_OPT_OUT: Optional[bool] = Field(
716
+ None,
717
+ description="Opt out of DeepEval telemetry (OFF wins if conflicting legacy flags are set).",
718
+ )
719
+ DEEPEVAL_UPDATE_WARNING_OPT_IN: Optional[bool] = Field(
720
+ None,
721
+ description="Opt in to update warnings in the CLI/runtime when new versions are available.",
722
+ )
723
+ DEEPEVAL_GRPC_LOGGING: Optional[bool] = Field(
724
+ None,
725
+ description="Enable extra gRPC logging for Confident transport/debugging.",
726
+ )
727
+ GRPC_VERBOSITY: Optional[str] = Field(
728
+ None, description="gRPC verbosity (grpc env var passthrough)."
729
+ )
730
+ GRPC_TRACE: Optional[str] = Field(
731
+ None, description="gRPC trace categories (grpc env var passthrough)."
732
+ )
733
+ ERROR_REPORTING: Optional[bool] = Field(
734
+ None,
735
+ description="Enable/disable error reporting (implementation/integration dependent).",
736
+ )
737
+ IGNORE_DEEPEVAL_ERRORS: Optional[bool] = Field(
738
+ None,
739
+ description="Continue execution when DeepEval encounters certain recoverable errors.",
740
+ )
741
+ SKIP_DEEPEVAL_MISSING_PARAMS: Optional[bool] = Field(
742
+ None,
743
+ description="Skip metrics/test cases with missing required params instead of raising.",
744
+ )
745
+ DEEPEVAL_VERBOSE_MODE: Optional[bool] = Field(
746
+ None, description="Enable verbose logging and additional warnings."
747
+ )
748
+ DEEPEVAL_LOG_STACK_TRACES: Optional[bool] = Field(
749
+ None, description="Include stack traces in certain DeepEval error logs."
750
+ )
751
+ ENABLE_DEEPEVAL_CACHE: Optional[bool] = Field(
752
+ None,
753
+ description="Enable DeepEval caching where supported (may improve performance).",
754
+ )
755
+
756
+ CONFIDENT_TRACE_FLUSH: Optional[bool] = Field(
757
+ None,
758
+ description="Flush traces eagerly (useful for debugging; may add overhead).",
759
+ )
760
+ CONFIDENT_TRACE_ENVIRONMENT: Optional[str] = Field(
761
+ "development",
762
+ description="Trace environment label (e.g. development/staging/production).",
763
+ )
764
+ CONFIDENT_TRACE_VERBOSE: Optional[bool] = Field(
765
+ True, description="Enable verbose trace logging for Confident tracing."
766
+ )
767
+ CONFIDENT_TRACE_SAMPLE_RATE: Optional[float] = Field(
768
+ 1.0, description="Trace sampling rate (0–1). Lower to reduce overhead."
769
+ )
770
+
771
+ CONFIDENT_METRIC_LOGGING_FLUSH: Optional[bool] = Field(
772
+ None,
773
+ description="Flush metric logs eagerly (useful for debugging; may add overhead).",
774
+ )
775
+ CONFIDENT_METRIC_LOGGING_VERBOSE: Optional[bool] = Field(
776
+ True, description="Enable verbose metric logging."
777
+ )
778
+ CONFIDENT_METRIC_LOGGING_SAMPLE_RATE: Optional[float] = Field(
779
+ 1.0,
780
+ description="Metric logging sampling rate (0–1). Lower to reduce overhead.",
781
+ )
782
+ CONFIDENT_METRIC_LOGGING_ENABLED: Optional[bool] = Field(
783
+ True, description="Enable metric logging to Confident where supported."
784
+ )
785
+
786
+ OTEL_EXPORTER_OTLP_ENDPOINT: Optional[AnyUrl] = Field(
787
+ None,
788
+ description="OpenTelemetry OTLP exporter endpoint (if using OTEL export).",
789
+ )
455
790
 
456
791
  #
457
792
  # Network
458
793
  #
459
- MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS: float = 3.05
460
- MEDIA_IMAGE_READ_TIMEOUT_SECONDS: float = 10.0
794
+ MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS: float = Field(
795
+ 3.05,
796
+ description="Connect timeout (seconds) when fetching remote images for multimodal inputs.",
797
+ )
798
+ MEDIA_IMAGE_READ_TIMEOUT_SECONDS: float = Field(
799
+ 10.0,
800
+ description="Read timeout (seconds) when fetching remote images for multimodal inputs.",
801
+ )
802
+ DEEPEVAL_DISABLE_TIMEOUTS: Optional[bool] = Field(
803
+ None,
804
+ description="Disable DeepEval-enforced timeouts (per-attempt, per-task, gather). Provider SDK timeouts may still apply.",
805
+ )
461
806
  # DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE
462
807
  # Per-attempt timeout (seconds) for provider calls used by the retry policy.
463
808
  # This is an OVERRIDE setting. The effective value you should rely on at runtime is
@@ -470,20 +815,31 @@ class Settings(BaseSettings):
470
815
  #
471
816
  # Tip: Set this OR the outer override, but generally not both
472
817
  DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE: Optional[confloat(gt=0)] = (
473
- None
818
+ Field(
819
+ None,
820
+ description="Override per-attempt provider call timeout (seconds). Leave unset to derive from task timeout.",
821
+ )
474
822
  )
475
823
 
476
824
  #
477
825
  # Async Document Pipelines
478
826
  #
479
827
 
480
- DEEPEVAL_MAX_CONCURRENT_DOC_PROCESSING: conint(ge=1) = 2
828
+ DEEPEVAL_MAX_CONCURRENT_DOC_PROCESSING: conint(ge=1) = Field(
829
+ 2, description="Max concurrent async document processing tasks."
830
+ )
481
831
 
482
832
  #
483
833
  # Async Task Configuration
484
834
  #
485
- DEEPEVAL_TIMEOUT_THREAD_LIMIT: conint(ge=1) = 128
486
- DEEPEVAL_TIMEOUT_SEMAPHORE_WARN_AFTER_SECONDS: confloat(ge=0) = 5.0
835
+ DEEPEVAL_TIMEOUT_THREAD_LIMIT: conint(ge=1) = Field(
836
+ 128,
837
+ description="Max worker threads used for timeout enforcement in async execution.",
838
+ )
839
+ DEEPEVAL_TIMEOUT_SEMAPHORE_WARN_AFTER_SECONDS: confloat(ge=0) = Field(
840
+ 5.0,
841
+ description="Warn if waiting on the timeout semaphore longer than this many seconds.",
842
+ )
487
843
  # DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE
488
844
  # Outer time budget (seconds) for a single metric/test-case, including retries and backoff.
489
845
  # This is an OVERRIDE setting. If None or 0 the DEEPEVAL_PER_TASK_TIMEOUT_SECONDS field is computed:
@@ -496,7 +852,12 @@ class Settings(BaseSettings):
496
852
  # usage:
497
853
  # - set DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE along with DEEPEVAL_RETRY_MAX_ATTEMPTS, or
498
854
  # - set DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE alone.
499
- DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE: Optional[confloat(ge=0)] = None
855
+ DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE: Optional[confloat(ge=0)] = (
856
+ Field(
857
+ None,
858
+ description="Override outer per-test-case timeout budget (seconds), including retries/backoff. Leave unset to auto-derive.",
859
+ )
860
+ )
500
861
 
501
862
  # Buffer time for gathering results from all tasks, added to the longest task duration
502
863
  # Increase if many tasks are running concurrently
@@ -504,7 +865,10 @@ class Settings(BaseSettings):
504
865
  # 30 # 15s seemed like not enough. we may make this computed later.
505
866
  # )
506
867
  DEEPEVAL_TASK_GATHER_BUFFER_SECONDS_OVERRIDE: Optional[confloat(ge=0)] = (
507
- None
868
+ Field(
869
+ None,
870
+ description="Override buffer added to the longest task duration when gathering async results (seconds).",
871
+ )
508
872
  )
509
873
 
510
874
  ###################
@@ -598,10 +962,16 @@ class Settings(BaseSettings):
598
962
  ##############
599
963
 
600
964
  @field_validator(
965
+ "CONFIDENT_METRIC_LOGGING_ENABLED",
966
+ "CONFIDENT_METRIC_LOGGING_VERBOSE",
967
+ "CONFIDENT_METRIC_LOGGING_FLUSH",
601
968
  "CONFIDENT_OPEN_BROWSER",
602
969
  "CONFIDENT_TRACE_FLUSH",
603
970
  "CONFIDENT_TRACE_VERBOSE",
604
971
  "CUDA_LAUNCH_BLOCKING",
972
+ "DEEPEVAL_DEBUG_ASYNC",
973
+ "DEEPEVAL_LOG_STACK_TRACES",
974
+ "DEEPEVAL_DISABLE_TIMEOUTS",
605
975
  "DEEPEVAL_VERBOSE_MODE",
606
976
  "DEEPEVAL_GRPC_LOGGING",
607
977
  "DEEPEVAL_DISABLE_DOTENV",
@@ -614,6 +984,7 @@ class Settings(BaseSettings):
614
984
  "SKIP_DEEPEVAL_MISSING_PARAMS",
615
985
  "TOKENIZERS_PARALLELISM",
616
986
  "TRANSFORMERS_NO_ADVISORY_WARNINGS",
987
+ "USE_AWS_BEDROCK_MODEL",
617
988
  "USE_OPENAI_MODEL",
618
989
  "USE_AZURE_OPENAI",
619
990
  "USE_LOCAL_MODEL",
@@ -647,6 +1018,8 @@ class Settings(BaseSettings):
647
1018
  @field_validator(
648
1019
  "OPENAI_COST_PER_INPUT_TOKEN",
649
1020
  "OPENAI_COST_PER_OUTPUT_TOKEN",
1021
+ "AWS_BEDROCK_COST_PER_INPUT_TOKEN",
1022
+ "AWS_BEDROCK_COST_PER_OUTPUT_TOKEN",
650
1023
  "TEMPERATURE",
651
1024
  "CONFIDENT_TRACE_SAMPLE_RATE",
652
1025
  "CONFIDENT_METRIC_LOGGING_SAMPLE_RATE",
@@ -717,6 +1090,16 @@ class Settings(BaseSettings):
717
1090
  return None
718
1091
  return s.upper()
719
1092
 
1093
+ @field_validator("AWS_BEDROCK_REGION", mode="before")
1094
+ @classmethod
1095
+ def _normalize_lower(cls, v):
1096
+ if v is None:
1097
+ return None
1098
+ s = str(v).strip()
1099
+ if not s:
1100
+ return None
1101
+ return s.lower()
1102
+
720
1103
  @field_validator("DEEPEVAL_SDK_RETRY_PROVIDERS", mode="before")
721
1104
  @classmethod
722
1105
  def _coerce_to_list(cls, v):
@@ -917,6 +1300,7 @@ class Settings(BaseSettings):
917
1300
  self._save = save
918
1301
  self._persist = persist
919
1302
  self._before: Dict[str, Any] = {}
1303
+ self._touched: set[str] = set()
920
1304
  self.result: Optional[PersistResult] = None
921
1305
 
922
1306
  @property
@@ -925,122 +1309,168 @@ class Settings(BaseSettings):
925
1309
 
926
1310
  def __enter__(self) -> "Settings._SettingsEditCtx":
927
1311
  # snapshot current state
1312
+ self._token = _ACTIVE_SETTINGS_EDIT_CTX.set(self)
928
1313
  self._before = {
929
1314
  k: getattr(self._s, k) for k in type(self._s).model_fields
930
1315
  }
931
1316
  return self
932
1317
 
933
1318
  def __exit__(self, exc_type, exc, tb):
934
- if exc_type is not None:
935
- return False # don’t persist on error
936
-
937
- from deepeval.config.settings_manager import (
938
- update_settings_and_persist,
939
- _normalize_for_env,
940
- )
941
-
942
- # lazy import legacy JSON store deps
943
- from deepeval.key_handler import KEY_FILE_HANDLER
944
-
945
- model_fields = type(self._s).model_fields
946
- # Exclude computed fields from persistence
947
-
948
- # compute diff of changed fields
949
- after = {k: getattr(self._s, k) for k in model_fields}
950
-
951
- before_norm = {
952
- k: _normalize_for_env(v) for k, v in self._before.items()
953
- }
954
- after_norm = {k: _normalize_for_env(v) for k, v in after.items()}
1319
+ try:
1320
+ if exc_type is not None:
1321
+ return False # don’t persist on error
955
1322
 
956
- changed_keys = {
957
- k for k in after_norm if after_norm[k] != before_norm.get(k)
958
- }
959
- changed_keys -= self.COMPUTED_FIELDS
1323
+ from deepeval.config.settings_manager import (
1324
+ update_settings_and_persist,
1325
+ _normalize_for_env,
1326
+ _resolve_save_path,
1327
+ )
960
1328
 
961
- if not changed_keys:
962
- self.result = PersistResult(False, None, {})
963
- return False
1329
+ # lazy import legacy JSON store deps
1330
+ from deepeval.key_handler import KEY_FILE_HANDLER
1331
+
1332
+ model_fields = type(self._s).model_fields
1333
+ # Exclude computed fields from persistence
1334
+
1335
+ # compute diff of changed fields
1336
+ after = {k: getattr(self._s, k) for k in model_fields}
1337
+
1338
+ before_norm = {
1339
+ k: _normalize_for_env(v) for k, v in self._before.items()
1340
+ }
1341
+ after_norm = {
1342
+ k: _normalize_for_env(v) for k, v in after.items()
1343
+ }
1344
+
1345
+ changed_keys = {
1346
+ k for k in after_norm if after_norm[k] != before_norm.get(k)
1347
+ }
1348
+ changed_keys -= self.COMPUTED_FIELDS
1349
+ touched_keys = set(self._touched) - self.COMPUTED_FIELDS
1350
+
1351
+ # dotenv should persist union(changed, touched)
1352
+ persist_dotenv = self._persist is not False
1353
+ ok, resolved_path = _resolve_save_path(self._save)
1354
+
1355
+ existing_dotenv = {}
1356
+ if persist_dotenv and ok and resolved_path is not None:
1357
+ existing_dotenv = read_dotenv_file(resolved_path)
1358
+
1359
+ candidate_keys_for_dotenv = (
1360
+ changed_keys | touched_keys
1361
+ ) - self.COMPUTED_FIELDS
1362
+
1363
+ keys_for_dotenv: set[str] = set()
1364
+ for key in candidate_keys_for_dotenv:
1365
+ desired = after_norm.get(key) # normalized string or None
1366
+ if desired is None:
1367
+ # only need to unset if it's actually present in dotenv
1368
+ # if key in existing_dotenv:
1369
+ # keys_for_dotenv.add(key)
1370
+ keys_for_dotenv.add(key)
1371
+ else:
1372
+ if existing_dotenv.get(key) != desired:
1373
+ keys_for_dotenv.add(key)
964
1374
 
965
- updates = {k: after[k] for k in changed_keys}
1375
+ updates_for_dotenv = {
1376
+ key: after[key] for key in keys_for_dotenv
1377
+ }
966
1378
 
967
- if "LOG_LEVEL" in updates:
968
- from deepeval.config.logging import (
969
- apply_deepeval_log_level,
970
- )
1379
+ if not changed_keys and not updates_for_dotenv:
1380
+ if self._persist is False:
1381
+ # we report handled so that the cli does not mistakenly report invalid save option
1382
+ self.result = PersistResult(True, None, {})
1383
+ return False
971
1384
 
972
- apply_deepeval_log_level()
1385
+ ok, resolved_path = _resolve_save_path(self._save)
1386
+ self.result = PersistResult(ok, resolved_path, {})
1387
+ return False
973
1388
 
974
- #
975
- # .deepeval JSON support
976
- #
1389
+ updates = {k: after[k] for k in changed_keys}
977
1390
 
978
- if self._persist is not False:
979
- for k in changed_keys:
980
- legacy_member = _find_legacy_enum(k)
981
- if legacy_member is None:
982
- continue # skip if not a defined as legacy field
1391
+ if "LOG_LEVEL" in updates:
1392
+ from deepeval.config.logging import (
1393
+ apply_deepeval_log_level,
1394
+ )
983
1395
 
984
- val = updates[k]
985
- # Remove from JSON if unset
986
- if val is None:
987
- KEY_FILE_HANDLER.remove_key(legacy_member)
988
- continue
1396
+ apply_deepeval_log_level()
989
1397
 
990
- # Never store secrets in the JSON keystore
991
- if _is_secret_key(self._s, k):
992
- continue
1398
+ #
1399
+ # .deepeval JSON support
1400
+ #
993
1401
 
994
- # For booleans, the legacy store expects "YES"/"NO"
995
- if isinstance(val, bool):
996
- KEY_FILE_HANDLER.write_key(
997
- legacy_member, "YES" if val else "NO"
998
- )
999
- else:
1000
- # store as string
1001
- KEY_FILE_HANDLER.write_key(legacy_member, str(val))
1002
-
1003
- #
1004
- # dotenv store
1005
- #
1006
-
1007
- # defer import to avoid cyclics
1008
- handled, path = update_settings_and_persist(
1009
- updates,
1010
- save=self._save,
1011
- persist_dotenv=(False if self._persist is False else True),
1012
- )
1013
- self.result = PersistResult(handled, path, updates)
1014
- return False
1402
+ if self._persist is not False:
1403
+ for k in changed_keys:
1404
+ legacy_member = _find_legacy_enum(k)
1405
+ if legacy_member is None:
1406
+ continue # skip if not a defined as legacy field
1407
+
1408
+ val = updates[k]
1409
+ # Remove from JSON if unset
1410
+ if val is None:
1411
+ KEY_FILE_HANDLER.remove_key(legacy_member)
1412
+ continue
1413
+
1414
+ # Never store secrets in the JSON keystore
1415
+ if _is_secret_key(k):
1416
+ continue
1417
+
1418
+ # For booleans, the legacy store expects "YES"/"NO"
1419
+ if isinstance(val, bool):
1420
+ KEY_FILE_HANDLER.write_key(
1421
+ legacy_member, "YES" if val else "NO"
1422
+ )
1423
+ else:
1424
+ # store as string
1425
+ KEY_FILE_HANDLER.write_key(legacy_member, str(val))
1426
+
1427
+ #
1428
+ # dotenv store
1429
+ #
1430
+
1431
+ # defer import to avoid cyclics
1432
+ handled, path = update_settings_and_persist(
1433
+ updates_for_dotenv,
1434
+ save=self._save,
1435
+ persist_dotenv=persist_dotenv,
1436
+ )
1437
+ self.result = PersistResult(handled, path, updates_for_dotenv)
1438
+ return False
1439
+ finally:
1440
+ if self._token is not None:
1441
+ _ACTIVE_SETTINGS_EDIT_CTX.reset(self._token)
1015
1442
 
1016
1443
  def switch_model_provider(self, target) -> None:
1017
1444
  """
1018
- Flip all USE_* toggles so that the one matching the target is True and the rest are False.
1019
- Also, mirror this change into the legacy JSON keystore as "YES"/"NO".
1020
-
1021
- `target` may be an Enum with `.value`, such as ModelKeyValues.USE_OPENAI_MODEL
1022
- or a plain string like "USE_OPENAI_MODEL".
1445
+ Flip USE_* settings within the target's provider family (LLM vs embeddings).
1023
1446
  """
1024
1447
  from deepeval.key_handler import KEY_FILE_HANDLER
1025
1448
 
1026
- # Target key is the env style string, such as "USE_OPENAI_MODEL"
1027
1449
  target_key = getattr(target, "value", str(target))
1028
1450
 
1451
+ def _is_embedding_flag(k: str) -> bool:
1452
+ return "EMBEDDING" in k
1453
+
1454
+ target_is_embedding = _is_embedding_flag(target_key)
1455
+
1029
1456
  use_fields = [
1030
- k for k in type(self._s).model_fields if k.startswith("USE_")
1457
+ field
1458
+ for field in type(self._s).model_fields
1459
+ if field.startswith("USE_")
1460
+ and _is_embedding_flag(field) == target_is_embedding
1031
1461
  ]
1462
+
1032
1463
  if target_key not in use_fields:
1033
1464
  raise ValueError(
1034
1465
  f"{target_key} is not a recognized USE_* field"
1035
1466
  )
1036
1467
 
1037
- for k in use_fields:
1038
- on = k == target_key
1039
- # dotenv persistence will serialize to "1"/"0"
1040
- setattr(self._s, k, on)
1468
+ for field in use_fields:
1469
+ on = field == target_key
1470
+ setattr(self._s, field, on)
1471
+
1041
1472
  if self._persist is not False:
1042
- # legacy json persistence will serialize to "YES"/"NO"
1043
- legacy_member = _find_legacy_enum(k)
1473
+ legacy_member = _find_legacy_enum(field)
1044
1474
  if legacy_member is not None:
1045
1475
  KEY_FILE_HANDLER.write_key(
1046
1476
  legacy_member, "YES" if on else "NO"
@@ -1089,7 +1519,7 @@ class Settings(BaseSettings):
1089
1519
 
1090
1520
 
1091
1521
  _settings_singleton: Optional[Settings] = None
1092
- _settings_env_fingerprint: "str | None" = None
1522
+ _settings_env_fingerprint: Optional[str] = None
1093
1523
  _settings_lock = threading.RLock()
1094
1524
 
1095
1525