deepeval 3.7.6__py3-none-any.whl → 3.7.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/cli/main.py +2022 -759
  3. deepeval/cli/utils.py +208 -36
  4. deepeval/config/dotenv_handler.py +19 -0
  5. deepeval/config/settings.py +658 -262
  6. deepeval/config/utils.py +9 -1
  7. deepeval/evaluate/execute.py +153 -94
  8. deepeval/key_handler.py +121 -51
  9. deepeval/metrics/base_metric.py +9 -3
  10. deepeval/metrics/g_eval/g_eval.py +6 -1
  11. deepeval/metrics/indicator.py +8 -4
  12. deepeval/metrics/mcp/mcp_task_completion.py +15 -16
  13. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +15 -15
  14. deepeval/metrics/mcp/schema.py +4 -0
  15. deepeval/metrics/mcp/template.py +8 -1
  16. deepeval/metrics/prompt_alignment/prompt_alignment.py +6 -3
  17. deepeval/metrics/tool_use/schema.py +4 -0
  18. deepeval/metrics/tool_use/template.py +16 -2
  19. deepeval/metrics/tool_use/tool_use.py +30 -28
  20. deepeval/metrics/topic_adherence/schema.py +4 -0
  21. deepeval/metrics/topic_adherence/template.py +8 -1
  22. deepeval/metrics/topic_adherence/topic_adherence.py +15 -14
  23. deepeval/metrics/turn_contextual_precision/template.py +8 -1
  24. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +44 -86
  25. deepeval/metrics/turn_contextual_recall/template.py +8 -1
  26. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +44 -82
  27. deepeval/metrics/turn_contextual_relevancy/template.py +8 -1
  28. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +48 -92
  29. deepeval/metrics/turn_faithfulness/template.py +8 -1
  30. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +76 -130
  31. deepeval/metrics/utils.py +16 -1
  32. deepeval/models/__init__.py +2 -0
  33. deepeval/models/llms/__init__.py +2 -0
  34. deepeval/models/llms/amazon_bedrock_model.py +5 -4
  35. deepeval/models/llms/anthropic_model.py +4 -3
  36. deepeval/models/llms/azure_model.py +4 -3
  37. deepeval/models/llms/deepseek_model.py +5 -8
  38. deepeval/models/llms/grok_model.py +5 -8
  39. deepeval/models/llms/kimi_model.py +5 -8
  40. deepeval/models/llms/litellm_model.py +2 -0
  41. deepeval/models/llms/local_model.py +1 -1
  42. deepeval/models/llms/openai_model.py +4 -3
  43. deepeval/models/retry_policy.py +10 -5
  44. deepeval/models/utils.py +1 -5
  45. deepeval/simulator/conversation_simulator.py +6 -2
  46. deepeval/simulator/template.py +3 -1
  47. deepeval/synthesizer/synthesizer.py +19 -17
  48. deepeval/test_run/test_run.py +6 -1
  49. deepeval/utils.py +26 -0
  50. {deepeval-3.7.6.dist-info → deepeval-3.7.7.dist-info}/METADATA +3 -3
  51. {deepeval-3.7.6.dist-info → deepeval-3.7.7.dist-info}/RECORD +54 -53
  52. {deepeval-3.7.6.dist-info → deepeval-3.7.7.dist-info}/LICENSE.md +0 -0
  53. {deepeval-3.7.6.dist-info → deepeval-3.7.7.dist-info}/WHEEL +0 -0
  54. {deepeval-3.7.6.dist-info → deepeval-3.7.7.dist-info}/entry_points.txt +0 -0
@@ -17,26 +17,37 @@ import os
17
17
  import re
18
18
  import threading
19
19
 
20
- from dotenv import dotenv_values
20
+ from contextvars import ContextVar
21
21
  from pathlib import Path
22
22
  from pydantic import (
23
23
  AnyUrl,
24
24
  computed_field,
25
25
  confloat,
26
26
  conint,
27
+ Field,
27
28
  field_validator,
28
29
  model_validator,
29
30
  SecretStr,
30
31
  PositiveFloat,
31
32
  )
32
33
  from pydantic_settings import BaseSettings, SettingsConfigDict
33
- from typing import Any, Dict, List, Optional, NamedTuple
34
+ from typing import (
35
+ Any,
36
+ Dict,
37
+ List,
38
+ Optional,
39
+ Union,
40
+ NamedTuple,
41
+ get_args,
42
+ get_origin,
43
+ )
34
44
 
35
45
  from deepeval.config.utils import (
36
- parse_bool,
37
46
  coerce_to_list,
38
47
  constrain_between,
39
48
  dedupe_preserve_order,
49
+ parse_bool,
50
+ read_dotenv_file,
40
51
  )
41
52
  from deepeval.constants import SUPPORTED_PROVIDER_SLUGS, slugify
42
53
 
@@ -44,6 +55,10 @@ from deepeval.constants import SUPPORTED_PROVIDER_SLUGS, slugify
44
55
  logger = logging.getLogger(__name__)
45
56
  _SAVE_RE = re.compile(r"^(?P<scheme>dotenv)(?::(?P<path>.+))?$")
46
57
 
58
+ _ACTIVE_SETTINGS_EDIT_CTX: ContextVar[Optional["Settings._SettingsEditCtx"]] = (
59
+ ContextVar("_ACTIVE_SETTINGS_EDIT_CTX", default=None)
60
+ )
61
+
47
62
  # settings that were converted to computed fields with override counterparts
48
63
  _DEPRECATED_TO_OVERRIDE = {
49
64
  "DEEPEVAL_PER_TASK_TIMEOUT_SECONDS": "DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE",
@@ -76,14 +91,12 @@ def _find_legacy_enum(env_key: str):
76
91
  return None
77
92
 
78
93
 
79
- def _is_secret_key(settings: "Settings", env_key: str) -> bool:
80
- field = type(settings).model_fields.get(env_key)
94
+ def _is_secret_key(env_key: str) -> bool:
95
+ field = Settings.model_fields.get(env_key)
81
96
  if not field:
82
97
  return False
83
98
  if field.annotation is SecretStr:
84
99
  return True
85
- # Optional[SecretStr] etc.
86
- from typing import get_origin, get_args, Union
87
100
 
88
101
  origin = get_origin(field.annotation)
89
102
  if origin is Union:
@@ -111,7 +124,6 @@ def _merge_legacy_keyfile_into_env() -> None:
111
124
  KeyValues,
112
125
  ModelKeyValues,
113
126
  EmbeddingKeyValues,
114
- SECRET_KEYS,
115
127
  )
116
128
 
117
129
  key_path = Path(HIDDEN_DIR) / KEY_FILE
@@ -148,41 +160,28 @@ def _merge_legacy_keyfile_into_env() -> None:
148
160
  continue
149
161
 
150
162
  # Mirror the legacy warning semantics for secrets, but only once per key
151
- if (
152
- json_key in SECRET_KEYS
153
- and json_key not in _LEGACY_KEYFILE_SECRET_WARNED
163
+ if env_key not in _LEGACY_KEYFILE_SECRET_WARNED and _is_secret_key(
164
+ env_key
154
165
  ):
155
166
  logger.warning(
156
- "Reading secret '%s' from legacy %s/%s. "
167
+ "Reading secret '%s' (legacy key '%s') from legacy %s/%s. "
157
168
  "Persisting API keys in plaintext is deprecated. "
158
169
  "Move this to your environment (.env / .env.local). "
159
170
  "This fallback will be removed in a future release.",
171
+ env_key,
160
172
  json_key,
161
173
  HIDDEN_DIR,
162
174
  KEY_FILE,
163
175
  )
164
- _LEGACY_KEYFILE_SECRET_WARNED.add(json_key)
165
-
176
+ _LEGACY_KEYFILE_SECRET_WARNED.add(env_key)
166
177
  # Let Settings validators coerce types; we just inject the raw string
167
178
  os.environ[env_key] = str(raw)
168
179
 
169
180
 
170
- def _read_env_file(path: Path) -> Dict[str, str]:
171
- if not path.exists():
172
- return {}
173
- try:
174
- # filter out None to avoid writing "None" later
175
- return {
176
- k: v for k, v in dotenv_values(str(path)).items() if v is not None
177
- }
178
- except Exception:
179
- return {}
180
-
181
-
182
181
  def _discover_app_env_from_files(env_dir: Path) -> Optional[str]:
183
182
  # prefer base .env.local, then .env for APP_ENV discovery
184
183
  for name in (".env.local", ".env"):
185
- v = _read_env_file(env_dir / name).get("APP_ENV")
184
+ v = read_dotenv_file(env_dir / name).get("APP_ENV")
186
185
  if v:
187
186
  v = str(v).strip()
188
187
  if v:
@@ -211,8 +210,8 @@ def autoload_dotenv() -> None:
211
210
  env_dir = Path(os.getcwd())
212
211
 
213
212
  # merge files in precedence order
214
- base = _read_env_file(env_dir / ".env")
215
- local = _read_env_file(env_dir / ".env.local")
213
+ base = read_dotenv_file(env_dir / ".env")
214
+ local = read_dotenv_file(env_dir / ".env.local")
216
215
 
217
216
  # Pick APP_ENV (process -> .env.local -> .env -> default)
218
217
  app_env = (
@@ -223,7 +222,7 @@ def autoload_dotenv() -> None:
223
222
  if app_env is not None:
224
223
  app_env = app_env.strip()
225
224
  if app_env:
226
- env_specific = _read_env_file(env_dir / f".env.{app_env}")
225
+ env_specific = read_dotenv_file(env_dir / f".env.{app_env}")
227
226
  merged.setdefault("APP_ENV", app_env)
228
227
 
229
228
  merged.update(base)
@@ -243,6 +242,14 @@ class PersistResult(NamedTuple):
243
242
 
244
243
 
245
244
  class Settings(BaseSettings):
245
+ # def __init__(self):
246
+ # super().__init__()
247
+ def __setattr__(self, name: str, value):
248
+ ctx = _ACTIVE_SETTINGS_EDIT_CTX.get()
249
+ if ctx is not None and name in type(self).model_fields:
250
+ ctx._touched.add(name)
251
+ return super().__setattr__(name, value)
252
+
246
253
  model_config = SettingsConfigDict(
247
254
  extra="ignore",
248
255
  case_sensitive=True,
@@ -253,23 +260,50 @@ class Settings(BaseSettings):
253
260
  # General
254
261
  #
255
262
 
256
- APP_ENV: str = "dev"
257
- LOG_LEVEL: Optional[int] = None
258
- PYTHONPATH: str = "."
259
- CONFIDENT_REGION: Optional[str] = None
260
- CONFIDENT_OPEN_BROWSER: Optional[bool] = True
263
+ APP_ENV: str = Field(
264
+ "dev",
265
+ description="Application environment name used for dotenv selection (loads .env.<APP_ENV> if present).",
266
+ )
267
+ LOG_LEVEL: Optional[int] = Field(
268
+ None,
269
+ description="Global logging level (e.g. DEBUG/INFO/WARNING/ERROR/CRITICAL or numeric).",
270
+ )
271
+ PYTHONPATH: str = Field(
272
+ ".",
273
+ description="Extra PYTHONPATH used by the CLI runner (default: current project '.').",
274
+ )
275
+ CONFIDENT_REGION: Optional[str] = Field(
276
+ None,
277
+ description="Optional Confident AI region hint (uppercased).",
278
+ )
279
+ CONFIDENT_OPEN_BROWSER: Optional[bool] = Field(
280
+ True,
281
+ description="Open a browser automatically for Confident AI links/flows when available.",
282
+ )
261
283
 
262
284
  #
263
285
  # CLI
264
286
  #
265
-
266
- DEEPEVAL_DEFAULT_SAVE: Optional[str] = None
267
- DEEPEVAL_DISABLE_DOTENV: Optional[bool] = None
268
- ENV_DIR_PATH: Optional[Path] = (
269
- None # where .env files live (CWD if not set)
287
+ DEEPEVAL_DEFAULT_SAVE: Optional[str] = Field(
288
+ None,
289
+ description="Default persistence target for settings changes (e.g. 'dotenv' or 'dotenv:/path/to/.env.local').",
290
+ )
291
+ DEEPEVAL_DISABLE_DOTENV: Optional[bool] = Field(
292
+ None,
293
+ description="Disable dotenv autoloading (.env → .env.<APP_ENV> → .env.local). Tip: set to 1 in pytest/CI to prevent loading env files on import.",
294
+ )
295
+ ENV_DIR_PATH: Optional[Path] = Field(
296
+ None,
297
+ description="Directory containing .env files (default: current working directory).",
298
+ )
299
+ DEEPEVAL_FILE_SYSTEM: Optional[str] = Field(
300
+ None,
301
+ description="Filesystem mode for runtime/CLI (currently supports READ_ONLY).",
302
+ )
303
+ DEEPEVAL_IDENTIFIER: Optional[str] = Field(
304
+ None,
305
+ description="Identifier/tag to help identify your test run on Confident AI.",
270
306
  )
271
- DEEPEVAL_FILE_SYSTEM: Optional[str] = None
272
- DEEPEVAL_IDENTIFIER: Optional[str] = None
273
307
 
274
308
  #
275
309
  # Storage & Output
@@ -277,143 +311,360 @@ class Settings(BaseSettings):
277
311
 
278
312
  # When set, DeepEval will export a timestamped JSON of the latest test run
279
313
  # into this directory. The directory will be created on demand.
280
- DEEPEVAL_RESULTS_FOLDER: Optional[Path] = None
314
+ DEEPEVAL_RESULTS_FOLDER: Optional[Path] = Field(
315
+ None,
316
+ description="If set, export a timestamped JSON of the latest test run into this folder (created if missing).",
317
+ )
281
318
 
282
319
  # Display / Truncation
283
- DEEPEVAL_MAXLEN_TINY: Optional[int] = 40
284
- DEEPEVAL_MAXLEN_SHORT: Optional[int] = 60
285
- DEEPEVAL_MAXLEN_MEDIUM: Optional[int] = 120
286
- DEEPEVAL_MAXLEN_LONG: Optional[int] = 240
320
+ DEEPEVAL_MAXLEN_TINY: Optional[int] = Field(
321
+ 40,
322
+ description="Default truncation length for 'tiny' displays in logs/UI.",
323
+ )
324
+ DEEPEVAL_MAXLEN_SHORT: Optional[int] = Field(
325
+ 60,
326
+ description="Default truncation length for 'short' displays in logs/UI.",
327
+ )
328
+ DEEPEVAL_MAXLEN_MEDIUM: Optional[int] = Field(
329
+ 120,
330
+ description="Default truncation length for 'medium' displays in logs/UI.",
331
+ )
332
+ DEEPEVAL_MAXLEN_LONG: Optional[int] = Field(
333
+ 240,
334
+ description="Default truncation length for 'long' displays in logs/UI.",
335
+ )
287
336
 
288
337
  # If set, this overrides the default max_len used by deepeval/utils shorten
289
338
  # falls back to DEEPEVAL_MAXLEN_LONG when None.
290
- DEEPEVAL_SHORTEN_DEFAULT_MAXLEN: Optional[int] = None
339
+ DEEPEVAL_SHORTEN_DEFAULT_MAXLEN: Optional[int] = Field(
340
+ None,
341
+ description="Override default max_len for deepeval.utils.shorten (falls back to DEEPEVAL_MAXLEN_LONG when unset).",
342
+ )
291
343
 
292
344
  # Optional global suffix (keeps your "..." default).
293
- DEEPEVAL_SHORTEN_SUFFIX: Optional[str] = "..."
345
+ DEEPEVAL_SHORTEN_SUFFIX: Optional[str] = Field(
346
+ "...",
347
+ description="Suffix appended by deepeval.utils.shorten when truncating (default: '...').",
348
+ )
294
349
 
295
350
  #
296
351
  # GPU and perf toggles
297
352
  #
298
353
 
299
- CUDA_LAUNCH_BLOCKING: Optional[bool] = None
300
- CUDA_VISIBLE_DEVICES: Optional[str] = None
301
- TOKENIZERS_PARALLELISM: Optional[bool] = None
302
- TRANSFORMERS_NO_ADVISORY_WARNINGS: Optional[bool] = None
354
+ CUDA_LAUNCH_BLOCKING: Optional[bool] = Field(
355
+ None,
356
+ description="CUDA debug toggle (forces synchronous CUDA ops). Useful for debugging GPU errors.",
357
+ )
358
+ CUDA_VISIBLE_DEVICES: Optional[str] = Field(
359
+ None,
360
+ description="CUDA device visibility mask (e.g. '0' or '0,1').",
361
+ )
362
+ TOKENIZERS_PARALLELISM: Optional[bool] = Field(
363
+ None,
364
+ description="HuggingFace tokenizers parallelism toggle (set to false to reduce warnings/noise).",
365
+ )
366
+ TRANSFORMERS_NO_ADVISORY_WARNINGS: Optional[bool] = Field(
367
+ None,
368
+ description="Disable advisory warnings from transformers (reduces console noise).",
369
+ )
303
370
 
304
371
  #
305
372
  # Model Keys
306
373
  #
307
374
 
308
- API_KEY: Optional[SecretStr] = None
309
- CONFIDENT_API_KEY: Optional[SecretStr] = None
375
+ API_KEY: Optional[SecretStr] = Field(
376
+ None,
377
+ description="Alias for CONFIDENT_API_KEY (Confident AI API key).",
378
+ )
379
+ CONFIDENT_API_KEY: Optional[SecretStr] = Field(
380
+ None,
381
+ description="Confident AI API key (used for uploading results/telemetry to Confident).",
382
+ )
310
383
 
311
384
  # ======
312
385
  # Base URL for Confident AI API server
313
386
  # ======
314
- CONFIDENT_BASE_URL: Optional[str] = None
387
+ CONFIDENT_BASE_URL: Optional[str] = Field(
388
+ None,
389
+ description="Base URL for Confident AI API server (set only if using a custom/hosted endpoint).",
390
+ )
315
391
 
316
392
  # General
317
- TEMPERATURE: Optional[confloat(ge=0, le=2)] = None
393
+ TEMPERATURE: Optional[confloat(ge=0, le=2)] = Field(
394
+ None,
395
+ description="Global default model temperature (0–2). Model-specific constructors may override.",
396
+ )
318
397
 
319
398
  # Anthropic
320
- ANTHROPIC_API_KEY: Optional[SecretStr] = None
321
- ANTHROPIC_MODEL_NAME: Optional[str] = None
322
- ANTHROPIC_COST_PER_INPUT_TOKEN: Optional[PositiveFloat] = None
323
- ANTHROPIC_COST_PER_OUTPUT_TOKEN: Optional[PositiveFloat] = None
399
+ USE_ANTHROPIC_MODEL: Optional[bool] = Field(
400
+ None,
401
+ description="Select Anthropic as the active LLM provider (USE_* flags are mutually exclusive in CLI helpers).",
402
+ )
403
+ ANTHROPIC_API_KEY: Optional[SecretStr] = Field(
404
+ None, description="Anthropic API key."
405
+ )
406
+ ANTHROPIC_MODEL_NAME: Optional[str] = Field(
407
+ None, description="Anthropic model name (e.g. 'claude-3-...')."
408
+ )
409
+ ANTHROPIC_COST_PER_INPUT_TOKEN: Optional[PositiveFloat] = Field(
410
+ None,
411
+ description="Anthropic input token cost (used for cost reporting).",
412
+ )
413
+ ANTHROPIC_COST_PER_OUTPUT_TOKEN: Optional[PositiveFloat] = Field(
414
+ None,
415
+ description="Anthropic output token cost (used for cost reporting).",
416
+ )
324
417
 
325
418
  # AWS
326
- AWS_ACCESS_KEY_ID: Optional[SecretStr] = None
327
- AWS_SECRET_ACCESS_KEY: Optional[SecretStr] = None
419
+ AWS_ACCESS_KEY_ID: Optional[SecretStr] = Field(
420
+ None,
421
+ description="AWS access key ID (for Bedrock or other AWS-backed integrations).",
422
+ )
423
+ AWS_SECRET_ACCESS_KEY: Optional[SecretStr] = Field(
424
+ None,
425
+ description="AWS secret access key (for Bedrock or other AWS-backed integrations).",
426
+ )
328
427
  # AWS Bedrock
329
- USE_AWS_BEDROCK_MODEL: Optional[bool] = None
330
- AWS_BEDROCK_MODEL_NAME: Optional[str] = None
331
- AWS_BEDROCK_REGION: Optional[str] = None
332
- AWS_BEDROCK_COST_PER_INPUT_TOKEN: Optional[PositiveFloat] = None
333
- AWS_BEDROCK_COST_PER_OUTPUT_TOKEN: Optional[PositiveFloat] = None
428
+ USE_AWS_BEDROCK_MODEL: Optional[bool] = Field(
429
+ None, description="Select AWS Bedrock as the active LLM provider."
430
+ )
431
+ AWS_BEDROCK_MODEL_NAME: Optional[str] = Field(
432
+ None, description="AWS Bedrock model identifier."
433
+ )
434
+ AWS_BEDROCK_REGION: Optional[str] = Field(
435
+ None, description="AWS region for Bedrock (normalized to lowercase)."
436
+ )
437
+ AWS_BEDROCK_COST_PER_INPUT_TOKEN: Optional[PositiveFloat] = Field(
438
+ None, description="Bedrock input token cost (used for cost reporting)."
439
+ )
440
+ AWS_BEDROCK_COST_PER_OUTPUT_TOKEN: Optional[PositiveFloat] = Field(
441
+ None, description="Bedrock output token cost (used for cost reporting)."
442
+ )
334
443
  # Azure Open AI
335
- AZURE_OPENAI_API_KEY: Optional[SecretStr] = None
336
- AZURE_OPENAI_ENDPOINT: Optional[AnyUrl] = None
337
- OPENAI_API_VERSION: Optional[str] = None
338
- AZURE_DEPLOYMENT_NAME: Optional[str] = None
339
- AZURE_MODEL_NAME: Optional[str] = None
340
- AZURE_MODEL_VERSION: Optional[str] = None
341
- USE_AZURE_OPENAI: Optional[bool] = None
444
+ USE_AZURE_OPENAI: Optional[bool] = Field(
445
+ None, description="Select Azure OpenAI as the active LLM provider."
446
+ )
447
+ AZURE_OPENAI_API_KEY: Optional[SecretStr] = Field(
448
+ None, description="Azure OpenAI API key."
449
+ )
450
+ AZURE_OPENAI_ENDPOINT: Optional[AnyUrl] = Field(
451
+ None, description="Azure OpenAI endpoint URL."
452
+ )
453
+ OPENAI_API_VERSION: Optional[str] = Field(
454
+ None,
455
+ description="Azure OpenAI API version (if required by your deployment).",
456
+ )
457
+ AZURE_DEPLOYMENT_NAME: Optional[str] = Field(
458
+ None,
459
+ description="Azure OpenAI deployment name (required for most Azure configs).",
460
+ )
461
+ AZURE_MODEL_NAME: Optional[str] = Field(
462
+ None,
463
+ description="Azure model name label (informational; may be used in reporting).",
464
+ )
465
+ AZURE_MODEL_VERSION: Optional[str] = Field(
466
+ None,
467
+ description="Azure model version label (informational; may be used in reporting).",
468
+ )
342
469
  # DeepSeek
343
- USE_DEEPSEEK_MODEL: Optional[bool] = None
344
- DEEPSEEK_API_KEY: Optional[SecretStr] = None
345
- DEEPSEEK_MODEL_NAME: Optional[str] = None
346
- DEEPSEEK_COST_PER_INPUT_TOKEN: Optional[float] = None
347
- DEEPSEEK_COST_PER_OUTPUT_TOKEN: Optional[float] = None
470
+ USE_DEEPSEEK_MODEL: Optional[bool] = Field(
471
+ None, description="Select DeepSeek as the active LLM provider."
472
+ )
473
+ DEEPSEEK_API_KEY: Optional[SecretStr] = Field(
474
+ None, description="DeepSeek API key."
475
+ )
476
+ DEEPSEEK_MODEL_NAME: Optional[str] = Field(
477
+ None, description="DeepSeek model name."
478
+ )
479
+ DEEPSEEK_COST_PER_INPUT_TOKEN: Optional[float] = Field(
480
+ None, description="DeepSeek input token cost (used for cost reporting)."
481
+ )
482
+ DEEPSEEK_COST_PER_OUTPUT_TOKEN: Optional[float] = Field(
483
+ None,
484
+ description="DeepSeek output token cost (used for cost reporting).",
485
+ )
348
486
  # Gemini
349
- USE_GEMINI_MODEL: Optional[bool] = None
350
- GOOGLE_API_KEY: Optional[SecretStr] = None
351
- GEMINI_MODEL_NAME: Optional[str] = None
352
- GOOGLE_GENAI_USE_VERTEXAI: Optional[bool] = None
353
- GOOGLE_CLOUD_PROJECT: Optional[str] = None
354
- GOOGLE_CLOUD_LOCATION: Optional[str] = None
355
- GOOGLE_SERVICE_ACCOUNT_KEY: Optional[SecretStr] = None
487
+ USE_GEMINI_MODEL: Optional[bool] = Field(
488
+ None, description="Select Google Gemini as the active LLM provider."
489
+ )
490
+ GOOGLE_API_KEY: Optional[SecretStr] = Field(
491
+ None, description="Google API key for Gemini (non-Vertex usage)."
492
+ )
493
+ GEMINI_MODEL_NAME: Optional[str] = Field(
494
+ None, description="Gemini model name (e.g. 'gemini-...')."
495
+ )
496
+ GOOGLE_GENAI_USE_VERTEXAI: Optional[bool] = Field(
497
+ None,
498
+ description="Use Vertex AI for Gemini requests instead of direct API key mode.",
499
+ )
500
+ GOOGLE_CLOUD_PROJECT: Optional[str] = Field(
501
+ None,
502
+ description="GCP project ID for Vertex AI (required if GOOGLE_GENAI_USE_VERTEXAI=true).",
503
+ )
504
+ GOOGLE_CLOUD_LOCATION: Optional[str] = Field(
505
+ None,
506
+ description="GCP region/location for Vertex AI (e.g. 'us-central1').",
507
+ )
508
+ GOOGLE_SERVICE_ACCOUNT_KEY: Optional[SecretStr] = Field(
509
+ None,
510
+ description="Service account JSON key for Vertex AI auth (if not using ADC).",
511
+ )
356
512
  # Grok
357
- USE_GROK_MODEL: Optional[bool] = None
358
- GROK_API_KEY: Optional[SecretStr] = None
359
- GROK_MODEL_NAME: Optional[str] = None
360
- GROK_COST_PER_INPUT_TOKEN: Optional[float] = None
361
- GROK_COST_PER_OUTPUT_TOKEN: Optional[float] = None
513
+ USE_GROK_MODEL: Optional[bool] = Field(
514
+ None, description="Select Grok as the active LLM provider."
515
+ )
516
+ GROK_API_KEY: Optional[SecretStr] = Field(None, description="Grok API key.")
517
+ GROK_MODEL_NAME: Optional[str] = Field(None, description="Grok model name.")
518
+ GROK_COST_PER_INPUT_TOKEN: Optional[float] = Field(
519
+ None, description="Grok input token cost (used for cost reporting)."
520
+ )
521
+ GROK_COST_PER_OUTPUT_TOKEN: Optional[float] = Field(
522
+ None, description="Grok output token cost (used for cost reporting)."
523
+ )
362
524
  # LiteLLM
363
- USE_LITELLM: Optional[bool] = None
364
- LITELLM_API_KEY: Optional[SecretStr] = None
365
- LITELLM_MODEL_NAME: Optional[str] = None
366
- LITELLM_API_BASE: Optional[AnyUrl] = None
367
- LITELLM_PROXY_API_BASE: Optional[AnyUrl] = None
368
- LITELLM_PROXY_API_KEY: Optional[SecretStr] = None
525
+ USE_LITELLM: Optional[bool] = Field(
526
+ None, description="Select LiteLLM as the active LLM provider."
527
+ )
528
+ LITELLM_API_KEY: Optional[SecretStr] = Field(
529
+ None,
530
+ description="LiteLLM API key (if required by your LiteLLM deployment).",
531
+ )
532
+ LITELLM_MODEL_NAME: Optional[str] = Field(
533
+ None,
534
+ description="LiteLLM model name (as exposed by your LiteLLM endpoint).",
535
+ )
536
+ LITELLM_API_BASE: Optional[AnyUrl] = Field(
537
+ None, description="LiteLLM API base URL (direct)."
538
+ )
539
+ LITELLM_PROXY_API_BASE: Optional[AnyUrl] = Field(
540
+ None, description="LiteLLM proxy base URL (if using proxy mode)."
541
+ )
542
+ LITELLM_PROXY_API_KEY: Optional[SecretStr] = Field(
543
+ None, description="LiteLLM proxy API key (if required)."
544
+ )
369
545
  # LM Studio
370
- LM_STUDIO_API_KEY: Optional[SecretStr] = None
371
- LM_STUDIO_MODEL_NAME: Optional[str] = None
546
+ LM_STUDIO_API_KEY: Optional[SecretStr] = Field(
547
+ None, description="LM Studio API key (if configured)."
548
+ )
549
+ LM_STUDIO_MODEL_NAME: Optional[str] = Field(
550
+ None, description="LM Studio model name."
551
+ )
372
552
  # Local Model
373
- USE_LOCAL_MODEL: Optional[bool] = None
374
- LOCAL_MODEL_API_KEY: Optional[SecretStr] = None
375
- LOCAL_EMBEDDING_API_KEY: Optional[SecretStr] = None
376
- LOCAL_MODEL_NAME: Optional[str] = None
377
- LOCAL_MODEL_BASE_URL: Optional[AnyUrl] = None
378
- LOCAL_MODEL_FORMAT: Optional[str] = None
553
+ USE_LOCAL_MODEL: Optional[bool] = Field(
554
+ None,
555
+ description="Select a local/self-hosted model as the active LLM provider.",
556
+ )
557
+ LOCAL_MODEL_API_KEY: Optional[SecretStr] = Field(
558
+ None,
559
+ description="API key for a local/self-hosted LLM endpoint (if required).",
560
+ )
561
+ LOCAL_EMBEDDING_API_KEY: Optional[SecretStr] = Field(
562
+ None,
563
+ description="API key for a local/self-hosted embedding endpoint (if required).",
564
+ )
565
+ LOCAL_MODEL_NAME: Optional[str] = Field(
566
+ None,
567
+ description="Local/self-hosted model name (informational / routing).",
568
+ )
569
+ LOCAL_MODEL_BASE_URL: Optional[AnyUrl] = Field(
570
+ None, description="Base URL for a local/self-hosted LLM endpoint."
571
+ )
572
+ LOCAL_MODEL_FORMAT: Optional[str] = Field(
573
+ None,
574
+ description="Local model API format identifier (implementation-specific).",
575
+ )
379
576
  # Moonshot
380
- USE_MOONSHOT_MODEL: Optional[bool] = None
381
- MOONSHOT_API_KEY: Optional[SecretStr] = None
382
- MOONSHOT_MODEL_NAME: Optional[str] = None
383
- MOONSHOT_COST_PER_INPUT_TOKEN: Optional[float] = None
384
- MOONSHOT_COST_PER_OUTPUT_TOKEN: Optional[float] = None
577
+ USE_MOONSHOT_MODEL: Optional[bool] = Field(
578
+ None, description="Select Moonshot as the active LLM provider."
579
+ )
580
+ MOONSHOT_API_KEY: Optional[SecretStr] = Field(
581
+ None, description="Moonshot API key."
582
+ )
583
+ MOONSHOT_MODEL_NAME: Optional[str] = Field(
584
+ None, description="Moonshot model name."
585
+ )
586
+ MOONSHOT_COST_PER_INPUT_TOKEN: Optional[float] = Field(
587
+ None, description="Moonshot input token cost (used for cost reporting)."
588
+ )
589
+ MOONSHOT_COST_PER_OUTPUT_TOKEN: Optional[float] = Field(
590
+ None,
591
+ description="Moonshot output token cost (used for cost reporting).",
592
+ )
385
593
  # Ollama
386
- OLLAMA_MODEL_NAME: Optional[str] = None
594
+ OLLAMA_MODEL_NAME: Optional[str] = Field(
595
+ None,
596
+ description="Ollama model name (used when running via Ollama integration).",
597
+ )
387
598
  # OpenAI
388
- USE_OPENAI_MODEL: Optional[bool] = None
389
- OPENAI_API_KEY: Optional[SecretStr] = None
390
- OPENAI_MODEL_NAME: Optional[str] = None
391
- OPENAI_COST_PER_INPUT_TOKEN: Optional[float] = None
392
- OPENAI_COST_PER_OUTPUT_TOKEN: Optional[float] = None
599
+ USE_OPENAI_MODEL: Optional[bool] = Field(
600
+ None, description="Select OpenAI as the active LLM provider."
601
+ )
602
+ OPENAI_API_KEY: Optional[SecretStr] = Field(
603
+ None, description="OpenAI API key."
604
+ )
605
+ OPENAI_MODEL_NAME: Optional[str] = Field(
606
+ None, description="OpenAI model name (e.g. 'gpt-4.1')."
607
+ )
608
+ OPENAI_COST_PER_INPUT_TOKEN: Optional[float] = Field(
609
+ None, description="OpenAI input token cost (used for cost reporting)."
610
+ )
611
+ OPENAI_COST_PER_OUTPUT_TOKEN: Optional[float] = Field(
612
+ None, description="OpenAI output token cost (used for cost reporting)."
613
+ )
393
614
  # PortKey
394
- USE_PORTKEY_MODEL: Optional[bool] = None
395
- PORTKEY_API_KEY: Optional[SecretStr] = None
396
- PORTKEY_MODEL_NAME: Optional[str] = None
397
- PORTKEY_BASE_URL: Optional[AnyUrl] = None
398
- PORTKEY_PROVIDER_NAME: Optional[str] = None
615
+ USE_PORTKEY_MODEL: Optional[bool] = Field(
616
+ None, description="Select Portkey as the active LLM provider."
617
+ )
618
+ PORTKEY_API_KEY: Optional[SecretStr] = Field(
619
+ None, description="Portkey API key."
620
+ )
621
+ PORTKEY_MODEL_NAME: Optional[str] = Field(
622
+ None, description="Portkey model name (as configured in Portkey)."
623
+ )
624
+ PORTKEY_BASE_URL: Optional[AnyUrl] = Field(
625
+ None, description="Portkey base URL (if using a custom endpoint)."
626
+ )
627
+ PORTKEY_PROVIDER_NAME: Optional[str] = Field(
628
+ None, description="Provider name/routing hint for Portkey."
629
+ )
399
630
  # Vertex AI
400
- VERTEX_AI_MODEL_NAME: Optional[str] = None
631
+ VERTEX_AI_MODEL_NAME: Optional[str] = Field(
632
+ None,
633
+ description="Vertex AI model name (used by some Google integrations).",
634
+ )
401
635
  # VLLM
402
- VLLM_API_KEY: Optional[SecretStr] = None
403
- VLLM_MODEL_NAME: Optional[str] = None
636
+ VLLM_API_KEY: Optional[SecretStr] = Field(
637
+ None, description="vLLM API key (if required by your vLLM gateway)."
638
+ )
639
+ VLLM_MODEL_NAME: Optional[str] = Field(None, description="vLLM model name.")
404
640
 
405
641
  #
406
642
  # Embedding Keys
407
643
  #
408
644
 
409
645
  # Azure OpenAI
410
- USE_AZURE_OPENAI_EMBEDDING: Optional[bool] = None
411
- AZURE_EMBEDDING_MODEL_NAME: Optional[str] = None
412
- AZURE_EMBEDDING_DEPLOYMENT_NAME: Optional[str] = None
646
+ USE_AZURE_OPENAI_EMBEDDING: Optional[bool] = Field(
647
+ None, description="Use Azure OpenAI for embeddings."
648
+ )
649
+ AZURE_EMBEDDING_MODEL_NAME: Optional[str] = Field(
650
+ None, description="Azure embedding model name label."
651
+ )
652
+ AZURE_EMBEDDING_DEPLOYMENT_NAME: Optional[str] = Field(
653
+ None, description="Azure embedding deployment name."
654
+ )
655
+
413
656
  # Local
414
- USE_LOCAL_EMBEDDINGS: Optional[bool] = None
415
- LOCAL_EMBEDDING_MODEL_NAME: Optional[str] = None
416
- LOCAL_EMBEDDING_BASE_URL: Optional[AnyUrl] = None
657
+ USE_LOCAL_EMBEDDINGS: Optional[bool] = Field(
658
+ None, description="Use a local/self-hosted embeddings endpoint."
659
+ )
660
+ LOCAL_EMBEDDING_MODEL_NAME: Optional[str] = Field(
661
+ None,
662
+ description="Local embedding model name (informational / routing).",
663
+ )
664
+ LOCAL_EMBEDDING_BASE_URL: Optional[AnyUrl] = Field(
665
+ None,
666
+ description="Base URL for a local/self-hosted embeddings endpoint.",
667
+ )
417
668
 
418
669
  #
419
670
  # Retry Policy
@@ -425,60 +676,133 @@ class Settings(BaseSettings):
425
676
  # contribution is ~ JITTER/2 per sleep.
426
677
  # - logging levels are looked up dynamically each attempt, so if you change LOG_LEVEL at runtime,
427
678
  # the retry loggers will honor it without restart.
428
- DEEPEVAL_SDK_RETRY_PROVIDERS: Optional[List[str]] = (
429
- None # ["*"] to delegate all retries to SDKs
679
+ DEEPEVAL_SDK_RETRY_PROVIDERS: Optional[List[str]] = Field(
680
+ None,
681
+ description="Providers for which retries should be delegated to the provider SDK (use ['*'] for all).",
682
+ )
683
+ DEEPEVAL_RETRY_BEFORE_LOG_LEVEL: Optional[int] = Field(
684
+ None,
685
+ description="Log level for 'before retry' logs (defaults to LOG_LEVEL if set, else INFO).",
430
686
  )
431
- DEEPEVAL_RETRY_BEFORE_LOG_LEVEL: Optional[int] = (
432
- None # default is LOG_LEVEL if set, else INFO
687
+ DEEPEVAL_RETRY_AFTER_LOG_LEVEL: Optional[int] = Field(
688
+ None,
689
+ description="Log level for 'after retry' logs (defaults to ERROR).",
433
690
  )
434
- DEEPEVAL_RETRY_AFTER_LOG_LEVEL: Optional[int] = None # default -> ERROR
435
- DEEPEVAL_RETRY_MAX_ATTEMPTS: conint(ge=1) = (
436
- 2 # attempts = first try + retries
691
+ DEEPEVAL_RETRY_MAX_ATTEMPTS: conint(ge=1) = Field(
692
+ 2,
693
+ description="Max attempts per provider call (includes the first call; 1 = no retries).",
437
694
  )
438
- DEEPEVAL_RETRY_INITIAL_SECONDS: confloat(ge=0) = (
439
- 1.0 # first sleep before retry, if any
695
+ DEEPEVAL_RETRY_INITIAL_SECONDS: confloat(ge=0) = Field(
696
+ 1.0,
697
+ description="Initial backoff sleep (seconds) before the first retry.",
440
698
  )
441
- DEEPEVAL_RETRY_EXP_BASE: confloat(ge=1) = (
442
- 2.0 # exponential growth factor for sleeps
699
+ DEEPEVAL_RETRY_EXP_BASE: confloat(ge=1) = Field(
700
+ 2.0, description="Exponential backoff growth factor."
443
701
  )
444
- DEEPEVAL_RETRY_JITTER: confloat(ge=0) = 2.0 # uniform jitter
445
- DEEPEVAL_RETRY_CAP_SECONDS: confloat(ge=0) = (
446
- 5.0 # cap for each backoff sleep
702
+ DEEPEVAL_RETRY_JITTER: confloat(ge=0) = Field(
703
+ 2.0, description="Uniform jitter added to each retry sleep (seconds)."
704
+ )
705
+ DEEPEVAL_RETRY_CAP_SECONDS: confloat(ge=0) = Field(
706
+ 5.0, description="Maximum backoff sleep per retry (seconds)."
447
707
  )
448
708
 
449
709
  #
450
710
  # Telemetry and Debug
451
711
  #
452
- DEEPEVAL_DEBUG_ASYNC: Optional[bool] = None
453
- DEEPEVAL_TELEMETRY_OPT_OUT: Optional[bool] = None
454
- DEEPEVAL_UPDATE_WARNING_OPT_IN: Optional[bool] = None
455
- DEEPEVAL_GRPC_LOGGING: Optional[bool] = None
456
- GRPC_VERBOSITY: Optional[str] = None
457
- GRPC_TRACE: Optional[str] = None
458
- ERROR_REPORTING: Optional[bool] = None
459
- IGNORE_DEEPEVAL_ERRORS: Optional[bool] = None
460
- SKIP_DEEPEVAL_MISSING_PARAMS: Optional[bool] = None
461
- DEEPEVAL_VERBOSE_MODE: Optional[bool] = None
462
- DEEPEVAL_LOG_STACK_TRACES: Optional[bool] = None
463
- ENABLE_DEEPEVAL_CACHE: Optional[bool] = None
464
-
465
- CONFIDENT_TRACE_FLUSH: Optional[bool] = None
466
- CONFIDENT_TRACE_ENVIRONMENT: Optional[str] = "development"
467
- CONFIDENT_TRACE_VERBOSE: Optional[bool] = True
468
- CONFIDENT_TRACE_SAMPLE_RATE: Optional[float] = 1.0
469
-
470
- CONFIDENT_METRIC_LOGGING_FLUSH: Optional[bool] = None
471
- CONFIDENT_METRIC_LOGGING_VERBOSE: Optional[bool] = True
472
- CONFIDENT_METRIC_LOGGING_SAMPLE_RATE: Optional[float] = 1.0
473
- CONFIDENT_METRIC_LOGGING_ENABLED: Optional[bool] = True
474
-
475
- OTEL_EXPORTER_OTLP_ENDPOINT: Optional[AnyUrl] = None
712
+ DEEPEVAL_DEBUG_ASYNC: Optional[bool] = Field(
713
+ None, description="Enable extra async debugging logs/behavior."
714
+ )
715
+ DEEPEVAL_TELEMETRY_OPT_OUT: Optional[bool] = Field(
716
+ None,
717
+ description="Opt out of DeepEval telemetry (OFF wins if conflicting legacy flags are set).",
718
+ )
719
+ DEEPEVAL_UPDATE_WARNING_OPT_IN: Optional[bool] = Field(
720
+ None,
721
+ description="Opt in to update warnings in the CLI/runtime when new versions are available.",
722
+ )
723
+ DEEPEVAL_GRPC_LOGGING: Optional[bool] = Field(
724
+ None,
725
+ description="Enable extra gRPC logging for Confident transport/debugging.",
726
+ )
727
+ GRPC_VERBOSITY: Optional[str] = Field(
728
+ None, description="gRPC verbosity (grpc env var passthrough)."
729
+ )
730
+ GRPC_TRACE: Optional[str] = Field(
731
+ None, description="gRPC trace categories (grpc env var passthrough)."
732
+ )
733
+ ERROR_REPORTING: Optional[bool] = Field(
734
+ None,
735
+ description="Enable/disable error reporting (implementation/integration dependent).",
736
+ )
737
+ IGNORE_DEEPEVAL_ERRORS: Optional[bool] = Field(
738
+ None,
739
+ description="Continue execution when DeepEval encounters certain recoverable errors.",
740
+ )
741
+ SKIP_DEEPEVAL_MISSING_PARAMS: Optional[bool] = Field(
742
+ None,
743
+ description="Skip metrics/test cases with missing required params instead of raising.",
744
+ )
745
+ DEEPEVAL_VERBOSE_MODE: Optional[bool] = Field(
746
+ None, description="Enable verbose logging and additional warnings."
747
+ )
748
+ DEEPEVAL_LOG_STACK_TRACES: Optional[bool] = Field(
749
+ None, description="Include stack traces in certain DeepEval error logs."
750
+ )
751
+ ENABLE_DEEPEVAL_CACHE: Optional[bool] = Field(
752
+ None,
753
+ description="Enable DeepEval caching where supported (may improve performance).",
754
+ )
755
+
756
+ CONFIDENT_TRACE_FLUSH: Optional[bool] = Field(
757
+ None,
758
+ description="Flush traces eagerly (useful for debugging; may add overhead).",
759
+ )
760
+ CONFIDENT_TRACE_ENVIRONMENT: Optional[str] = Field(
761
+ "development",
762
+ description="Trace environment label (e.g. development/staging/production).",
763
+ )
764
+ CONFIDENT_TRACE_VERBOSE: Optional[bool] = Field(
765
+ True, description="Enable verbose trace logging for Confident tracing."
766
+ )
767
+ CONFIDENT_TRACE_SAMPLE_RATE: Optional[float] = Field(
768
+ 1.0, description="Trace sampling rate (0–1). Lower to reduce overhead."
769
+ )
770
+
771
+ CONFIDENT_METRIC_LOGGING_FLUSH: Optional[bool] = Field(
772
+ None,
773
+ description="Flush metric logs eagerly (useful for debugging; may add overhead).",
774
+ )
775
+ CONFIDENT_METRIC_LOGGING_VERBOSE: Optional[bool] = Field(
776
+ True, description="Enable verbose metric logging."
777
+ )
778
+ CONFIDENT_METRIC_LOGGING_SAMPLE_RATE: Optional[float] = Field(
779
+ 1.0,
780
+ description="Metric logging sampling rate (0–1). Lower to reduce overhead.",
781
+ )
782
+ CONFIDENT_METRIC_LOGGING_ENABLED: Optional[bool] = Field(
783
+ True, description="Enable metric logging to Confident where supported."
784
+ )
785
+
786
+ OTEL_EXPORTER_OTLP_ENDPOINT: Optional[AnyUrl] = Field(
787
+ None,
788
+ description="OpenTelemetry OTLP exporter endpoint (if using OTEL export).",
789
+ )
476
790
 
477
791
  #
478
792
  # Network
479
793
  #
480
- MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS: float = 3.05
481
- MEDIA_IMAGE_READ_TIMEOUT_SECONDS: float = 10.0
794
+ MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS: float = Field(
795
+ 3.05,
796
+ description="Connect timeout (seconds) when fetching remote images for multimodal inputs.",
797
+ )
798
+ MEDIA_IMAGE_READ_TIMEOUT_SECONDS: float = Field(
799
+ 10.0,
800
+ description="Read timeout (seconds) when fetching remote images for multimodal inputs.",
801
+ )
802
+ DEEPEVAL_DISABLE_TIMEOUTS: Optional[bool] = Field(
803
+ None,
804
+ description="Disable DeepEval-enforced timeouts (per-attempt, per-task, gather). Provider SDK timeouts may still apply.",
805
+ )
482
806
  # DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE
483
807
  # Per-attempt timeout (seconds) for provider calls used by the retry policy.
484
808
  # This is an OVERRIDE setting. The effective value you should rely on at runtime is
@@ -491,20 +815,31 @@ class Settings(BaseSettings):
491
815
  #
492
816
  # Tip: Set this OR the outer override, but generally not both
493
817
  DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE: Optional[confloat(gt=0)] = (
494
- None
818
+ Field(
819
+ None,
820
+ description="Override per-attempt provider call timeout (seconds). Leave unset to derive from task timeout.",
821
+ )
495
822
  )
496
823
 
497
824
  #
498
825
  # Async Document Pipelines
499
826
  #
500
827
 
501
- DEEPEVAL_MAX_CONCURRENT_DOC_PROCESSING: conint(ge=1) = 2
828
+ DEEPEVAL_MAX_CONCURRENT_DOC_PROCESSING: conint(ge=1) = Field(
829
+ 2, description="Max concurrent async document processing tasks."
830
+ )
502
831
 
503
832
  #
504
833
  # Async Task Configuration
505
834
  #
506
- DEEPEVAL_TIMEOUT_THREAD_LIMIT: conint(ge=1) = 128
507
- DEEPEVAL_TIMEOUT_SEMAPHORE_WARN_AFTER_SECONDS: confloat(ge=0) = 5.0
835
+ DEEPEVAL_TIMEOUT_THREAD_LIMIT: conint(ge=1) = Field(
836
+ 128,
837
+ description="Max worker threads used for timeout enforcement in async execution.",
838
+ )
839
+ DEEPEVAL_TIMEOUT_SEMAPHORE_WARN_AFTER_SECONDS: confloat(ge=0) = Field(
840
+ 5.0,
841
+ description="Warn if waiting on the timeout semaphore longer than this many seconds.",
842
+ )
508
843
  # DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE
509
844
  # Outer time budget (seconds) for a single metric/test-case, including retries and backoff.
510
845
  # This is an OVERRIDE setting. If None or 0 the DEEPEVAL_PER_TASK_TIMEOUT_SECONDS field is computed:
@@ -517,7 +852,12 @@ class Settings(BaseSettings):
517
852
  # usage:
518
853
  # - set DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE along with DEEPEVAL_RETRY_MAX_ATTEMPTS, or
519
854
  # - set DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE alone.
520
- DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE: Optional[confloat(ge=0)] = None
855
+ DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE: Optional[confloat(ge=0)] = (
856
+ Field(
857
+ None,
858
+ description="Override outer per-test-case timeout budget (seconds), including retries/backoff. Leave unset to auto-derive.",
859
+ )
860
+ )
521
861
 
522
862
  # Buffer time for gathering results from all tasks, added to the longest task duration
523
863
  # Increase if many tasks are running concurrently
@@ -525,7 +865,10 @@ class Settings(BaseSettings):
525
865
  # 30 # 15s seemed like not enough. we may make this computed later.
526
866
  # )
527
867
  DEEPEVAL_TASK_GATHER_BUFFER_SECONDS_OVERRIDE: Optional[confloat(ge=0)] = (
528
- None
868
+ Field(
869
+ None,
870
+ description="Override buffer added to the longest task duration when gathering async results (seconds).",
871
+ )
529
872
  )
530
873
 
531
874
  ###################
@@ -619,10 +962,16 @@ class Settings(BaseSettings):
619
962
  ##############
620
963
 
621
964
  @field_validator(
965
+ "CONFIDENT_METRIC_LOGGING_ENABLED",
966
+ "CONFIDENT_METRIC_LOGGING_VERBOSE",
967
+ "CONFIDENT_METRIC_LOGGING_FLUSH",
622
968
  "CONFIDENT_OPEN_BROWSER",
623
969
  "CONFIDENT_TRACE_FLUSH",
624
970
  "CONFIDENT_TRACE_VERBOSE",
625
971
  "CUDA_LAUNCH_BLOCKING",
972
+ "DEEPEVAL_DEBUG_ASYNC",
973
+ "DEEPEVAL_LOG_STACK_TRACES",
974
+ "DEEPEVAL_DISABLE_TIMEOUTS",
626
975
  "DEEPEVAL_VERBOSE_MODE",
627
976
  "DEEPEVAL_GRPC_LOGGING",
628
977
  "DEEPEVAL_DISABLE_DOTENV",
@@ -951,6 +1300,7 @@ class Settings(BaseSettings):
951
1300
  self._save = save
952
1301
  self._persist = persist
953
1302
  self._before: Dict[str, Any] = {}
1303
+ self._touched: set[str] = set()
954
1304
  self.result: Optional[PersistResult] = None
955
1305
 
956
1306
  @property
@@ -959,122 +1309,168 @@ class Settings(BaseSettings):
959
1309
 
960
1310
  def __enter__(self) -> "Settings._SettingsEditCtx":
961
1311
  # snapshot current state
1312
+ self._token = _ACTIVE_SETTINGS_EDIT_CTX.set(self)
962
1313
  self._before = {
963
1314
  k: getattr(self._s, k) for k in type(self._s).model_fields
964
1315
  }
965
1316
  return self
966
1317
 
967
1318
  def __exit__(self, exc_type, exc, tb):
968
- if exc_type is not None:
969
- return False # don’t persist on error
970
-
971
- from deepeval.config.settings_manager import (
972
- update_settings_and_persist,
973
- _normalize_for_env,
974
- )
975
-
976
- # lazy import legacy JSON store deps
977
- from deepeval.key_handler import KEY_FILE_HANDLER
978
-
979
- model_fields = type(self._s).model_fields
980
- # Exclude computed fields from persistence
981
-
982
- # compute diff of changed fields
983
- after = {k: getattr(self._s, k) for k in model_fields}
984
-
985
- before_norm = {
986
- k: _normalize_for_env(v) for k, v in self._before.items()
987
- }
988
- after_norm = {k: _normalize_for_env(v) for k, v in after.items()}
1319
+ try:
1320
+ if exc_type is not None:
1321
+ return False # don’t persist on error
989
1322
 
990
- changed_keys = {
991
- k for k in after_norm if after_norm[k] != before_norm.get(k)
992
- }
993
- changed_keys -= self.COMPUTED_FIELDS
1323
+ from deepeval.config.settings_manager import (
1324
+ update_settings_and_persist,
1325
+ _normalize_for_env,
1326
+ _resolve_save_path,
1327
+ )
994
1328
 
995
- if not changed_keys:
996
- self.result = PersistResult(False, None, {})
997
- return False
1329
+ # lazy import legacy JSON store deps
1330
+ from deepeval.key_handler import KEY_FILE_HANDLER
1331
+
1332
+ model_fields = type(self._s).model_fields
1333
+ # Exclude computed fields from persistence
1334
+
1335
+ # compute diff of changed fields
1336
+ after = {k: getattr(self._s, k) for k in model_fields}
1337
+
1338
+ before_norm = {
1339
+ k: _normalize_for_env(v) for k, v in self._before.items()
1340
+ }
1341
+ after_norm = {
1342
+ k: _normalize_for_env(v) for k, v in after.items()
1343
+ }
1344
+
1345
+ changed_keys = {
1346
+ k for k in after_norm if after_norm[k] != before_norm.get(k)
1347
+ }
1348
+ changed_keys -= self.COMPUTED_FIELDS
1349
+ touched_keys = set(self._touched) - self.COMPUTED_FIELDS
1350
+
1351
+ # dotenv should persist union(changed, touched)
1352
+ persist_dotenv = self._persist is not False
1353
+ ok, resolved_path = _resolve_save_path(self._save)
1354
+
1355
+ existing_dotenv = {}
1356
+ if persist_dotenv and ok and resolved_path is not None:
1357
+ existing_dotenv = read_dotenv_file(resolved_path)
1358
+
1359
+ candidate_keys_for_dotenv = (
1360
+ changed_keys | touched_keys
1361
+ ) - self.COMPUTED_FIELDS
1362
+
1363
+ keys_for_dotenv: set[str] = set()
1364
+ for key in candidate_keys_for_dotenv:
1365
+ desired = after_norm.get(key) # normalized string or None
1366
+ if desired is None:
1367
+ # only need to unset if it's actually present in dotenv
1368
+ # if key in existing_dotenv:
1369
+ # keys_for_dotenv.add(key)
1370
+ keys_for_dotenv.add(key)
1371
+ else:
1372
+ if existing_dotenv.get(key) != desired:
1373
+ keys_for_dotenv.add(key)
998
1374
 
999
- updates = {k: after[k] for k in changed_keys}
1375
+ updates_for_dotenv = {
1376
+ key: after[key] for key in keys_for_dotenv
1377
+ }
1000
1378
 
1001
- if "LOG_LEVEL" in updates:
1002
- from deepeval.config.logging import (
1003
- apply_deepeval_log_level,
1004
- )
1379
+ if not changed_keys and not updates_for_dotenv:
1380
+ if self._persist is False:
1381
+ # we report handled so that the cli does not mistakenly report invalid save option
1382
+ self.result = PersistResult(True, None, {})
1383
+ return False
1005
1384
 
1006
- apply_deepeval_log_level()
1385
+ ok, resolved_path = _resolve_save_path(self._save)
1386
+ self.result = PersistResult(ok, resolved_path, {})
1387
+ return False
1007
1388
 
1008
- #
1009
- # .deepeval JSON support
1010
- #
1389
+ updates = {k: after[k] for k in changed_keys}
1011
1390
 
1012
- if self._persist is not False:
1013
- for k in changed_keys:
1014
- legacy_member = _find_legacy_enum(k)
1015
- if legacy_member is None:
1016
- continue # skip if not a defined as legacy field
1391
+ if "LOG_LEVEL" in updates:
1392
+ from deepeval.config.logging import (
1393
+ apply_deepeval_log_level,
1394
+ )
1017
1395
 
1018
- val = updates[k]
1019
- # Remove from JSON if unset
1020
- if val is None:
1021
- KEY_FILE_HANDLER.remove_key(legacy_member)
1022
- continue
1396
+ apply_deepeval_log_level()
1023
1397
 
1024
- # Never store secrets in the JSON keystore
1025
- if _is_secret_key(self._s, k):
1026
- continue
1398
+ #
1399
+ # .deepeval JSON support
1400
+ #
1027
1401
 
1028
- # For booleans, the legacy store expects "YES"/"NO"
1029
- if isinstance(val, bool):
1030
- KEY_FILE_HANDLER.write_key(
1031
- legacy_member, "YES" if val else "NO"
1032
- )
1033
- else:
1034
- # store as string
1035
- KEY_FILE_HANDLER.write_key(legacy_member, str(val))
1036
-
1037
- #
1038
- # dotenv store
1039
- #
1040
-
1041
- # defer import to avoid cyclics
1042
- handled, path = update_settings_and_persist(
1043
- updates,
1044
- save=self._save,
1045
- persist_dotenv=(False if self._persist is False else True),
1046
- )
1047
- self.result = PersistResult(handled, path, updates)
1048
- return False
1402
+ if self._persist is not False:
1403
+ for k in changed_keys:
1404
+ legacy_member = _find_legacy_enum(k)
1405
+ if legacy_member is None:
1406
+ continue # skip if not a defined as legacy field
1407
+
1408
+ val = updates[k]
1409
+ # Remove from JSON if unset
1410
+ if val is None:
1411
+ KEY_FILE_HANDLER.remove_key(legacy_member)
1412
+ continue
1413
+
1414
+ # Never store secrets in the JSON keystore
1415
+ if _is_secret_key(k):
1416
+ continue
1417
+
1418
+ # For booleans, the legacy store expects "YES"/"NO"
1419
+ if isinstance(val, bool):
1420
+ KEY_FILE_HANDLER.write_key(
1421
+ legacy_member, "YES" if val else "NO"
1422
+ )
1423
+ else:
1424
+ # store as string
1425
+ KEY_FILE_HANDLER.write_key(legacy_member, str(val))
1426
+
1427
+ #
1428
+ # dotenv store
1429
+ #
1430
+
1431
+ # defer import to avoid cyclics
1432
+ handled, path = update_settings_and_persist(
1433
+ updates_for_dotenv,
1434
+ save=self._save,
1435
+ persist_dotenv=persist_dotenv,
1436
+ )
1437
+ self.result = PersistResult(handled, path, updates_for_dotenv)
1438
+ return False
1439
+ finally:
1440
+ if self._token is not None:
1441
+ _ACTIVE_SETTINGS_EDIT_CTX.reset(self._token)
1049
1442
 
1050
1443
  def switch_model_provider(self, target) -> None:
1051
1444
  """
1052
- Flip all USE_* toggles so that the one matching the target is True and the rest are False.
1053
- Also, mirror this change into the legacy JSON keystore as "YES"/"NO".
1054
-
1055
- `target` may be an Enum with `.value`, such as ModelKeyValues.USE_OPENAI_MODEL
1056
- or a plain string like "USE_OPENAI_MODEL".
1445
+ Flip USE_* settings within the target's provider family (LLM vs embeddings).
1057
1446
  """
1058
1447
  from deepeval.key_handler import KEY_FILE_HANDLER
1059
1448
 
1060
- # Target key is the env style string, such as "USE_OPENAI_MODEL"
1061
1449
  target_key = getattr(target, "value", str(target))
1062
1450
 
1451
+ def _is_embedding_flag(k: str) -> bool:
1452
+ return "EMBEDDING" in k
1453
+
1454
+ target_is_embedding = _is_embedding_flag(target_key)
1455
+
1063
1456
  use_fields = [
1064
- k for k in type(self._s).model_fields if k.startswith("USE_")
1457
+ field
1458
+ for field in type(self._s).model_fields
1459
+ if field.startswith("USE_")
1460
+ and _is_embedding_flag(field) == target_is_embedding
1065
1461
  ]
1462
+
1066
1463
  if target_key not in use_fields:
1067
1464
  raise ValueError(
1068
1465
  f"{target_key} is not a recognized USE_* field"
1069
1466
  )
1070
1467
 
1071
- for k in use_fields:
1072
- on = k == target_key
1073
- # dotenv persistence will serialize to "1"/"0"
1074
- setattr(self._s, k, on)
1468
+ for field in use_fields:
1469
+ on = field == target_key
1470
+ setattr(self._s, field, on)
1471
+
1075
1472
  if self._persist is not False:
1076
- # legacy json persistence will serialize to "YES"/"NO"
1077
- legacy_member = _find_legacy_enum(k)
1473
+ legacy_member = _find_legacy_enum(field)
1078
1474
  if legacy_member is not None:
1079
1475
  KEY_FILE_HANDLER.write_key(
1080
1476
  legacy_member, "YES" if on else "NO"
@@ -1123,7 +1519,7 @@ class Settings(BaseSettings):
1123
1519
 
1124
1520
 
1125
1521
  _settings_singleton: Optional[Settings] = None
1126
- _settings_env_fingerprint: "str | None" = None
1522
+ _settings_env_fingerprint: Optional[str] = None
1127
1523
  _settings_lock = threading.RLock()
1128
1524
 
1129
1525