deepeval 3.7.6__py3-none-any.whl → 3.7.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/cli/main.py +2022 -759
- deepeval/cli/utils.py +208 -36
- deepeval/config/dotenv_handler.py +19 -0
- deepeval/config/settings.py +658 -262
- deepeval/config/utils.py +9 -1
- deepeval/dataset/test_run_tracer.py +4 -6
- deepeval/evaluate/execute.py +153 -94
- deepeval/integrations/pydantic_ai/instrumentator.py +4 -2
- deepeval/integrations/pydantic_ai/otel.py +5 -1
- deepeval/key_handler.py +121 -51
- deepeval/metrics/base_metric.py +9 -3
- deepeval/metrics/g_eval/g_eval.py +6 -1
- deepeval/metrics/indicator.py +8 -4
- deepeval/metrics/mcp/mcp_task_completion.py +15 -16
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +15 -15
- deepeval/metrics/mcp/schema.py +4 -0
- deepeval/metrics/mcp/template.py +8 -1
- deepeval/metrics/prompt_alignment/prompt_alignment.py +6 -3
- deepeval/metrics/tool_use/schema.py +4 -0
- deepeval/metrics/tool_use/template.py +16 -2
- deepeval/metrics/tool_use/tool_use.py +30 -28
- deepeval/metrics/topic_adherence/schema.py +4 -0
- deepeval/metrics/topic_adherence/template.py +8 -1
- deepeval/metrics/topic_adherence/topic_adherence.py +15 -14
- deepeval/metrics/turn_contextual_precision/template.py +8 -1
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +44 -86
- deepeval/metrics/turn_contextual_recall/template.py +8 -1
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +44 -82
- deepeval/metrics/turn_contextual_relevancy/template.py +8 -1
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +48 -92
- deepeval/metrics/turn_faithfulness/template.py +8 -1
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +76 -130
- deepeval/metrics/utils.py +16 -1
- deepeval/models/__init__.py +2 -0
- deepeval/models/llms/__init__.py +2 -0
- deepeval/models/llms/amazon_bedrock_model.py +5 -4
- deepeval/models/llms/anthropic_model.py +4 -3
- deepeval/models/llms/azure_model.py +4 -3
- deepeval/models/llms/deepseek_model.py +5 -8
- deepeval/models/llms/grok_model.py +5 -8
- deepeval/models/llms/kimi_model.py +5 -8
- deepeval/models/llms/litellm_model.py +2 -0
- deepeval/models/llms/local_model.py +1 -1
- deepeval/models/llms/openai_model.py +4 -3
- deepeval/models/retry_policy.py +10 -5
- deepeval/models/utils.py +1 -5
- deepeval/simulator/conversation_simulator.py +6 -2
- deepeval/simulator/template.py +3 -1
- deepeval/synthesizer/synthesizer.py +19 -17
- deepeval/test_run/test_run.py +6 -1
- deepeval/utils.py +26 -0
- {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/METADATA +3 -3
- {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/RECORD +57 -56
- {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/WHEEL +0 -0
- {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/entry_points.txt +0 -0
deepeval/config/settings.py
CHANGED
|
@@ -17,26 +17,37 @@ import os
|
|
|
17
17
|
import re
|
|
18
18
|
import threading
|
|
19
19
|
|
|
20
|
-
from
|
|
20
|
+
from contextvars import ContextVar
|
|
21
21
|
from pathlib import Path
|
|
22
22
|
from pydantic import (
|
|
23
23
|
AnyUrl,
|
|
24
24
|
computed_field,
|
|
25
25
|
confloat,
|
|
26
26
|
conint,
|
|
27
|
+
Field,
|
|
27
28
|
field_validator,
|
|
28
29
|
model_validator,
|
|
29
30
|
SecretStr,
|
|
30
31
|
PositiveFloat,
|
|
31
32
|
)
|
|
32
33
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
33
|
-
from typing import
|
|
34
|
+
from typing import (
|
|
35
|
+
Any,
|
|
36
|
+
Dict,
|
|
37
|
+
List,
|
|
38
|
+
Optional,
|
|
39
|
+
Union,
|
|
40
|
+
NamedTuple,
|
|
41
|
+
get_args,
|
|
42
|
+
get_origin,
|
|
43
|
+
)
|
|
34
44
|
|
|
35
45
|
from deepeval.config.utils import (
|
|
36
|
-
parse_bool,
|
|
37
46
|
coerce_to_list,
|
|
38
47
|
constrain_between,
|
|
39
48
|
dedupe_preserve_order,
|
|
49
|
+
parse_bool,
|
|
50
|
+
read_dotenv_file,
|
|
40
51
|
)
|
|
41
52
|
from deepeval.constants import SUPPORTED_PROVIDER_SLUGS, slugify
|
|
42
53
|
|
|
@@ -44,6 +55,10 @@ from deepeval.constants import SUPPORTED_PROVIDER_SLUGS, slugify
|
|
|
44
55
|
logger = logging.getLogger(__name__)
|
|
45
56
|
_SAVE_RE = re.compile(r"^(?P<scheme>dotenv)(?::(?P<path>.+))?$")
|
|
46
57
|
|
|
58
|
+
_ACTIVE_SETTINGS_EDIT_CTX: ContextVar[Optional["Settings._SettingsEditCtx"]] = (
|
|
59
|
+
ContextVar("_ACTIVE_SETTINGS_EDIT_CTX", default=None)
|
|
60
|
+
)
|
|
61
|
+
|
|
47
62
|
# settings that were converted to computed fields with override counterparts
|
|
48
63
|
_DEPRECATED_TO_OVERRIDE = {
|
|
49
64
|
"DEEPEVAL_PER_TASK_TIMEOUT_SECONDS": "DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE",
|
|
@@ -76,14 +91,12 @@ def _find_legacy_enum(env_key: str):
|
|
|
76
91
|
return None
|
|
77
92
|
|
|
78
93
|
|
|
79
|
-
def _is_secret_key(
|
|
80
|
-
field =
|
|
94
|
+
def _is_secret_key(env_key: str) -> bool:
|
|
95
|
+
field = Settings.model_fields.get(env_key)
|
|
81
96
|
if not field:
|
|
82
97
|
return False
|
|
83
98
|
if field.annotation is SecretStr:
|
|
84
99
|
return True
|
|
85
|
-
# Optional[SecretStr] etc.
|
|
86
|
-
from typing import get_origin, get_args, Union
|
|
87
100
|
|
|
88
101
|
origin = get_origin(field.annotation)
|
|
89
102
|
if origin is Union:
|
|
@@ -111,7 +124,6 @@ def _merge_legacy_keyfile_into_env() -> None:
|
|
|
111
124
|
KeyValues,
|
|
112
125
|
ModelKeyValues,
|
|
113
126
|
EmbeddingKeyValues,
|
|
114
|
-
SECRET_KEYS,
|
|
115
127
|
)
|
|
116
128
|
|
|
117
129
|
key_path = Path(HIDDEN_DIR) / KEY_FILE
|
|
@@ -148,41 +160,28 @@ def _merge_legacy_keyfile_into_env() -> None:
|
|
|
148
160
|
continue
|
|
149
161
|
|
|
150
162
|
# Mirror the legacy warning semantics for secrets, but only once per key
|
|
151
|
-
if (
|
|
152
|
-
|
|
153
|
-
and json_key not in _LEGACY_KEYFILE_SECRET_WARNED
|
|
163
|
+
if env_key not in _LEGACY_KEYFILE_SECRET_WARNED and _is_secret_key(
|
|
164
|
+
env_key
|
|
154
165
|
):
|
|
155
166
|
logger.warning(
|
|
156
|
-
"Reading secret '%s' from legacy %s/%s. "
|
|
167
|
+
"Reading secret '%s' (legacy key '%s') from legacy %s/%s. "
|
|
157
168
|
"Persisting API keys in plaintext is deprecated. "
|
|
158
169
|
"Move this to your environment (.env / .env.local). "
|
|
159
170
|
"This fallback will be removed in a future release.",
|
|
171
|
+
env_key,
|
|
160
172
|
json_key,
|
|
161
173
|
HIDDEN_DIR,
|
|
162
174
|
KEY_FILE,
|
|
163
175
|
)
|
|
164
|
-
_LEGACY_KEYFILE_SECRET_WARNED.add(
|
|
165
|
-
|
|
176
|
+
_LEGACY_KEYFILE_SECRET_WARNED.add(env_key)
|
|
166
177
|
# Let Settings validators coerce types; we just inject the raw string
|
|
167
178
|
os.environ[env_key] = str(raw)
|
|
168
179
|
|
|
169
180
|
|
|
170
|
-
def _read_env_file(path: Path) -> Dict[str, str]:
|
|
171
|
-
if not path.exists():
|
|
172
|
-
return {}
|
|
173
|
-
try:
|
|
174
|
-
# filter out None to avoid writing "None" later
|
|
175
|
-
return {
|
|
176
|
-
k: v for k, v in dotenv_values(str(path)).items() if v is not None
|
|
177
|
-
}
|
|
178
|
-
except Exception:
|
|
179
|
-
return {}
|
|
180
|
-
|
|
181
|
-
|
|
182
181
|
def _discover_app_env_from_files(env_dir: Path) -> Optional[str]:
|
|
183
182
|
# prefer base .env.local, then .env for APP_ENV discovery
|
|
184
183
|
for name in (".env.local", ".env"):
|
|
185
|
-
v =
|
|
184
|
+
v = read_dotenv_file(env_dir / name).get("APP_ENV")
|
|
186
185
|
if v:
|
|
187
186
|
v = str(v).strip()
|
|
188
187
|
if v:
|
|
@@ -211,8 +210,8 @@ def autoload_dotenv() -> None:
|
|
|
211
210
|
env_dir = Path(os.getcwd())
|
|
212
211
|
|
|
213
212
|
# merge files in precedence order
|
|
214
|
-
base =
|
|
215
|
-
local =
|
|
213
|
+
base = read_dotenv_file(env_dir / ".env")
|
|
214
|
+
local = read_dotenv_file(env_dir / ".env.local")
|
|
216
215
|
|
|
217
216
|
# Pick APP_ENV (process -> .env.local -> .env -> default)
|
|
218
217
|
app_env = (
|
|
@@ -223,7 +222,7 @@ def autoload_dotenv() -> None:
|
|
|
223
222
|
if app_env is not None:
|
|
224
223
|
app_env = app_env.strip()
|
|
225
224
|
if app_env:
|
|
226
|
-
env_specific =
|
|
225
|
+
env_specific = read_dotenv_file(env_dir / f".env.{app_env}")
|
|
227
226
|
merged.setdefault("APP_ENV", app_env)
|
|
228
227
|
|
|
229
228
|
merged.update(base)
|
|
@@ -243,6 +242,14 @@ class PersistResult(NamedTuple):
|
|
|
243
242
|
|
|
244
243
|
|
|
245
244
|
class Settings(BaseSettings):
|
|
245
|
+
# def __init__(self):
|
|
246
|
+
# super().__init__()
|
|
247
|
+
def __setattr__(self, name: str, value):
|
|
248
|
+
ctx = _ACTIVE_SETTINGS_EDIT_CTX.get()
|
|
249
|
+
if ctx is not None and name in type(self).model_fields:
|
|
250
|
+
ctx._touched.add(name)
|
|
251
|
+
return super().__setattr__(name, value)
|
|
252
|
+
|
|
246
253
|
model_config = SettingsConfigDict(
|
|
247
254
|
extra="ignore",
|
|
248
255
|
case_sensitive=True,
|
|
@@ -253,23 +260,50 @@ class Settings(BaseSettings):
|
|
|
253
260
|
# General
|
|
254
261
|
#
|
|
255
262
|
|
|
256
|
-
APP_ENV: str =
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
263
|
+
APP_ENV: str = Field(
|
|
264
|
+
"dev",
|
|
265
|
+
description="Application environment name used for dotenv selection (loads .env.<APP_ENV> if present).",
|
|
266
|
+
)
|
|
267
|
+
LOG_LEVEL: Optional[int] = Field(
|
|
268
|
+
None,
|
|
269
|
+
description="Global logging level (e.g. DEBUG/INFO/WARNING/ERROR/CRITICAL or numeric).",
|
|
270
|
+
)
|
|
271
|
+
PYTHONPATH: str = Field(
|
|
272
|
+
".",
|
|
273
|
+
description="Extra PYTHONPATH used by the CLI runner (default: current project '.').",
|
|
274
|
+
)
|
|
275
|
+
CONFIDENT_REGION: Optional[str] = Field(
|
|
276
|
+
None,
|
|
277
|
+
description="Optional Confident AI region hint (uppercased).",
|
|
278
|
+
)
|
|
279
|
+
CONFIDENT_OPEN_BROWSER: Optional[bool] = Field(
|
|
280
|
+
True,
|
|
281
|
+
description="Open a browser automatically for Confident AI links/flows when available.",
|
|
282
|
+
)
|
|
261
283
|
|
|
262
284
|
#
|
|
263
285
|
# CLI
|
|
264
286
|
#
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
287
|
+
DEEPEVAL_DEFAULT_SAVE: Optional[str] = Field(
|
|
288
|
+
None,
|
|
289
|
+
description="Default persistence target for settings changes (e.g. 'dotenv' or 'dotenv:/path/to/.env.local').",
|
|
290
|
+
)
|
|
291
|
+
DEEPEVAL_DISABLE_DOTENV: Optional[bool] = Field(
|
|
292
|
+
None,
|
|
293
|
+
description="Disable dotenv autoloading (.env → .env.<APP_ENV> → .env.local). Tip: set to 1 in pytest/CI to prevent loading env files on import.",
|
|
294
|
+
)
|
|
295
|
+
ENV_DIR_PATH: Optional[Path] = Field(
|
|
296
|
+
None,
|
|
297
|
+
description="Directory containing .env files (default: current working directory).",
|
|
298
|
+
)
|
|
299
|
+
DEEPEVAL_FILE_SYSTEM: Optional[str] = Field(
|
|
300
|
+
None,
|
|
301
|
+
description="Filesystem mode for runtime/CLI (currently supports READ_ONLY).",
|
|
302
|
+
)
|
|
303
|
+
DEEPEVAL_IDENTIFIER: Optional[str] = Field(
|
|
304
|
+
None,
|
|
305
|
+
description="Identifier/tag to help identify your test run on Confident AI.",
|
|
270
306
|
)
|
|
271
|
-
DEEPEVAL_FILE_SYSTEM: Optional[str] = None
|
|
272
|
-
DEEPEVAL_IDENTIFIER: Optional[str] = None
|
|
273
307
|
|
|
274
308
|
#
|
|
275
309
|
# Storage & Output
|
|
@@ -277,143 +311,360 @@ class Settings(BaseSettings):
|
|
|
277
311
|
|
|
278
312
|
# When set, DeepEval will export a timestamped JSON of the latest test run
|
|
279
313
|
# into this directory. The directory will be created on demand.
|
|
280
|
-
DEEPEVAL_RESULTS_FOLDER: Optional[Path] =
|
|
314
|
+
DEEPEVAL_RESULTS_FOLDER: Optional[Path] = Field(
|
|
315
|
+
None,
|
|
316
|
+
description="If set, export a timestamped JSON of the latest test run into this folder (created if missing).",
|
|
317
|
+
)
|
|
281
318
|
|
|
282
319
|
# Display / Truncation
|
|
283
|
-
DEEPEVAL_MAXLEN_TINY: Optional[int] =
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
320
|
+
DEEPEVAL_MAXLEN_TINY: Optional[int] = Field(
|
|
321
|
+
40,
|
|
322
|
+
description="Default truncation length for 'tiny' displays in logs/UI.",
|
|
323
|
+
)
|
|
324
|
+
DEEPEVAL_MAXLEN_SHORT: Optional[int] = Field(
|
|
325
|
+
60,
|
|
326
|
+
description="Default truncation length for 'short' displays in logs/UI.",
|
|
327
|
+
)
|
|
328
|
+
DEEPEVAL_MAXLEN_MEDIUM: Optional[int] = Field(
|
|
329
|
+
120,
|
|
330
|
+
description="Default truncation length for 'medium' displays in logs/UI.",
|
|
331
|
+
)
|
|
332
|
+
DEEPEVAL_MAXLEN_LONG: Optional[int] = Field(
|
|
333
|
+
240,
|
|
334
|
+
description="Default truncation length for 'long' displays in logs/UI.",
|
|
335
|
+
)
|
|
287
336
|
|
|
288
337
|
# If set, this overrides the default max_len used by deepeval/utils shorten
|
|
289
338
|
# falls back to DEEPEVAL_MAXLEN_LONG when None.
|
|
290
|
-
DEEPEVAL_SHORTEN_DEFAULT_MAXLEN: Optional[int] =
|
|
339
|
+
DEEPEVAL_SHORTEN_DEFAULT_MAXLEN: Optional[int] = Field(
|
|
340
|
+
None,
|
|
341
|
+
description="Override default max_len for deepeval.utils.shorten (falls back to DEEPEVAL_MAXLEN_LONG when unset).",
|
|
342
|
+
)
|
|
291
343
|
|
|
292
344
|
# Optional global suffix (keeps your "..." default).
|
|
293
|
-
DEEPEVAL_SHORTEN_SUFFIX: Optional[str] =
|
|
345
|
+
DEEPEVAL_SHORTEN_SUFFIX: Optional[str] = Field(
|
|
346
|
+
"...",
|
|
347
|
+
description="Suffix appended by deepeval.utils.shorten when truncating (default: '...').",
|
|
348
|
+
)
|
|
294
349
|
|
|
295
350
|
#
|
|
296
351
|
# GPU and perf toggles
|
|
297
352
|
#
|
|
298
353
|
|
|
299
|
-
CUDA_LAUNCH_BLOCKING: Optional[bool] =
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
354
|
+
CUDA_LAUNCH_BLOCKING: Optional[bool] = Field(
|
|
355
|
+
None,
|
|
356
|
+
description="CUDA debug toggle (forces synchronous CUDA ops). Useful for debugging GPU errors.",
|
|
357
|
+
)
|
|
358
|
+
CUDA_VISIBLE_DEVICES: Optional[str] = Field(
|
|
359
|
+
None,
|
|
360
|
+
description="CUDA device visibility mask (e.g. '0' or '0,1').",
|
|
361
|
+
)
|
|
362
|
+
TOKENIZERS_PARALLELISM: Optional[bool] = Field(
|
|
363
|
+
None,
|
|
364
|
+
description="HuggingFace tokenizers parallelism toggle (set to false to reduce warnings/noise).",
|
|
365
|
+
)
|
|
366
|
+
TRANSFORMERS_NO_ADVISORY_WARNINGS: Optional[bool] = Field(
|
|
367
|
+
None,
|
|
368
|
+
description="Disable advisory warnings from transformers (reduces console noise).",
|
|
369
|
+
)
|
|
303
370
|
|
|
304
371
|
#
|
|
305
372
|
# Model Keys
|
|
306
373
|
#
|
|
307
374
|
|
|
308
|
-
API_KEY: Optional[SecretStr] =
|
|
309
|
-
|
|
375
|
+
API_KEY: Optional[SecretStr] = Field(
|
|
376
|
+
None,
|
|
377
|
+
description="Alias for CONFIDENT_API_KEY (Confident AI API key).",
|
|
378
|
+
)
|
|
379
|
+
CONFIDENT_API_KEY: Optional[SecretStr] = Field(
|
|
380
|
+
None,
|
|
381
|
+
description="Confident AI API key (used for uploading results/telemetry to Confident).",
|
|
382
|
+
)
|
|
310
383
|
|
|
311
384
|
# ======
|
|
312
385
|
# Base URL for Confident AI API server
|
|
313
386
|
# ======
|
|
314
|
-
CONFIDENT_BASE_URL: Optional[str] =
|
|
387
|
+
CONFIDENT_BASE_URL: Optional[str] = Field(
|
|
388
|
+
None,
|
|
389
|
+
description="Base URL for Confident AI API server (set only if using a custom/hosted endpoint).",
|
|
390
|
+
)
|
|
315
391
|
|
|
316
392
|
# General
|
|
317
|
-
TEMPERATURE: Optional[confloat(ge=0, le=2)] =
|
|
393
|
+
TEMPERATURE: Optional[confloat(ge=0, le=2)] = Field(
|
|
394
|
+
None,
|
|
395
|
+
description="Global default model temperature (0–2). Model-specific constructors may override.",
|
|
396
|
+
)
|
|
318
397
|
|
|
319
398
|
# Anthropic
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
399
|
+
USE_ANTHROPIC_MODEL: Optional[bool] = Field(
|
|
400
|
+
None,
|
|
401
|
+
description="Select Anthropic as the active LLM provider (USE_* flags are mutually exclusive in CLI helpers).",
|
|
402
|
+
)
|
|
403
|
+
ANTHROPIC_API_KEY: Optional[SecretStr] = Field(
|
|
404
|
+
None, description="Anthropic API key."
|
|
405
|
+
)
|
|
406
|
+
ANTHROPIC_MODEL_NAME: Optional[str] = Field(
|
|
407
|
+
None, description="Anthropic model name (e.g. 'claude-3-...')."
|
|
408
|
+
)
|
|
409
|
+
ANTHROPIC_COST_PER_INPUT_TOKEN: Optional[PositiveFloat] = Field(
|
|
410
|
+
None,
|
|
411
|
+
description="Anthropic input token cost (used for cost reporting).",
|
|
412
|
+
)
|
|
413
|
+
ANTHROPIC_COST_PER_OUTPUT_TOKEN: Optional[PositiveFloat] = Field(
|
|
414
|
+
None,
|
|
415
|
+
description="Anthropic output token cost (used for cost reporting).",
|
|
416
|
+
)
|
|
324
417
|
|
|
325
418
|
# AWS
|
|
326
|
-
AWS_ACCESS_KEY_ID: Optional[SecretStr] =
|
|
327
|
-
|
|
419
|
+
AWS_ACCESS_KEY_ID: Optional[SecretStr] = Field(
|
|
420
|
+
None,
|
|
421
|
+
description="AWS access key ID (for Bedrock or other AWS-backed integrations).",
|
|
422
|
+
)
|
|
423
|
+
AWS_SECRET_ACCESS_KEY: Optional[SecretStr] = Field(
|
|
424
|
+
None,
|
|
425
|
+
description="AWS secret access key (for Bedrock or other AWS-backed integrations).",
|
|
426
|
+
)
|
|
328
427
|
# AWS Bedrock
|
|
329
|
-
USE_AWS_BEDROCK_MODEL: Optional[bool] =
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
428
|
+
USE_AWS_BEDROCK_MODEL: Optional[bool] = Field(
|
|
429
|
+
None, description="Select AWS Bedrock as the active LLM provider."
|
|
430
|
+
)
|
|
431
|
+
AWS_BEDROCK_MODEL_NAME: Optional[str] = Field(
|
|
432
|
+
None, description="AWS Bedrock model identifier."
|
|
433
|
+
)
|
|
434
|
+
AWS_BEDROCK_REGION: Optional[str] = Field(
|
|
435
|
+
None, description="AWS region for Bedrock (normalized to lowercase)."
|
|
436
|
+
)
|
|
437
|
+
AWS_BEDROCK_COST_PER_INPUT_TOKEN: Optional[PositiveFloat] = Field(
|
|
438
|
+
None, description="Bedrock input token cost (used for cost reporting)."
|
|
439
|
+
)
|
|
440
|
+
AWS_BEDROCK_COST_PER_OUTPUT_TOKEN: Optional[PositiveFloat] = Field(
|
|
441
|
+
None, description="Bedrock output token cost (used for cost reporting)."
|
|
442
|
+
)
|
|
334
443
|
# Azure Open AI
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
444
|
+
USE_AZURE_OPENAI: Optional[bool] = Field(
|
|
445
|
+
None, description="Select Azure OpenAI as the active LLM provider."
|
|
446
|
+
)
|
|
447
|
+
AZURE_OPENAI_API_KEY: Optional[SecretStr] = Field(
|
|
448
|
+
None, description="Azure OpenAI API key."
|
|
449
|
+
)
|
|
450
|
+
AZURE_OPENAI_ENDPOINT: Optional[AnyUrl] = Field(
|
|
451
|
+
None, description="Azure OpenAI endpoint URL."
|
|
452
|
+
)
|
|
453
|
+
OPENAI_API_VERSION: Optional[str] = Field(
|
|
454
|
+
None,
|
|
455
|
+
description="Azure OpenAI API version (if required by your deployment).",
|
|
456
|
+
)
|
|
457
|
+
AZURE_DEPLOYMENT_NAME: Optional[str] = Field(
|
|
458
|
+
None,
|
|
459
|
+
description="Azure OpenAI deployment name (required for most Azure configs).",
|
|
460
|
+
)
|
|
461
|
+
AZURE_MODEL_NAME: Optional[str] = Field(
|
|
462
|
+
None,
|
|
463
|
+
description="Azure model name label (informational; may be used in reporting).",
|
|
464
|
+
)
|
|
465
|
+
AZURE_MODEL_VERSION: Optional[str] = Field(
|
|
466
|
+
None,
|
|
467
|
+
description="Azure model version label (informational; may be used in reporting).",
|
|
468
|
+
)
|
|
342
469
|
# DeepSeek
|
|
343
|
-
USE_DEEPSEEK_MODEL: Optional[bool] =
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
470
|
+
USE_DEEPSEEK_MODEL: Optional[bool] = Field(
|
|
471
|
+
None, description="Select DeepSeek as the active LLM provider."
|
|
472
|
+
)
|
|
473
|
+
DEEPSEEK_API_KEY: Optional[SecretStr] = Field(
|
|
474
|
+
None, description="DeepSeek API key."
|
|
475
|
+
)
|
|
476
|
+
DEEPSEEK_MODEL_NAME: Optional[str] = Field(
|
|
477
|
+
None, description="DeepSeek model name."
|
|
478
|
+
)
|
|
479
|
+
DEEPSEEK_COST_PER_INPUT_TOKEN: Optional[float] = Field(
|
|
480
|
+
None, description="DeepSeek input token cost (used for cost reporting)."
|
|
481
|
+
)
|
|
482
|
+
DEEPSEEK_COST_PER_OUTPUT_TOKEN: Optional[float] = Field(
|
|
483
|
+
None,
|
|
484
|
+
description="DeepSeek output token cost (used for cost reporting).",
|
|
485
|
+
)
|
|
348
486
|
# Gemini
|
|
349
|
-
USE_GEMINI_MODEL: Optional[bool] =
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
487
|
+
USE_GEMINI_MODEL: Optional[bool] = Field(
|
|
488
|
+
None, description="Select Google Gemini as the active LLM provider."
|
|
489
|
+
)
|
|
490
|
+
GOOGLE_API_KEY: Optional[SecretStr] = Field(
|
|
491
|
+
None, description="Google API key for Gemini (non-Vertex usage)."
|
|
492
|
+
)
|
|
493
|
+
GEMINI_MODEL_NAME: Optional[str] = Field(
|
|
494
|
+
None, description="Gemini model name (e.g. 'gemini-...')."
|
|
495
|
+
)
|
|
496
|
+
GOOGLE_GENAI_USE_VERTEXAI: Optional[bool] = Field(
|
|
497
|
+
None,
|
|
498
|
+
description="Use Vertex AI for Gemini requests instead of direct API key mode.",
|
|
499
|
+
)
|
|
500
|
+
GOOGLE_CLOUD_PROJECT: Optional[str] = Field(
|
|
501
|
+
None,
|
|
502
|
+
description="GCP project ID for Vertex AI (required if GOOGLE_GENAI_USE_VERTEXAI=true).",
|
|
503
|
+
)
|
|
504
|
+
GOOGLE_CLOUD_LOCATION: Optional[str] = Field(
|
|
505
|
+
None,
|
|
506
|
+
description="GCP region/location for Vertex AI (e.g. 'us-central1').",
|
|
507
|
+
)
|
|
508
|
+
GOOGLE_SERVICE_ACCOUNT_KEY: Optional[SecretStr] = Field(
|
|
509
|
+
None,
|
|
510
|
+
description="Service account JSON key for Vertex AI auth (if not using ADC).",
|
|
511
|
+
)
|
|
356
512
|
# Grok
|
|
357
|
-
USE_GROK_MODEL: Optional[bool] =
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
513
|
+
USE_GROK_MODEL: Optional[bool] = Field(
|
|
514
|
+
None, description="Select Grok as the active LLM provider."
|
|
515
|
+
)
|
|
516
|
+
GROK_API_KEY: Optional[SecretStr] = Field(None, description="Grok API key.")
|
|
517
|
+
GROK_MODEL_NAME: Optional[str] = Field(None, description="Grok model name.")
|
|
518
|
+
GROK_COST_PER_INPUT_TOKEN: Optional[float] = Field(
|
|
519
|
+
None, description="Grok input token cost (used for cost reporting)."
|
|
520
|
+
)
|
|
521
|
+
GROK_COST_PER_OUTPUT_TOKEN: Optional[float] = Field(
|
|
522
|
+
None, description="Grok output token cost (used for cost reporting)."
|
|
523
|
+
)
|
|
362
524
|
# LiteLLM
|
|
363
|
-
USE_LITELLM: Optional[bool] =
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
525
|
+
USE_LITELLM: Optional[bool] = Field(
|
|
526
|
+
None, description="Select LiteLLM as the active LLM provider."
|
|
527
|
+
)
|
|
528
|
+
LITELLM_API_KEY: Optional[SecretStr] = Field(
|
|
529
|
+
None,
|
|
530
|
+
description="LiteLLM API key (if required by your LiteLLM deployment).",
|
|
531
|
+
)
|
|
532
|
+
LITELLM_MODEL_NAME: Optional[str] = Field(
|
|
533
|
+
None,
|
|
534
|
+
description="LiteLLM model name (as exposed by your LiteLLM endpoint).",
|
|
535
|
+
)
|
|
536
|
+
LITELLM_API_BASE: Optional[AnyUrl] = Field(
|
|
537
|
+
None, description="LiteLLM API base URL (direct)."
|
|
538
|
+
)
|
|
539
|
+
LITELLM_PROXY_API_BASE: Optional[AnyUrl] = Field(
|
|
540
|
+
None, description="LiteLLM proxy base URL (if using proxy mode)."
|
|
541
|
+
)
|
|
542
|
+
LITELLM_PROXY_API_KEY: Optional[SecretStr] = Field(
|
|
543
|
+
None, description="LiteLLM proxy API key (if required)."
|
|
544
|
+
)
|
|
369
545
|
# LM Studio
|
|
370
|
-
LM_STUDIO_API_KEY: Optional[SecretStr] =
|
|
371
|
-
|
|
546
|
+
LM_STUDIO_API_KEY: Optional[SecretStr] = Field(
|
|
547
|
+
None, description="LM Studio API key (if configured)."
|
|
548
|
+
)
|
|
549
|
+
LM_STUDIO_MODEL_NAME: Optional[str] = Field(
|
|
550
|
+
None, description="LM Studio model name."
|
|
551
|
+
)
|
|
372
552
|
# Local Model
|
|
373
|
-
USE_LOCAL_MODEL: Optional[bool] =
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
553
|
+
USE_LOCAL_MODEL: Optional[bool] = Field(
|
|
554
|
+
None,
|
|
555
|
+
description="Select a local/self-hosted model as the active LLM provider.",
|
|
556
|
+
)
|
|
557
|
+
LOCAL_MODEL_API_KEY: Optional[SecretStr] = Field(
|
|
558
|
+
None,
|
|
559
|
+
description="API key for a local/self-hosted LLM endpoint (if required).",
|
|
560
|
+
)
|
|
561
|
+
LOCAL_EMBEDDING_API_KEY: Optional[SecretStr] = Field(
|
|
562
|
+
None,
|
|
563
|
+
description="API key for a local/self-hosted embedding endpoint (if required).",
|
|
564
|
+
)
|
|
565
|
+
LOCAL_MODEL_NAME: Optional[str] = Field(
|
|
566
|
+
None,
|
|
567
|
+
description="Local/self-hosted model name (informational / routing).",
|
|
568
|
+
)
|
|
569
|
+
LOCAL_MODEL_BASE_URL: Optional[AnyUrl] = Field(
|
|
570
|
+
None, description="Base URL for a local/self-hosted LLM endpoint."
|
|
571
|
+
)
|
|
572
|
+
LOCAL_MODEL_FORMAT: Optional[str] = Field(
|
|
573
|
+
None,
|
|
574
|
+
description="Local model API format identifier (implementation-specific).",
|
|
575
|
+
)
|
|
379
576
|
# Moonshot
|
|
380
|
-
USE_MOONSHOT_MODEL: Optional[bool] =
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
577
|
+
USE_MOONSHOT_MODEL: Optional[bool] = Field(
|
|
578
|
+
None, description="Select Moonshot as the active LLM provider."
|
|
579
|
+
)
|
|
580
|
+
MOONSHOT_API_KEY: Optional[SecretStr] = Field(
|
|
581
|
+
None, description="Moonshot API key."
|
|
582
|
+
)
|
|
583
|
+
MOONSHOT_MODEL_NAME: Optional[str] = Field(
|
|
584
|
+
None, description="Moonshot model name."
|
|
585
|
+
)
|
|
586
|
+
MOONSHOT_COST_PER_INPUT_TOKEN: Optional[float] = Field(
|
|
587
|
+
None, description="Moonshot input token cost (used for cost reporting)."
|
|
588
|
+
)
|
|
589
|
+
MOONSHOT_COST_PER_OUTPUT_TOKEN: Optional[float] = Field(
|
|
590
|
+
None,
|
|
591
|
+
description="Moonshot output token cost (used for cost reporting).",
|
|
592
|
+
)
|
|
385
593
|
# Ollama
|
|
386
|
-
OLLAMA_MODEL_NAME: Optional[str] =
|
|
594
|
+
OLLAMA_MODEL_NAME: Optional[str] = Field(
|
|
595
|
+
None,
|
|
596
|
+
description="Ollama model name (used when running via Ollama integration).",
|
|
597
|
+
)
|
|
387
598
|
# OpenAI
|
|
388
|
-
USE_OPENAI_MODEL: Optional[bool] =
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
599
|
+
USE_OPENAI_MODEL: Optional[bool] = Field(
|
|
600
|
+
None, description="Select OpenAI as the active LLM provider."
|
|
601
|
+
)
|
|
602
|
+
OPENAI_API_KEY: Optional[SecretStr] = Field(
|
|
603
|
+
None, description="OpenAI API key."
|
|
604
|
+
)
|
|
605
|
+
OPENAI_MODEL_NAME: Optional[str] = Field(
|
|
606
|
+
None, description="OpenAI model name (e.g. 'gpt-4.1')."
|
|
607
|
+
)
|
|
608
|
+
OPENAI_COST_PER_INPUT_TOKEN: Optional[float] = Field(
|
|
609
|
+
None, description="OpenAI input token cost (used for cost reporting)."
|
|
610
|
+
)
|
|
611
|
+
OPENAI_COST_PER_OUTPUT_TOKEN: Optional[float] = Field(
|
|
612
|
+
None, description="OpenAI output token cost (used for cost reporting)."
|
|
613
|
+
)
|
|
393
614
|
# PortKey
|
|
394
|
-
USE_PORTKEY_MODEL: Optional[bool] =
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
615
|
+
USE_PORTKEY_MODEL: Optional[bool] = Field(
|
|
616
|
+
None, description="Select Portkey as the active LLM provider."
|
|
617
|
+
)
|
|
618
|
+
PORTKEY_API_KEY: Optional[SecretStr] = Field(
|
|
619
|
+
None, description="Portkey API key."
|
|
620
|
+
)
|
|
621
|
+
PORTKEY_MODEL_NAME: Optional[str] = Field(
|
|
622
|
+
None, description="Portkey model name (as configured in Portkey)."
|
|
623
|
+
)
|
|
624
|
+
PORTKEY_BASE_URL: Optional[AnyUrl] = Field(
|
|
625
|
+
None, description="Portkey base URL (if using a custom endpoint)."
|
|
626
|
+
)
|
|
627
|
+
PORTKEY_PROVIDER_NAME: Optional[str] = Field(
|
|
628
|
+
None, description="Provider name/routing hint for Portkey."
|
|
629
|
+
)
|
|
399
630
|
# Vertex AI
|
|
400
|
-
VERTEX_AI_MODEL_NAME: Optional[str] =
|
|
631
|
+
VERTEX_AI_MODEL_NAME: Optional[str] = Field(
|
|
632
|
+
None,
|
|
633
|
+
description="Vertex AI model name (used by some Google integrations).",
|
|
634
|
+
)
|
|
401
635
|
# VLLM
|
|
402
|
-
VLLM_API_KEY: Optional[SecretStr] =
|
|
403
|
-
|
|
636
|
+
VLLM_API_KEY: Optional[SecretStr] = Field(
|
|
637
|
+
None, description="vLLM API key (if required by your vLLM gateway)."
|
|
638
|
+
)
|
|
639
|
+
VLLM_MODEL_NAME: Optional[str] = Field(None, description="vLLM model name.")
|
|
404
640
|
|
|
405
641
|
#
|
|
406
642
|
# Embedding Keys
|
|
407
643
|
#
|
|
408
644
|
|
|
409
645
|
# Azure OpenAI
|
|
410
|
-
USE_AZURE_OPENAI_EMBEDDING: Optional[bool] =
|
|
411
|
-
|
|
412
|
-
|
|
646
|
+
USE_AZURE_OPENAI_EMBEDDING: Optional[bool] = Field(
|
|
647
|
+
None, description="Use Azure OpenAI for embeddings."
|
|
648
|
+
)
|
|
649
|
+
AZURE_EMBEDDING_MODEL_NAME: Optional[str] = Field(
|
|
650
|
+
None, description="Azure embedding model name label."
|
|
651
|
+
)
|
|
652
|
+
AZURE_EMBEDDING_DEPLOYMENT_NAME: Optional[str] = Field(
|
|
653
|
+
None, description="Azure embedding deployment name."
|
|
654
|
+
)
|
|
655
|
+
|
|
413
656
|
# Local
|
|
414
|
-
USE_LOCAL_EMBEDDINGS: Optional[bool] =
|
|
415
|
-
|
|
416
|
-
|
|
657
|
+
USE_LOCAL_EMBEDDINGS: Optional[bool] = Field(
|
|
658
|
+
None, description="Use a local/self-hosted embeddings endpoint."
|
|
659
|
+
)
|
|
660
|
+
LOCAL_EMBEDDING_MODEL_NAME: Optional[str] = Field(
|
|
661
|
+
None,
|
|
662
|
+
description="Local embedding model name (informational / routing).",
|
|
663
|
+
)
|
|
664
|
+
LOCAL_EMBEDDING_BASE_URL: Optional[AnyUrl] = Field(
|
|
665
|
+
None,
|
|
666
|
+
description="Base URL for a local/self-hosted embeddings endpoint.",
|
|
667
|
+
)
|
|
417
668
|
|
|
418
669
|
#
|
|
419
670
|
# Retry Policy
|
|
@@ -425,60 +676,133 @@ class Settings(BaseSettings):
|
|
|
425
676
|
# contribution is ~ JITTER/2 per sleep.
|
|
426
677
|
# - logging levels are looked up dynamically each attempt, so if you change LOG_LEVEL at runtime,
|
|
427
678
|
# the retry loggers will honor it without restart.
|
|
428
|
-
DEEPEVAL_SDK_RETRY_PROVIDERS: Optional[List[str]] = (
|
|
429
|
-
None
|
|
679
|
+
DEEPEVAL_SDK_RETRY_PROVIDERS: Optional[List[str]] = Field(
|
|
680
|
+
None,
|
|
681
|
+
description="Providers for which retries should be delegated to the provider SDK (use ['*'] for all).",
|
|
682
|
+
)
|
|
683
|
+
DEEPEVAL_RETRY_BEFORE_LOG_LEVEL: Optional[int] = Field(
|
|
684
|
+
None,
|
|
685
|
+
description="Log level for 'before retry' logs (defaults to LOG_LEVEL if set, else INFO).",
|
|
430
686
|
)
|
|
431
|
-
|
|
432
|
-
None
|
|
687
|
+
DEEPEVAL_RETRY_AFTER_LOG_LEVEL: Optional[int] = Field(
|
|
688
|
+
None,
|
|
689
|
+
description="Log level for 'after retry' logs (defaults to ERROR).",
|
|
433
690
|
)
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
691
|
+
DEEPEVAL_RETRY_MAX_ATTEMPTS: conint(ge=1) = Field(
|
|
692
|
+
2,
|
|
693
|
+
description="Max attempts per provider call (includes the first call; 1 = no retries).",
|
|
437
694
|
)
|
|
438
|
-
DEEPEVAL_RETRY_INITIAL_SECONDS: confloat(ge=0) = (
|
|
439
|
-
1.0
|
|
695
|
+
DEEPEVAL_RETRY_INITIAL_SECONDS: confloat(ge=0) = Field(
|
|
696
|
+
1.0,
|
|
697
|
+
description="Initial backoff sleep (seconds) before the first retry.",
|
|
440
698
|
)
|
|
441
|
-
DEEPEVAL_RETRY_EXP_BASE: confloat(ge=1) = (
|
|
442
|
-
2.0
|
|
699
|
+
DEEPEVAL_RETRY_EXP_BASE: confloat(ge=1) = Field(
|
|
700
|
+
2.0, description="Exponential backoff growth factor."
|
|
443
701
|
)
|
|
444
|
-
DEEPEVAL_RETRY_JITTER: confloat(ge=0) =
|
|
445
|
-
|
|
446
|
-
|
|
702
|
+
DEEPEVAL_RETRY_JITTER: confloat(ge=0) = Field(
|
|
703
|
+
2.0, description="Uniform jitter added to each retry sleep (seconds)."
|
|
704
|
+
)
|
|
705
|
+
DEEPEVAL_RETRY_CAP_SECONDS: confloat(ge=0) = Field(
|
|
706
|
+
5.0, description="Maximum backoff sleep per retry (seconds)."
|
|
447
707
|
)
|
|
448
708
|
|
|
449
709
|
#
|
|
450
710
|
# Telemetry and Debug
|
|
451
711
|
#
|
|
452
|
-
DEEPEVAL_DEBUG_ASYNC: Optional[bool] =
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
712
|
+
DEEPEVAL_DEBUG_ASYNC: Optional[bool] = Field(
|
|
713
|
+
None, description="Enable extra async debugging logs/behavior."
|
|
714
|
+
)
|
|
715
|
+
DEEPEVAL_TELEMETRY_OPT_OUT: Optional[bool] = Field(
|
|
716
|
+
None,
|
|
717
|
+
description="Opt out of DeepEval telemetry (OFF wins if conflicting legacy flags are set).",
|
|
718
|
+
)
|
|
719
|
+
DEEPEVAL_UPDATE_WARNING_OPT_IN: Optional[bool] = Field(
|
|
720
|
+
None,
|
|
721
|
+
description="Opt in to update warnings in the CLI/runtime when new versions are available.",
|
|
722
|
+
)
|
|
723
|
+
DEEPEVAL_GRPC_LOGGING: Optional[bool] = Field(
|
|
724
|
+
None,
|
|
725
|
+
description="Enable extra gRPC logging for Confident transport/debugging.",
|
|
726
|
+
)
|
|
727
|
+
GRPC_VERBOSITY: Optional[str] = Field(
|
|
728
|
+
None, description="gRPC verbosity (grpc env var passthrough)."
|
|
729
|
+
)
|
|
730
|
+
GRPC_TRACE: Optional[str] = Field(
|
|
731
|
+
None, description="gRPC trace categories (grpc env var passthrough)."
|
|
732
|
+
)
|
|
733
|
+
ERROR_REPORTING: Optional[bool] = Field(
|
|
734
|
+
None,
|
|
735
|
+
description="Enable/disable error reporting (implementation/integration dependent).",
|
|
736
|
+
)
|
|
737
|
+
IGNORE_DEEPEVAL_ERRORS: Optional[bool] = Field(
|
|
738
|
+
None,
|
|
739
|
+
description="Continue execution when DeepEval encounters certain recoverable errors.",
|
|
740
|
+
)
|
|
741
|
+
SKIP_DEEPEVAL_MISSING_PARAMS: Optional[bool] = Field(
|
|
742
|
+
None,
|
|
743
|
+
description="Skip metrics/test cases with missing required params instead of raising.",
|
|
744
|
+
)
|
|
745
|
+
DEEPEVAL_VERBOSE_MODE: Optional[bool] = Field(
|
|
746
|
+
None, description="Enable verbose logging and additional warnings."
|
|
747
|
+
)
|
|
748
|
+
DEEPEVAL_LOG_STACK_TRACES: Optional[bool] = Field(
|
|
749
|
+
None, description="Include stack traces in certain DeepEval error logs."
|
|
750
|
+
)
|
|
751
|
+
ENABLE_DEEPEVAL_CACHE: Optional[bool] = Field(
|
|
752
|
+
None,
|
|
753
|
+
description="Enable DeepEval caching where supported (may improve performance).",
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
CONFIDENT_TRACE_FLUSH: Optional[bool] = Field(
|
|
757
|
+
None,
|
|
758
|
+
description="Flush traces eagerly (useful for debugging; may add overhead).",
|
|
759
|
+
)
|
|
760
|
+
CONFIDENT_TRACE_ENVIRONMENT: Optional[str] = Field(
|
|
761
|
+
"development",
|
|
762
|
+
description="Trace environment label (e.g. development/staging/production).",
|
|
763
|
+
)
|
|
764
|
+
CONFIDENT_TRACE_VERBOSE: Optional[bool] = Field(
|
|
765
|
+
True, description="Enable verbose trace logging for Confident tracing."
|
|
766
|
+
)
|
|
767
|
+
CONFIDENT_TRACE_SAMPLE_RATE: Optional[float] = Field(
|
|
768
|
+
1.0, description="Trace sampling rate (0–1). Lower to reduce overhead."
|
|
769
|
+
)
|
|
770
|
+
|
|
771
|
+
CONFIDENT_METRIC_LOGGING_FLUSH: Optional[bool] = Field(
|
|
772
|
+
None,
|
|
773
|
+
description="Flush metric logs eagerly (useful for debugging; may add overhead).",
|
|
774
|
+
)
|
|
775
|
+
CONFIDENT_METRIC_LOGGING_VERBOSE: Optional[bool] = Field(
|
|
776
|
+
True, description="Enable verbose metric logging."
|
|
777
|
+
)
|
|
778
|
+
CONFIDENT_METRIC_LOGGING_SAMPLE_RATE: Optional[float] = Field(
|
|
779
|
+
1.0,
|
|
780
|
+
description="Metric logging sampling rate (0–1). Lower to reduce overhead.",
|
|
781
|
+
)
|
|
782
|
+
CONFIDENT_METRIC_LOGGING_ENABLED: Optional[bool] = Field(
|
|
783
|
+
True, description="Enable metric logging to Confident where supported."
|
|
784
|
+
)
|
|
785
|
+
|
|
786
|
+
CONFIDENT_OTEL_URL: Optional[AnyUrl] = Field(
|
|
787
|
+
"https://otel.confident-ai.com",
|
|
788
|
+
description="OpenTelemetry OTLP exporter endpoint (if using OTEL export).",
|
|
789
|
+
)
|
|
476
790
|
|
|
477
791
|
#
|
|
478
792
|
# Network
|
|
479
793
|
#
|
|
480
|
-
MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS: float =
|
|
481
|
-
|
|
794
|
+
MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS: float = Field(
|
|
795
|
+
3.05,
|
|
796
|
+
description="Connect timeout (seconds) when fetching remote images for multimodal inputs.",
|
|
797
|
+
)
|
|
798
|
+
MEDIA_IMAGE_READ_TIMEOUT_SECONDS: float = Field(
|
|
799
|
+
10.0,
|
|
800
|
+
description="Read timeout (seconds) when fetching remote images for multimodal inputs.",
|
|
801
|
+
)
|
|
802
|
+
DEEPEVAL_DISABLE_TIMEOUTS: Optional[bool] = Field(
|
|
803
|
+
None,
|
|
804
|
+
description="Disable DeepEval-enforced timeouts (per-attempt, per-task, gather). Provider SDK timeouts may still apply.",
|
|
805
|
+
)
|
|
482
806
|
# DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE
|
|
483
807
|
# Per-attempt timeout (seconds) for provider calls used by the retry policy.
|
|
484
808
|
# This is an OVERRIDE setting. The effective value you should rely on at runtime is
|
|
@@ -491,20 +815,31 @@ class Settings(BaseSettings):
|
|
|
491
815
|
#
|
|
492
816
|
# Tip: Set this OR the outer override, but generally not both
|
|
493
817
|
DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE: Optional[confloat(gt=0)] = (
|
|
494
|
-
|
|
818
|
+
Field(
|
|
819
|
+
None,
|
|
820
|
+
description="Override per-attempt provider call timeout (seconds). Leave unset to derive from task timeout.",
|
|
821
|
+
)
|
|
495
822
|
)
|
|
496
823
|
|
|
497
824
|
#
|
|
498
825
|
# Async Document Pipelines
|
|
499
826
|
#
|
|
500
827
|
|
|
501
|
-
DEEPEVAL_MAX_CONCURRENT_DOC_PROCESSING: conint(ge=1) =
|
|
828
|
+
DEEPEVAL_MAX_CONCURRENT_DOC_PROCESSING: conint(ge=1) = Field(
|
|
829
|
+
2, description="Max concurrent async document processing tasks."
|
|
830
|
+
)
|
|
502
831
|
|
|
503
832
|
#
|
|
504
833
|
# Async Task Configuration
|
|
505
834
|
#
|
|
506
|
-
DEEPEVAL_TIMEOUT_THREAD_LIMIT: conint(ge=1) =
|
|
507
|
-
|
|
835
|
+
DEEPEVAL_TIMEOUT_THREAD_LIMIT: conint(ge=1) = Field(
|
|
836
|
+
128,
|
|
837
|
+
description="Max worker threads used for timeout enforcement in async execution.",
|
|
838
|
+
)
|
|
839
|
+
DEEPEVAL_TIMEOUT_SEMAPHORE_WARN_AFTER_SECONDS: confloat(ge=0) = Field(
|
|
840
|
+
5.0,
|
|
841
|
+
description="Warn if waiting on the timeout semaphore longer than this many seconds.",
|
|
842
|
+
)
|
|
508
843
|
# DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE
|
|
509
844
|
# Outer time budget (seconds) for a single metric/test-case, including retries and backoff.
|
|
510
845
|
# This is an OVERRIDE setting. If None or 0 the DEEPEVAL_PER_TASK_TIMEOUT_SECONDS field is computed:
|
|
@@ -517,7 +852,12 @@ class Settings(BaseSettings):
|
|
|
517
852
|
# usage:
|
|
518
853
|
# - set DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE along with DEEPEVAL_RETRY_MAX_ATTEMPTS, or
|
|
519
854
|
# - set DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE alone.
|
|
520
|
-
DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE: Optional[confloat(ge=0)] =
|
|
855
|
+
DEEPEVAL_PER_TASK_TIMEOUT_SECONDS_OVERRIDE: Optional[confloat(ge=0)] = (
|
|
856
|
+
Field(
|
|
857
|
+
None,
|
|
858
|
+
description="Override outer per-test-case timeout budget (seconds), including retries/backoff. Leave unset to auto-derive.",
|
|
859
|
+
)
|
|
860
|
+
)
|
|
521
861
|
|
|
522
862
|
# Buffer time for gathering results from all tasks, added to the longest task duration
|
|
523
863
|
# Increase if many tasks are running concurrently
|
|
@@ -525,7 +865,10 @@ class Settings(BaseSettings):
|
|
|
525
865
|
# 30 # 15s seemed like not enough. we may make this computed later.
|
|
526
866
|
# )
|
|
527
867
|
DEEPEVAL_TASK_GATHER_BUFFER_SECONDS_OVERRIDE: Optional[confloat(ge=0)] = (
|
|
528
|
-
|
|
868
|
+
Field(
|
|
869
|
+
None,
|
|
870
|
+
description="Override buffer added to the longest task duration when gathering async results (seconds).",
|
|
871
|
+
)
|
|
529
872
|
)
|
|
530
873
|
|
|
531
874
|
###################
|
|
@@ -619,10 +962,16 @@ class Settings(BaseSettings):
|
|
|
619
962
|
##############
|
|
620
963
|
|
|
621
964
|
@field_validator(
|
|
965
|
+
"CONFIDENT_METRIC_LOGGING_ENABLED",
|
|
966
|
+
"CONFIDENT_METRIC_LOGGING_VERBOSE",
|
|
967
|
+
"CONFIDENT_METRIC_LOGGING_FLUSH",
|
|
622
968
|
"CONFIDENT_OPEN_BROWSER",
|
|
623
969
|
"CONFIDENT_TRACE_FLUSH",
|
|
624
970
|
"CONFIDENT_TRACE_VERBOSE",
|
|
625
971
|
"CUDA_LAUNCH_BLOCKING",
|
|
972
|
+
"DEEPEVAL_DEBUG_ASYNC",
|
|
973
|
+
"DEEPEVAL_LOG_STACK_TRACES",
|
|
974
|
+
"DEEPEVAL_DISABLE_TIMEOUTS",
|
|
626
975
|
"DEEPEVAL_VERBOSE_MODE",
|
|
627
976
|
"DEEPEVAL_GRPC_LOGGING",
|
|
628
977
|
"DEEPEVAL_DISABLE_DOTENV",
|
|
@@ -951,6 +1300,7 @@ class Settings(BaseSettings):
|
|
|
951
1300
|
self._save = save
|
|
952
1301
|
self._persist = persist
|
|
953
1302
|
self._before: Dict[str, Any] = {}
|
|
1303
|
+
self._touched: set[str] = set()
|
|
954
1304
|
self.result: Optional[PersistResult] = None
|
|
955
1305
|
|
|
956
1306
|
@property
|
|
@@ -959,122 +1309,168 @@ class Settings(BaseSettings):
|
|
|
959
1309
|
|
|
960
1310
|
def __enter__(self) -> "Settings._SettingsEditCtx":
|
|
961
1311
|
# snapshot current state
|
|
1312
|
+
self._token = _ACTIVE_SETTINGS_EDIT_CTX.set(self)
|
|
962
1313
|
self._before = {
|
|
963
1314
|
k: getattr(self._s, k) for k in type(self._s).model_fields
|
|
964
1315
|
}
|
|
965
1316
|
return self
|
|
966
1317
|
|
|
967
1318
|
def __exit__(self, exc_type, exc, tb):
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
from deepeval.config.settings_manager import (
|
|
972
|
-
update_settings_and_persist,
|
|
973
|
-
_normalize_for_env,
|
|
974
|
-
)
|
|
975
|
-
|
|
976
|
-
# lazy import legacy JSON store deps
|
|
977
|
-
from deepeval.key_handler import KEY_FILE_HANDLER
|
|
978
|
-
|
|
979
|
-
model_fields = type(self._s).model_fields
|
|
980
|
-
# Exclude computed fields from persistence
|
|
981
|
-
|
|
982
|
-
# compute diff of changed fields
|
|
983
|
-
after = {k: getattr(self._s, k) for k in model_fields}
|
|
984
|
-
|
|
985
|
-
before_norm = {
|
|
986
|
-
k: _normalize_for_env(v) for k, v in self._before.items()
|
|
987
|
-
}
|
|
988
|
-
after_norm = {k: _normalize_for_env(v) for k, v in after.items()}
|
|
1319
|
+
try:
|
|
1320
|
+
if exc_type is not None:
|
|
1321
|
+
return False # don’t persist on error
|
|
989
1322
|
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
1323
|
+
from deepeval.config.settings_manager import (
|
|
1324
|
+
update_settings_and_persist,
|
|
1325
|
+
_normalize_for_env,
|
|
1326
|
+
_resolve_save_path,
|
|
1327
|
+
)
|
|
994
1328
|
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
1329
|
+
# lazy import legacy JSON store deps
|
|
1330
|
+
from deepeval.key_handler import KEY_FILE_HANDLER
|
|
1331
|
+
|
|
1332
|
+
model_fields = type(self._s).model_fields
|
|
1333
|
+
# Exclude computed fields from persistence
|
|
1334
|
+
|
|
1335
|
+
# compute diff of changed fields
|
|
1336
|
+
after = {k: getattr(self._s, k) for k in model_fields}
|
|
1337
|
+
|
|
1338
|
+
before_norm = {
|
|
1339
|
+
k: _normalize_for_env(v) for k, v in self._before.items()
|
|
1340
|
+
}
|
|
1341
|
+
after_norm = {
|
|
1342
|
+
k: _normalize_for_env(v) for k, v in after.items()
|
|
1343
|
+
}
|
|
1344
|
+
|
|
1345
|
+
changed_keys = {
|
|
1346
|
+
k for k in after_norm if after_norm[k] != before_norm.get(k)
|
|
1347
|
+
}
|
|
1348
|
+
changed_keys -= self.COMPUTED_FIELDS
|
|
1349
|
+
touched_keys = set(self._touched) - self.COMPUTED_FIELDS
|
|
1350
|
+
|
|
1351
|
+
# dotenv should persist union(changed, touched)
|
|
1352
|
+
persist_dotenv = self._persist is not False
|
|
1353
|
+
ok, resolved_path = _resolve_save_path(self._save)
|
|
1354
|
+
|
|
1355
|
+
existing_dotenv = {}
|
|
1356
|
+
if persist_dotenv and ok and resolved_path is not None:
|
|
1357
|
+
existing_dotenv = read_dotenv_file(resolved_path)
|
|
1358
|
+
|
|
1359
|
+
candidate_keys_for_dotenv = (
|
|
1360
|
+
changed_keys | touched_keys
|
|
1361
|
+
) - self.COMPUTED_FIELDS
|
|
1362
|
+
|
|
1363
|
+
keys_for_dotenv: set[str] = set()
|
|
1364
|
+
for key in candidate_keys_for_dotenv:
|
|
1365
|
+
desired = after_norm.get(key) # normalized string or None
|
|
1366
|
+
if desired is None:
|
|
1367
|
+
# only need to unset if it's actually present in dotenv
|
|
1368
|
+
# if key in existing_dotenv:
|
|
1369
|
+
# keys_for_dotenv.add(key)
|
|
1370
|
+
keys_for_dotenv.add(key)
|
|
1371
|
+
else:
|
|
1372
|
+
if existing_dotenv.get(key) != desired:
|
|
1373
|
+
keys_for_dotenv.add(key)
|
|
998
1374
|
|
|
999
|
-
|
|
1375
|
+
updates_for_dotenv = {
|
|
1376
|
+
key: after[key] for key in keys_for_dotenv
|
|
1377
|
+
}
|
|
1000
1378
|
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1379
|
+
if not changed_keys and not updates_for_dotenv:
|
|
1380
|
+
if self._persist is False:
|
|
1381
|
+
# we report handled so that the cli does not mistakenly report invalid save option
|
|
1382
|
+
self.result = PersistResult(True, None, {})
|
|
1383
|
+
return False
|
|
1005
1384
|
|
|
1006
|
-
|
|
1385
|
+
ok, resolved_path = _resolve_save_path(self._save)
|
|
1386
|
+
self.result = PersistResult(ok, resolved_path, {})
|
|
1387
|
+
return False
|
|
1007
1388
|
|
|
1008
|
-
|
|
1009
|
-
# .deepeval JSON support
|
|
1010
|
-
#
|
|
1389
|
+
updates = {k: after[k] for k in changed_keys}
|
|
1011
1390
|
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
continue # skip if not a defined as legacy field
|
|
1391
|
+
if "LOG_LEVEL" in updates:
|
|
1392
|
+
from deepeval.config.logging import (
|
|
1393
|
+
apply_deepeval_log_level,
|
|
1394
|
+
)
|
|
1017
1395
|
|
|
1018
|
-
|
|
1019
|
-
# Remove from JSON if unset
|
|
1020
|
-
if val is None:
|
|
1021
|
-
KEY_FILE_HANDLER.remove_key(legacy_member)
|
|
1022
|
-
continue
|
|
1396
|
+
apply_deepeval_log_level()
|
|
1023
1397
|
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1398
|
+
#
|
|
1399
|
+
# .deepeval JSON support
|
|
1400
|
+
#
|
|
1027
1401
|
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1402
|
+
if self._persist is not False:
|
|
1403
|
+
for k in changed_keys:
|
|
1404
|
+
legacy_member = _find_legacy_enum(k)
|
|
1405
|
+
if legacy_member is None:
|
|
1406
|
+
continue # skip if not a defined as legacy field
|
|
1407
|
+
|
|
1408
|
+
val = updates[k]
|
|
1409
|
+
# Remove from JSON if unset
|
|
1410
|
+
if val is None:
|
|
1411
|
+
KEY_FILE_HANDLER.remove_key(legacy_member)
|
|
1412
|
+
continue
|
|
1413
|
+
|
|
1414
|
+
# Never store secrets in the JSON keystore
|
|
1415
|
+
if _is_secret_key(k):
|
|
1416
|
+
continue
|
|
1417
|
+
|
|
1418
|
+
# For booleans, the legacy store expects "YES"/"NO"
|
|
1419
|
+
if isinstance(val, bool):
|
|
1420
|
+
KEY_FILE_HANDLER.write_key(
|
|
1421
|
+
legacy_member, "YES" if val else "NO"
|
|
1422
|
+
)
|
|
1423
|
+
else:
|
|
1424
|
+
# store as string
|
|
1425
|
+
KEY_FILE_HANDLER.write_key(legacy_member, str(val))
|
|
1426
|
+
|
|
1427
|
+
#
|
|
1428
|
+
# dotenv store
|
|
1429
|
+
#
|
|
1430
|
+
|
|
1431
|
+
# defer import to avoid cyclics
|
|
1432
|
+
handled, path = update_settings_and_persist(
|
|
1433
|
+
updates_for_dotenv,
|
|
1434
|
+
save=self._save,
|
|
1435
|
+
persist_dotenv=persist_dotenv,
|
|
1436
|
+
)
|
|
1437
|
+
self.result = PersistResult(handled, path, updates_for_dotenv)
|
|
1438
|
+
return False
|
|
1439
|
+
finally:
|
|
1440
|
+
if self._token is not None:
|
|
1441
|
+
_ACTIVE_SETTINGS_EDIT_CTX.reset(self._token)
|
|
1049
1442
|
|
|
1050
1443
|
def switch_model_provider(self, target) -> None:
|
|
1051
1444
|
"""
|
|
1052
|
-
Flip
|
|
1053
|
-
Also, mirror this change into the legacy JSON keystore as "YES"/"NO".
|
|
1054
|
-
|
|
1055
|
-
`target` may be an Enum with `.value`, such as ModelKeyValues.USE_OPENAI_MODEL
|
|
1056
|
-
or a plain string like "USE_OPENAI_MODEL".
|
|
1445
|
+
Flip USE_* settings within the target's provider family (LLM vs embeddings).
|
|
1057
1446
|
"""
|
|
1058
1447
|
from deepeval.key_handler import KEY_FILE_HANDLER
|
|
1059
1448
|
|
|
1060
|
-
# Target key is the env style string, such as "USE_OPENAI_MODEL"
|
|
1061
1449
|
target_key = getattr(target, "value", str(target))
|
|
1062
1450
|
|
|
1451
|
+
def _is_embedding_flag(k: str) -> bool:
|
|
1452
|
+
return "EMBEDDING" in k
|
|
1453
|
+
|
|
1454
|
+
target_is_embedding = _is_embedding_flag(target_key)
|
|
1455
|
+
|
|
1063
1456
|
use_fields = [
|
|
1064
|
-
|
|
1457
|
+
field
|
|
1458
|
+
for field in type(self._s).model_fields
|
|
1459
|
+
if field.startswith("USE_")
|
|
1460
|
+
and _is_embedding_flag(field) == target_is_embedding
|
|
1065
1461
|
]
|
|
1462
|
+
|
|
1066
1463
|
if target_key not in use_fields:
|
|
1067
1464
|
raise ValueError(
|
|
1068
1465
|
f"{target_key} is not a recognized USE_* field"
|
|
1069
1466
|
)
|
|
1070
1467
|
|
|
1071
|
-
for
|
|
1072
|
-
on =
|
|
1073
|
-
|
|
1074
|
-
|
|
1468
|
+
for field in use_fields:
|
|
1469
|
+
on = field == target_key
|
|
1470
|
+
setattr(self._s, field, on)
|
|
1471
|
+
|
|
1075
1472
|
if self._persist is not False:
|
|
1076
|
-
|
|
1077
|
-
legacy_member = _find_legacy_enum(k)
|
|
1473
|
+
legacy_member = _find_legacy_enum(field)
|
|
1078
1474
|
if legacy_member is not None:
|
|
1079
1475
|
KEY_FILE_HANDLER.write_key(
|
|
1080
1476
|
legacy_member, "YES" if on else "NO"
|
|
@@ -1123,7 +1519,7 @@ class Settings(BaseSettings):
|
|
|
1123
1519
|
|
|
1124
1520
|
|
|
1125
1521
|
_settings_singleton: Optional[Settings] = None
|
|
1126
|
-
_settings_env_fingerprint:
|
|
1522
|
+
_settings_env_fingerprint: Optional[str] = None
|
|
1127
1523
|
_settings_lock = threading.RLock()
|
|
1128
1524
|
|
|
1129
1525
|
|