deepeval 3.5.4__py3-none-any.whl → 3.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deepeval/_version.py CHANGED
@@ -1 +1 @@
1
- __version__: str = "3.5.4"
1
+ __version__: str = "3.5.6"
deepeval/cli/main.py CHANGED
@@ -28,8 +28,6 @@ import typer
28
28
  from enum import Enum
29
29
  from pydantic import SecretStr
30
30
  from deepeval.key_handler import (
31
- KEY_FILE_HANDLER,
32
- KeyValues,
33
31
  EmbeddingKeyValues,
34
32
  ModelKeyValues,
35
33
  )
@@ -46,16 +44,9 @@ from deepeval.cli.utils import (
46
44
  render_login_message,
47
45
  upload_and_open_link,
48
46
  PROD,
49
- resolve_save_target,
50
- save_environ_to_store,
51
- unset_environ_in_store,
52
- switch_model_provider,
53
47
  )
54
48
  from deepeval.confident.api import (
55
- get_confident_api_key,
56
49
  is_confident,
57
- set_confident_api_key,
58
- CONFIDENT_API_KEY_ENV_VAR,
59
50
  )
60
51
 
61
52
  app = typer.Typer(name="deepeval")
@@ -109,7 +100,7 @@ def set_confident_region_command(
109
100
  # Add flag emojis based on region
110
101
  flag = "🇺🇸" if region == Regions.US else "🇪🇺"
111
102
 
112
- setting = get_settings()
103
+ settings = get_settings()
113
104
  with settings.edit(save=save) as edit_ctx:
114
105
  settings.CONFIDENT_REGION = region.value
115
106
 
@@ -282,23 +273,196 @@ def view():
282
273
  upload_and_open_link(_span=span)
283
274
 
284
275
 
285
- @app.command(name="enable-grpc-logging")
286
- def enable_grpc_logging(save: Optional[str] = None):
276
+ @app.command(name="set-debug")
277
+ def set_debug(
278
+ # Core verbosity
279
+ log_level: Optional[str] = typer.Option(
280
+ None,
281
+ "--log-level",
282
+ help="Global LOG_LEVEL (DEBUG|INFO|WARNING|ERROR|CRITICAL|NOTSET).",
283
+ ),
284
+ verbose: Optional[bool] = typer.Option(
285
+ None, "--verbose/--no-verbose", help="Toggle DEEPEVAL_VERBOSE_MODE."
286
+ ),
287
+ # Retry logging dials
288
+ retry_before_level: Optional[str] = typer.Option(
289
+ None,
290
+ "--retry-before-level",
291
+ help="Log level before a retry attempt (DEBUG|INFO|WARNING|ERROR|CRITICAL|NOTSET or numeric).",
292
+ ),
293
+ retry_after_level: Optional[str] = typer.Option(
294
+ None,
295
+ "--retry-after-level",
296
+ help="Log level after a retry attempt (DEBUG|INFO|WARNING|ERROR|CRITICAL|NOTSET or numeric).",
297
+ ),
298
+ # gRPC visibility
299
+ grpc: Optional[bool] = typer.Option(
300
+ None, "--grpc/--no-grpc", help="Toggle DEEPEVAL_GRPC_LOGGING."
301
+ ),
302
+ grpc_verbosity: Optional[str] = typer.Option(
303
+ None,
304
+ "--grpc-verbosity",
305
+ help="Set GRPC_VERBOSITY (DEBUG|INFO|ERROR|NONE).",
306
+ ),
307
+ grpc_trace: Optional[str] = typer.Option(
308
+ None,
309
+ "--grpc-trace",
310
+ help=(
311
+ "Set GRPC_TRACE to comma-separated tracer names or glob patterns "
312
+ "(e.g. 'tcp,http,secure_endpoint', '*' for all, 'list_tracers' to print available)."
313
+ ),
314
+ ),
315
+ # Confident tracing
316
+ trace_verbose: Optional[bool] = typer.Option(
317
+ None,
318
+ "--trace-verbose/--no-trace-verbose",
319
+ help="Enable / disable CONFIDENT_TRACE_VERBOSE.",
320
+ ),
321
+ trace_env: Optional[str] = typer.Option(
322
+ None,
323
+ "--trace-env",
324
+ help='Set CONFIDENT_TRACE_ENVIRONMENT ("development", "staging", "production", etc).',
325
+ ),
326
+ trace_flush: Optional[bool] = typer.Option(
327
+ None,
328
+ "--trace-flush/--no-trace-flush",
329
+ help="Enable / disable CONFIDENT_TRACE_FLUSH.",
330
+ ),
331
+ # Advanced / potentially surprising
332
+ error_reporting: Optional[bool] = typer.Option(
333
+ None,
334
+ "--error-reporting/--no-error-reporting",
335
+ help="Enable / disable ERROR_REPORTING.",
336
+ ),
337
+ ignore_errors: Optional[bool] = typer.Option(
338
+ None,
339
+ "--ignore-errors/--no-ignore-errors",
340
+ help="Enable / disable IGNORE_DEEPEVAL_ERRORS (not recommended in normal debugging).",
341
+ ),
342
+ # Persistence
343
+ save: Optional[str] = typer.Option(
344
+ None,
345
+ "--save",
346
+ help="Persist CLI parameters as environment variables in a dotenv file. "
347
+ "Usage: --save=dotenv[:path] (default: .env.local)",
348
+ ),
349
+ ):
287
350
  """
288
- Enable verbose gRPC logging for the current process.
289
- Pass --save=dotenv[:path] to persist it (optional).
351
+ Configure verbose debug behavior for DeepEval.
352
+
353
+ This command lets you mix-and-match verbosity flags (global LOG_LEVEL, verbose mode),
354
+ retry logger levels, gRPC wire logging, and Confident trace toggles. Values apply
355
+ immediately to the current process and can be persisted to a dotenv file with --save.
356
+
357
+ Examples:
358
+ deepeval set-debug --log-level DEBUG --verbose --grpc --retry-before-level DEBUG --retry-after-level INFO
359
+ deepeval set-debug --trace-verbose --trace-env staging --save dotenv:.env.local
290
360
  """
291
361
  settings = get_settings()
292
362
  with settings.edit(save=save) as edit_ctx:
293
- settings.DEEPEVAL_GRPC_LOGGING = True
363
+ # Core verbosity
364
+ if log_level is not None:
365
+ settings.LOG_LEVEL = log_level
366
+ if verbose is not None:
367
+ settings.DEEPEVAL_VERBOSE_MODE = verbose
368
+
369
+ # Retry logging
370
+ if retry_before_level is not None:
371
+ settings.DEEPEVAL_RETRY_BEFORE_LOG_LEVEL = retry_before_level
372
+ if retry_after_level is not None:
373
+ settings.DEEPEVAL_RETRY_AFTER_LOG_LEVEL = retry_after_level
374
+
375
+ # gRPC
376
+ if grpc is not None:
377
+ settings.DEEPEVAL_GRPC_LOGGING = grpc
378
+ if grpc_verbosity is not None:
379
+ settings.GRPC_VERBOSITY = grpc_verbosity
380
+ if grpc_trace is not None:
381
+ settings.GRPC_TRACE = grpc_trace
382
+
383
+ # Confident tracing
384
+ if trace_verbose is not None:
385
+ settings.CONFIDENT_TRACE_VERBOSE = trace_verbose
386
+ if trace_env is not None:
387
+ settings.CONFIDENT_TRACE_ENVIRONMENT = trace_env
388
+ if trace_flush is not None:
389
+ settings.CONFIDENT_TRACE_FLUSH = trace_flush
390
+
391
+ # Advanced
392
+ if error_reporting is not None:
393
+ settings.ERROR_REPORTING = error_reporting
394
+ if ignore_errors is not None:
395
+ settings.IGNORE_DEEPEVAL_ERRORS = ignore_errors
396
+
397
+ handled, path, updated = edit_ctx.result
398
+
399
+ if not updated:
400
+ # no changes were made, so there is nothing to do.
401
+ return
402
+
403
+ if not handled and save is not None:
404
+ print("Unsupported --save option. Use --save=dotenv[:path].")
405
+ elif path:
406
+ print(
407
+ f"Saved environment variables to {path} (ensure it's git-ignored)."
408
+ )
409
+ else:
410
+ print(
411
+ "Settings updated for this session. To persist, use --save=dotenv[:path] "
412
+ "(default .env.local) or set DEEPEVAL_DEFAULT_SAVE=dotenv:.env.local"
413
+ )
414
+
415
+ print(":loud_sound: Debug options updated.")
416
+
417
+
418
+ @app.command(name="unset-debug")
419
+ def unset_debug(
420
+ save: Optional[str] = typer.Option(
421
+ None,
422
+ "--save",
423
+ help="Remove only the debug-related environment variables from a dotenv file. "
424
+ "Usage: --save=dotenv[:path] (default: .env.local)",
425
+ ),
426
+ ):
427
+ """
428
+ Restore default behavior by unsetting debug related variables.
429
+
430
+ Behavior:
431
+ - Resets LOG_LEVEL back to 'info'.
432
+ - Unsets DEEPEVAL_VERBOSE_MODE, retry log-level overrides, gRPC and Confident trace flags.
433
+ - If --save is provided (or DEEPEVAL_DEFAULT_SAVE is set), removes these keys from the target dotenv file.
434
+ """
435
+ settings = get_settings()
436
+ with settings.edit(save=save) as edit_ctx:
437
+ # Back to normal global level
438
+ settings.LOG_LEVEL = "info"
439
+ settings.CONFIDENT_TRACE_ENVIRONMENT = "development"
440
+ settings.CONFIDENT_TRACE_VERBOSE = True
441
+
442
+ # Clear optional toggles/overrides
443
+ settings.DEEPEVAL_VERBOSE_MODE = None
444
+ settings.DEEPEVAL_RETRY_BEFORE_LOG_LEVEL = None
445
+ settings.DEEPEVAL_RETRY_AFTER_LOG_LEVEL = None
446
+
447
+ settings.DEEPEVAL_GRPC_LOGGING = None
448
+ settings.GRPC_VERBOSITY = None
449
+ settings.GRPC_TRACE = None
450
+
451
+ settings.CONFIDENT_TRACE_FLUSH = None
452
+
453
+ settings.ERROR_REPORTING = None
454
+ settings.IGNORE_DEEPEVAL_ERRORS = None
294
455
 
295
456
  handled, path, _ = edit_ctx.result
296
457
 
297
458
  if not handled and save is not None:
298
- # invalid --save format (unsupported)
299
459
  print("Unsupported --save option. Use --save=dotenv[:path].")
460
+ elif path:
461
+ print(f"Removed debug-related environment variables from {path}.")
300
462
  else:
301
- print("gRPC logging enabled.")
463
+ print("Debug settings reverted to defaults for this session.")
464
+
465
+ print(":mute: Debug options unset.")
302
466
 
303
467
 
304
468
  #############################################
@@ -1336,7 +1500,7 @@ def set_gemini_model_env(
1336
1500
  )
1337
1501
  else:
1338
1502
  print(
1339
- f":raising_hands: Congratulations! You're now using Gemini's model for all evals that require an LLM."
1503
+ ":raising_hands: Congratulations! You're now using Gemini's model for all evals that require an LLM."
1340
1504
  )
1341
1505
 
1342
1506
 
@@ -281,6 +281,7 @@ class Settings(BaseSettings):
281
281
  #
282
282
  # Telemetry and Debug
283
283
  #
284
+ DEEPEVAL_DEBUG_ASYNC: Optional[bool] = None
284
285
  DEEPEVAL_TELEMETRY_OPT_OUT: Optional[bool] = None
285
286
  DEEPEVAL_UPDATE_WARNING_OPT_IN: Optional[bool] = None
286
287
  DEEPEVAL_GRPC_LOGGING: Optional[bool] = None
@@ -303,6 +304,19 @@ class Settings(BaseSettings):
303
304
  MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS: float = 3.05
304
305
  MEDIA_IMAGE_READ_TIMEOUT_SECONDS: float = 10.0
305
306
 
307
+ #
308
+ # Async Task Configuration
309
+ #
310
+
311
+ # Maximum time allowed for a single task to complete
312
+ DEEPEVAL_PER_TASK_TIMEOUT_SECONDS: int = (
313
+ 300 # Set to float('inf') to disable timeout
314
+ )
315
+
316
+ # Buffer time for gathering results from all tasks, added to the longest task duration
317
+ # Increase if many tasks are running concurrently
318
+ DEEPEVAL_TASK_GATHER_BUFFER_SECONDS: int = 60
319
+
306
320
  ##############
307
321
  # Validators #
308
322
  ##############
deepeval/constants.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from enum import Enum
2
+ from typing import Union
2
3
 
3
4
  KEY_FILE: str = ".deepeval"
4
5
  HIDDEN_DIR: str = ".deepeval"
@@ -29,7 +30,7 @@ class ProviderSlug(str, Enum):
29
30
  OLLAMA = "ollama"
30
31
 
31
32
 
32
- def slugify(value: str | ProviderSlug) -> str:
33
+ def slugify(value: Union[str, ProviderSlug]) -> str:
33
34
  return (
34
35
  value.value
35
36
  if isinstance(value, ProviderSlug)
@@ -1,5 +1,5 @@
1
1
  from asyncio import Task
2
- from typing import Iterator, List, Optional, Union, Literal
2
+ from typing import TYPE_CHECKING, Iterator, List, Optional, Union, Literal
3
3
  from dataclasses import dataclass, field
4
4
  from opentelemetry.trace import Tracer
5
5
  from opentelemetry.context import Context, attach, detach
@@ -7,7 +7,6 @@ from rich.console import Console
7
7
  from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
8
8
  import json
9
9
  import csv
10
- import webbrowser
11
10
  import os
12
11
  import datetime
13
12
  import time
@@ -17,6 +16,7 @@ from opentelemetry import baggage
17
16
 
18
17
  from deepeval.confident.api import Api, Endpoints, HttpMethods
19
18
  from deepeval.dataset.utils import (
19
+ coerce_to_task,
20
20
  convert_test_cases_to_goldens,
21
21
  convert_goldens_to_test_cases,
22
22
  convert_convo_goldens_to_convo_test_cases,
@@ -49,11 +49,18 @@ from deepeval.utils import (
49
49
  from deepeval.test_run import (
50
50
  global_test_run_manager,
51
51
  )
52
- from deepeval.dataset.types import global_evaluation_tasks
53
52
  from deepeval.openai.utils import openai_test_case_pairs
54
53
  from deepeval.tracing import trace_manager
55
54
  from deepeval.tracing.tracing import EVAL_DUMMY_SPAN_NAME
56
55
 
56
+ if TYPE_CHECKING:
57
+ from deepeval.evaluate.configs import (
58
+ AsyncConfig,
59
+ DisplayConfig,
60
+ CacheConfig,
61
+ ErrorConfig,
62
+ )
63
+
57
64
 
58
65
  valid_file_types = ["csv", "json", "jsonl"]
59
66
 
@@ -1230,7 +1237,7 @@ class EvaluationDataset:
1230
1237
  )
1231
1238
 
1232
1239
  def evaluate(self, task: Task):
1233
- global_evaluation_tasks.append(task)
1240
+ coerce_to_task(task)
1234
1241
 
1235
1242
  def _start_otel_test_run(self, tracer: Optional[Tracer] = None) -> Context:
1236
1243
  _tracer = check_tracer(tracer)
deepeval/dataset/types.py CHANGED
@@ -1,17 +1,25 @@
1
- class EvaluationTasks:
2
- tasks: list = []
1
+ import asyncio
3
2
 
4
- def append(self, t):
5
- self.tasks.append(t)
3
+ from typing import Any
4
+ from deepeval.dataset.utils import coerce_to_task
6
5
 
7
- def get_tasks(self):
8
- return self.tasks
9
6
 
10
- def num_tasks(self):
11
- return len(self.tasks)
7
+ class EvaluationTasks:
12
8
 
13
- def clear_tasks(self):
14
- self.tasks.clear()
9
+ def __init__(self):
10
+ self._tasks: list[asyncio.Future] = []
15
11
 
12
+ def append(self, obj: Any):
13
+ self._tasks.append(coerce_to_task(obj))
14
+
15
+ def get_tasks(self) -> list[asyncio.Future]:
16
+ return list(self._tasks)
17
+
18
+ def num_tasks(self):
19
+ return len(self._tasks)
16
20
 
17
- global_evaluation_tasks = EvaluationTasks()
21
+ def clear_tasks(self) -> None:
22
+ for t in self._tasks:
23
+ if not t.done():
24
+ t.cancel()
25
+ self._tasks.clear()
deepeval/dataset/utils.py CHANGED
@@ -1,10 +1,10 @@
1
- from typing import List, Optional, Any
1
+ import asyncio
2
+ import inspect
2
3
  import json
3
4
  import re
4
5
 
6
+ from typing import List, Optional, Any
5
7
  from opentelemetry.trace import Tracer
6
- from opentelemetry import trace
7
- from opentelemetry.trace import NoOpTracerProvider
8
8
 
9
9
  from deepeval.dataset.api import Golden
10
10
  from deepeval.dataset.golden import ConversationalGolden
@@ -174,3 +174,31 @@ def check_tracer(tracer: Optional[Tracer] = None) -> Tracer:
174
174
  )
175
175
 
176
176
  return GLOBAL_TEST_RUN_TRACER
177
+
178
+
179
+ def coerce_to_task(obj: Any) -> asyncio.Future[Any]:
180
+ # already a Task so just return it
181
+ if isinstance(obj, asyncio.Task):
182
+ return obj
183
+
184
+ # If it is a future, it is already scheduled, so just return it
185
+ if asyncio.isfuture(obj):
186
+ # type: ignore[return-value] # it is an awaitable, gather accepts it
187
+ return obj
188
+
189
+ # bare coroutine must be explicitly scheduled using create_task to bind to loop & track
190
+ if asyncio.iscoroutine(obj):
191
+ return asyncio.create_task(obj)
192
+
193
+ # generic awaitable (any object with __await__) will need to be wrapped so create_task accepts it
194
+ if inspect.isawaitable(obj):
195
+
196
+ async def _wrap(awaitable):
197
+ return await awaitable
198
+
199
+ return asyncio.create_task(_wrap(obj))
200
+
201
+ # not awaitable, so time to sound the alarm!
202
+ raise TypeError(
203
+ f"Expected Task/Future/coroutine/awaitable, got {type(obj).__name__}"
204
+ )