deepeval 3.7.6__py3-none-any.whl → 3.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/cli/main.py +2022 -759
  3. deepeval/cli/utils.py +208 -36
  4. deepeval/config/dotenv_handler.py +19 -0
  5. deepeval/config/settings.py +658 -262
  6. deepeval/config/utils.py +9 -1
  7. deepeval/dataset/test_run_tracer.py +4 -6
  8. deepeval/evaluate/execute.py +153 -94
  9. deepeval/integrations/pydantic_ai/instrumentator.py +4 -2
  10. deepeval/integrations/pydantic_ai/otel.py +5 -1
  11. deepeval/key_handler.py +121 -51
  12. deepeval/metrics/base_metric.py +9 -3
  13. deepeval/metrics/g_eval/g_eval.py +6 -1
  14. deepeval/metrics/indicator.py +8 -4
  15. deepeval/metrics/mcp/mcp_task_completion.py +15 -16
  16. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +15 -15
  17. deepeval/metrics/mcp/schema.py +4 -0
  18. deepeval/metrics/mcp/template.py +8 -1
  19. deepeval/metrics/prompt_alignment/prompt_alignment.py +6 -3
  20. deepeval/metrics/tool_use/schema.py +4 -0
  21. deepeval/metrics/tool_use/template.py +16 -2
  22. deepeval/metrics/tool_use/tool_use.py +30 -28
  23. deepeval/metrics/topic_adherence/schema.py +4 -0
  24. deepeval/metrics/topic_adherence/template.py +8 -1
  25. deepeval/metrics/topic_adherence/topic_adherence.py +15 -14
  26. deepeval/metrics/turn_contextual_precision/template.py +8 -1
  27. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +44 -86
  28. deepeval/metrics/turn_contextual_recall/template.py +8 -1
  29. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +44 -82
  30. deepeval/metrics/turn_contextual_relevancy/template.py +8 -1
  31. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +48 -92
  32. deepeval/metrics/turn_faithfulness/template.py +8 -1
  33. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +76 -130
  34. deepeval/metrics/utils.py +16 -1
  35. deepeval/models/__init__.py +2 -0
  36. deepeval/models/llms/__init__.py +2 -0
  37. deepeval/models/llms/amazon_bedrock_model.py +5 -4
  38. deepeval/models/llms/anthropic_model.py +4 -3
  39. deepeval/models/llms/azure_model.py +4 -3
  40. deepeval/models/llms/deepseek_model.py +5 -8
  41. deepeval/models/llms/grok_model.py +5 -8
  42. deepeval/models/llms/kimi_model.py +5 -8
  43. deepeval/models/llms/litellm_model.py +2 -0
  44. deepeval/models/llms/local_model.py +1 -1
  45. deepeval/models/llms/openai_model.py +4 -3
  46. deepeval/models/retry_policy.py +10 -5
  47. deepeval/models/utils.py +1 -5
  48. deepeval/simulator/conversation_simulator.py +6 -2
  49. deepeval/simulator/template.py +3 -1
  50. deepeval/synthesizer/synthesizer.py +19 -17
  51. deepeval/test_run/test_run.py +6 -1
  52. deepeval/utils.py +26 -0
  53. {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/METADATA +3 -3
  54. {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/RECORD +57 -56
  55. {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/LICENSE.md +0 -0
  56. {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/WHEEL +0 -0
  57. {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/entry_points.txt +0 -0
deepeval/cli/utils.py CHANGED
@@ -1,18 +1,30 @@
1
1
  from __future__ import annotations
2
-
2
+ import json
3
3
  import os
4
- import webbrowser
5
4
  import pyfiglet
5
+ import typer
6
+ import webbrowser
6
7
 
8
+ from pydantic import ValidationError
9
+ from pydantic.fields import FieldInfo
7
10
  from enum import Enum
8
11
  from pathlib import Path
9
12
  from rich import print
10
- from typing import Optional, Dict, Iterable, List, Tuple, Union
13
+ from typing import (
14
+ Any,
15
+ Dict,
16
+ Iterable,
17
+ Tuple,
18
+ Optional,
19
+ get_args,
20
+ get_origin,
21
+ Union,
22
+ )
11
23
  from opentelemetry.trace import Span
12
24
 
25
+ from deepeval.config.settings import Settings, get_settings
13
26
  from deepeval.key_handler import (
14
27
  KEY_FILE_HANDLER,
15
- KeyValues,
16
28
  ModelKeyValues,
17
29
  EmbeddingKeyValues,
18
30
  )
@@ -26,24 +38,21 @@ from deepeval.cli.dotenv_handler import DotenvHandler
26
38
  StrOrEnum = Union[str, "Enum"]
27
39
  PROD = "https://app.confident-ai.com"
28
40
  # List all mutually exclusive USE_* keys
29
- USE_MODEL_KEYS: List[ModelKeyValues | EmbeddingKeyValues] = [
30
- ModelKeyValues.USE_OPENAI_MODEL,
31
- ModelKeyValues.USE_AZURE_OPENAI,
32
- ModelKeyValues.USE_LOCAL_MODEL,
33
- ModelKeyValues.USE_GROK_MODEL,
34
- ModelKeyValues.USE_MOONSHOT_MODEL,
35
- ModelKeyValues.USE_DEEPSEEK_MODEL,
36
- ModelKeyValues.USE_GEMINI_MODEL,
37
- ModelKeyValues.USE_LITELLM,
38
- EmbeddingKeyValues.USE_AZURE_OPENAI_EMBEDDING,
39
- EmbeddingKeyValues.USE_LOCAL_EMBEDDINGS,
40
- # MAINTENANCE: add more if new USE_* keys appear
41
+ USE_LLM_KEYS = [
42
+ key
43
+ for key in Settings.model_fields
44
+ if key.startswith("USE_") and key in ModelKeyValues.__members__
45
+ ]
46
+ USE_EMBED_KEYS = [
47
+ key
48
+ for key in Settings.model_fields
49
+ if key.startswith("USE_") and key in EmbeddingKeyValues.__members__
41
50
  ]
42
51
 
43
52
 
44
53
  def render_login_message():
45
54
  print(
46
- f"🥳 Welcome to [rgb(106,0,255)]Confident AI[/rgb(106,0,255)], the DeepEval cloud platform 🏡❤️"
55
+ "🥳 Welcome to [rgb(106,0,255)]Confident AI[/rgb(106,0,255)], the DeepEval cloud platform 🏡❤️"
47
56
  )
48
57
  print("")
49
58
  print(pyfiglet.Figlet(font="big_money-ne").renderText("DeepEval Cloud"))
@@ -72,7 +81,7 @@ def upload_and_open_link(_span: Span):
72
81
  else:
73
82
  print("❌ API Key cannot be empty. Please try again.\n")
74
83
 
75
- print(f"📤 Uploading test run to Confident AI...")
84
+ print("📤 Uploading test run to Confident AI...")
76
85
  global_test_run_manager.post_test_run(last_test_run_data)
77
86
  else:
78
87
  print(
@@ -91,7 +100,7 @@ def clear_embedding_model_keys():
91
100
 
92
101
 
93
102
  def _to_str_key(k: StrOrEnum) -> str:
94
- return k.value if hasattr(k, "value") else str(k)
103
+ return k.name if hasattr(k, "name") else str(k)
95
104
 
96
105
 
97
106
  def _normalize_kv(updates: Dict[StrOrEnum, str]) -> Dict[str, str]:
@@ -102,9 +111,14 @@ def _normalize_keys(keys: Iterable[StrOrEnum]) -> list[str]:
102
111
  return [_to_str_key(k) for k in keys]
103
112
 
104
113
 
114
+ def _normalize_setting_key(raw_key: str) -> str:
115
+ """Normalize CLI keys like 'log-level' / 'LOG_LEVEL' to model field names."""
116
+ return raw_key.strip().lower().replace("-", "_")
117
+
118
+
105
119
  def _parse_save_option(
106
- save_opt: str | None, default_path: str = ".env.local"
107
- ) -> Tuple[bool, str | None]:
120
+ save_opt: Optional[str] = None, default_path: str = ".env.local"
121
+ ) -> Tuple[bool, Optional[str]]:
108
122
  if not save_opt:
109
123
  return False, None
110
124
  kind, *rest = save_opt.split(":", 1)
@@ -133,8 +147,8 @@ def resolve_save_target(save_opt: Optional[str]) -> Optional[str]:
133
147
 
134
148
 
135
149
  def save_environ_to_store(
136
- save_opt: str | None, updates: Dict[StrOrEnum, str]
137
- ) -> Tuple[bool, str | None]:
150
+ updates: Dict[StrOrEnum, str], save_opt: Optional[str] = None
151
+ ) -> Tuple[bool, Optional[str]]:
138
152
  """
139
153
  Save 'updates' into the selected store (currently only dotenv). Idempotent upsert.
140
154
  Returns (handled, path).
@@ -148,8 +162,8 @@ def save_environ_to_store(
148
162
 
149
163
 
150
164
  def unset_environ_in_store(
151
- save_opt: str | None, keys: Iterable[StrOrEnum]
152
- ) -> Tuple[bool, str | None]:
165
+ keys: Iterable[StrOrEnum], save_opt: Optional[str] = None
166
+ ) -> Tuple[bool, Optional[str]]:
153
167
  """
154
168
  Remove keys from the selected store (currently only dotenv).
155
169
  Returns (handled, path).
@@ -163,19 +177,177 @@ def unset_environ_in_store(
163
177
  return True, path
164
178
 
165
179
 
166
- def switch_model_provider(target: ModelKeyValues, save: str = None) -> None:
180
+ def _as_legacy_use_key(
181
+ k: str,
182
+ ) -> Union[ModelKeyValues, EmbeddingKeyValues, None]:
183
+ if k in ModelKeyValues.__members__:
184
+ return ModelKeyValues[k]
185
+ if k in EmbeddingKeyValues.__members__:
186
+ return EmbeddingKeyValues[k]
187
+ return None
188
+
189
+
190
+ def switch_model_provider(
191
+ target: Union[ModelKeyValues, EmbeddingKeyValues],
192
+ save: Optional[str] = None,
193
+ ) -> Tuple[bool, Optional[str]]:
167
194
  """
168
- Ensure exactly one USE_* model flag is set to "YES" and the rest to "NO",
169
- both in the .deepeval json store and in a dotenv file (if save is provided).
195
+ Ensure exactly one USE_* flag is enabled.
196
+ We *unset* all other USE_* keys (instead of writing explicit "NO") to:
197
+ - keep dotenv clean
198
+ - preserve Optional[bool] semantics (unset vs explicit false)
170
199
  """
171
- if target not in USE_MODEL_KEYS:
200
+ keys_to_clear = (
201
+ USE_LLM_KEYS if isinstance(target, ModelKeyValues) else USE_EMBED_KEYS
202
+ )
203
+ target_key = target.name # or _to_str_key(target)
204
+
205
+ if target_key not in keys_to_clear:
172
206
  raise ValueError(f"{target} is not a recognized USE_* model key")
173
207
 
174
- for key in USE_MODEL_KEYS:
175
- value = "YES" if key == target else "NO"
176
- KEY_FILE_HANDLER.write_key(key, value)
208
+ # Clear legacy JSON store entries
209
+ for k in keys_to_clear:
210
+ legacy = _as_legacy_use_key(k)
211
+ if legacy is not None:
212
+ KEY_FILE_HANDLER.remove_key(legacy)
213
+
214
+ KEY_FILE_HANDLER.write_key(target, "YES")
215
+
216
+ if not save:
217
+ return True, None
218
+
219
+ handled, path = unset_environ_in_store(keys_to_clear, save)
220
+ if not handled:
221
+ return False, None
222
+ return save_environ_to_store({target: "true"}, save)
223
+
224
+
225
+ def coerce_blank_to_none(value: Optional[str]) -> Optional[str]:
226
+ """Return None if value is None/blank/whitespace; otherwise return stripped string."""
227
+ if value is None:
228
+ return None
229
+ value = value.strip()
230
+ return value or None
231
+
232
+
233
+ def load_service_account_key_file(path: Path) -> str:
234
+ try:
235
+ raw = path.read_text(encoding="utf-8").strip()
236
+ except OSError as e:
237
+ raise typer.BadParameter(
238
+ f"Could not read service account file: {path}",
239
+ param_hint="--service-account-file",
240
+ ) from e
241
+
242
+ if not raw:
243
+ raise typer.BadParameter(
244
+ f"Service account file is empty: {path}",
245
+ param_hint="--service-account-file",
246
+ )
247
+
248
+ # Validate it's JSON and normalize to a single-line string for dotenv.
249
+ try:
250
+ obj = json.loads(raw)
251
+ except json.JSONDecodeError as e:
252
+ raise typer.BadParameter(
253
+ f"Service account file does not contain valid JSON: {path}",
254
+ param_hint="--service-account-file",
255
+ ) from e
256
+
257
+ return json.dumps(obj, separators=(",", ":"))
258
+
177
259
 
178
- if save:
179
- handled, path = save_environ_to_store(save, {key: value})
180
- if not handled:
181
- print("Unsupported --save option. Use --save=dotenv[:path].")
260
+ def unwrap_optional(annotation: Any) -> Any:
261
+ """
262
+ If `annotation` is Optional[T] (i.e. Union[T, None]), return T.
263
+ Otherwise return `annotation` unchanged.
264
+
265
+ Note: If it's a Union with multiple non-None members, we leave it unchanged.
266
+ """
267
+ origin = get_origin(annotation)
268
+ if origin is Union:
269
+ non_none = [a for a in get_args(annotation) if a is not type(None)]
270
+ if len(non_none) == 1:
271
+ return non_none[0]
272
+ return annotation
273
+
274
+
275
+ def looks_like_json_container_literal(raw_value: str) -> bool:
276
+ setting = raw_value.strip()
277
+ return (setting.startswith("{") and setting.endswith("}")) or (
278
+ setting.startswith("[") and setting.endswith("]")
279
+ )
280
+
281
+
282
+ def should_parse_json_for_field(field_info: FieldInfo) -> bool:
283
+ annotation = unwrap_optional(field_info.annotation)
284
+ origin = get_origin(annotation) or annotation
285
+ return origin in (list, dict, tuple, set)
286
+
287
+
288
+ def maybe_parse_json_literal(raw_value: str, field_info) -> object:
289
+ if not isinstance(raw_value, str):
290
+ return raw_value
291
+ if not looks_like_json_container_literal(raw_value):
292
+ return raw_value
293
+ if not should_parse_json_for_field(field_info):
294
+ return raw_value
295
+ try:
296
+ return json.loads(raw_value)
297
+ except Exception as e:
298
+ raise typer.BadParameter(f"Invalid JSON for {field_info}: {e}") from e
299
+
300
+
301
+ def resolve_field_names(settings, query: str) -> list[str]:
302
+ """Return matching Settings fields for a case-insensitive partial query."""
303
+ fields = type(settings).model_fields
304
+ query = _normalize_setting_key(query)
305
+
306
+ # exact match (case-insensitive) first
307
+ exact = [
308
+ name for name in fields.keys() if _normalize_setting_key(name) == query
309
+ ]
310
+ if exact:
311
+ return exact
312
+
313
+ # substring matches
314
+ return [
315
+ name for name in fields.keys() if query in _normalize_setting_key(name)
316
+ ]
317
+
318
+
319
+ def is_optional(annotation) -> bool:
320
+ origin = get_origin(annotation)
321
+ if origin is Union:
322
+ return type(None) in get_args(annotation)
323
+ return False
324
+
325
+
326
+ def parse_and_validate(field_name: str, field_info, raw: str):
327
+ """
328
+ Validate and coerce a CLI value by delegating to the Settings model.
329
+
330
+ Field validators like LOG_LEVEL coercion (e.g. 'error' -> numeric log level)
331
+ are applied.
332
+ """
333
+ settings = get_settings()
334
+ value: object = maybe_parse_json_literal(raw, field_info)
335
+ payload = settings.model_dump(mode="python")
336
+ payload[field_name] = value
337
+
338
+ try:
339
+ validated = type(settings).model_validate(payload)
340
+ except ValidationError as e:
341
+ # Surface field-specific error(s) if possible
342
+ field_errors: list[str] = []
343
+ for err in e.errors():
344
+ loc = err.get("loc") or ()
345
+ if loc and loc[0] == field_name:
346
+ field_errors.append(err.get("msg") or str(err))
347
+
348
+ detail = "; ".join(field_errors) if field_errors else str(e)
349
+ raise typer.BadParameter(
350
+ f"Invalid value for {field_name}: {raw!r}. {detail}"
351
+ ) from e
352
+
353
+ return getattr(validated, field_name)
@@ -0,0 +1,19 @@
1
+ from dotenv import set_key, unset_key
2
+ from pathlib import Path
3
+
4
+
5
+ class DotenvHandler:
6
+ def __init__(self, path: Path):
7
+ self.path = Path(path)
8
+
9
+ def upsert(self, mapping: dict[str, str]) -> None:
10
+ self.path.parent.mkdir(parents=True, exist_ok=True)
11
+ self.path.touch(exist_ok=True)
12
+ for key, value in mapping.items():
13
+ set_key(str(self.path), key, value, quote_mode="always")
14
+
15
+ def unset(self, keys: set[str]) -> None:
16
+ if not self.path.exists():
17
+ return
18
+ for key in keys:
19
+ unset_key(str(self.path), key)