deepeval 3.7.6__py3-none-any.whl → 3.7.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/cli/main.py +2022 -759
- deepeval/cli/utils.py +208 -36
- deepeval/config/dotenv_handler.py +19 -0
- deepeval/config/settings.py +658 -262
- deepeval/config/utils.py +9 -1
- deepeval/dataset/test_run_tracer.py +4 -6
- deepeval/evaluate/execute.py +153 -94
- deepeval/integrations/pydantic_ai/instrumentator.py +4 -2
- deepeval/integrations/pydantic_ai/otel.py +5 -1
- deepeval/key_handler.py +121 -51
- deepeval/metrics/base_metric.py +9 -3
- deepeval/metrics/g_eval/g_eval.py +6 -1
- deepeval/metrics/indicator.py +8 -4
- deepeval/metrics/mcp/mcp_task_completion.py +15 -16
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +15 -15
- deepeval/metrics/mcp/schema.py +4 -0
- deepeval/metrics/mcp/template.py +8 -1
- deepeval/metrics/prompt_alignment/prompt_alignment.py +6 -3
- deepeval/metrics/tool_use/schema.py +4 -0
- deepeval/metrics/tool_use/template.py +16 -2
- deepeval/metrics/tool_use/tool_use.py +30 -28
- deepeval/metrics/topic_adherence/schema.py +4 -0
- deepeval/metrics/topic_adherence/template.py +8 -1
- deepeval/metrics/topic_adherence/topic_adherence.py +15 -14
- deepeval/metrics/turn_contextual_precision/template.py +8 -1
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +44 -86
- deepeval/metrics/turn_contextual_recall/template.py +8 -1
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +44 -82
- deepeval/metrics/turn_contextual_relevancy/template.py +8 -1
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +48 -92
- deepeval/metrics/turn_faithfulness/template.py +8 -1
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +76 -130
- deepeval/metrics/utils.py +16 -1
- deepeval/models/__init__.py +2 -0
- deepeval/models/llms/__init__.py +2 -0
- deepeval/models/llms/amazon_bedrock_model.py +5 -4
- deepeval/models/llms/anthropic_model.py +4 -3
- deepeval/models/llms/azure_model.py +4 -3
- deepeval/models/llms/deepseek_model.py +5 -8
- deepeval/models/llms/grok_model.py +5 -8
- deepeval/models/llms/kimi_model.py +5 -8
- deepeval/models/llms/litellm_model.py +2 -0
- deepeval/models/llms/local_model.py +1 -1
- deepeval/models/llms/openai_model.py +4 -3
- deepeval/models/retry_policy.py +10 -5
- deepeval/models/utils.py +1 -5
- deepeval/simulator/conversation_simulator.py +6 -2
- deepeval/simulator/template.py +3 -1
- deepeval/synthesizer/synthesizer.py +19 -17
- deepeval/test_run/test_run.py +6 -1
- deepeval/utils.py +26 -0
- {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/METADATA +3 -3
- {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/RECORD +57 -56
- {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/WHEEL +0 -0
- {deepeval-3.7.6.dist-info → deepeval-3.7.8.dist-info}/entry_points.txt +0 -0
deepeval/cli/utils.py
CHANGED
|
@@ -1,18 +1,30 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
import json
|
|
3
3
|
import os
|
|
4
|
-
import webbrowser
|
|
5
4
|
import pyfiglet
|
|
5
|
+
import typer
|
|
6
|
+
import webbrowser
|
|
6
7
|
|
|
8
|
+
from pydantic import ValidationError
|
|
9
|
+
from pydantic.fields import FieldInfo
|
|
7
10
|
from enum import Enum
|
|
8
11
|
from pathlib import Path
|
|
9
12
|
from rich import print
|
|
10
|
-
from typing import
|
|
13
|
+
from typing import (
|
|
14
|
+
Any,
|
|
15
|
+
Dict,
|
|
16
|
+
Iterable,
|
|
17
|
+
Tuple,
|
|
18
|
+
Optional,
|
|
19
|
+
get_args,
|
|
20
|
+
get_origin,
|
|
21
|
+
Union,
|
|
22
|
+
)
|
|
11
23
|
from opentelemetry.trace import Span
|
|
12
24
|
|
|
25
|
+
from deepeval.config.settings import Settings, get_settings
|
|
13
26
|
from deepeval.key_handler import (
|
|
14
27
|
KEY_FILE_HANDLER,
|
|
15
|
-
KeyValues,
|
|
16
28
|
ModelKeyValues,
|
|
17
29
|
EmbeddingKeyValues,
|
|
18
30
|
)
|
|
@@ -26,24 +38,21 @@ from deepeval.cli.dotenv_handler import DotenvHandler
|
|
|
26
38
|
StrOrEnum = Union[str, "Enum"]
|
|
27
39
|
PROD = "https://app.confident-ai.com"
|
|
28
40
|
# List all mutually exclusive USE_* keys
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
ModelKeyValues.
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
EmbeddingKeyValues.USE_AZURE_OPENAI_EMBEDDING,
|
|
39
|
-
EmbeddingKeyValues.USE_LOCAL_EMBEDDINGS,
|
|
40
|
-
# MAINTENANCE: add more if new USE_* keys appear
|
|
41
|
+
USE_LLM_KEYS = [
|
|
42
|
+
key
|
|
43
|
+
for key in Settings.model_fields
|
|
44
|
+
if key.startswith("USE_") and key in ModelKeyValues.__members__
|
|
45
|
+
]
|
|
46
|
+
USE_EMBED_KEYS = [
|
|
47
|
+
key
|
|
48
|
+
for key in Settings.model_fields
|
|
49
|
+
if key.startswith("USE_") and key in EmbeddingKeyValues.__members__
|
|
41
50
|
]
|
|
42
51
|
|
|
43
52
|
|
|
44
53
|
def render_login_message():
|
|
45
54
|
print(
|
|
46
|
-
|
|
55
|
+
"🥳 Welcome to [rgb(106,0,255)]Confident AI[/rgb(106,0,255)], the DeepEval cloud platform 🏡❤️"
|
|
47
56
|
)
|
|
48
57
|
print("")
|
|
49
58
|
print(pyfiglet.Figlet(font="big_money-ne").renderText("DeepEval Cloud"))
|
|
@@ -72,7 +81,7 @@ def upload_and_open_link(_span: Span):
|
|
|
72
81
|
else:
|
|
73
82
|
print("❌ API Key cannot be empty. Please try again.\n")
|
|
74
83
|
|
|
75
|
-
print(
|
|
84
|
+
print("📤 Uploading test run to Confident AI...")
|
|
76
85
|
global_test_run_manager.post_test_run(last_test_run_data)
|
|
77
86
|
else:
|
|
78
87
|
print(
|
|
@@ -91,7 +100,7 @@ def clear_embedding_model_keys():
|
|
|
91
100
|
|
|
92
101
|
|
|
93
102
|
def _to_str_key(k: StrOrEnum) -> str:
|
|
94
|
-
return k.
|
|
103
|
+
return k.name if hasattr(k, "name") else str(k)
|
|
95
104
|
|
|
96
105
|
|
|
97
106
|
def _normalize_kv(updates: Dict[StrOrEnum, str]) -> Dict[str, str]:
|
|
@@ -102,9 +111,14 @@ def _normalize_keys(keys: Iterable[StrOrEnum]) -> list[str]:
|
|
|
102
111
|
return [_to_str_key(k) for k in keys]
|
|
103
112
|
|
|
104
113
|
|
|
114
|
+
def _normalize_setting_key(raw_key: str) -> str:
|
|
115
|
+
"""Normalize CLI keys like 'log-level' / 'LOG_LEVEL' to model field names."""
|
|
116
|
+
return raw_key.strip().lower().replace("-", "_")
|
|
117
|
+
|
|
118
|
+
|
|
105
119
|
def _parse_save_option(
|
|
106
|
-
save_opt: str
|
|
107
|
-
) -> Tuple[bool, str
|
|
120
|
+
save_opt: Optional[str] = None, default_path: str = ".env.local"
|
|
121
|
+
) -> Tuple[bool, Optional[str]]:
|
|
108
122
|
if not save_opt:
|
|
109
123
|
return False, None
|
|
110
124
|
kind, *rest = save_opt.split(":", 1)
|
|
@@ -133,8 +147,8 @@ def resolve_save_target(save_opt: Optional[str]) -> Optional[str]:
|
|
|
133
147
|
|
|
134
148
|
|
|
135
149
|
def save_environ_to_store(
|
|
136
|
-
|
|
137
|
-
) -> Tuple[bool, str
|
|
150
|
+
updates: Dict[StrOrEnum, str], save_opt: Optional[str] = None
|
|
151
|
+
) -> Tuple[bool, Optional[str]]:
|
|
138
152
|
"""
|
|
139
153
|
Save 'updates' into the selected store (currently only dotenv). Idempotent upsert.
|
|
140
154
|
Returns (handled, path).
|
|
@@ -148,8 +162,8 @@ def save_environ_to_store(
|
|
|
148
162
|
|
|
149
163
|
|
|
150
164
|
def unset_environ_in_store(
|
|
151
|
-
|
|
152
|
-
) -> Tuple[bool, str
|
|
165
|
+
keys: Iterable[StrOrEnum], save_opt: Optional[str] = None
|
|
166
|
+
) -> Tuple[bool, Optional[str]]:
|
|
153
167
|
"""
|
|
154
168
|
Remove keys from the selected store (currently only dotenv).
|
|
155
169
|
Returns (handled, path).
|
|
@@ -163,19 +177,177 @@ def unset_environ_in_store(
|
|
|
163
177
|
return True, path
|
|
164
178
|
|
|
165
179
|
|
|
166
|
-
def
|
|
180
|
+
def _as_legacy_use_key(
|
|
181
|
+
k: str,
|
|
182
|
+
) -> Union[ModelKeyValues, EmbeddingKeyValues, None]:
|
|
183
|
+
if k in ModelKeyValues.__members__:
|
|
184
|
+
return ModelKeyValues[k]
|
|
185
|
+
if k in EmbeddingKeyValues.__members__:
|
|
186
|
+
return EmbeddingKeyValues[k]
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def switch_model_provider(
|
|
191
|
+
target: Union[ModelKeyValues, EmbeddingKeyValues],
|
|
192
|
+
save: Optional[str] = None,
|
|
193
|
+
) -> Tuple[bool, Optional[str]]:
|
|
167
194
|
"""
|
|
168
|
-
Ensure exactly one USE_*
|
|
169
|
-
|
|
195
|
+
Ensure exactly one USE_* flag is enabled.
|
|
196
|
+
We *unset* all other USE_* keys (instead of writing explicit "NO") to:
|
|
197
|
+
- keep dotenv clean
|
|
198
|
+
- preserve Optional[bool] semantics (unset vs explicit false)
|
|
170
199
|
"""
|
|
171
|
-
|
|
200
|
+
keys_to_clear = (
|
|
201
|
+
USE_LLM_KEYS if isinstance(target, ModelKeyValues) else USE_EMBED_KEYS
|
|
202
|
+
)
|
|
203
|
+
target_key = target.name # or _to_str_key(target)
|
|
204
|
+
|
|
205
|
+
if target_key not in keys_to_clear:
|
|
172
206
|
raise ValueError(f"{target} is not a recognized USE_* model key")
|
|
173
207
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
208
|
+
# Clear legacy JSON store entries
|
|
209
|
+
for k in keys_to_clear:
|
|
210
|
+
legacy = _as_legacy_use_key(k)
|
|
211
|
+
if legacy is not None:
|
|
212
|
+
KEY_FILE_HANDLER.remove_key(legacy)
|
|
213
|
+
|
|
214
|
+
KEY_FILE_HANDLER.write_key(target, "YES")
|
|
215
|
+
|
|
216
|
+
if not save:
|
|
217
|
+
return True, None
|
|
218
|
+
|
|
219
|
+
handled, path = unset_environ_in_store(keys_to_clear, save)
|
|
220
|
+
if not handled:
|
|
221
|
+
return False, None
|
|
222
|
+
return save_environ_to_store({target: "true"}, save)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def coerce_blank_to_none(value: Optional[str]) -> Optional[str]:
|
|
226
|
+
"""Return None if value is None/blank/whitespace; otherwise return stripped string."""
|
|
227
|
+
if value is None:
|
|
228
|
+
return None
|
|
229
|
+
value = value.strip()
|
|
230
|
+
return value or None
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def load_service_account_key_file(path: Path) -> str:
|
|
234
|
+
try:
|
|
235
|
+
raw = path.read_text(encoding="utf-8").strip()
|
|
236
|
+
except OSError as e:
|
|
237
|
+
raise typer.BadParameter(
|
|
238
|
+
f"Could not read service account file: {path}",
|
|
239
|
+
param_hint="--service-account-file",
|
|
240
|
+
) from e
|
|
241
|
+
|
|
242
|
+
if not raw:
|
|
243
|
+
raise typer.BadParameter(
|
|
244
|
+
f"Service account file is empty: {path}",
|
|
245
|
+
param_hint="--service-account-file",
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
# Validate it's JSON and normalize to a single-line string for dotenv.
|
|
249
|
+
try:
|
|
250
|
+
obj = json.loads(raw)
|
|
251
|
+
except json.JSONDecodeError as e:
|
|
252
|
+
raise typer.BadParameter(
|
|
253
|
+
f"Service account file does not contain valid JSON: {path}",
|
|
254
|
+
param_hint="--service-account-file",
|
|
255
|
+
) from e
|
|
256
|
+
|
|
257
|
+
return json.dumps(obj, separators=(",", ":"))
|
|
258
|
+
|
|
177
259
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
260
|
+
def unwrap_optional(annotation: Any) -> Any:
|
|
261
|
+
"""
|
|
262
|
+
If `annotation` is Optional[T] (i.e. Union[T, None]), return T.
|
|
263
|
+
Otherwise return `annotation` unchanged.
|
|
264
|
+
|
|
265
|
+
Note: If it's a Union with multiple non-None members, we leave it unchanged.
|
|
266
|
+
"""
|
|
267
|
+
origin = get_origin(annotation)
|
|
268
|
+
if origin is Union:
|
|
269
|
+
non_none = [a for a in get_args(annotation) if a is not type(None)]
|
|
270
|
+
if len(non_none) == 1:
|
|
271
|
+
return non_none[0]
|
|
272
|
+
return annotation
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def looks_like_json_container_literal(raw_value: str) -> bool:
|
|
276
|
+
setting = raw_value.strip()
|
|
277
|
+
return (setting.startswith("{") and setting.endswith("}")) or (
|
|
278
|
+
setting.startswith("[") and setting.endswith("]")
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def should_parse_json_for_field(field_info: FieldInfo) -> bool:
|
|
283
|
+
annotation = unwrap_optional(field_info.annotation)
|
|
284
|
+
origin = get_origin(annotation) or annotation
|
|
285
|
+
return origin in (list, dict, tuple, set)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def maybe_parse_json_literal(raw_value: str, field_info) -> object:
|
|
289
|
+
if not isinstance(raw_value, str):
|
|
290
|
+
return raw_value
|
|
291
|
+
if not looks_like_json_container_literal(raw_value):
|
|
292
|
+
return raw_value
|
|
293
|
+
if not should_parse_json_for_field(field_info):
|
|
294
|
+
return raw_value
|
|
295
|
+
try:
|
|
296
|
+
return json.loads(raw_value)
|
|
297
|
+
except Exception as e:
|
|
298
|
+
raise typer.BadParameter(f"Invalid JSON for {field_info}: {e}") from e
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def resolve_field_names(settings, query: str) -> list[str]:
|
|
302
|
+
"""Return matching Settings fields for a case-insensitive partial query."""
|
|
303
|
+
fields = type(settings).model_fields
|
|
304
|
+
query = _normalize_setting_key(query)
|
|
305
|
+
|
|
306
|
+
# exact match (case-insensitive) first
|
|
307
|
+
exact = [
|
|
308
|
+
name for name in fields.keys() if _normalize_setting_key(name) == query
|
|
309
|
+
]
|
|
310
|
+
if exact:
|
|
311
|
+
return exact
|
|
312
|
+
|
|
313
|
+
# substring matches
|
|
314
|
+
return [
|
|
315
|
+
name for name in fields.keys() if query in _normalize_setting_key(name)
|
|
316
|
+
]
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def is_optional(annotation) -> bool:
|
|
320
|
+
origin = get_origin(annotation)
|
|
321
|
+
if origin is Union:
|
|
322
|
+
return type(None) in get_args(annotation)
|
|
323
|
+
return False
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def parse_and_validate(field_name: str, field_info, raw: str):
|
|
327
|
+
"""
|
|
328
|
+
Validate and coerce a CLI value by delegating to the Settings model.
|
|
329
|
+
|
|
330
|
+
Field validators like LOG_LEVEL coercion (e.g. 'error' -> numeric log level)
|
|
331
|
+
are applied.
|
|
332
|
+
"""
|
|
333
|
+
settings = get_settings()
|
|
334
|
+
value: object = maybe_parse_json_literal(raw, field_info)
|
|
335
|
+
payload = settings.model_dump(mode="python")
|
|
336
|
+
payload[field_name] = value
|
|
337
|
+
|
|
338
|
+
try:
|
|
339
|
+
validated = type(settings).model_validate(payload)
|
|
340
|
+
except ValidationError as e:
|
|
341
|
+
# Surface field-specific error(s) if possible
|
|
342
|
+
field_errors: list[str] = []
|
|
343
|
+
for err in e.errors():
|
|
344
|
+
loc = err.get("loc") or ()
|
|
345
|
+
if loc and loc[0] == field_name:
|
|
346
|
+
field_errors.append(err.get("msg") or str(err))
|
|
347
|
+
|
|
348
|
+
detail = "; ".join(field_errors) if field_errors else str(e)
|
|
349
|
+
raise typer.BadParameter(
|
|
350
|
+
f"Invalid value for {field_name}: {raw!r}. {detail}"
|
|
351
|
+
) from e
|
|
352
|
+
|
|
353
|
+
return getattr(validated, field_name)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from dotenv import set_key, unset_key
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class DotenvHandler:
|
|
6
|
+
def __init__(self, path: Path):
|
|
7
|
+
self.path = Path(path)
|
|
8
|
+
|
|
9
|
+
def upsert(self, mapping: dict[str, str]) -> None:
|
|
10
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
11
|
+
self.path.touch(exist_ok=True)
|
|
12
|
+
for key, value in mapping.items():
|
|
13
|
+
set_key(str(self.path), key, value, quote_mode="always")
|
|
14
|
+
|
|
15
|
+
def unset(self, keys: set[str]) -> None:
|
|
16
|
+
if not self.path.exists():
|
|
17
|
+
return
|
|
18
|
+
for key in keys:
|
|
19
|
+
unset_key(str(self.path), key)
|