deepeval 3.4.7__py3-none-any.whl → 3.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deepeval/cli/utils.py CHANGED
@@ -1,7 +1,13 @@
1
- from rich import print
1
+ from __future__ import annotations
2
+
3
+ import os
2
4
  import webbrowser
3
5
  import pyfiglet
4
- from typing import Optional
6
+
7
+ from enum import Enum
8
+ from pathlib import Path
9
+ from rich import print
10
+ from typing import Optional, Dict, Iterable, List, Tuple, Union
5
11
  from opentelemetry.trace import Span
6
12
 
7
13
  from deepeval.key_handler import (
@@ -14,8 +20,25 @@ from deepeval.test_run.test_run import (
14
20
  global_test_run_manager,
15
21
  )
16
22
  from deepeval.confident.api import get_confident_api_key, set_confident_api_key
23
+ from deepeval.cli.dotenv_handler import DotenvHandler
17
24
 
25
+
26
+ StrOrEnum = Union[str, "Enum"]
18
27
  PROD = "https://app.confident-ai.com"
28
+ # List all mutually exclusive USE_* keys
29
+ USE_MODEL_KEYS: List[ModelKeyValues | EmbeddingKeyValues] = [
30
+ ModelKeyValues.USE_OPENAI_MODEL,
31
+ ModelKeyValues.USE_AZURE_OPENAI,
32
+ ModelKeyValues.USE_LOCAL_MODEL,
33
+ ModelKeyValues.USE_GROK_MODEL,
34
+ ModelKeyValues.USE_MOONSHOT_MODEL,
35
+ ModelKeyValues.USE_DEEPSEEK_MODEL,
36
+ ModelKeyValues.USE_GEMINI_MODEL,
37
+ ModelKeyValues.USE_LITELLM,
38
+ EmbeddingKeyValues.USE_AZURE_OPENAI_EMBEDDING,
39
+ EmbeddingKeyValues.USE_LOCAL_EMBEDDINGS,
40
+ # MAINTENANCE: add more if new USE_* keys appear
41
+ ]
19
42
 
20
43
 
21
44
  def render_login_message():
@@ -65,3 +88,94 @@ def clear_evaluation_model_keys():
65
88
  def clear_embedding_model_keys():
66
89
  for key in EmbeddingKeyValues:
67
90
  KEY_FILE_HANDLER.remove_key(key)
91
+
92
+
93
+ def _to_str_key(k: StrOrEnum) -> str:
94
+ return k.value if hasattr(k, "value") else str(k)
95
+
96
+
97
+ def _normalize_kv(updates: Dict[StrOrEnum, str]) -> Dict[str, str]:
98
+ return {_to_str_key(k): v for k, v in updates.items()}
99
+
100
+
101
+ def _normalize_keys(keys: Iterable[StrOrEnum]) -> list[str]:
102
+ return [_to_str_key(k) for k in keys]
103
+
104
+
105
+ def _parse_save_option(
106
+ save_opt: str | None, default_path: str = ".env.local"
107
+ ) -> Tuple[bool, str | None]:
108
+ if not save_opt:
109
+ return False, None
110
+ kind, *rest = save_opt.split(":", 1)
111
+ if kind != "dotenv":
112
+ return False, None
113
+ path = rest[0] if rest else default_path
114
+ return True, path
115
+
116
+
117
+ def resolve_save_target(save_opt: Optional[str]) -> Optional[str]:
118
+ """
119
+ Returns a normalized save target string like 'dotenv:.env.local' or None.
120
+ Precedence:
121
+ 1) --save=...
122
+ 2) DEEPEVAL_DEFAULT_SAVE (opt-in project default)
123
+ 3) None (no save)
124
+ """
125
+ if save_opt:
126
+ return save_opt
127
+
128
+ env_default = os.getenv("DEEPEVAL_DEFAULT_SAVE")
129
+ if env_default and env_default.strip():
130
+ return env_default.strip()
131
+
132
+ return None
133
+
134
+
135
+ def save_environ_to_store(
136
+ save_opt: str | None, updates: Dict[StrOrEnum, str]
137
+ ) -> Tuple[bool, str | None]:
138
+ """
139
+ Save 'updates' into the selected store (currently only dotenv). Idempotent upsert.
140
+ Returns (handled, path).
141
+ """
142
+ ok, path = _parse_save_option(save_opt)
143
+ if not ok:
144
+ return False, None
145
+ if updates:
146
+ DotenvHandler(path).upsert(_normalize_kv(updates))
147
+ return True, path
148
+
149
+
150
+ def unset_environ_in_store(
151
+ save_opt: str | None, keys: Iterable[StrOrEnum]
152
+ ) -> Tuple[bool, str | None]:
153
+ """
154
+ Remove keys from the selected store (currently only dotenv).
155
+ Returns (handled, path).
156
+ """
157
+ ok, path = _parse_save_option(save_opt)
158
+ if not ok:
159
+ return False, None
160
+ norm = _normalize_keys(keys)
161
+ if norm:
162
+ DotenvHandler(path).unset(norm)
163
+ return True, path
164
+
165
+
166
+ def switch_model_provider(target: ModelKeyValues, save: str = None) -> None:
167
+ """
168
+ Ensure exactly one USE_* model flag is set to "YES" and the rest to "NO",
169
+ both in the .deepeval json store and in a dotenv file (if save is provided).
170
+ """
171
+ if target not in USE_MODEL_KEYS:
172
+ raise ValueError(f"{target} is not a recognized USE_* model key")
173
+
174
+ for key in USE_MODEL_KEYS:
175
+ value = "YES" if key == target else "NO"
176
+ KEY_FILE_HANDLER.write_key(key, value)
177
+
178
+ if save:
179
+ handled, path = save_environ_to_store(save, {key: value})
180
+ if not handled:
181
+ print("Unsupported --save option. Use --save=dotenv[:path].")
deepeval/key_handler.py CHANGED
@@ -1,12 +1,42 @@
1
1
  """File for handling API key"""
2
2
 
3
+ import os
3
4
  import json
5
+ import logging
6
+
4
7
  from enum import Enum
5
8
  from typing import Union
6
9
 
7
10
  from .constants import KEY_FILE, HIDDEN_DIR
8
11
 
9
12
 
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ SECRET_KEYS = {
17
+ # General providers
18
+ "OPENAI_API_KEY",
19
+ "ANTHROPIC_API_KEY",
20
+ # Azure OpenAI
21
+ "AZURE_OPENAI_API_KEY",
22
+ # Google / Gemini
23
+ "GOOGLE_API_KEY",
24
+ # xAI Grok
25
+ "GROK_API_KEY",
26
+ # Moonshot
27
+ "MOONSHOT_API_KEY",
28
+ # DeepSeek
29
+ "DEEPSEEK_API_KEY",
30
+ # LiteLLM
31
+ "LITELLM_API_KEY",
32
+ # Local gateways (if any require keys)
33
+ "LOCAL_MODEL_API_KEY",
34
+ "LOCAL_EMBEDDING_API_KEY",
35
+ }
36
+
37
+ _WARNED_SECRET_KEYS = set()
38
+
39
+
10
40
  class KeyValues(Enum):
11
41
  # Confident AI
12
42
  API_KEY = "api_key"
@@ -79,10 +109,21 @@ class KeyFileHandler:
79
109
  def __init__(self):
80
110
  self.data = {}
81
111
 
112
+ def _ensure_dir(self):
113
+ os.makedirs(HIDDEN_DIR, exist_ok=True)
114
+
82
115
  def write_key(
83
116
  self, key: Union[KeyValues, ModelKeyValues, EmbeddingKeyValues], value
84
117
  ):
85
118
  """Appends or updates data in the hidden file"""
119
+
120
+ # hard stop on secrets: never write to disk
121
+ if key.value in SECRET_KEYS:
122
+ logger.warning(
123
+ f"{key} is blacklisted, refusing to persist. Keep your secrets in .env or .env.local instead"
124
+ )
125
+ return
126
+
86
127
  try:
87
128
  with open(f"{HIDDEN_DIR}/{KEY_FILE}", "r") as f:
88
129
  # Load existing data
@@ -99,13 +140,15 @@ class KeyFileHandler:
99
140
  self.data[key.value] = value
100
141
 
101
142
  # Write the updated data back to the file
143
+ self._ensure_dir()
102
144
  with open(f"{HIDDEN_DIR}/{KEY_FILE}", "w") as f:
103
145
  json.dump(self.data, f)
104
146
 
105
147
  def fetch_data(
106
148
  self, key: Union[KeyValues, ModelKeyValues, EmbeddingKeyValues]
107
149
  ):
108
- """Fetches the data from the hidden file"""
150
+ """Fetches the data from the hidden file.
151
+ NOTE: secrets in this file are deprecated; prefer env/.env."""
109
152
  try:
110
153
  with open(f"{HIDDEN_DIR}/{KEY_FILE}", "r") as f:
111
154
  try:
@@ -116,7 +159,24 @@ class KeyFileHandler:
116
159
  except FileNotFoundError:
117
160
  # Handle the case when the file doesn't exist
118
161
  self.data = {}
119
- return self.data.get(key.value)
162
+
163
+ value = self.data.get(key.value)
164
+
165
+ # Deprecation: warn only if we're actually returning a secret
166
+ if (
167
+ value is not None
168
+ and key.value in SECRET_KEYS
169
+ and key.value not in _WARNED_SECRET_KEYS
170
+ ):
171
+ logger.warning(
172
+ f"Reading secret '{key.value}' from legacy {HIDDEN_DIR}/{KEY_FILE}. "
173
+ "Persisting API keys in plaintext is deprecated. "
174
+ "Move this to your environment (.env / .env.local). "
175
+ "This fallback will be removed in a future release."
176
+ )
177
+ _WARNED_SECRET_KEYS.add(key.value)
178
+
179
+ return value
120
180
 
121
181
  def remove_key(
122
182
  self, key: Union[KeyValues, ModelKeyValues, EmbeddingKeyValues]
@@ -130,6 +190,7 @@ class KeyFileHandler:
130
190
  # Handle corrupted JSON file
131
191
  self.data = {}
132
192
  self.data.pop(key.value, None) # Remove the key if it exists
193
+ self._ensure_dir()
133
194
  with open(f"{HIDDEN_DIR}/{KEY_FILE}", "w") as f:
134
195
  json.dump(self.data, f)
135
196
  except FileNotFoundError:
@@ -5,7 +5,8 @@ from .base_metric import (
5
5
  BaseArenaMetric,
6
6
  )
7
7
 
8
- from .dag.dag import DAGMetric
8
+ from .dag.dag import DAGMetric, DeepAcyclicGraph
9
+ from .conversational_dag.conversational_dag import ConversationalDAGMetric
9
10
  from .bias.bias import BiasMetric
10
11
  from .toxicity.toxicity import ToxicityMetric
11
12
  from .pii_leakage.pii_leakage import PIILeakageMetric
@@ -67,6 +68,8 @@ __all__ = [
67
68
  "ArenaGEval",
68
69
  "ConversationalGEval",
69
70
  "DAGMetric",
71
+ "DeepAcyclicGraph",
72
+ "ConversationalDAGMetric"
70
73
  # RAG metrics
71
74
  "AnswerRelevancyMetric",
72
75
  "FaithfulnessMetric",
@@ -0,0 +1,7 @@
1
+ from .nodes import (
2
+ ConversationalBaseNode,
3
+ ConversationalVerdictNode,
4
+ ConversationalTaskNode,
5
+ ConversationalBinaryJudgementNode,
6
+ ConversationalNonBinaryJudgementNode,
7
+ )
@@ -0,0 +1,139 @@
1
+ from typing import Optional, Union
2
+ from deepeval.metrics import BaseConversationalMetric
3
+ from deepeval.test_case import (
4
+ ConversationalTestCase,
5
+ )
6
+ from deepeval.utils import get_or_create_event_loop
7
+ from deepeval.metrics.utils import (
8
+ check_conversational_test_case_params,
9
+ construct_verbose_logs,
10
+ initialize_model,
11
+ )
12
+ from deepeval.models import DeepEvalBaseLLM
13
+ from deepeval.metrics.indicator import metric_progress_indicator
14
+ from deepeval.metrics.g_eval.schema import *
15
+ from deepeval.metrics import DeepAcyclicGraph
16
+ from deepeval.metrics.dag.utils import (
17
+ is_valid_dag_from_roots,
18
+ extract_required_params,
19
+ copy_graph,
20
+ )
21
+
22
+
23
+ class ConversationalDAGMetric(BaseConversationalMetric):
24
+
25
+ def __init__(
26
+ self,
27
+ name: str,
28
+ dag: DeepAcyclicGraph,
29
+ model: Optional[Union[str, DeepEvalBaseLLM]] = None,
30
+ threshold: float = 0.5,
31
+ include_reason: bool = True,
32
+ async_mode: bool = True,
33
+ strict_mode: bool = False,
34
+ verbose_mode: bool = False,
35
+ _include_dag_suffix: bool = True,
36
+ ):
37
+ if (
38
+ is_valid_dag_from_roots(
39
+ root_nodes=dag.root_nodes, multiturn=dag.multiturn
40
+ )
41
+ == False
42
+ ):
43
+ raise ValueError("Cycle detected in DAG graph.")
44
+
45
+ self._verbose_steps: List[str] = []
46
+ self.dag = copy_graph(dag)
47
+ self.name = name
48
+ self.model, self.using_native_model = initialize_model(model)
49
+ self.evaluation_model = self.model.get_model_name()
50
+ self.threshold = 1 if strict_mode else threshold
51
+ self.include_reason = include_reason
52
+ self.strict_mode = strict_mode
53
+ self.async_mode = async_mode
54
+ self.verbose_mode = verbose_mode
55
+ self._include_dag_suffix = _include_dag_suffix
56
+
57
+ def measure(
58
+ self,
59
+ test_case: ConversationalTestCase,
60
+ _show_indicator: bool = True,
61
+ _in_component: bool = False,
62
+ ) -> float:
63
+ check_conversational_test_case_params(
64
+ test_case,
65
+ extract_required_params(self.dag.root_nodes, multiturn=True),
66
+ self,
67
+ )
68
+
69
+ self.evaluation_cost = 0 if self.using_native_model else None
70
+ with metric_progress_indicator(
71
+ self, _show_indicator=_show_indicator, _in_component=_in_component
72
+ ):
73
+ if self.async_mode:
74
+ loop = get_or_create_event_loop()
75
+ loop.run_until_complete(
76
+ self.a_measure(
77
+ test_case,
78
+ _show_indicator=False,
79
+ _in_component=_in_component,
80
+ )
81
+ )
82
+ else:
83
+ self.dag._execute(metric=self, test_case=test_case)
84
+ self.success = self.is_successful()
85
+ self.verbose_logs = construct_verbose_logs(
86
+ self,
87
+ steps=[
88
+ *self._verbose_steps,
89
+ f"Score: {self.score}\nReason: {self.reason}",
90
+ ],
91
+ )
92
+ return self.score
93
+
94
+ async def a_measure(
95
+ self,
96
+ test_case: ConversationalTestCase,
97
+ _show_indicator: bool = True,
98
+ _in_component: bool = False,
99
+ ) -> float:
100
+ check_conversational_test_case_params(
101
+ test_case,
102
+ extract_required_params(self.dag.root_nodes, multiturn=True),
103
+ self,
104
+ )
105
+
106
+ self.evaluation_cost = 0 if self.using_native_model else None
107
+ with metric_progress_indicator(
108
+ self,
109
+ async_mode=True,
110
+ _show_indicator=_show_indicator,
111
+ _in_component=_in_component,
112
+ ):
113
+ await self.dag._a_execute(metric=self, test_case=test_case)
114
+ self.success = self.is_successful()
115
+ self.verbose_logs = construct_verbose_logs(
116
+ self,
117
+ steps=[
118
+ *self._verbose_steps,
119
+ f"Score: {self.score}\nReason: {self.reason}",
120
+ ],
121
+ )
122
+ return self.score
123
+
124
+ def is_successful(self) -> bool:
125
+ if self.error is not None:
126
+ self.success = False
127
+ else:
128
+ try:
129
+ self.success = self.score >= self.threshold
130
+ except:
131
+ self.success = False
132
+ return self.success
133
+
134
+ @property
135
+ def __name__(self):
136
+ if self._include_dag_suffix:
137
+ return f"{self.name} [ConversationalDAG]"
138
+ else:
139
+ return self.name