deepeval 3.7.0__py3-none-any.whl → 3.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,21 @@
1
+ from time import perf_counter
2
+
1
3
  from deepeval.tracing.tracing import (
2
4
  Observer,
3
5
  current_span_context,
4
6
  trace_manager,
5
7
  )
6
- from deepeval.openai_agents.extractors import *
8
+ from deepeval.openai_agents.extractors import (
9
+ update_span_properties,
10
+ update_trace_properties_from_span_data,
11
+ )
7
12
  from deepeval.tracing.context import current_trace_context
8
13
  from deepeval.tracing.utils import make_json_serializable
9
- from time import perf_counter
10
- from deepeval.tracing.types import TraceSpanStatus
14
+ from deepeval.tracing.types import (
15
+ BaseSpan,
16
+ LlmSpan,
17
+ TraceSpanStatus,
18
+ )
11
19
 
12
20
  try:
13
21
  from agents.tracing import Span, Trace, TracingProcessor
@@ -18,6 +26,7 @@ try:
18
26
  GenerationSpanData,
19
27
  GuardrailSpanData,
20
28
  HandoffSpanData,
29
+ MCPListToolsSpanData,
21
30
  ResponseSpanData,
22
31
  SpanData,
23
32
  )
deepeval/prompt/prompt.py CHANGED
@@ -1,3 +1,8 @@
1
+ import logging
2
+ import time
3
+ import json
4
+ import os
5
+
1
6
  from enum import Enum
2
7
  from typing import Optional, List, Dict, Type, Literal
3
8
  from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
@@ -5,12 +10,11 @@ from rich.console import Console
5
10
  import time
6
11
  import json
7
12
  import os
8
- from pydantic import BaseModel, ValidationError, ConfigDict
13
+ from pydantic import BaseModel, ValidationError
9
14
  import asyncio
10
- import portalocker
11
15
  import threading
12
16
 
13
- from deepeval.utils import make_model_config
17
+ from deepeval.utils import make_model_config, is_read_only_env
14
18
 
15
19
  from deepeval.prompt.api import (
16
20
  PromptHttpResponse,
@@ -24,9 +28,6 @@ from deepeval.prompt.api import (
24
28
  ModelSettings,
25
29
  OutputSchema,
26
30
  OutputType,
27
- ReasoningEffort,
28
- Verbosity,
29
- ModelProvider,
30
31
  )
31
32
  from deepeval.prompt.utils import (
32
33
  interpolate_text,
@@ -36,6 +37,18 @@ from deepeval.prompt.utils import (
36
37
  from deepeval.confident.api import Api, Endpoints, HttpMethods
37
38
  from deepeval.constants import HIDDEN_DIR
38
39
 
40
+
41
+ logger = logging.getLogger(__name__)
42
+
43
+ portalocker = None
44
+ if not is_read_only_env():
45
+ try:
46
+ import portalocker
47
+ except Exception as e:
48
+ logger.warning("failed to import portalocker: %s", e)
49
+ else:
50
+ logger.warning("READ_ONLY filesystem: skipping disk cache for prompts.")
51
+
39
52
  CACHE_FILE_NAME = f"{HIDDEN_DIR}/.deepeval-prompt-cache.json"
40
53
  VERSION_CACHE_KEY = "version"
41
54
  LABEL_CACHE_KEY = "label"
@@ -165,7 +178,7 @@ class Prompt:
165
178
  content = f.read()
166
179
  try:
167
180
  data = json.loads(content)
168
- except:
181
+ except (json.JSONDecodeError, TypeError):
169
182
  self.text_template = content
170
183
  return content
171
184
 
@@ -203,7 +216,6 @@ class Prompt:
203
216
  "Unable to interpolate empty prompt template. Please pull a prompt from Confident AI or set template manually to continue."
204
217
  )
205
218
 
206
- print("@@@@@")
207
219
  return interpolate_text(interpolation_type, text_template, **kwargs)
208
220
 
209
221
  elif prompt_type == PromptType.LIST:
@@ -248,7 +260,7 @@ class Prompt:
248
260
  version: Optional[str] = None,
249
261
  label: Optional[str] = None,
250
262
  ) -> Optional[CachedPrompt]:
251
- if not os.path.exists(CACHE_FILE_NAME):
263
+ if portalocker is None or not os.path.exists(CACHE_FILE_NAME):
252
264
  return None
253
265
 
254
266
  try:
@@ -296,13 +308,12 @@ class Prompt:
296
308
  output_type: Optional[OutputType] = None,
297
309
  output_schema: Optional[OutputSchema] = None,
298
310
  ):
299
- if not self.alias:
311
+ if portalocker is None or not self.alias:
300
312
  return
301
313
 
302
- # Ensure directory exists
303
- os.makedirs(HIDDEN_DIR, exist_ok=True)
304
-
305
314
  try:
315
+ # Ensure directory exists
316
+ os.makedirs(HIDDEN_DIR, exist_ok=True)
306
317
  # Use r+ mode if file exists, w mode if it doesn't
307
318
  mode = "r+" if os.path.exists(CACHE_FILE_NAME) else "w"
308
319
 
@@ -481,7 +492,7 @@ class Prompt:
481
492
  cached_prompt.output_schema
482
493
  )
483
494
  return
484
- except:
495
+ except Exception:
485
496
  pass
486
497
 
487
498
  api = Api()
@@ -112,7 +112,7 @@ class ConversationSimulatorTemplate:
112
112
  ]
113
113
  Example JSON Output:
114
114
  {{
115
- "is_complete": False,
115
+ "is_complete": false,
116
116
  "reason": "The assistant explained how to forget password but ahas not confirmed that the user successfully set a new password."
117
117
  }}
118
118
 
@@ -10,7 +10,7 @@ from .conversational_test_case import (
10
10
  TurnParams,
11
11
  )
12
12
  from .mllm_test_case import MLLMTestCase, MLLMTestCaseParams, MLLMImage
13
- from .arena_test_case import ArenaTestCase
13
+ from .arena_test_case import ArenaTestCase, Contestant
14
14
  from .mcp import (
15
15
  MCPServer,
16
16
  MCPPromptCall,
@@ -35,4 +35,5 @@ __all__ = [
35
35
  "MLLMTestCaseParams",
36
36
  "MLLMImage",
37
37
  "ArenaTestCase",
38
+ "Contestant",
38
39
  ]
@@ -1,20 +1,31 @@
1
+ from typing import List, Dict, Optional, Union
1
2
  from dataclasses import dataclass
2
- from typing import List, Dict
3
+ from pydantic import BaseModel
4
+
3
5
  from deepeval.test_case import (
4
6
  LLMTestCase,
5
7
  )
8
+ from deepeval.prompt import Prompt
9
+
10
+
11
+ class Contestant(BaseModel):
12
+ name: str
13
+ test_case: LLMTestCase
14
+ hyperparameters: Optional[Dict[str, Union[str, int, float, Prompt]]] = None
15
+
16
+ model_config = {"arbitrary_types_allowed": True}
6
17
 
7
18
 
8
19
  @dataclass
9
20
  class ArenaTestCase:
10
- contestants: Dict[str, LLMTestCase]
21
+ contestants: List[Contestant]
11
22
 
12
23
  def __post_init__(self):
13
- contestant_names = list(self.contestants.keys())
24
+ contestant_names = [contestant.name for contestant in self.contestants]
14
25
  if len(contestant_names) != len(set(contestant_names)):
15
26
  raise ValueError("All contestant names must be unique.")
16
27
 
17
- cases = list(self.contestants.values())
28
+ cases = [contestant.test_case for contestant in self.contestants]
18
29
  ref_input = cases[0].input
19
30
  for case in cases[1:]:
20
31
  if case.input != ref_input:
@@ -11,33 +11,50 @@ from deepeval.test_case import ToolCall
11
11
 
12
12
  @dataclass
13
13
  class MLLMImage:
14
- url: str
14
+ dataBase64: Optional[str] = None
15
+ mimeType: Optional[str] = None
16
+ url: Optional[str] = None
15
17
  local: Optional[bool] = None
16
- filename: Optional[str] = field(default=None, init=False, repr=False)
17
- mimeType: Optional[str] = field(default=None, init=False, repr=False)
18
- dataBase64: Optional[str] = field(default=None, init=False, repr=False)
18
+ filename: Optional[str] = None
19
19
 
20
20
  def __post_init__(self):
21
- is_local = self.is_local_path(self.url)
22
- if self.local is not None:
23
- assert self.local == is_local, "Local path mismatch"
24
- else:
25
- self.local = is_local
26
-
27
- # compute filename, mime_type, and Base64 data
28
- if self.local:
29
- path = self.process_url(self.url)
30
- self.filename = os.path.basename(path)
31
- self.mimeType = (
32
- mimetypes.guess_type(path)[0] or "application/octet-stream"
21
+
22
+ if self.url and self.dataBase64:
23
+ raise ValueError(
24
+ "You cannot provide both 'url' and 'dataBase64' at the same time when creating an MLLMImage."
25
+ )
26
+
27
+ if not self.url and not self.dataBase64:
28
+ raise ValueError(
29
+ "You must provide either a 'url' or both 'dataBase64' and 'mimeType' to create an MLLMImage."
33
30
  )
34
- with open(path, "rb") as f:
35
- raw = f.read()
36
- self.dataBase64 = base64.b64encode(raw).decode("ascii")
31
+
32
+ if self.dataBase64 is not None:
33
+ if self.mimeType is None:
34
+ raise ValueError(
35
+ "mimeType must be provided when initializing from Base64 data."
36
+ )
37
37
  else:
38
- self.filename = None
39
- self.mimeType = None
40
- self.dataBase64 = None
38
+ is_local = self.is_local_path(self.url)
39
+ if self.local is not None:
40
+ assert self.local == is_local, "Local path mismatch"
41
+ else:
42
+ self.local = is_local
43
+
44
+ # compute filename, mime_type, and Base64 data
45
+ if self.local:
46
+ path = self.process_url(self.url)
47
+ self.filename = os.path.basename(path)
48
+ self.mimeType = (
49
+ mimetypes.guess_type(path)[0] or "application/octet-stream"
50
+ )
51
+ with open(path, "rb") as f:
52
+ raw = f.read()
53
+ self.dataBase64 = base64.b64encode(raw).decode("ascii")
54
+ else:
55
+ self.filename = None
56
+ self.mimeType = None
57
+ self.dataBase64 = None
41
58
 
42
59
  @staticmethod
43
60
  def process_url(url: str) -> str:
@@ -69,6 +86,12 @@ class MLLMImage:
69
86
  return os.path.exists(path)
70
87
  return False
71
88
 
89
+ def as_data_uri(self) -> Optional[str]:
90
+ """Return the image as a data URI string, if Base64 data is available."""
91
+ if not self.dataBase64 or not self.mimeType:
92
+ return None
93
+ return f"data:{self.mimeType};base64,{self.dataBase64}"
94
+
72
95
 
73
96
  class MLLMTestCaseParams(Enum):
74
97
  INPUT = "input"
@@ -1,8 +1,8 @@
1
- import portalocker
1
+ import logging
2
2
  import sys
3
3
  import json
4
4
  import os
5
- from typing import List, Optional, Union, Dict, Union
5
+ from typing import List, Optional, Dict, Union
6
6
  from enum import Enum
7
7
  from pydantic import BaseModel, Field
8
8
 
@@ -12,11 +12,26 @@ from deepeval.test_case import LLMTestCaseParams, LLMTestCase, ToolCallParams
12
12
  from deepeval.test_run.api import MetricData
13
13
  from deepeval.utils import (
14
14
  delete_file_if_exists,
15
+ is_read_only_env,
15
16
  serialize,
16
17
  )
17
18
  from deepeval.metrics import BaseMetric
18
19
  from deepeval.constants import HIDDEN_DIR
19
20
 
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ portalocker = None
26
+ if not is_read_only_env():
27
+ try:
28
+ import portalocker
29
+ except Exception as e:
30
+ logger.warning("failed to import portalocker: %s", e)
31
+ else:
32
+ logger.warning("READ_ONLY filesystem: skipping disk cache for test runs.")
33
+
34
+
20
35
  CACHE_FILE_NAME = f"{HIDDEN_DIR}/.deepeval-cache.json"
21
36
  TEMP_CACHE_FILE_NAME = f"{HIDDEN_DIR}/.temp-deepeval-cache.json"
22
37
 
@@ -97,7 +112,7 @@ class TestRunCacheManager:
97
112
  def get_cached_test_case(
98
113
  self, test_case: LLMTestCase, hyperparameters: Union[Dict, None]
99
114
  ) -> Union[CachedTestCase, None]:
100
- if self.disable_write_cache:
115
+ if self.disable_write_cache or portalocker is None:
101
116
  return None
102
117
 
103
118
  cached_test_run = self.get_cached_test_run()
@@ -122,7 +137,7 @@ class TestRunCacheManager:
122
137
  hyperparameters: Union[Dict, None],
123
138
  to_temp: bool = False,
124
139
  ):
125
- if self.disable_write_cache:
140
+ if self.disable_write_cache or portalocker is None:
126
141
  return
127
142
  cache_dict = {
128
143
  LLMTestCaseParams.INPUT.value: test_case.input,
@@ -142,7 +157,7 @@ class TestRunCacheManager:
142
157
  def set_cached_test_run(
143
158
  self, cached_test_run: CachedTestRun, temp: bool = False
144
159
  ):
145
- if self.disable_write_cache:
160
+ if self.disable_write_cache or portalocker is None:
146
161
  return
147
162
 
148
163
  if temp:
@@ -151,7 +166,7 @@ class TestRunCacheManager:
151
166
  self.cached_test_run = cached_test_run
152
167
 
153
168
  def save_cached_test_run(self, to_temp: bool = False):
154
- if self.disable_write_cache:
169
+ if self.disable_write_cache or portalocker is None:
155
170
  return
156
171
 
157
172
  if to_temp:
@@ -178,7 +193,7 @@ class TestRunCacheManager:
178
193
  )
179
194
 
180
195
  def create_cached_test_run(self, temp: bool = False):
181
- if self.disable_write_cache:
196
+ if self.disable_write_cache or portalocker is None:
182
197
  return
183
198
 
184
199
  cached_test_run = CachedTestRun()
@@ -188,7 +203,7 @@ class TestRunCacheManager:
188
203
  def get_cached_test_run(
189
204
  self, from_temp: bool = False
190
205
  ) -> Union[CachedTestRun, None]:
191
- if self.disable_write_cache:
206
+ if self.disable_write_cache or portalocker is None:
192
207
  return
193
208
 
194
209
  should_create_cached_test_run = False
@@ -209,7 +224,7 @@ class TestRunCacheManager:
209
224
  try:
210
225
  data = json.loads(content)
211
226
  self.temp_cached_test_run = CachedTestRun.load(data)
212
- except Exception as e:
227
+ except Exception:
213
228
  should_create_cached_test_run = True
214
229
  except portalocker.exceptions.LockException as e:
215
230
  print(
@@ -217,6 +232,9 @@ class TestRunCacheManager:
217
232
  file=sys.stderr,
218
233
  )
219
234
 
235
+ if should_create_cached_test_run:
236
+ self.create_cached_test_run(temp=from_temp)
237
+
220
238
  return self.temp_cached_test_run
221
239
  else:
222
240
  if self.cached_test_run:
@@ -250,6 +268,9 @@ class TestRunCacheManager:
250
268
  return self.cached_test_run
251
269
 
252
270
  def wrap_up_cached_test_run(self):
271
+ if portalocker is None:
272
+ return
273
+
253
274
  if self.disable_write_cache:
254
275
  # Clear cache if write cache is disabled
255
276
  delete_file_if_exists(self.cache_file_name)
@@ -330,7 +351,7 @@ class Cache:
330
351
  if criteria_value != cached_criteria_value:
331
352
  return False
332
353
  continue
333
- except:
354
+ except Exception:
334
355
  # For non-GEval
335
356
  continue
336
357
 
@@ -33,7 +33,11 @@ def process_hyperparameters(
33
33
  )
34
34
 
35
35
  if isinstance(value, Prompt):
36
- prompt_key = f"{value.alias}_{value.version}"
36
+ try:
37
+ prompt_key = f"{value.alias}_{value.version}"
38
+ except AttributeError:
39
+ prompt_key = f"{value.alias}_00.00.01"
40
+
37
41
  if value._prompt_version_id is not None and value.type is not None:
38
42
  processed_hyperparameters[key] = PromptApi(
39
43
  id=value._prompt_version_id,
@@ -6,11 +6,11 @@ from typing import Any, Optional, List, Dict, Union, Tuple
6
6
  import shutil
7
7
  import sys
8
8
  import datetime
9
- import portalocker
10
9
  from rich.table import Table
11
10
  from rich.console import Console
12
11
  from rich import print
13
12
 
13
+
14
14
  from deepeval.metrics import BaseMetric
15
15
  from deepeval.confident.api import Api, Endpoints, HttpMethods, is_confident
16
16
  from deepeval.test_run.api import (
@@ -25,6 +25,7 @@ from deepeval.test_case import LLMTestCase, ConversationalTestCase, MLLMTestCase
25
25
  from deepeval.utils import (
26
26
  delete_file_if_exists,
27
27
  get_is_running_deepeval,
28
+ is_read_only_env,
28
29
  open_browser,
29
30
  shorten,
30
31
  format_turn,
@@ -42,6 +43,21 @@ from rich.panel import Panel
42
43
  from rich.columns import Columns
43
44
 
44
45
 
46
+ portalocker = None
47
+ if not is_read_only_env():
48
+ try:
49
+ import portalocker
50
+ except Exception as e:
51
+ print(
52
+ f"Warning: failed to import portalocker: {e}",
53
+ file=sys.stderr,
54
+ )
55
+ else:
56
+ print(
57
+ "Warning: DeepEval is configured for read only environment. Test runs will not be written to disk."
58
+ )
59
+
60
+
45
61
  TEMP_FILE_PATH = f"{HIDDEN_DIR}/.temp_test_run_data.json"
46
62
  LATEST_TEST_RUN_FILE_PATH = f"{HIDDEN_DIR}/.latest_test_run.json"
47
63
  LATEST_TEST_RUN_DATA_KEY = "testRunData"
@@ -456,7 +472,7 @@ class TestRunManager:
456
472
  if self.test_run is None:
457
473
  self.create_test_run(identifier=identifier)
458
474
 
459
- if self.save_to_disk:
475
+ if portalocker and self.save_to_disk:
460
476
  try:
461
477
  with portalocker.Lock(
462
478
  self.temp_file_path,
@@ -479,7 +495,7 @@ class TestRunManager:
479
495
  return self.test_run
480
496
 
481
497
  def save_test_run(self, path: str, save_under_key: Optional[str] = None):
482
- if self.save_to_disk:
498
+ if portalocker and self.save_to_disk:
483
499
  try:
484
500
  # ensure parent directory exists
485
501
  parent = os.path.dirname(path)
@@ -505,11 +521,14 @@ class TestRunManager:
505
521
  pass
506
522
 
507
523
  def save_final_test_run_link(self, link: str):
508
- try:
509
- with portalocker.Lock(LATEST_TEST_RUN_FILE_PATH, mode="w") as file:
510
- json.dump({LATEST_TEST_RUN_LINK_KEY: link}, file)
511
- except portalocker.exceptions.LockException:
512
- pass
524
+ if portalocker:
525
+ try:
526
+ with portalocker.Lock(
527
+ LATEST_TEST_RUN_FILE_PATH, mode="w"
528
+ ) as file:
529
+ json.dump({LATEST_TEST_RUN_LINK_KEY: link}, file)
530
+ except portalocker.exceptions.LockException:
531
+ pass
513
532
 
514
533
  def update_test_run(
515
534
  self,
@@ -523,7 +542,7 @@ class TestRunManager:
523
542
  ):
524
543
  return
525
544
 
526
- if self.save_to_disk:
545
+ if portalocker and self.save_to_disk:
527
546
  try:
528
547
  with portalocker.Lock(
529
548
  self.temp_file_path,
@@ -144,7 +144,7 @@ class TraceManager:
144
144
 
145
145
  def mask(self, data: Any):
146
146
  if self.custom_mask_fn is not None:
147
- self.custom_mask_fn(data)
147
+ return self.custom_mask_fn(data)
148
148
  else:
149
149
  return data
150
150
 
deepeval/utils.py CHANGED
@@ -810,3 +810,7 @@ def format_error_text(
810
810
  text += " (Run with LOG_LEVEL=DEBUG for stack trace.)"
811
811
 
812
812
  return text
813
+
814
+
815
+ def is_read_only_env():
816
+ return get_settings().DEEPEVAL_FILE_SYSTEM == "READ_ONLY"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepeval
3
- Version: 3.7.0
3
+ Version: 3.7.1
4
4
  Summary: The LLM Evaluation Framework
5
5
  Home-page: https://github.com/confident-ai/deepeval
6
6
  License: Apache-2.0
@@ -32,7 +32,7 @@ Requires-Dist: pyfiglet
32
32
  Requires-Dist: pytest
33
33
  Requires-Dist: pytest-asyncio
34
34
  Requires-Dist: pytest-repeat
35
- Requires-Dist: pytest-rerunfailures (>=12.0,<13.0)
35
+ Requires-Dist: pytest-rerunfailures
36
36
  Requires-Dist: pytest-xdist
37
37
  Requires-Dist: python-dotenv (>=1.1.1,<2.0.0)
38
38
  Requires-Dist: requests (>=2.31.0,<3.0.0)