deepeval 3.7.0__py3-none-any.whl → 3.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/__init__.py +0 -4
- deepeval/_version.py +1 -1
- deepeval/cli/main.py +7 -0
- deepeval/confident/api.py +6 -1
- deepeval/config/settings.py +5 -0
- deepeval/evaluate/compare.py +215 -4
- deepeval/evaluate/types.py +6 -0
- deepeval/evaluate/utils.py +30 -0
- deepeval/key_handler.py +1 -0
- deepeval/metrics/arena_g_eval/arena_g_eval.py +5 -1
- deepeval/metrics/arena_g_eval/utils.py +5 -5
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +9 -18
- deepeval/metrics/g_eval/g_eval.py +5 -1
- deepeval/metrics/g_eval/utils.py +1 -1
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +5 -1
- deepeval/metrics/utils.py +1 -1
- deepeval/models/llms/gemini_model.py +27 -5
- deepeval/openai_agents/callback_handler.py +12 -3
- deepeval/prompt/prompt.py +25 -14
- deepeval/simulator/template.py +1 -1
- deepeval/test_case/__init__.py +2 -1
- deepeval/test_case/arena_test_case.py +15 -4
- deepeval/test_case/mllm_test_case.py +45 -22
- deepeval/test_run/cache.py +31 -10
- deepeval/test_run/hyperparameters.py +5 -1
- deepeval/test_run/test_run.py +28 -9
- deepeval/tracing/tracing.py +1 -1
- deepeval/utils.py +4 -0
- {deepeval-3.7.0.dist-info → deepeval-3.7.1.dist-info}/METADATA +2 -2
- {deepeval-3.7.0.dist-info → deepeval-3.7.1.dist-info}/RECORD +33 -33
- {deepeval-3.7.0.dist-info → deepeval-3.7.1.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.0.dist-info → deepeval-3.7.1.dist-info}/WHEEL +0 -0
- {deepeval-3.7.0.dist-info → deepeval-3.7.1.dist-info}/entry_points.txt +0 -0
|
@@ -1,13 +1,21 @@
|
|
|
1
|
+
from time import perf_counter
|
|
2
|
+
|
|
1
3
|
from deepeval.tracing.tracing import (
|
|
2
4
|
Observer,
|
|
3
5
|
current_span_context,
|
|
4
6
|
trace_manager,
|
|
5
7
|
)
|
|
6
|
-
from deepeval.openai_agents.extractors import
|
|
8
|
+
from deepeval.openai_agents.extractors import (
|
|
9
|
+
update_span_properties,
|
|
10
|
+
update_trace_properties_from_span_data,
|
|
11
|
+
)
|
|
7
12
|
from deepeval.tracing.context import current_trace_context
|
|
8
13
|
from deepeval.tracing.utils import make_json_serializable
|
|
9
|
-
from
|
|
10
|
-
|
|
14
|
+
from deepeval.tracing.types import (
|
|
15
|
+
BaseSpan,
|
|
16
|
+
LlmSpan,
|
|
17
|
+
TraceSpanStatus,
|
|
18
|
+
)
|
|
11
19
|
|
|
12
20
|
try:
|
|
13
21
|
from agents.tracing import Span, Trace, TracingProcessor
|
|
@@ -18,6 +26,7 @@ try:
|
|
|
18
26
|
GenerationSpanData,
|
|
19
27
|
GuardrailSpanData,
|
|
20
28
|
HandoffSpanData,
|
|
29
|
+
MCPListToolsSpanData,
|
|
21
30
|
ResponseSpanData,
|
|
22
31
|
SpanData,
|
|
23
32
|
)
|
deepeval/prompt/prompt.py
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import time
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
|
|
1
6
|
from enum import Enum
|
|
2
7
|
from typing import Optional, List, Dict, Type, Literal
|
|
3
8
|
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
|
|
@@ -5,12 +10,11 @@ from rich.console import Console
|
|
|
5
10
|
import time
|
|
6
11
|
import json
|
|
7
12
|
import os
|
|
8
|
-
from pydantic import BaseModel, ValidationError
|
|
13
|
+
from pydantic import BaseModel, ValidationError
|
|
9
14
|
import asyncio
|
|
10
|
-
import portalocker
|
|
11
15
|
import threading
|
|
12
16
|
|
|
13
|
-
from deepeval.utils import make_model_config
|
|
17
|
+
from deepeval.utils import make_model_config, is_read_only_env
|
|
14
18
|
|
|
15
19
|
from deepeval.prompt.api import (
|
|
16
20
|
PromptHttpResponse,
|
|
@@ -24,9 +28,6 @@ from deepeval.prompt.api import (
|
|
|
24
28
|
ModelSettings,
|
|
25
29
|
OutputSchema,
|
|
26
30
|
OutputType,
|
|
27
|
-
ReasoningEffort,
|
|
28
|
-
Verbosity,
|
|
29
|
-
ModelProvider,
|
|
30
31
|
)
|
|
31
32
|
from deepeval.prompt.utils import (
|
|
32
33
|
interpolate_text,
|
|
@@ -36,6 +37,18 @@ from deepeval.prompt.utils import (
|
|
|
36
37
|
from deepeval.confident.api import Api, Endpoints, HttpMethods
|
|
37
38
|
from deepeval.constants import HIDDEN_DIR
|
|
38
39
|
|
|
40
|
+
|
|
41
|
+
logger = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
portalocker = None
|
|
44
|
+
if not is_read_only_env():
|
|
45
|
+
try:
|
|
46
|
+
import portalocker
|
|
47
|
+
except Exception as e:
|
|
48
|
+
logger.warning("failed to import portalocker: %s", e)
|
|
49
|
+
else:
|
|
50
|
+
logger.warning("READ_ONLY filesystem: skipping disk cache for prompts.")
|
|
51
|
+
|
|
39
52
|
CACHE_FILE_NAME = f"{HIDDEN_DIR}/.deepeval-prompt-cache.json"
|
|
40
53
|
VERSION_CACHE_KEY = "version"
|
|
41
54
|
LABEL_CACHE_KEY = "label"
|
|
@@ -165,7 +178,7 @@ class Prompt:
|
|
|
165
178
|
content = f.read()
|
|
166
179
|
try:
|
|
167
180
|
data = json.loads(content)
|
|
168
|
-
except:
|
|
181
|
+
except (json.JSONDecodeError, TypeError):
|
|
169
182
|
self.text_template = content
|
|
170
183
|
return content
|
|
171
184
|
|
|
@@ -203,7 +216,6 @@ class Prompt:
|
|
|
203
216
|
"Unable to interpolate empty prompt template. Please pull a prompt from Confident AI or set template manually to continue."
|
|
204
217
|
)
|
|
205
218
|
|
|
206
|
-
print("@@@@@")
|
|
207
219
|
return interpolate_text(interpolation_type, text_template, **kwargs)
|
|
208
220
|
|
|
209
221
|
elif prompt_type == PromptType.LIST:
|
|
@@ -248,7 +260,7 @@ class Prompt:
|
|
|
248
260
|
version: Optional[str] = None,
|
|
249
261
|
label: Optional[str] = None,
|
|
250
262
|
) -> Optional[CachedPrompt]:
|
|
251
|
-
if not os.path.exists(CACHE_FILE_NAME):
|
|
263
|
+
if portalocker is None or not os.path.exists(CACHE_FILE_NAME):
|
|
252
264
|
return None
|
|
253
265
|
|
|
254
266
|
try:
|
|
@@ -296,13 +308,12 @@ class Prompt:
|
|
|
296
308
|
output_type: Optional[OutputType] = None,
|
|
297
309
|
output_schema: Optional[OutputSchema] = None,
|
|
298
310
|
):
|
|
299
|
-
if not self.alias:
|
|
311
|
+
if portalocker is None or not self.alias:
|
|
300
312
|
return
|
|
301
313
|
|
|
302
|
-
# Ensure directory exists
|
|
303
|
-
os.makedirs(HIDDEN_DIR, exist_ok=True)
|
|
304
|
-
|
|
305
314
|
try:
|
|
315
|
+
# Ensure directory exists
|
|
316
|
+
os.makedirs(HIDDEN_DIR, exist_ok=True)
|
|
306
317
|
# Use r+ mode if file exists, w mode if it doesn't
|
|
307
318
|
mode = "r+" if os.path.exists(CACHE_FILE_NAME) else "w"
|
|
308
319
|
|
|
@@ -481,7 +492,7 @@ class Prompt:
|
|
|
481
492
|
cached_prompt.output_schema
|
|
482
493
|
)
|
|
483
494
|
return
|
|
484
|
-
except:
|
|
495
|
+
except Exception:
|
|
485
496
|
pass
|
|
486
497
|
|
|
487
498
|
api = Api()
|
deepeval/simulator/template.py
CHANGED
|
@@ -112,7 +112,7 @@ class ConversationSimulatorTemplate:
|
|
|
112
112
|
]
|
|
113
113
|
Example JSON Output:
|
|
114
114
|
{{
|
|
115
|
-
"is_complete":
|
|
115
|
+
"is_complete": false,
|
|
116
116
|
"reason": "The assistant explained how to forget password but ahas not confirmed that the user successfully set a new password."
|
|
117
117
|
}}
|
|
118
118
|
|
deepeval/test_case/__init__.py
CHANGED
|
@@ -10,7 +10,7 @@ from .conversational_test_case import (
|
|
|
10
10
|
TurnParams,
|
|
11
11
|
)
|
|
12
12
|
from .mllm_test_case import MLLMTestCase, MLLMTestCaseParams, MLLMImage
|
|
13
|
-
from .arena_test_case import ArenaTestCase
|
|
13
|
+
from .arena_test_case import ArenaTestCase, Contestant
|
|
14
14
|
from .mcp import (
|
|
15
15
|
MCPServer,
|
|
16
16
|
MCPPromptCall,
|
|
@@ -35,4 +35,5 @@ __all__ = [
|
|
|
35
35
|
"MLLMTestCaseParams",
|
|
36
36
|
"MLLMImage",
|
|
37
37
|
"ArenaTestCase",
|
|
38
|
+
"Contestant",
|
|
38
39
|
]
|
|
@@ -1,20 +1,31 @@
|
|
|
1
|
+
from typing import List, Dict, Optional, Union
|
|
1
2
|
from dataclasses import dataclass
|
|
2
|
-
from
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
3
5
|
from deepeval.test_case import (
|
|
4
6
|
LLMTestCase,
|
|
5
7
|
)
|
|
8
|
+
from deepeval.prompt import Prompt
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Contestant(BaseModel):
|
|
12
|
+
name: str
|
|
13
|
+
test_case: LLMTestCase
|
|
14
|
+
hyperparameters: Optional[Dict[str, Union[str, int, float, Prompt]]] = None
|
|
15
|
+
|
|
16
|
+
model_config = {"arbitrary_types_allowed": True}
|
|
6
17
|
|
|
7
18
|
|
|
8
19
|
@dataclass
|
|
9
20
|
class ArenaTestCase:
|
|
10
|
-
contestants:
|
|
21
|
+
contestants: List[Contestant]
|
|
11
22
|
|
|
12
23
|
def __post_init__(self):
|
|
13
|
-
contestant_names =
|
|
24
|
+
contestant_names = [contestant.name for contestant in self.contestants]
|
|
14
25
|
if len(contestant_names) != len(set(contestant_names)):
|
|
15
26
|
raise ValueError("All contestant names must be unique.")
|
|
16
27
|
|
|
17
|
-
cases =
|
|
28
|
+
cases = [contestant.test_case for contestant in self.contestants]
|
|
18
29
|
ref_input = cases[0].input
|
|
19
30
|
for case in cases[1:]:
|
|
20
31
|
if case.input != ref_input:
|
|
@@ -11,33 +11,50 @@ from deepeval.test_case import ToolCall
|
|
|
11
11
|
|
|
12
12
|
@dataclass
|
|
13
13
|
class MLLMImage:
|
|
14
|
-
|
|
14
|
+
dataBase64: Optional[str] = None
|
|
15
|
+
mimeType: Optional[str] = None
|
|
16
|
+
url: Optional[str] = None
|
|
15
17
|
local: Optional[bool] = None
|
|
16
|
-
filename: Optional[str] =
|
|
17
|
-
mimeType: Optional[str] = field(default=None, init=False, repr=False)
|
|
18
|
-
dataBase64: Optional[str] = field(default=None, init=False, repr=False)
|
|
18
|
+
filename: Optional[str] = None
|
|
19
19
|
|
|
20
20
|
def __post_init__(self):
|
|
21
|
-
|
|
22
|
-
if self.
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
self.filename = os.path.basename(path)
|
|
31
|
-
self.mimeType = (
|
|
32
|
-
mimetypes.guess_type(path)[0] or "application/octet-stream"
|
|
21
|
+
|
|
22
|
+
if self.url and self.dataBase64:
|
|
23
|
+
raise ValueError(
|
|
24
|
+
"You cannot provide both 'url' and 'dataBase64' at the same time when creating an MLLMImage."
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
if not self.url and not self.dataBase64:
|
|
28
|
+
raise ValueError(
|
|
29
|
+
"You must provide either a 'url' or both 'dataBase64' and 'mimeType' to create an MLLMImage."
|
|
33
30
|
)
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
self.
|
|
31
|
+
|
|
32
|
+
if self.dataBase64 is not None:
|
|
33
|
+
if self.mimeType is None:
|
|
34
|
+
raise ValueError(
|
|
35
|
+
"mimeType must be provided when initializing from Base64 data."
|
|
36
|
+
)
|
|
37
37
|
else:
|
|
38
|
-
|
|
39
|
-
self.
|
|
40
|
-
|
|
38
|
+
is_local = self.is_local_path(self.url)
|
|
39
|
+
if self.local is not None:
|
|
40
|
+
assert self.local == is_local, "Local path mismatch"
|
|
41
|
+
else:
|
|
42
|
+
self.local = is_local
|
|
43
|
+
|
|
44
|
+
# compute filename, mime_type, and Base64 data
|
|
45
|
+
if self.local:
|
|
46
|
+
path = self.process_url(self.url)
|
|
47
|
+
self.filename = os.path.basename(path)
|
|
48
|
+
self.mimeType = (
|
|
49
|
+
mimetypes.guess_type(path)[0] or "application/octet-stream"
|
|
50
|
+
)
|
|
51
|
+
with open(path, "rb") as f:
|
|
52
|
+
raw = f.read()
|
|
53
|
+
self.dataBase64 = base64.b64encode(raw).decode("ascii")
|
|
54
|
+
else:
|
|
55
|
+
self.filename = None
|
|
56
|
+
self.mimeType = None
|
|
57
|
+
self.dataBase64 = None
|
|
41
58
|
|
|
42
59
|
@staticmethod
|
|
43
60
|
def process_url(url: str) -> str:
|
|
@@ -69,6 +86,12 @@ class MLLMImage:
|
|
|
69
86
|
return os.path.exists(path)
|
|
70
87
|
return False
|
|
71
88
|
|
|
89
|
+
def as_data_uri(self) -> Optional[str]:
|
|
90
|
+
"""Return the image as a data URI string, if Base64 data is available."""
|
|
91
|
+
if not self.dataBase64 or not self.mimeType:
|
|
92
|
+
return None
|
|
93
|
+
return f"data:{self.mimeType};base64,{self.dataBase64}"
|
|
94
|
+
|
|
72
95
|
|
|
73
96
|
class MLLMTestCaseParams(Enum):
|
|
74
97
|
INPUT = "input"
|
deepeval/test_run/cache.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import
|
|
1
|
+
import logging
|
|
2
2
|
import sys
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
|
-
from typing import List, Optional,
|
|
5
|
+
from typing import List, Optional, Dict, Union
|
|
6
6
|
from enum import Enum
|
|
7
7
|
from pydantic import BaseModel, Field
|
|
8
8
|
|
|
@@ -12,11 +12,26 @@ from deepeval.test_case import LLMTestCaseParams, LLMTestCase, ToolCallParams
|
|
|
12
12
|
from deepeval.test_run.api import MetricData
|
|
13
13
|
from deepeval.utils import (
|
|
14
14
|
delete_file_if_exists,
|
|
15
|
+
is_read_only_env,
|
|
15
16
|
serialize,
|
|
16
17
|
)
|
|
17
18
|
from deepeval.metrics import BaseMetric
|
|
18
19
|
from deepeval.constants import HIDDEN_DIR
|
|
19
20
|
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
portalocker = None
|
|
26
|
+
if not is_read_only_env():
|
|
27
|
+
try:
|
|
28
|
+
import portalocker
|
|
29
|
+
except Exception as e:
|
|
30
|
+
logger.warning("failed to import portalocker: %s", e)
|
|
31
|
+
else:
|
|
32
|
+
logger.warning("READ_ONLY filesystem: skipping disk cache for test runs.")
|
|
33
|
+
|
|
34
|
+
|
|
20
35
|
CACHE_FILE_NAME = f"{HIDDEN_DIR}/.deepeval-cache.json"
|
|
21
36
|
TEMP_CACHE_FILE_NAME = f"{HIDDEN_DIR}/.temp-deepeval-cache.json"
|
|
22
37
|
|
|
@@ -97,7 +112,7 @@ class TestRunCacheManager:
|
|
|
97
112
|
def get_cached_test_case(
|
|
98
113
|
self, test_case: LLMTestCase, hyperparameters: Union[Dict, None]
|
|
99
114
|
) -> Union[CachedTestCase, None]:
|
|
100
|
-
if self.disable_write_cache:
|
|
115
|
+
if self.disable_write_cache or portalocker is None:
|
|
101
116
|
return None
|
|
102
117
|
|
|
103
118
|
cached_test_run = self.get_cached_test_run()
|
|
@@ -122,7 +137,7 @@ class TestRunCacheManager:
|
|
|
122
137
|
hyperparameters: Union[Dict, None],
|
|
123
138
|
to_temp: bool = False,
|
|
124
139
|
):
|
|
125
|
-
if self.disable_write_cache:
|
|
140
|
+
if self.disable_write_cache or portalocker is None:
|
|
126
141
|
return
|
|
127
142
|
cache_dict = {
|
|
128
143
|
LLMTestCaseParams.INPUT.value: test_case.input,
|
|
@@ -142,7 +157,7 @@ class TestRunCacheManager:
|
|
|
142
157
|
def set_cached_test_run(
|
|
143
158
|
self, cached_test_run: CachedTestRun, temp: bool = False
|
|
144
159
|
):
|
|
145
|
-
if self.disable_write_cache:
|
|
160
|
+
if self.disable_write_cache or portalocker is None:
|
|
146
161
|
return
|
|
147
162
|
|
|
148
163
|
if temp:
|
|
@@ -151,7 +166,7 @@ class TestRunCacheManager:
|
|
|
151
166
|
self.cached_test_run = cached_test_run
|
|
152
167
|
|
|
153
168
|
def save_cached_test_run(self, to_temp: bool = False):
|
|
154
|
-
if self.disable_write_cache:
|
|
169
|
+
if self.disable_write_cache or portalocker is None:
|
|
155
170
|
return
|
|
156
171
|
|
|
157
172
|
if to_temp:
|
|
@@ -178,7 +193,7 @@ class TestRunCacheManager:
|
|
|
178
193
|
)
|
|
179
194
|
|
|
180
195
|
def create_cached_test_run(self, temp: bool = False):
|
|
181
|
-
if self.disable_write_cache:
|
|
196
|
+
if self.disable_write_cache or portalocker is None:
|
|
182
197
|
return
|
|
183
198
|
|
|
184
199
|
cached_test_run = CachedTestRun()
|
|
@@ -188,7 +203,7 @@ class TestRunCacheManager:
|
|
|
188
203
|
def get_cached_test_run(
|
|
189
204
|
self, from_temp: bool = False
|
|
190
205
|
) -> Union[CachedTestRun, None]:
|
|
191
|
-
if self.disable_write_cache:
|
|
206
|
+
if self.disable_write_cache or portalocker is None:
|
|
192
207
|
return
|
|
193
208
|
|
|
194
209
|
should_create_cached_test_run = False
|
|
@@ -209,7 +224,7 @@ class TestRunCacheManager:
|
|
|
209
224
|
try:
|
|
210
225
|
data = json.loads(content)
|
|
211
226
|
self.temp_cached_test_run = CachedTestRun.load(data)
|
|
212
|
-
except Exception
|
|
227
|
+
except Exception:
|
|
213
228
|
should_create_cached_test_run = True
|
|
214
229
|
except portalocker.exceptions.LockException as e:
|
|
215
230
|
print(
|
|
@@ -217,6 +232,9 @@ class TestRunCacheManager:
|
|
|
217
232
|
file=sys.stderr,
|
|
218
233
|
)
|
|
219
234
|
|
|
235
|
+
if should_create_cached_test_run:
|
|
236
|
+
self.create_cached_test_run(temp=from_temp)
|
|
237
|
+
|
|
220
238
|
return self.temp_cached_test_run
|
|
221
239
|
else:
|
|
222
240
|
if self.cached_test_run:
|
|
@@ -250,6 +268,9 @@ class TestRunCacheManager:
|
|
|
250
268
|
return self.cached_test_run
|
|
251
269
|
|
|
252
270
|
def wrap_up_cached_test_run(self):
|
|
271
|
+
if portalocker is None:
|
|
272
|
+
return
|
|
273
|
+
|
|
253
274
|
if self.disable_write_cache:
|
|
254
275
|
# Clear cache if write cache is disabled
|
|
255
276
|
delete_file_if_exists(self.cache_file_name)
|
|
@@ -330,7 +351,7 @@ class Cache:
|
|
|
330
351
|
if criteria_value != cached_criteria_value:
|
|
331
352
|
return False
|
|
332
353
|
continue
|
|
333
|
-
except:
|
|
354
|
+
except Exception:
|
|
334
355
|
# For non-GEval
|
|
335
356
|
continue
|
|
336
357
|
|
|
@@ -33,7 +33,11 @@ def process_hyperparameters(
|
|
|
33
33
|
)
|
|
34
34
|
|
|
35
35
|
if isinstance(value, Prompt):
|
|
36
|
-
|
|
36
|
+
try:
|
|
37
|
+
prompt_key = f"{value.alias}_{value.version}"
|
|
38
|
+
except AttributeError:
|
|
39
|
+
prompt_key = f"{value.alias}_00.00.01"
|
|
40
|
+
|
|
37
41
|
if value._prompt_version_id is not None and value.type is not None:
|
|
38
42
|
processed_hyperparameters[key] = PromptApi(
|
|
39
43
|
id=value._prompt_version_id,
|
deepeval/test_run/test_run.py
CHANGED
|
@@ -6,11 +6,11 @@ from typing import Any, Optional, List, Dict, Union, Tuple
|
|
|
6
6
|
import shutil
|
|
7
7
|
import sys
|
|
8
8
|
import datetime
|
|
9
|
-
import portalocker
|
|
10
9
|
from rich.table import Table
|
|
11
10
|
from rich.console import Console
|
|
12
11
|
from rich import print
|
|
13
12
|
|
|
13
|
+
|
|
14
14
|
from deepeval.metrics import BaseMetric
|
|
15
15
|
from deepeval.confident.api import Api, Endpoints, HttpMethods, is_confident
|
|
16
16
|
from deepeval.test_run.api import (
|
|
@@ -25,6 +25,7 @@ from deepeval.test_case import LLMTestCase, ConversationalTestCase, MLLMTestCase
|
|
|
25
25
|
from deepeval.utils import (
|
|
26
26
|
delete_file_if_exists,
|
|
27
27
|
get_is_running_deepeval,
|
|
28
|
+
is_read_only_env,
|
|
28
29
|
open_browser,
|
|
29
30
|
shorten,
|
|
30
31
|
format_turn,
|
|
@@ -42,6 +43,21 @@ from rich.panel import Panel
|
|
|
42
43
|
from rich.columns import Columns
|
|
43
44
|
|
|
44
45
|
|
|
46
|
+
portalocker = None
|
|
47
|
+
if not is_read_only_env():
|
|
48
|
+
try:
|
|
49
|
+
import portalocker
|
|
50
|
+
except Exception as e:
|
|
51
|
+
print(
|
|
52
|
+
f"Warning: failed to import portalocker: {e}",
|
|
53
|
+
file=sys.stderr,
|
|
54
|
+
)
|
|
55
|
+
else:
|
|
56
|
+
print(
|
|
57
|
+
"Warning: DeepEval is configured for read only environment. Test runs will not be written to disk."
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
45
61
|
TEMP_FILE_PATH = f"{HIDDEN_DIR}/.temp_test_run_data.json"
|
|
46
62
|
LATEST_TEST_RUN_FILE_PATH = f"{HIDDEN_DIR}/.latest_test_run.json"
|
|
47
63
|
LATEST_TEST_RUN_DATA_KEY = "testRunData"
|
|
@@ -456,7 +472,7 @@ class TestRunManager:
|
|
|
456
472
|
if self.test_run is None:
|
|
457
473
|
self.create_test_run(identifier=identifier)
|
|
458
474
|
|
|
459
|
-
if self.save_to_disk:
|
|
475
|
+
if portalocker and self.save_to_disk:
|
|
460
476
|
try:
|
|
461
477
|
with portalocker.Lock(
|
|
462
478
|
self.temp_file_path,
|
|
@@ -479,7 +495,7 @@ class TestRunManager:
|
|
|
479
495
|
return self.test_run
|
|
480
496
|
|
|
481
497
|
def save_test_run(self, path: str, save_under_key: Optional[str] = None):
|
|
482
|
-
if self.save_to_disk:
|
|
498
|
+
if portalocker and self.save_to_disk:
|
|
483
499
|
try:
|
|
484
500
|
# ensure parent directory exists
|
|
485
501
|
parent = os.path.dirname(path)
|
|
@@ -505,11 +521,14 @@ class TestRunManager:
|
|
|
505
521
|
pass
|
|
506
522
|
|
|
507
523
|
def save_final_test_run_link(self, link: str):
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
524
|
+
if portalocker:
|
|
525
|
+
try:
|
|
526
|
+
with portalocker.Lock(
|
|
527
|
+
LATEST_TEST_RUN_FILE_PATH, mode="w"
|
|
528
|
+
) as file:
|
|
529
|
+
json.dump({LATEST_TEST_RUN_LINK_KEY: link}, file)
|
|
530
|
+
except portalocker.exceptions.LockException:
|
|
531
|
+
pass
|
|
513
532
|
|
|
514
533
|
def update_test_run(
|
|
515
534
|
self,
|
|
@@ -523,7 +542,7 @@ class TestRunManager:
|
|
|
523
542
|
):
|
|
524
543
|
return
|
|
525
544
|
|
|
526
|
-
if self.save_to_disk:
|
|
545
|
+
if portalocker and self.save_to_disk:
|
|
527
546
|
try:
|
|
528
547
|
with portalocker.Lock(
|
|
529
548
|
self.temp_file_path,
|
deepeval/tracing/tracing.py
CHANGED
deepeval/utils.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: deepeval
|
|
3
|
-
Version: 3.7.
|
|
3
|
+
Version: 3.7.1
|
|
4
4
|
Summary: The LLM Evaluation Framework
|
|
5
5
|
Home-page: https://github.com/confident-ai/deepeval
|
|
6
6
|
License: Apache-2.0
|
|
@@ -32,7 +32,7 @@ Requires-Dist: pyfiglet
|
|
|
32
32
|
Requires-Dist: pytest
|
|
33
33
|
Requires-Dist: pytest-asyncio
|
|
34
34
|
Requires-Dist: pytest-repeat
|
|
35
|
-
Requires-Dist: pytest-rerunfailures
|
|
35
|
+
Requires-Dist: pytest-rerunfailures
|
|
36
36
|
Requires-Dist: pytest-xdist
|
|
37
37
|
Requires-Dist: python-dotenv (>=1.1.1,<2.0.0)
|
|
38
38
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|