deepeval 3.7.0__py3-none-any.whl → 3.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/__init__.py +0 -4
- deepeval/_version.py +1 -1
- deepeval/cli/main.py +7 -0
- deepeval/confident/api.py +6 -1
- deepeval/config/settings.py +5 -0
- deepeval/evaluate/compare.py +219 -4
- deepeval/evaluate/types.py +6 -0
- deepeval/evaluate/utils.py +30 -0
- deepeval/key_handler.py +1 -0
- deepeval/metrics/arena_g_eval/arena_g_eval.py +5 -1
- deepeval/metrics/arena_g_eval/utils.py +5 -5
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +9 -18
- deepeval/metrics/g_eval/g_eval.py +5 -1
- deepeval/metrics/g_eval/utils.py +1 -1
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +5 -1
- deepeval/metrics/utils.py +1 -1
- deepeval/models/llms/gemini_model.py +27 -5
- deepeval/openai_agents/callback_handler.py +12 -3
- deepeval/prompt/prompt.py +25 -14
- deepeval/simulator/template.py +1 -1
- deepeval/synthesizer/config.py +9 -0
- deepeval/synthesizer/schema.py +23 -0
- deepeval/synthesizer/synthesizer.py +1137 -2
- deepeval/synthesizer/templates/__init__.py +11 -2
- deepeval/synthesizer/templates/template.py +554 -1
- deepeval/synthesizer/templates/template_extraction.py +32 -0
- deepeval/synthesizer/templates/template_prompt.py +262 -0
- deepeval/test_case/__init__.py +2 -1
- deepeval/test_case/arena_test_case.py +15 -4
- deepeval/test_case/mllm_test_case.py +45 -22
- deepeval/test_run/cache.py +31 -10
- deepeval/test_run/hyperparameters.py +5 -1
- deepeval/test_run/test_run.py +28 -9
- deepeval/tracing/tracing.py +1 -1
- deepeval/utils.py +4 -0
- {deepeval-3.7.0.dist-info → deepeval-3.7.2.dist-info}/METADATA +3 -2
- {deepeval-3.7.0.dist-info → deepeval-3.7.2.dist-info}/RECORD +40 -40
- {deepeval-3.7.0.dist-info → deepeval-3.7.2.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.0.dist-info → deepeval-3.7.2.dist-info}/WHEEL +0 -0
- {deepeval-3.7.0.dist-info → deepeval-3.7.2.dist-info}/entry_points.txt +0 -0
|
@@ -1,13 +1,21 @@
|
|
|
1
|
+
from time import perf_counter
|
|
2
|
+
|
|
1
3
|
from deepeval.tracing.tracing import (
|
|
2
4
|
Observer,
|
|
3
5
|
current_span_context,
|
|
4
6
|
trace_manager,
|
|
5
7
|
)
|
|
6
|
-
from deepeval.openai_agents.extractors import
|
|
8
|
+
from deepeval.openai_agents.extractors import (
|
|
9
|
+
update_span_properties,
|
|
10
|
+
update_trace_properties_from_span_data,
|
|
11
|
+
)
|
|
7
12
|
from deepeval.tracing.context import current_trace_context
|
|
8
13
|
from deepeval.tracing.utils import make_json_serializable
|
|
9
|
-
from
|
|
10
|
-
|
|
14
|
+
from deepeval.tracing.types import (
|
|
15
|
+
BaseSpan,
|
|
16
|
+
LlmSpan,
|
|
17
|
+
TraceSpanStatus,
|
|
18
|
+
)
|
|
11
19
|
|
|
12
20
|
try:
|
|
13
21
|
from agents.tracing import Span, Trace, TracingProcessor
|
|
@@ -18,6 +26,7 @@ try:
|
|
|
18
26
|
GenerationSpanData,
|
|
19
27
|
GuardrailSpanData,
|
|
20
28
|
HandoffSpanData,
|
|
29
|
+
MCPListToolsSpanData,
|
|
21
30
|
ResponseSpanData,
|
|
22
31
|
SpanData,
|
|
23
32
|
)
|
deepeval/prompt/prompt.py
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import time
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
|
|
1
6
|
from enum import Enum
|
|
2
7
|
from typing import Optional, List, Dict, Type, Literal
|
|
3
8
|
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
|
|
@@ -5,12 +10,11 @@ from rich.console import Console
|
|
|
5
10
|
import time
|
|
6
11
|
import json
|
|
7
12
|
import os
|
|
8
|
-
from pydantic import BaseModel, ValidationError
|
|
13
|
+
from pydantic import BaseModel, ValidationError
|
|
9
14
|
import asyncio
|
|
10
|
-
import portalocker
|
|
11
15
|
import threading
|
|
12
16
|
|
|
13
|
-
from deepeval.utils import make_model_config
|
|
17
|
+
from deepeval.utils import make_model_config, is_read_only_env
|
|
14
18
|
|
|
15
19
|
from deepeval.prompt.api import (
|
|
16
20
|
PromptHttpResponse,
|
|
@@ -24,9 +28,6 @@ from deepeval.prompt.api import (
|
|
|
24
28
|
ModelSettings,
|
|
25
29
|
OutputSchema,
|
|
26
30
|
OutputType,
|
|
27
|
-
ReasoningEffort,
|
|
28
|
-
Verbosity,
|
|
29
|
-
ModelProvider,
|
|
30
31
|
)
|
|
31
32
|
from deepeval.prompt.utils import (
|
|
32
33
|
interpolate_text,
|
|
@@ -36,6 +37,18 @@ from deepeval.prompt.utils import (
|
|
|
36
37
|
from deepeval.confident.api import Api, Endpoints, HttpMethods
|
|
37
38
|
from deepeval.constants import HIDDEN_DIR
|
|
38
39
|
|
|
40
|
+
|
|
41
|
+
logger = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
portalocker = None
|
|
44
|
+
if not is_read_only_env():
|
|
45
|
+
try:
|
|
46
|
+
import portalocker
|
|
47
|
+
except Exception as e:
|
|
48
|
+
logger.warning("failed to import portalocker: %s", e)
|
|
49
|
+
else:
|
|
50
|
+
logger.warning("READ_ONLY filesystem: skipping disk cache for prompts.")
|
|
51
|
+
|
|
39
52
|
CACHE_FILE_NAME = f"{HIDDEN_DIR}/.deepeval-prompt-cache.json"
|
|
40
53
|
VERSION_CACHE_KEY = "version"
|
|
41
54
|
LABEL_CACHE_KEY = "label"
|
|
@@ -165,7 +178,7 @@ class Prompt:
|
|
|
165
178
|
content = f.read()
|
|
166
179
|
try:
|
|
167
180
|
data = json.loads(content)
|
|
168
|
-
except:
|
|
181
|
+
except (json.JSONDecodeError, TypeError):
|
|
169
182
|
self.text_template = content
|
|
170
183
|
return content
|
|
171
184
|
|
|
@@ -203,7 +216,6 @@ class Prompt:
|
|
|
203
216
|
"Unable to interpolate empty prompt template. Please pull a prompt from Confident AI or set template manually to continue."
|
|
204
217
|
)
|
|
205
218
|
|
|
206
|
-
print("@@@@@")
|
|
207
219
|
return interpolate_text(interpolation_type, text_template, **kwargs)
|
|
208
220
|
|
|
209
221
|
elif prompt_type == PromptType.LIST:
|
|
@@ -248,7 +260,7 @@ class Prompt:
|
|
|
248
260
|
version: Optional[str] = None,
|
|
249
261
|
label: Optional[str] = None,
|
|
250
262
|
) -> Optional[CachedPrompt]:
|
|
251
|
-
if not os.path.exists(CACHE_FILE_NAME):
|
|
263
|
+
if portalocker is None or not os.path.exists(CACHE_FILE_NAME):
|
|
252
264
|
return None
|
|
253
265
|
|
|
254
266
|
try:
|
|
@@ -296,13 +308,12 @@ class Prompt:
|
|
|
296
308
|
output_type: Optional[OutputType] = None,
|
|
297
309
|
output_schema: Optional[OutputSchema] = None,
|
|
298
310
|
):
|
|
299
|
-
if not self.alias:
|
|
311
|
+
if portalocker is None or not self.alias:
|
|
300
312
|
return
|
|
301
313
|
|
|
302
|
-
# Ensure directory exists
|
|
303
|
-
os.makedirs(HIDDEN_DIR, exist_ok=True)
|
|
304
|
-
|
|
305
314
|
try:
|
|
315
|
+
# Ensure directory exists
|
|
316
|
+
os.makedirs(HIDDEN_DIR, exist_ok=True)
|
|
306
317
|
# Use r+ mode if file exists, w mode if it doesn't
|
|
307
318
|
mode = "r+" if os.path.exists(CACHE_FILE_NAME) else "w"
|
|
308
319
|
|
|
@@ -481,7 +492,7 @@ class Prompt:
|
|
|
481
492
|
cached_prompt.output_schema
|
|
482
493
|
)
|
|
483
494
|
return
|
|
484
|
-
except:
|
|
495
|
+
except Exception:
|
|
485
496
|
pass
|
|
486
497
|
|
|
487
498
|
api = Api()
|
deepeval/simulator/template.py
CHANGED
|
@@ -112,7 +112,7 @@ class ConversationSimulatorTemplate:
|
|
|
112
112
|
]
|
|
113
113
|
Example JSON Output:
|
|
114
114
|
{{
|
|
115
|
-
"is_complete":
|
|
115
|
+
"is_complete": false,
|
|
116
116
|
"reason": "The assistant explained how to forget password but ahas not confirmed that the user successfully set a new password."
|
|
117
117
|
}}
|
|
118
118
|
|
deepeval/synthesizer/config.py
CHANGED
|
@@ -41,6 +41,15 @@ class StylingConfig:
|
|
|
41
41
|
expected_output_format: Optional[str] = None
|
|
42
42
|
|
|
43
43
|
|
|
44
|
+
@dataclass
|
|
45
|
+
class ConversationalStylingConfig:
|
|
46
|
+
scenario_context: Optional[str] = None
|
|
47
|
+
conversational_task: Optional[str] = None
|
|
48
|
+
participant_roles: Optional[str] = None
|
|
49
|
+
scenario_format: Optional[str] = None
|
|
50
|
+
expected_outcome_format: Optional[str] = None
|
|
51
|
+
|
|
52
|
+
|
|
44
53
|
@dataclass
|
|
45
54
|
class ContextConstructionConfig:
|
|
46
55
|
embedder: Optional[Union[str, DeepEvalBaseEmbeddingModel]] = None
|
deepeval/synthesizer/schema.py
CHANGED
|
@@ -58,3 +58,26 @@ class PromptStyling(BaseModel):
|
|
|
58
58
|
scenario: str
|
|
59
59
|
task: str
|
|
60
60
|
input_format: str
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class ConversationalScenario(BaseModel):
|
|
64
|
+
scenario: str
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class ConversationalScenarioList(BaseModel):
|
|
68
|
+
data: List[ConversationalScenario]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class RewrittenScenario(BaseModel):
|
|
72
|
+
rewritten_scenario: str
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class ScenarioFeedback(BaseModel):
|
|
76
|
+
score: float
|
|
77
|
+
feedback: str
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class ConversationalPromptStyling(BaseModel):
|
|
81
|
+
scenario_context: str
|
|
82
|
+
conversational_task: str
|
|
83
|
+
participant_roles: str
|