deepeval 3.7.0__py3-none-any.whl → 3.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. deepeval/__init__.py +0 -4
  2. deepeval/_version.py +1 -1
  3. deepeval/cli/main.py +7 -0
  4. deepeval/confident/api.py +6 -1
  5. deepeval/config/settings.py +5 -0
  6. deepeval/evaluate/compare.py +219 -4
  7. deepeval/evaluate/types.py +6 -0
  8. deepeval/evaluate/utils.py +30 -0
  9. deepeval/key_handler.py +1 -0
  10. deepeval/metrics/arena_g_eval/arena_g_eval.py +5 -1
  11. deepeval/metrics/arena_g_eval/utils.py +5 -5
  12. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +9 -18
  13. deepeval/metrics/g_eval/g_eval.py +5 -1
  14. deepeval/metrics/g_eval/utils.py +1 -1
  15. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +5 -1
  16. deepeval/metrics/utils.py +1 -1
  17. deepeval/models/llms/gemini_model.py +27 -5
  18. deepeval/openai_agents/callback_handler.py +12 -3
  19. deepeval/prompt/prompt.py +25 -14
  20. deepeval/simulator/template.py +1 -1
  21. deepeval/synthesizer/config.py +9 -0
  22. deepeval/synthesizer/schema.py +23 -0
  23. deepeval/synthesizer/synthesizer.py +1137 -2
  24. deepeval/synthesizer/templates/__init__.py +11 -2
  25. deepeval/synthesizer/templates/template.py +554 -1
  26. deepeval/synthesizer/templates/template_extraction.py +32 -0
  27. deepeval/synthesizer/templates/template_prompt.py +262 -0
  28. deepeval/test_case/__init__.py +2 -1
  29. deepeval/test_case/arena_test_case.py +15 -4
  30. deepeval/test_case/mllm_test_case.py +45 -22
  31. deepeval/test_run/cache.py +31 -10
  32. deepeval/test_run/hyperparameters.py +5 -1
  33. deepeval/test_run/test_run.py +28 -9
  34. deepeval/tracing/tracing.py +1 -1
  35. deepeval/utils.py +4 -0
  36. {deepeval-3.7.0.dist-info → deepeval-3.7.2.dist-info}/METADATA +3 -2
  37. {deepeval-3.7.0.dist-info → deepeval-3.7.2.dist-info}/RECORD +40 -40
  38. {deepeval-3.7.0.dist-info → deepeval-3.7.2.dist-info}/LICENSE.md +0 -0
  39. {deepeval-3.7.0.dist-info → deepeval-3.7.2.dist-info}/WHEEL +0 -0
  40. {deepeval-3.7.0.dist-info → deepeval-3.7.2.dist-info}/entry_points.txt +0 -0
@@ -1,13 +1,21 @@
1
+ from time import perf_counter
2
+
1
3
  from deepeval.tracing.tracing import (
2
4
  Observer,
3
5
  current_span_context,
4
6
  trace_manager,
5
7
  )
6
- from deepeval.openai_agents.extractors import *
8
+ from deepeval.openai_agents.extractors import (
9
+ update_span_properties,
10
+ update_trace_properties_from_span_data,
11
+ )
7
12
  from deepeval.tracing.context import current_trace_context
8
13
  from deepeval.tracing.utils import make_json_serializable
9
- from time import perf_counter
10
- from deepeval.tracing.types import TraceSpanStatus
14
+ from deepeval.tracing.types import (
15
+ BaseSpan,
16
+ LlmSpan,
17
+ TraceSpanStatus,
18
+ )
11
19
 
12
20
  try:
13
21
  from agents.tracing import Span, Trace, TracingProcessor
@@ -18,6 +26,7 @@ try:
18
26
  GenerationSpanData,
19
27
  GuardrailSpanData,
20
28
  HandoffSpanData,
29
+ MCPListToolsSpanData,
21
30
  ResponseSpanData,
22
31
  SpanData,
23
32
  )
deepeval/prompt/prompt.py CHANGED
@@ -1,3 +1,8 @@
1
+ import logging
2
+ import time
3
+ import json
4
+ import os
5
+
1
6
  from enum import Enum
2
7
  from typing import Optional, List, Dict, Type, Literal
3
8
  from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
@@ -5,12 +10,11 @@ from rich.console import Console
5
10
  import time
6
11
  import json
7
12
  import os
8
- from pydantic import BaseModel, ValidationError, ConfigDict
13
+ from pydantic import BaseModel, ValidationError
9
14
  import asyncio
10
- import portalocker
11
15
  import threading
12
16
 
13
- from deepeval.utils import make_model_config
17
+ from deepeval.utils import make_model_config, is_read_only_env
14
18
 
15
19
  from deepeval.prompt.api import (
16
20
  PromptHttpResponse,
@@ -24,9 +28,6 @@ from deepeval.prompt.api import (
24
28
  ModelSettings,
25
29
  OutputSchema,
26
30
  OutputType,
27
- ReasoningEffort,
28
- Verbosity,
29
- ModelProvider,
30
31
  )
31
32
  from deepeval.prompt.utils import (
32
33
  interpolate_text,
@@ -36,6 +37,18 @@ from deepeval.prompt.utils import (
36
37
  from deepeval.confident.api import Api, Endpoints, HttpMethods
37
38
  from deepeval.constants import HIDDEN_DIR
38
39
 
40
+
41
+ logger = logging.getLogger(__name__)
42
+
43
+ portalocker = None
44
+ if not is_read_only_env():
45
+ try:
46
+ import portalocker
47
+ except Exception as e:
48
+ logger.warning("failed to import portalocker: %s", e)
49
+ else:
50
+ logger.warning("READ_ONLY filesystem: skipping disk cache for prompts.")
51
+
39
52
  CACHE_FILE_NAME = f"{HIDDEN_DIR}/.deepeval-prompt-cache.json"
40
53
  VERSION_CACHE_KEY = "version"
41
54
  LABEL_CACHE_KEY = "label"
@@ -165,7 +178,7 @@ class Prompt:
165
178
  content = f.read()
166
179
  try:
167
180
  data = json.loads(content)
168
- except:
181
+ except (json.JSONDecodeError, TypeError):
169
182
  self.text_template = content
170
183
  return content
171
184
 
@@ -203,7 +216,6 @@ class Prompt:
203
216
  "Unable to interpolate empty prompt template. Please pull a prompt from Confident AI or set template manually to continue."
204
217
  )
205
218
 
206
- print("@@@@@")
207
219
  return interpolate_text(interpolation_type, text_template, **kwargs)
208
220
 
209
221
  elif prompt_type == PromptType.LIST:
@@ -248,7 +260,7 @@ class Prompt:
248
260
  version: Optional[str] = None,
249
261
  label: Optional[str] = None,
250
262
  ) -> Optional[CachedPrompt]:
251
- if not os.path.exists(CACHE_FILE_NAME):
263
+ if portalocker is None or not os.path.exists(CACHE_FILE_NAME):
252
264
  return None
253
265
 
254
266
  try:
@@ -296,13 +308,12 @@ class Prompt:
296
308
  output_type: Optional[OutputType] = None,
297
309
  output_schema: Optional[OutputSchema] = None,
298
310
  ):
299
- if not self.alias:
311
+ if portalocker is None or not self.alias:
300
312
  return
301
313
 
302
- # Ensure directory exists
303
- os.makedirs(HIDDEN_DIR, exist_ok=True)
304
-
305
314
  try:
315
+ # Ensure directory exists
316
+ os.makedirs(HIDDEN_DIR, exist_ok=True)
306
317
  # Use r+ mode if file exists, w mode if it doesn't
307
318
  mode = "r+" if os.path.exists(CACHE_FILE_NAME) else "w"
308
319
 
@@ -481,7 +492,7 @@ class Prompt:
481
492
  cached_prompt.output_schema
482
493
  )
483
494
  return
484
- except:
495
+ except Exception:
485
496
  pass
486
497
 
487
498
  api = Api()
@@ -112,7 +112,7 @@ class ConversationSimulatorTemplate:
112
112
  ]
113
113
  Example JSON Output:
114
114
  {{
115
- "is_complete": False,
115
+ "is_complete": false,
116
116
  "reason": "The assistant explained how to forget password but ahas not confirmed that the user successfully set a new password."
117
117
  }}
118
118
 
@@ -41,6 +41,15 @@ class StylingConfig:
41
41
  expected_output_format: Optional[str] = None
42
42
 
43
43
 
44
+ @dataclass
45
+ class ConversationalStylingConfig:
46
+ scenario_context: Optional[str] = None
47
+ conversational_task: Optional[str] = None
48
+ participant_roles: Optional[str] = None
49
+ scenario_format: Optional[str] = None
50
+ expected_outcome_format: Optional[str] = None
51
+
52
+
44
53
  @dataclass
45
54
  class ContextConstructionConfig:
46
55
  embedder: Optional[Union[str, DeepEvalBaseEmbeddingModel]] = None
@@ -58,3 +58,26 @@ class PromptStyling(BaseModel):
58
58
  scenario: str
59
59
  task: str
60
60
  input_format: str
61
+
62
+
63
+ class ConversationalScenario(BaseModel):
64
+ scenario: str
65
+
66
+
67
+ class ConversationalScenarioList(BaseModel):
68
+ data: List[ConversationalScenario]
69
+
70
+
71
+ class RewrittenScenario(BaseModel):
72
+ rewritten_scenario: str
73
+
74
+
75
+ class ScenarioFeedback(BaseModel):
76
+ score: float
77
+ feedback: str
78
+
79
+
80
+ class ConversationalPromptStyling(BaseModel):
81
+ scenario_context: str
82
+ conversational_task: str
83
+ participant_roles: str