deepeval 3.6.4__py3-none-any.whl → 3.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +13 -0
- deepeval/dataset/dataset.py +8 -2
- deepeval/evaluate/evaluate.py +8 -2
- deepeval/evaluate/execute.py +6 -11
- deepeval/evaluate/types.py +4 -1
- deepeval/evaluate/utils.py +46 -29
- deepeval/integrations/crewai/__init__.py +1 -2
- deepeval/integrations/crewai/handler.py +153 -81
- deepeval/integrations/crewai/wrapper.py +87 -0
- deepeval/integrations/pydantic_ai/instrumentator.py +48 -9
- deepeval/integrations/pydantic_ai/test_instrumentator.py +0 -0
- deepeval/metrics/faithfulness/faithfulness.py +8 -0
- deepeval/test_run/__init__.py +2 -1
- deepeval/test_run/api.py +1 -0
- deepeval/test_run/test_run.py +85 -9
- deepeval/tracing/__init__.py +2 -0
- deepeval/tracing/otel/test_exporter.py +35 -0
- deepeval/tracing/trace_context.py +14 -0
- deepeval/tracing/tracing.py +7 -6
- deepeval/tracing/utils.py +2 -86
- deepeval/utils.py +149 -1
- {deepeval-3.6.4.dist-info → deepeval-3.6.5.dist-info}/METADATA +1 -1
- {deepeval-3.6.4.dist-info → deepeval-3.6.5.dist-info}/RECORD +27 -25
- deepeval/integrations/crewai/agent.py +0 -98
- deepeval/integrations/crewai/patch.py +0 -41
- {deepeval-3.6.4.dist-info → deepeval-3.6.5.dist-info}/LICENSE.md +0 -0
- {deepeval-3.6.4.dist-info → deepeval-3.6.5.dist-info}/WHEEL +0 -0
- {deepeval-3.6.4.dist-info → deepeval-3.6.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,7 +1,19 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import logging
|
|
2
3
|
import os
|
|
3
4
|
from typing import Literal, Optional, List
|
|
4
5
|
|
|
6
|
+
from deepeval.config.settings import get_settings
|
|
7
|
+
from deepeval.confident.api import get_confident_api_key
|
|
8
|
+
from deepeval.prompt import Prompt
|
|
9
|
+
from deepeval.tracing.context import current_trace_context
|
|
10
|
+
from deepeval.tracing.types import Trace
|
|
11
|
+
from deepeval.tracing.otel.utils import to_hex_string
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
5
17
|
try:
|
|
6
18
|
from pydantic_ai.models.instrumented import InstrumentationSettings
|
|
7
19
|
from opentelemetry.sdk.trace import SpanProcessor, TracerProvider
|
|
@@ -11,7 +23,20 @@ try:
|
|
|
11
23
|
)
|
|
12
24
|
|
|
13
25
|
dependency_installed = True
|
|
14
|
-
except:
|
|
26
|
+
except ImportError as e:
|
|
27
|
+
if get_settings().DEEPEVAL_VERBOSE_MODE:
|
|
28
|
+
if isinstance(e, ModuleNotFoundError):
|
|
29
|
+
logger.warning(
|
|
30
|
+
"Optional tracing dependency not installed: %s",
|
|
31
|
+
e.name,
|
|
32
|
+
stacklevel=2,
|
|
33
|
+
)
|
|
34
|
+
else:
|
|
35
|
+
logger.warning(
|
|
36
|
+
"Optional tracing import failed: %s",
|
|
37
|
+
e,
|
|
38
|
+
stacklevel=2,
|
|
39
|
+
)
|
|
15
40
|
dependency_installed = False
|
|
16
41
|
|
|
17
42
|
|
|
@@ -25,6 +50,10 @@ def is_dependency_installed():
|
|
|
25
50
|
|
|
26
51
|
from deepeval.confident.api import get_confident_api_key
|
|
27
52
|
from deepeval.prompt import Prompt
|
|
53
|
+
from deepeval.tracing.otel.test_exporter import test_exporter
|
|
54
|
+
from deepeval.tracing.context import current_trace_context
|
|
55
|
+
from deepeval.tracing.types import Trace
|
|
56
|
+
from deepeval.tracing.otel.utils import to_hex_string
|
|
28
57
|
|
|
29
58
|
# OTLP_ENDPOINT = "http://127.0.0.1:4318/v1/traces"
|
|
30
59
|
OTLP_ENDPOINT = "https://otel.confident-ai.com/v1/traces"
|
|
@@ -37,6 +66,12 @@ class SpanInterceptor(SpanProcessor):
|
|
|
37
66
|
|
|
38
67
|
def on_start(self, span, parent_context):
|
|
39
68
|
|
|
69
|
+
# set trace uuid
|
|
70
|
+
_current_trace_context = current_trace_context.get()
|
|
71
|
+
if _current_trace_context and isinstance(_current_trace_context, Trace):
|
|
72
|
+
_otel_trace_id = span.get_span_context().trace_id
|
|
73
|
+
_current_trace_context.uuid = to_hex_string(_otel_trace_id, 32)
|
|
74
|
+
|
|
40
75
|
# set trace attributes
|
|
41
76
|
if self.settings.thread_id:
|
|
42
77
|
span.set_attribute(
|
|
@@ -148,8 +183,9 @@ class ConfidentInstrumentationSettings(InstrumentationSettings):
|
|
|
148
183
|
confident_prompt: Optional[Prompt] = None,
|
|
149
184
|
llm_metric_collection: Optional[str] = None,
|
|
150
185
|
agent_metric_collection: Optional[str] = None,
|
|
151
|
-
tool_metric_collection_map: dict =
|
|
186
|
+
tool_metric_collection_map: Optional[dict] = None,
|
|
152
187
|
trace_metric_collection: Optional[str] = None,
|
|
188
|
+
is_test_mode: Optional[bool] = False,
|
|
153
189
|
):
|
|
154
190
|
is_dependency_installed()
|
|
155
191
|
|
|
@@ -162,7 +198,7 @@ class ConfidentInstrumentationSettings(InstrumentationSettings):
|
|
|
162
198
|
]:
|
|
163
199
|
self.environment = _environment
|
|
164
200
|
|
|
165
|
-
self.tool_metric_collection_map = tool_metric_collection_map
|
|
201
|
+
self.tool_metric_collection_map = tool_metric_collection_map or {}
|
|
166
202
|
self.name = name
|
|
167
203
|
self.thread_id = thread_id
|
|
168
204
|
self.user_id = user_id
|
|
@@ -185,12 +221,15 @@ class ConfidentInstrumentationSettings(InstrumentationSettings):
|
|
|
185
221
|
span_interceptor = SpanInterceptor(self)
|
|
186
222
|
trace_provider.add_span_processor(span_interceptor)
|
|
187
223
|
|
|
188
|
-
|
|
189
|
-
BatchSpanProcessor(
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
224
|
+
if is_test_mode:
|
|
225
|
+
trace_provider.add_span_processor(BatchSpanProcessor(test_exporter))
|
|
226
|
+
else:
|
|
227
|
+
trace_provider.add_span_processor(
|
|
228
|
+
BatchSpanProcessor(
|
|
229
|
+
OTLPSpanExporter(
|
|
230
|
+
endpoint=OTLP_ENDPOINT,
|
|
231
|
+
headers={"x-confident-api-key": api_key},
|
|
232
|
+
)
|
|
193
233
|
)
|
|
194
234
|
)
|
|
195
|
-
)
|
|
196
235
|
super().__init__(tracer_provider=trace_provider)
|
|
File without changes
|
|
@@ -41,6 +41,7 @@ class FaithfulnessMetric(BaseMetric):
|
|
|
41
41
|
strict_mode: bool = False,
|
|
42
42
|
verbose_mode: bool = False,
|
|
43
43
|
truths_extraction_limit: Optional[int] = None,
|
|
44
|
+
penalize_ambiguous_claims: bool = False,
|
|
44
45
|
evaluation_template: Type[FaithfulnessTemplate] = FaithfulnessTemplate,
|
|
45
46
|
):
|
|
46
47
|
self.threshold = 1 if strict_mode else threshold
|
|
@@ -51,6 +52,7 @@ class FaithfulnessMetric(BaseMetric):
|
|
|
51
52
|
self.strict_mode = strict_mode
|
|
52
53
|
self.verbose_mode = verbose_mode
|
|
53
54
|
self.evaluation_template = evaluation_template
|
|
55
|
+
self.penalize_ambiguous_claims = penalize_ambiguous_claims
|
|
54
56
|
|
|
55
57
|
self.truths_extraction_limit = truths_extraction_limit
|
|
56
58
|
if self.truths_extraction_limit is not None:
|
|
@@ -329,6 +331,12 @@ class FaithfulnessMetric(BaseMetric):
|
|
|
329
331
|
if verdict.verdict.strip().lower() != "no":
|
|
330
332
|
faithfulness_count += 1
|
|
331
333
|
|
|
334
|
+
if (
|
|
335
|
+
self.penalize_ambiguous_claims
|
|
336
|
+
and verdict.verdict.strip().lower() == "idk"
|
|
337
|
+
):
|
|
338
|
+
faithfulness_count -= 1
|
|
339
|
+
|
|
332
340
|
score = faithfulness_count / number_of_verdicts
|
|
333
341
|
return 0 if self.strict_mode and score < self.threshold else score
|
|
334
342
|
|
deepeval/test_run/__init__.py
CHANGED
|
@@ -11,7 +11,7 @@ from .test_run import (
|
|
|
11
11
|
)
|
|
12
12
|
|
|
13
13
|
from .hooks import on_test_run_end, invoke_test_run_end_hook
|
|
14
|
-
from .api import MetricData
|
|
14
|
+
from .api import MetricData, TurnApi
|
|
15
15
|
from .hyperparameters import log_hyperparameters
|
|
16
16
|
|
|
17
17
|
|
|
@@ -28,5 +28,6 @@ __all__ = [
|
|
|
28
28
|
"on_test_run_end",
|
|
29
29
|
"invoke_test_run_end_hook",
|
|
30
30
|
"MetricData",
|
|
31
|
+
"TurnApi",
|
|
31
32
|
"log_hyperparameters",
|
|
32
33
|
]
|
deepeval/test_run/api.py
CHANGED
|
@@ -99,6 +99,7 @@ class TurnApi(BaseModel):
|
|
|
99
99
|
role: str
|
|
100
100
|
content: str
|
|
101
101
|
order: int
|
|
102
|
+
user_id: Optional[str] = Field(None, alias="userId")
|
|
102
103
|
retrieval_context: Optional[list] = Field(None, alias="retrievalContext")
|
|
103
104
|
tools_called: Optional[List[ToolCall]] = Field(None, alias="toolsCalled")
|
|
104
105
|
additional_metadata: Optional[Dict] = Field(
|
deepeval/test_run/test_run.py
CHANGED
|
@@ -2,9 +2,8 @@ from enum import Enum
|
|
|
2
2
|
import os
|
|
3
3
|
import json
|
|
4
4
|
from pydantic import BaseModel, Field
|
|
5
|
-
from typing import Any, Optional, List, Dict, Union
|
|
5
|
+
from typing import Any, Optional, List, Dict, Union, Tuple
|
|
6
6
|
import shutil
|
|
7
|
-
import webbrowser
|
|
8
7
|
import sys
|
|
9
8
|
import datetime
|
|
10
9
|
import portalocker
|
|
@@ -27,6 +26,9 @@ from deepeval.utils import (
|
|
|
27
26
|
delete_file_if_exists,
|
|
28
27
|
get_is_running_deepeval,
|
|
29
28
|
open_browser,
|
|
29
|
+
shorten,
|
|
30
|
+
format_turn,
|
|
31
|
+
len_short,
|
|
30
32
|
)
|
|
31
33
|
from deepeval.test_run.cache import global_test_run_cache_manager
|
|
32
34
|
from deepeval.constants import CONFIDENT_TEST_CASE_BATCH_SIZE, HIDDEN_DIR
|
|
@@ -546,7 +548,7 @@ class TestRunManager:
|
|
|
546
548
|
|
|
547
549
|
if (
|
|
548
550
|
display == TestRunResultDisplay.PASSING
|
|
549
|
-
and test_case.success
|
|
551
|
+
and test_case.success is False
|
|
550
552
|
):
|
|
551
553
|
continue
|
|
552
554
|
elif display == TestRunResultDisplay.FAILING and test_case.success:
|
|
@@ -618,7 +620,7 @@ class TestRunManager:
|
|
|
618
620
|
):
|
|
619
621
|
if (
|
|
620
622
|
display == TestRunResultDisplay.PASSING
|
|
621
|
-
and conversational_test_case.success
|
|
623
|
+
and conversational_test_case.success is False
|
|
622
624
|
):
|
|
623
625
|
continue
|
|
624
626
|
elif (
|
|
@@ -631,6 +633,65 @@ class TestRunManager:
|
|
|
631
633
|
fail_count = 0
|
|
632
634
|
conversational_test_case_name = conversational_test_case.name
|
|
633
635
|
|
|
636
|
+
if conversational_test_case.turns:
|
|
637
|
+
turns_table = Table(
|
|
638
|
+
title=f"Conversation - {conversational_test_case_name}",
|
|
639
|
+
show_header=True,
|
|
640
|
+
header_style="bold",
|
|
641
|
+
)
|
|
642
|
+
turns_table.add_column("#", justify="right", width=3)
|
|
643
|
+
turns_table.add_column("Role", justify="left", width=10)
|
|
644
|
+
|
|
645
|
+
# subtract fixed widths + borders and padding.
|
|
646
|
+
# ~20 as a safe buffer
|
|
647
|
+
details_max_width = max(
|
|
648
|
+
48, min(120, console.width - 3 - 10 - 20)
|
|
649
|
+
)
|
|
650
|
+
turns_table.add_column(
|
|
651
|
+
"Details",
|
|
652
|
+
justify="left",
|
|
653
|
+
overflow="fold",
|
|
654
|
+
max_width=details_max_width,
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
# truncate when too long
|
|
658
|
+
tools_max_width = min(60, max(24, console.width // 3))
|
|
659
|
+
turns_table.add_column(
|
|
660
|
+
"Tools",
|
|
661
|
+
justify="left",
|
|
662
|
+
no_wrap=True,
|
|
663
|
+
overflow="ellipsis",
|
|
664
|
+
max_width=tools_max_width,
|
|
665
|
+
)
|
|
666
|
+
|
|
667
|
+
sorted_turns = sorted(
|
|
668
|
+
conversational_test_case.turns, key=lambda t: t.order
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
for t in sorted_turns:
|
|
672
|
+
tools = t.tools_called or []
|
|
673
|
+
tool_names = ", ".join(tc.name for tc in tools)
|
|
674
|
+
|
|
675
|
+
# omit order, role and tools since we show them in a separate columns.
|
|
676
|
+
details = format_turn(
|
|
677
|
+
t,
|
|
678
|
+
include_tools_in_header=False,
|
|
679
|
+
include_order_role_in_header=False,
|
|
680
|
+
)
|
|
681
|
+
|
|
682
|
+
turns_table.add_row(
|
|
683
|
+
str(t.order),
|
|
684
|
+
t.role,
|
|
685
|
+
details,
|
|
686
|
+
shorten(tool_names, len_short()),
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
console.print(turns_table)
|
|
690
|
+
else:
|
|
691
|
+
console.print(
|
|
692
|
+
f"[dim]No turns recorded for {conversational_test_case_name}.[/dim]"
|
|
693
|
+
)
|
|
694
|
+
|
|
634
695
|
if conversational_test_case.metrics_data is not None:
|
|
635
696
|
for metric_data in conversational_test_case.metrics_data:
|
|
636
697
|
if metric_data.success:
|
|
@@ -698,7 +759,7 @@ class TestRunManager:
|
|
|
698
759
|
)
|
|
699
760
|
print(table)
|
|
700
761
|
|
|
701
|
-
def post_test_run(self, test_run: TestRun) -> Optional[str]:
|
|
762
|
+
def post_test_run(self, test_run: TestRun) -> Optional[Tuple[str, str]]:
|
|
702
763
|
if (
|
|
703
764
|
len(test_run.test_cases) == 0
|
|
704
765
|
and len(test_run.conversational_test_cases) == 0
|
|
@@ -752,6 +813,21 @@ class TestRunManager:
|
|
|
752
813
|
body=body,
|
|
753
814
|
)
|
|
754
815
|
|
|
816
|
+
if not isinstance(data, dict) or "id" not in data:
|
|
817
|
+
# try to show helpful details
|
|
818
|
+
detail = None
|
|
819
|
+
if isinstance(data, dict):
|
|
820
|
+
detail = (
|
|
821
|
+
data.get("detail")
|
|
822
|
+
or data.get("message")
|
|
823
|
+
or data.get("error")
|
|
824
|
+
)
|
|
825
|
+
# fall back to repr for visibility
|
|
826
|
+
raise RuntimeError(
|
|
827
|
+
f"Confident API response missing 'id'. "
|
|
828
|
+
f"detail={detail!r} raw={type(data).__name__}:{repr(data)[:500]}"
|
|
829
|
+
)
|
|
830
|
+
|
|
755
831
|
res = TestRunHttpResponse(
|
|
756
832
|
id=data["id"],
|
|
757
833
|
)
|
|
@@ -814,7 +890,7 @@ class TestRunManager:
|
|
|
814
890
|
)
|
|
815
891
|
self.save_final_test_run_link(link)
|
|
816
892
|
open_browser(link)
|
|
817
|
-
return link
|
|
893
|
+
return link, res.id
|
|
818
894
|
|
|
819
895
|
def save_test_run_locally(self):
|
|
820
896
|
local_folder = os.getenv("DEEPEVAL_RESULTS_FOLDER")
|
|
@@ -841,7 +917,7 @@ class TestRunManager:
|
|
|
841
917
|
runDuration: float,
|
|
842
918
|
display_table: bool = True,
|
|
843
919
|
display: Optional[TestRunResultDisplay] = TestRunResultDisplay.ALL,
|
|
844
|
-
) -> Optional[str]:
|
|
920
|
+
) -> Optional[Tuple[str, str]]:
|
|
845
921
|
test_run = self.get_test_run()
|
|
846
922
|
if test_run is None:
|
|
847
923
|
print("Test Run is empty, please try again.")
|
|
@@ -868,8 +944,8 @@ class TestRunManager:
|
|
|
868
944
|
test_run.sort_test_cases()
|
|
869
945
|
|
|
870
946
|
if global_test_run_cache_manager.disable_write_cache is None:
|
|
871
|
-
global_test_run_cache_manager.disable_write_cache = (
|
|
872
|
-
get_is_running_deepeval()
|
|
947
|
+
global_test_run_cache_manager.disable_write_cache = not bool(
|
|
948
|
+
get_is_running_deepeval()
|
|
873
949
|
)
|
|
874
950
|
|
|
875
951
|
global_test_run_cache_manager.wrap_up_cached_test_run()
|
deepeval/tracing/__init__.py
CHANGED
|
@@ -4,6 +4,7 @@ from .context import (
|
|
|
4
4
|
update_retriever_span,
|
|
5
5
|
update_llm_span,
|
|
6
6
|
)
|
|
7
|
+
from .trace_context import trace
|
|
7
8
|
from .types import BaseSpan, Trace
|
|
8
9
|
from .tracing import observe, trace_manager
|
|
9
10
|
from .offline_evals import evaluate_thread, evaluate_trace, evaluate_span
|
|
@@ -16,6 +17,7 @@ __all__ = [
|
|
|
16
17
|
"BaseSpan",
|
|
17
18
|
"Trace",
|
|
18
19
|
"observe",
|
|
20
|
+
"trace",
|
|
19
21
|
"trace_manager",
|
|
20
22
|
"evaluate_thread",
|
|
21
23
|
"evaluate_trace",
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from typing import List, Dict, Any, Sequence
|
|
2
|
+
from opentelemetry.sdk.trace import ReadableSpan
|
|
3
|
+
from opentelemetry.sdk.trace.export import SpanExporter
|
|
4
|
+
from opentelemetry.sdk.trace.export import SpanExportResult
|
|
5
|
+
import json
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestExporter(SpanExporter):
|
|
10
|
+
"""This exporter is used to test the exporter. It will store the spans in a list of dictionaries."""
|
|
11
|
+
|
|
12
|
+
span_json_list: List[Dict[str, Any]] = []
|
|
13
|
+
|
|
14
|
+
def export(
|
|
15
|
+
self, spans: Sequence[ReadableSpan], timeout_millis: int = 30000
|
|
16
|
+
) -> SpanExportResult:
|
|
17
|
+
for span in spans:
|
|
18
|
+
_span_json = json.loads(span.to_json())
|
|
19
|
+
self.span_json_list.append(_span_json)
|
|
20
|
+
|
|
21
|
+
return SpanExportResult.SUCCESS
|
|
22
|
+
|
|
23
|
+
def get_span_json_list(self) -> List[Dict[str, Any]]:
|
|
24
|
+
return sorted(
|
|
25
|
+
self.span_json_list,
|
|
26
|
+
key=lambda x: datetime.fromisoformat(
|
|
27
|
+
x["start_time"].replace("Z", "+00:00")
|
|
28
|
+
),
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
def clear_span_json_list(self):
|
|
32
|
+
self.span_json_list = []
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
test_exporter = TestExporter()
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from .context import current_trace_context
|
|
2
|
+
from .tracing import trace_manager
|
|
3
|
+
from contextlib import contextmanager
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@contextmanager
|
|
7
|
+
def trace():
|
|
8
|
+
current_trace = current_trace_context.get()
|
|
9
|
+
|
|
10
|
+
if not current_trace:
|
|
11
|
+
current_trace = trace_manager.start_new_trace()
|
|
12
|
+
current_trace_context.set(current_trace)
|
|
13
|
+
|
|
14
|
+
yield current_trace
|
deepeval/tracing/tracing.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import os
|
|
1
2
|
from typing import Any, Dict, List, Literal, Optional, Set, Union, Callable
|
|
2
3
|
from time import perf_counter
|
|
3
4
|
import threading
|
|
@@ -47,13 +48,12 @@ from deepeval.tracing.utils import (
|
|
|
47
48
|
tracing_enabled,
|
|
48
49
|
validate_environment,
|
|
49
50
|
validate_sampling_rate,
|
|
50
|
-
dump_body_to_json_file,
|
|
51
|
-
get_deepeval_trace_mode,
|
|
52
51
|
)
|
|
53
52
|
from deepeval.utils import dataclass_to_dict
|
|
54
53
|
from deepeval.tracing.context import current_span_context, current_trace_context
|
|
55
54
|
from deepeval.tracing.types import TestCaseMetricPair
|
|
56
55
|
from deepeval.tracing.api import PromptApi
|
|
56
|
+
from tests.test_integrations.manager import trace_testing_manager
|
|
57
57
|
|
|
58
58
|
EVAL_DUMMY_SPAN_NAME = "evals_iterator"
|
|
59
59
|
|
|
@@ -183,13 +183,14 @@ class TraceManager:
|
|
|
183
183
|
if trace.status == TraceSpanStatus.IN_PROGRESS:
|
|
184
184
|
trace.status = TraceSpanStatus.SUCCESS
|
|
185
185
|
|
|
186
|
-
|
|
187
|
-
|
|
186
|
+
if trace_testing_manager.test_name:
|
|
187
|
+
# Trace testing mode is enabled
|
|
188
|
+
# Instead posting the trace to the queue, it will be stored in this global variable
|
|
188
189
|
body = self.create_trace_api(trace).model_dump(
|
|
189
190
|
by_alias=True, exclude_none=True
|
|
190
191
|
)
|
|
191
|
-
|
|
192
|
-
#
|
|
192
|
+
trace_testing_manager.test_dict = make_json_serializable(body)
|
|
193
|
+
# Post the trace to the server before removing it
|
|
193
194
|
elif not self.evaluating:
|
|
194
195
|
self.post_trace(trace)
|
|
195
196
|
else:
|
deepeval/tracing/utils.py
CHANGED
|
@@ -1,13 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import inspect
|
|
3
|
-
import json
|
|
4
|
-
import sys
|
|
5
2
|
from datetime import datetime, timezone
|
|
6
3
|
from enum import Enum
|
|
7
4
|
from time import perf_counter
|
|
8
5
|
from collections import deque
|
|
9
|
-
from typing import Any, Dict, Optional
|
|
10
|
-
|
|
11
6
|
from deepeval.constants import CONFIDENT_TRACING_ENABLED
|
|
12
7
|
|
|
13
8
|
|
|
@@ -186,84 +181,5 @@ def perf_counter_to_datetime(perf_counter_value: float) -> datetime:
|
|
|
186
181
|
def replace_self_with_class_name(obj):
|
|
187
182
|
try:
|
|
188
183
|
return f"<{obj.__class__.__name__}>"
|
|
189
|
-
except
|
|
190
|
-
return "<self>"
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
def get_deepeval_trace_mode() -> Optional[str]:
|
|
194
|
-
deepeval_trace_mode = None
|
|
195
|
-
try:
|
|
196
|
-
args = sys.argv
|
|
197
|
-
for idx, arg in enumerate(args):
|
|
198
|
-
if isinstance(arg, str) and arg.startswith(
|
|
199
|
-
"--deepeval-trace-mode="
|
|
200
|
-
):
|
|
201
|
-
deepeval_trace_mode = (
|
|
202
|
-
arg.split("=", 1)[1].strip().strip('"').strip("'").lower()
|
|
203
|
-
)
|
|
204
|
-
break
|
|
205
|
-
if arg == "--deepeval-trace-mode" and idx + 1 < len(args):
|
|
206
|
-
deepeval_trace_mode = (
|
|
207
|
-
str(args[idx + 1]).strip().strip('"').strip("'").lower()
|
|
208
|
-
)
|
|
209
|
-
break
|
|
210
|
-
except Exception:
|
|
211
|
-
deepeval_trace_mode = None
|
|
212
|
-
|
|
213
|
-
return deepeval_trace_mode
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
def dump_body_to_json_file(
|
|
217
|
-
body: Dict[str, Any], file_path: Optional[str] = None
|
|
218
|
-
) -> str:
|
|
219
|
-
entry_file = None
|
|
220
|
-
try:
|
|
221
|
-
cmd0 = sys.argv[0] if sys.argv else None
|
|
222
|
-
if cmd0 and cmd0.endswith(".py"):
|
|
223
|
-
entry_file = cmd0
|
|
224
|
-
else:
|
|
225
|
-
for frame_info in reversed(inspect.stack()):
|
|
226
|
-
fp = frame_info.filename
|
|
227
|
-
if (
|
|
228
|
-
fp
|
|
229
|
-
and fp.endswith(".py")
|
|
230
|
-
and "deepeval/tracing" not in fp
|
|
231
|
-
and "site-packages" not in fp
|
|
232
|
-
):
|
|
233
|
-
entry_file = fp
|
|
234
|
-
break
|
|
235
|
-
except Exception:
|
|
236
|
-
entry_file = None
|
|
237
|
-
|
|
238
|
-
if not entry_file:
|
|
239
|
-
entry_file = "unknown.py"
|
|
240
|
-
|
|
241
|
-
abs_entry = os.path.abspath(entry_file)
|
|
242
|
-
dir_path = os.path.dirname(abs_entry)
|
|
243
|
-
|
|
244
|
-
file_arg = None
|
|
245
|
-
try:
|
|
246
|
-
for idx, arg in enumerate(sys.argv):
|
|
247
|
-
if isinstance(arg, str) and arg.startswith(
|
|
248
|
-
"--deepeval-trace-file-name="
|
|
249
|
-
):
|
|
250
|
-
file_arg = arg.split("=", 1)[1].strip().strip('"').strip("'")
|
|
251
|
-
break
|
|
252
|
-
if arg == "--deepeval-trace-file-name" and idx + 1 < len(sys.argv):
|
|
253
|
-
file_arg = str(sys.argv[idx + 1]).strip().strip('"').strip("'")
|
|
254
|
-
break
|
|
255
|
-
except Exception:
|
|
256
|
-
file_arg = None
|
|
257
|
-
|
|
258
|
-
if file_path:
|
|
259
|
-
dst_path = os.path.abspath(file_path)
|
|
260
|
-
elif file_arg:
|
|
261
|
-
dst_path = os.path.abspath(file_arg)
|
|
262
|
-
else:
|
|
263
|
-
base_name = os.path.splitext(os.path.basename(abs_entry))[0]
|
|
264
|
-
dst_path = os.path.join(dir_path, f"{base_name}.json")
|
|
265
|
-
|
|
266
|
-
actual_body = make_json_serializable(body)
|
|
267
|
-
with open(dst_path, "w", encoding="utf-8") as f:
|
|
268
|
-
json.dump(actual_body, f, ensure_ascii=False, indent=2, sort_keys=True)
|
|
269
|
-
return dst_path
|
|
184
|
+
except:
|
|
185
|
+
return f"<self>"
|