deepeval 3.6.8__py3-none-any.whl → 3.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/anthropic/__init__.py +19 -0
- deepeval/anthropic/extractors.py +94 -0
- deepeval/anthropic/patch.py +169 -0
- deepeval/anthropic/utils.py +225 -0
- deepeval/benchmarks/drop/drop.py +40 -14
- deepeval/benchmarks/ifeval/ifeval.py +2 -2
- deepeval/confident/types.py +4 -2
- deepeval/config/settings.py +258 -47
- deepeval/config/settings_manager.py +4 -0
- deepeval/config/utils.py +5 -0
- deepeval/dataset/dataset.py +162 -30
- deepeval/dataset/utils.py +41 -13
- deepeval/evaluate/execute.py +1099 -633
- deepeval/integrations/crewai/handler.py +36 -0
- deepeval/integrations/langchain/callback.py +27 -2
- deepeval/integrations/llama_index/handler.py +58 -4
- deepeval/integrations/llama_index/utils.py +24 -0
- deepeval/metrics/__init__.py +5 -0
- deepeval/metrics/exact_match/__init__.py +0 -0
- deepeval/metrics/exact_match/exact_match.py +94 -0
- deepeval/metrics/indicator.py +21 -1
- deepeval/metrics/pattern_match/__init__.py +0 -0
- deepeval/metrics/pattern_match/pattern_match.py +103 -0
- deepeval/metrics/task_completion/task_completion.py +9 -2
- deepeval/model_integrations/__init__.py +0 -0
- deepeval/model_integrations/utils.py +116 -0
- deepeval/models/base_model.py +3 -1
- deepeval/models/llms/amazon_bedrock_model.py +20 -17
- deepeval/models/llms/openai_model.py +10 -1
- deepeval/models/retry_policy.py +103 -20
- deepeval/openai/__init__.py +3 -1
- deepeval/openai/extractors.py +2 -2
- deepeval/openai/utils.py +7 -31
- deepeval/prompt/api.py +11 -10
- deepeval/prompt/prompt.py +5 -4
- deepeval/simulator/conversation_simulator.py +25 -18
- deepeval/synthesizer/chunking/context_generator.py +9 -1
- deepeval/telemetry.py +3 -3
- deepeval/test_case/llm_test_case.py +3 -2
- deepeval/test_run/api.py +3 -2
- deepeval/test_run/cache.py +4 -3
- deepeval/test_run/test_run.py +24 -5
- deepeval/tracing/api.py +11 -10
- deepeval/tracing/otel/exporter.py +11 -0
- deepeval/tracing/patchers.py +102 -1
- deepeval/tracing/trace_context.py +13 -4
- deepeval/tracing/tracing.py +10 -1
- deepeval/tracing/types.py +8 -8
- deepeval/tracing/utils.py +9 -0
- deepeval/utils.py +44 -2
- {deepeval-3.6.8.dist-info → deepeval-3.7.0.dist-info}/METADATA +2 -2
- {deepeval-3.6.8.dist-info → deepeval-3.7.0.dist-info}/RECORD +57 -47
- /deepeval/{openai → model_integrations}/types.py +0 -0
- {deepeval-3.6.8.dist-info → deepeval-3.7.0.dist-info}/LICENSE.md +0 -0
- {deepeval-3.6.8.dist-info → deepeval-3.7.0.dist-info}/WHEEL +0 -0
- {deepeval-3.6.8.dist-info → deepeval-3.7.0.dist-info}/entry_points.txt +0 -0
deepeval/telemetry.py
CHANGED
|
@@ -3,12 +3,12 @@ import logging
|
|
|
3
3
|
import os
|
|
4
4
|
import socket
|
|
5
5
|
import sys
|
|
6
|
-
from threading import Event
|
|
7
6
|
import uuid
|
|
8
7
|
import sentry_sdk
|
|
9
8
|
from enum import Enum
|
|
10
9
|
from typing import List, Dict
|
|
11
10
|
import requests
|
|
11
|
+
from deepeval.config.settings import get_settings
|
|
12
12
|
from deepeval.constants import LOGIN_PROMPT, HIDDEN_DIR, KEY_FILE
|
|
13
13
|
from posthog import Posthog
|
|
14
14
|
|
|
@@ -34,7 +34,7 @@ TELEMETRY_PATH = os.path.join(HIDDEN_DIR, TELEMETRY_DATA_FILE)
|
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
def telemetry_opt_out():
|
|
37
|
-
return
|
|
37
|
+
return get_settings().DEEPEVAL_TELEMETRY_OPT_OUT
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
def blocked_by_firewall():
|
|
@@ -131,7 +131,7 @@ if not telemetry_opt_out():
|
|
|
131
131
|
|
|
132
132
|
|
|
133
133
|
if (
|
|
134
|
-
|
|
134
|
+
get_settings().ERROR_REPORTING
|
|
135
135
|
and not blocked_by_firewall()
|
|
136
136
|
and not telemetry_opt_out()
|
|
137
137
|
):
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from pydantic import (
|
|
2
|
-
ConfigDict,
|
|
3
2
|
Field,
|
|
4
3
|
BaseModel,
|
|
5
4
|
model_validator,
|
|
@@ -11,6 +10,8 @@ from enum import Enum
|
|
|
11
10
|
import json
|
|
12
11
|
import uuid
|
|
13
12
|
|
|
13
|
+
from deepeval.utils import make_model_config
|
|
14
|
+
|
|
14
15
|
from deepeval.test_case.mcp import (
|
|
15
16
|
MCPServer,
|
|
16
17
|
MCPPromptCall,
|
|
@@ -156,7 +157,7 @@ class ToolCall(BaseModel):
|
|
|
156
157
|
|
|
157
158
|
|
|
158
159
|
class LLMTestCase(BaseModel):
|
|
159
|
-
model_config =
|
|
160
|
+
model_config = make_model_config(extra="ignore")
|
|
160
161
|
|
|
161
162
|
input: str
|
|
162
163
|
actual_output: Optional[str] = Field(
|
deepeval/test_run/api.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from pydantic import BaseModel, Field
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
2
|
from typing import Optional, List, Union, Dict
|
|
3
3
|
|
|
4
4
|
from deepeval.test_case import MLLMImage, ToolCall
|
|
5
5
|
from deepeval.tracing.api import TraceApi, MetricData
|
|
6
|
+
from deepeval.utils import make_model_config
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class LLMApiTestCase(BaseModel):
|
|
@@ -49,7 +50,7 @@ class LLMApiTestCase(BaseModel):
|
|
|
49
50
|
comments: Optional[str] = Field(None)
|
|
50
51
|
trace: Optional[TraceApi] = Field(None)
|
|
51
52
|
|
|
52
|
-
model_config =
|
|
53
|
+
model_config = make_model_config(arbitrary_types_allowed=True)
|
|
53
54
|
# metric_collection: Optional[str] = Field(None, alias="metricCollection")
|
|
54
55
|
|
|
55
56
|
def update_metric_data(self, metric_data: MetricData):
|
deepeval/test_run/cache.py
CHANGED
|
@@ -6,6 +6,8 @@ from typing import List, Optional, Union, Dict, Union
|
|
|
6
6
|
from enum import Enum
|
|
7
7
|
from pydantic import BaseModel, Field
|
|
8
8
|
|
|
9
|
+
from deepeval.utils import make_model_config
|
|
10
|
+
|
|
9
11
|
from deepeval.test_case import LLMTestCaseParams, LLMTestCase, ToolCallParams
|
|
10
12
|
from deepeval.test_run.api import MetricData
|
|
11
13
|
from deepeval.utils import (
|
|
@@ -20,6 +22,8 @@ TEMP_CACHE_FILE_NAME = f"{HIDDEN_DIR}/.temp-deepeval-cache.json"
|
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
class MetricConfiguration(BaseModel):
|
|
25
|
+
model_config = make_model_config(arbitrary_types_allowed=True)
|
|
26
|
+
|
|
23
27
|
##### Required fields #####
|
|
24
28
|
threshold: float
|
|
25
29
|
evaluation_model: Optional[str] = None
|
|
@@ -36,9 +40,6 @@ class MetricConfiguration(BaseModel):
|
|
|
36
40
|
Union[List[LLMTestCaseParams], List[ToolCallParams]]
|
|
37
41
|
] = None
|
|
38
42
|
|
|
39
|
-
class Config:
|
|
40
|
-
arbitrary_types_allowed = True
|
|
41
|
-
|
|
42
43
|
|
|
43
44
|
class CachedMetricData(BaseModel):
|
|
44
45
|
metric_data: MetricData
|
deepeval/test_run/test_run.py
CHANGED
|
@@ -463,19 +463,29 @@ class TestRunManager:
|
|
|
463
463
|
mode="r",
|
|
464
464
|
flags=portalocker.LOCK_SH | portalocker.LOCK_NB,
|
|
465
465
|
) as file:
|
|
466
|
-
|
|
466
|
+
loaded = self.test_run.load(file)
|
|
467
|
+
# only overwrite if loading actually worked
|
|
468
|
+
self.test_run = loaded
|
|
467
469
|
except (
|
|
468
470
|
FileNotFoundError,
|
|
471
|
+
json.JSONDecodeError,
|
|
469
472
|
portalocker.exceptions.LockException,
|
|
470
473
|
) as e:
|
|
471
|
-
print(
|
|
472
|
-
|
|
474
|
+
print(
|
|
475
|
+
f"Warning: Could not load test run from disk: {e}",
|
|
476
|
+
file=sys.stderr,
|
|
477
|
+
)
|
|
473
478
|
|
|
474
479
|
return self.test_run
|
|
475
480
|
|
|
476
481
|
def save_test_run(self, path: str, save_under_key: Optional[str] = None):
|
|
477
482
|
if self.save_to_disk:
|
|
478
483
|
try:
|
|
484
|
+
# ensure parent directory exists
|
|
485
|
+
parent = os.path.dirname(path)
|
|
486
|
+
if parent:
|
|
487
|
+
os.makedirs(parent, exist_ok=True)
|
|
488
|
+
|
|
479
489
|
with portalocker.Lock(path, mode="w") as file:
|
|
480
490
|
if save_under_key:
|
|
481
491
|
try:
|
|
@@ -533,10 +543,19 @@ class TestRunManager:
|
|
|
533
543
|
self.test_run.save(file)
|
|
534
544
|
except (
|
|
535
545
|
FileNotFoundError,
|
|
546
|
+
json.JSONDecodeError,
|
|
536
547
|
portalocker.exceptions.LockException,
|
|
537
548
|
) as e:
|
|
538
|
-
print(
|
|
539
|
-
|
|
549
|
+
print(
|
|
550
|
+
f"Warning: Could not update test run on disk: {e}",
|
|
551
|
+
file=sys.stderr,
|
|
552
|
+
)
|
|
553
|
+
if self.test_run is None:
|
|
554
|
+
# guarantee a valid in-memory run so the update can proceed.
|
|
555
|
+
# never destroy in-memory state on I/O failure.
|
|
556
|
+
self.create_test_run()
|
|
557
|
+
self.test_run.add_test_case(api_test_case)
|
|
558
|
+
self.test_run.set_dataset_properties(test_case)
|
|
540
559
|
else:
|
|
541
560
|
if self.test_run is None:
|
|
542
561
|
self.create_test_run()
|
deepeval/tracing/api.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
2
|
from typing import Dict, List, Optional, Union, Literal, Any
|
|
3
|
-
from pydantic import BaseModel,
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
4
|
|
|
5
5
|
from deepeval.test_case import ToolCall
|
|
6
|
+
from deepeval.utils import make_model_config
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class SpanApiType(Enum):
|
|
@@ -27,7 +28,7 @@ class PromptApi(BaseModel):
|
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
class MetricData(BaseModel):
|
|
30
|
-
model_config =
|
|
31
|
+
model_config = make_model_config(extra="ignore")
|
|
31
32
|
|
|
32
33
|
name: str
|
|
33
34
|
threshold: float
|
|
@@ -42,6 +43,10 @@ class MetricData(BaseModel):
|
|
|
42
43
|
|
|
43
44
|
|
|
44
45
|
class BaseApiSpan(BaseModel):
|
|
46
|
+
model_config = make_model_config(
|
|
47
|
+
use_enum_values=True, validate_assignment=True
|
|
48
|
+
)
|
|
49
|
+
|
|
45
50
|
uuid: str
|
|
46
51
|
name: str = None
|
|
47
52
|
status: TraceSpanApiStatus
|
|
@@ -96,12 +101,12 @@ class BaseApiSpan(BaseModel):
|
|
|
96
101
|
metric_collection: Optional[str] = Field(None, alias="metricCollection")
|
|
97
102
|
metrics_data: Optional[List[MetricData]] = Field(None, alias="metricsData")
|
|
98
103
|
|
|
99
|
-
class Config:
|
|
100
|
-
use_enum_values = True
|
|
101
|
-
validate_assignment = True
|
|
102
|
-
|
|
103
104
|
|
|
104
105
|
class TraceApi(BaseModel):
|
|
106
|
+
model_config = make_model_config(
|
|
107
|
+
use_enum_values=True, validate_assignment=True
|
|
108
|
+
)
|
|
109
|
+
|
|
105
110
|
uuid: str
|
|
106
111
|
base_spans: Optional[List[BaseApiSpan]] = Field(None, alias="baseSpans")
|
|
107
112
|
agent_spans: Optional[List[BaseApiSpan]] = Field(None, alias="agentSpans")
|
|
@@ -139,7 +144,3 @@ class TraceApi(BaseModel):
|
|
|
139
144
|
|
|
140
145
|
# Don't serialize these
|
|
141
146
|
confident_api_key: Optional[str] = Field(None, exclude=True)
|
|
142
|
-
|
|
143
|
-
class Config:
|
|
144
|
-
use_enum_values = True
|
|
145
|
-
validate_assignment = True
|
|
@@ -493,6 +493,17 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
493
493
|
output_token_count = span.attributes.get(
|
|
494
494
|
"confident.llm.output_token_count"
|
|
495
495
|
)
|
|
496
|
+
|
|
497
|
+
# fallback to gen ai attributes if not found in confident attributes
|
|
498
|
+
if not input_token_count:
|
|
499
|
+
input_token_count = span.attributes.get(
|
|
500
|
+
"gen_ai.usage.input_tokens"
|
|
501
|
+
)
|
|
502
|
+
if not output_token_count:
|
|
503
|
+
output_token_count = span.attributes.get(
|
|
504
|
+
"gen_ai.usage.output_tokens"
|
|
505
|
+
)
|
|
506
|
+
|
|
496
507
|
cost_per_input_token = span.attributes.get(
|
|
497
508
|
"confident.llm.cost_per_input_token"
|
|
498
509
|
)
|
deepeval/tracing/patchers.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
from openai import OpenAI
|
|
2
1
|
import functools
|
|
3
2
|
|
|
3
|
+
from anthropic import Anthropic
|
|
4
|
+
from openai import OpenAI
|
|
5
|
+
|
|
4
6
|
from deepeval.tracing.context import update_current_span, update_llm_span
|
|
5
7
|
from deepeval.tracing.context import current_span_context
|
|
6
8
|
from deepeval.tracing.types import LlmSpan
|
|
@@ -82,3 +84,102 @@ def patch_openai_client(client: OpenAI):
|
|
|
82
84
|
return response
|
|
83
85
|
|
|
84
86
|
setattr(current_obj, method_name, wrapped_method)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def patch_anthropic_client(client: Anthropic):
|
|
90
|
+
"""
|
|
91
|
+
Patch an Anthropic client instance to add tracing capabilities.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
client: An instance of Anthropic client to patch
|
|
95
|
+
"""
|
|
96
|
+
original_methods = {}
|
|
97
|
+
|
|
98
|
+
methods_to_patch = [
|
|
99
|
+
"messages.create",
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
for method_path in methods_to_patch:
|
|
103
|
+
parts = method_path.split(".")
|
|
104
|
+
current_obj = client
|
|
105
|
+
|
|
106
|
+
for part in parts[:-1]:
|
|
107
|
+
if not hasattr(current_obj, part):
|
|
108
|
+
print(f"Warning: Cannot find {part} in the path {method_path}")
|
|
109
|
+
continue
|
|
110
|
+
current_obj = getattr(current_obj, part)
|
|
111
|
+
|
|
112
|
+
method_name = parts[-1]
|
|
113
|
+
if not hasattr(current_obj, method_name):
|
|
114
|
+
print(
|
|
115
|
+
f"Warning: Cannot find method {method_name} in the path {method_path}"
|
|
116
|
+
)
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
method = getattr(current_obj, method_name)
|
|
120
|
+
|
|
121
|
+
if callable(method) and not isinstance(method, type):
|
|
122
|
+
original_methods[method_path] = method
|
|
123
|
+
|
|
124
|
+
@functools.wraps(method)
|
|
125
|
+
def wrapped_method(*args, original_method=method, **kwargs):
|
|
126
|
+
current_span = current_span_context.get()
|
|
127
|
+
response = original_method(*args, **kwargs)
|
|
128
|
+
|
|
129
|
+
if isinstance(current_span, LlmSpan):
|
|
130
|
+
model = kwargs.get("model", None)
|
|
131
|
+
if model is None:
|
|
132
|
+
raise ValueError("model not found in client")
|
|
133
|
+
|
|
134
|
+
current_span.model = model
|
|
135
|
+
|
|
136
|
+
output = None
|
|
137
|
+
try:
|
|
138
|
+
if (
|
|
139
|
+
hasattr(response, "content")
|
|
140
|
+
and response.content
|
|
141
|
+
and len(response.content) > 0
|
|
142
|
+
):
|
|
143
|
+
for block in response.content:
|
|
144
|
+
if hasattr(block, "text"):
|
|
145
|
+
output = block.text
|
|
146
|
+
break
|
|
147
|
+
except Exception:
|
|
148
|
+
pass
|
|
149
|
+
|
|
150
|
+
input_token_count = None
|
|
151
|
+
output_token_count = None
|
|
152
|
+
try:
|
|
153
|
+
if hasattr(response, "usage"):
|
|
154
|
+
usage = response.usage
|
|
155
|
+
# usage can be a dict or an object with attributes
|
|
156
|
+
if isinstance(usage, dict):
|
|
157
|
+
input_token_count = usage.get(
|
|
158
|
+
"input_tokens", None
|
|
159
|
+
)
|
|
160
|
+
output_token_count = usage.get(
|
|
161
|
+
"output_tokens", None
|
|
162
|
+
)
|
|
163
|
+
else:
|
|
164
|
+
input_token_count = getattr(
|
|
165
|
+
usage, "input_tokens", None
|
|
166
|
+
)
|
|
167
|
+
output_token_count = getattr(
|
|
168
|
+
usage, "output_tokens", None
|
|
169
|
+
)
|
|
170
|
+
except Exception:
|
|
171
|
+
pass
|
|
172
|
+
|
|
173
|
+
update_current_span(
|
|
174
|
+
input=kwargs.get("messages", "INPUT_MESSAGE_NOT_FOUND"),
|
|
175
|
+
output=output if output else "OUTPUT_MESSAGE_NOT_FOUND",
|
|
176
|
+
)
|
|
177
|
+
update_llm_span(
|
|
178
|
+
input_token_count=input_token_count,
|
|
179
|
+
output_token_count=output_token_count,
|
|
180
|
+
)
|
|
181
|
+
return response
|
|
182
|
+
|
|
183
|
+
setattr(current_obj, method_name, wrapped_method)
|
|
184
|
+
|
|
185
|
+
return original_methods
|
|
@@ -1,13 +1,15 @@
|
|
|
1
|
-
from typing import Optional, List, Dict, Any
|
|
2
1
|
from contextvars import ContextVar
|
|
3
2
|
from contextlib import contextmanager
|
|
4
3
|
from dataclasses import dataclass
|
|
4
|
+
from typing import Optional, List, Dict, Any
|
|
5
5
|
|
|
6
|
-
from .tracing import trace_manager
|
|
7
|
-
from .context import current_trace_context, update_current_trace
|
|
8
|
-
from deepeval.prompt import Prompt
|
|
9
6
|
from deepeval.metrics import BaseMetric
|
|
7
|
+
from deepeval.prompt import Prompt
|
|
10
8
|
from deepeval.test_case.llm_test_case import ToolCall
|
|
9
|
+
from deepeval.tracing.context import current_trace_context, update_current_trace
|
|
10
|
+
from deepeval.tracing.tracing import trace_manager
|
|
11
|
+
from deepeval.tracing.types import TraceWorkerStatus
|
|
12
|
+
from deepeval.tracing.utils import is_async_context
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
@dataclass
|
|
@@ -59,6 +61,13 @@ def trace(
|
|
|
59
61
|
metrics: Optional[List[BaseMetric]] = None,
|
|
60
62
|
metric_collection: Optional[str] = None,
|
|
61
63
|
):
|
|
64
|
+
if is_async_context():
|
|
65
|
+
trace_manager._print_trace_status(
|
|
66
|
+
message="Warning: Detected use of the synchronous 'trace' context manager within an async method",
|
|
67
|
+
trace_worker_status=TraceWorkerStatus.WARNING,
|
|
68
|
+
description="Wrapping an async method with the synchronous 'trace' context manager may lead to unexpected behavior.",
|
|
69
|
+
)
|
|
70
|
+
|
|
62
71
|
current_trace = current_trace_context.get()
|
|
63
72
|
|
|
64
73
|
if not current_trace:
|
deepeval/tracing/tracing.py
CHANGED
|
@@ -19,6 +19,7 @@ import random
|
|
|
19
19
|
import atexit
|
|
20
20
|
import queue
|
|
21
21
|
import uuid
|
|
22
|
+
from anthropic import Anthropic
|
|
22
23
|
from openai import OpenAI
|
|
23
24
|
from rich.console import Console
|
|
24
25
|
from rich.progress import Progress
|
|
@@ -38,7 +39,10 @@ from deepeval.tracing.api import (
|
|
|
38
39
|
TraceSpanApiStatus,
|
|
39
40
|
)
|
|
40
41
|
from deepeval.telemetry import capture_send_trace
|
|
41
|
-
from deepeval.tracing.patchers import
|
|
42
|
+
from deepeval.tracing.patchers import (
|
|
43
|
+
patch_anthropic_client,
|
|
44
|
+
patch_openai_client,
|
|
45
|
+
)
|
|
42
46
|
from deepeval.tracing.types import (
|
|
43
47
|
AgentSpan,
|
|
44
48
|
BaseSpan,
|
|
@@ -111,6 +115,7 @@ class TraceManager:
|
|
|
111
115
|
|
|
112
116
|
self.sampling_rate = settings.CONFIDENT_TRACE_SAMPLE_RATE
|
|
113
117
|
validate_sampling_rate(self.sampling_rate)
|
|
118
|
+
self.anthropic_client = None
|
|
114
119
|
self.openai_client = None
|
|
115
120
|
self.tracing_enabled = True
|
|
116
121
|
|
|
@@ -149,6 +154,7 @@ class TraceManager:
|
|
|
149
154
|
environment: Optional[str] = None,
|
|
150
155
|
sampling_rate: Optional[float] = None,
|
|
151
156
|
confident_api_key: Optional[str] = None,
|
|
157
|
+
anthropic_client: Optional[Anthropic] = None,
|
|
152
158
|
openai_client: Optional[OpenAI] = None,
|
|
153
159
|
tracing_enabled: Optional[bool] = None,
|
|
154
160
|
) -> None:
|
|
@@ -165,6 +171,9 @@ class TraceManager:
|
|
|
165
171
|
if openai_client is not None:
|
|
166
172
|
self.openai_client = openai_client
|
|
167
173
|
patch_openai_client(openai_client)
|
|
174
|
+
if anthropic_client is not None:
|
|
175
|
+
self.anthropic_client = anthropic_client
|
|
176
|
+
patch_anthropic_client(anthropic_client)
|
|
168
177
|
if tracing_enabled is not None:
|
|
169
178
|
self.tracing_enabled = tracing_enabled
|
|
170
179
|
|
deepeval/tracing/types.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
2
|
from dataclasses import dataclass, field
|
|
3
|
-
from pydantic import BaseModel, Field
|
|
3
|
+
from pydantic import BaseModel, Field, ConfigDict
|
|
4
4
|
from typing import Any, Dict, List, Optional, Union, Literal
|
|
5
5
|
from rich.progress import Progress
|
|
6
6
|
|
|
7
|
+
from deepeval.utils import make_model_config
|
|
8
|
+
|
|
7
9
|
from deepeval.prompt.prompt import Prompt
|
|
8
10
|
from deepeval.test_case.llm_test_case import ToolCall
|
|
9
11
|
from deepeval.test_case import LLMTestCase
|
|
@@ -55,6 +57,8 @@ class LlmOutput(BaseModel):
|
|
|
55
57
|
|
|
56
58
|
|
|
57
59
|
class BaseSpan(BaseModel):
|
|
60
|
+
model_config = make_model_config(arbitrary_types_allowed=True)
|
|
61
|
+
|
|
58
62
|
uuid: str
|
|
59
63
|
status: TraceSpanStatus
|
|
60
64
|
children: List["BaseSpan"] = Field(default_factory=list)
|
|
@@ -90,9 +94,6 @@ class BaseSpan(BaseModel):
|
|
|
90
94
|
None, serialization_alias="expectedTools"
|
|
91
95
|
)
|
|
92
96
|
|
|
93
|
-
class Config:
|
|
94
|
-
arbitrary_types_allowed = True
|
|
95
|
-
|
|
96
97
|
|
|
97
98
|
class AgentSpan(BaseSpan):
|
|
98
99
|
name: str
|
|
@@ -125,7 +126,7 @@ class LlmSpan(BaseSpan):
|
|
|
125
126
|
# output_metadata: Optional[Dict[str, Any]] = Field(None, serialization_alias="outputMetadata")
|
|
126
127
|
|
|
127
128
|
# for serializing `prompt`
|
|
128
|
-
model_config =
|
|
129
|
+
model_config = make_model_config(arbitrary_types_allowed=True)
|
|
129
130
|
|
|
130
131
|
|
|
131
132
|
class RetrieverSpan(BaseSpan):
|
|
@@ -140,6 +141,8 @@ class ToolSpan(BaseSpan):
|
|
|
140
141
|
|
|
141
142
|
|
|
142
143
|
class Trace(BaseModel):
|
|
144
|
+
model_config = make_model_config(arbitrary_types_allowed=True)
|
|
145
|
+
|
|
143
146
|
uuid: str = Field(serialization_alias="uuid")
|
|
144
147
|
status: TraceSpanStatus
|
|
145
148
|
root_spans: List[BaseSpan] = Field(serialization_alias="rootSpans")
|
|
@@ -174,9 +177,6 @@ class Trace(BaseModel):
|
|
|
174
177
|
None, serialization_alias="expectedTools"
|
|
175
178
|
)
|
|
176
179
|
|
|
177
|
-
class Config:
|
|
178
|
-
arbitrary_types_allowed = True
|
|
179
|
-
|
|
180
180
|
|
|
181
181
|
class TraceAttributes(BaseModel):
|
|
182
182
|
name: Optional[str] = None
|
deepeval/tracing/utils.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import os
|
|
2
3
|
from typing import Dict, Any
|
|
3
4
|
from datetime import datetime, timezone
|
|
@@ -191,3 +192,11 @@ def prepare_tool_call_input_parameters(output: Any) -> Dict[str, Any]:
|
|
|
191
192
|
if res and not isinstance(res, dict):
|
|
192
193
|
res = {"output": res}
|
|
193
194
|
return res
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def is_async_context() -> bool:
|
|
198
|
+
try:
|
|
199
|
+
asyncio.get_running_loop()
|
|
200
|
+
return True
|
|
201
|
+
except RuntimeError:
|
|
202
|
+
return False
|
deepeval/utils.py
CHANGED
|
@@ -21,7 +21,6 @@ from pydantic import BaseModel
|
|
|
21
21
|
from rich.progress import Progress
|
|
22
22
|
from rich.console import Console, Theme
|
|
23
23
|
|
|
24
|
-
from deepeval.confident.api import set_confident_api_key
|
|
25
24
|
from deepeval.config.settings import get_settings
|
|
26
25
|
from deepeval.config.utils import (
|
|
27
26
|
get_env_bool,
|
|
@@ -29,6 +28,48 @@ from deepeval.config.utils import (
|
|
|
29
28
|
)
|
|
30
29
|
|
|
31
30
|
|
|
31
|
+
#####################
|
|
32
|
+
# Pydantic Compat #
|
|
33
|
+
#####################
|
|
34
|
+
|
|
35
|
+
import pydantic
|
|
36
|
+
|
|
37
|
+
PYDANTIC_V2 = pydantic.VERSION.startswith("2")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def make_model_config(**kwargs):
|
|
41
|
+
"""
|
|
42
|
+
Create a model configuration that works with both Pydantic v1 and v2.
|
|
43
|
+
|
|
44
|
+
Usage in a model (Pydantic v2 style):
|
|
45
|
+
class MyModel(BaseModel):
|
|
46
|
+
model_config = make_model_config(arbitrary_types_allowed=True)
|
|
47
|
+
field: str
|
|
48
|
+
|
|
49
|
+
This will work correctly in both v1 and v2:
|
|
50
|
+
- In v2: Returns ConfigDict(**kwargs)
|
|
51
|
+
- In v1: Returns a Config class with the attributes set
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
**kwargs: Configuration options (e.g., use_enum_values=True, arbitrary_types_allowed=True)
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
ConfigDict (v2) or Config class (v1)
|
|
58
|
+
"""
|
|
59
|
+
if PYDANTIC_V2:
|
|
60
|
+
from pydantic import ConfigDict
|
|
61
|
+
|
|
62
|
+
return ConfigDict(**kwargs)
|
|
63
|
+
else:
|
|
64
|
+
# For Pydantic v1, create an inner Config class
|
|
65
|
+
class Config:
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
for key, value in kwargs.items():
|
|
69
|
+
setattr(Config, key, value)
|
|
70
|
+
return Config
|
|
71
|
+
|
|
72
|
+
|
|
32
73
|
###############
|
|
33
74
|
# Local Types #
|
|
34
75
|
###############
|
|
@@ -232,6 +273,7 @@ def login(api_key: str):
|
|
|
232
273
|
raise ValueError("Unable to login, please provide a non-empty api key.")
|
|
233
274
|
|
|
234
275
|
from rich import print
|
|
276
|
+
from deepeval.confident.api import set_confident_api_key
|
|
235
277
|
|
|
236
278
|
set_confident_api_key(api_key)
|
|
237
279
|
print(
|
|
@@ -751,7 +793,7 @@ custom_console = Console(theme=my_theme)
|
|
|
751
793
|
|
|
752
794
|
|
|
753
795
|
def format_error_text(
|
|
754
|
-
exc: BaseException, *, with_stack: bool
|
|
796
|
+
exc: BaseException, *, with_stack: Optional[bool] = None
|
|
755
797
|
) -> str:
|
|
756
798
|
if with_stack is None:
|
|
757
799
|
with_stack = logging.getLogger("deepeval").isEnabledFor(logging.DEBUG)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: deepeval
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.7.0
|
|
4
4
|
Summary: The LLM Evaluation Framework
|
|
5
5
|
Home-page: https://github.com/confident-ai/deepeval
|
|
6
6
|
License: Apache-2.0
|
|
@@ -25,7 +25,7 @@ Requires-Dist: opentelemetry-api (>=1.24.0,<2.0.0)
|
|
|
25
25
|
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc (>=1.24.0,<2.0.0)
|
|
26
26
|
Requires-Dist: opentelemetry-sdk (>=1.24.0,<2.0.0)
|
|
27
27
|
Requires-Dist: portalocker
|
|
28
|
-
Requires-Dist: posthog (>=
|
|
28
|
+
Requires-Dist: posthog (>=5.4.0,<6.0.0)
|
|
29
29
|
Requires-Dist: pydantic (>=2.11.7,<3.0.0)
|
|
30
30
|
Requires-Dist: pydantic-settings (>=2.10.1,<3.0.0)
|
|
31
31
|
Requires-Dist: pyfiglet
|