deepeval 3.5.8__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings_manager.py +1 -1
- deepeval/contextvars.py +25 -0
- deepeval/dataset/__init__.py +8 -2
- deepeval/evaluate/execute.py +15 -3
- deepeval/integrations/pydantic_ai/__init__.py +3 -3
- deepeval/integrations/pydantic_ai/agent.py +9 -327
- deepeval/integrations/pydantic_ai/instrumentator.py +196 -0
- deepeval/integrations/pydantic_ai/otel.py +8 -2
- deepeval/openai_agents/__init__.py +4 -3
- deepeval/openai_agents/agent.py +8 -166
- deepeval/openai_agents/callback_handler.py +63 -62
- deepeval/openai_agents/extractors.py +83 -7
- deepeval/openai_agents/patch.py +255 -61
- deepeval/openai_agents/runner.py +348 -335
- deepeval/tracing/context.py +1 -0
- deepeval/tracing/otel/exporter.py +236 -174
- deepeval/tracing/otel/utils.py +95 -7
- deepeval/tracing/tracing.py +3 -0
- deepeval/utils.py +4 -3
- {deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/METADATA +1 -1
- {deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/RECORD +25 -25
- deepeval/integrations/pydantic_ai/patcher.py +0 -484
- deepeval/integrations/pydantic_ai/utils.py +0 -323
- {deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/LICENSE.md +0 -0
- {deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/WHEEL +0 -0
- {deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/entry_points.txt +0 -0
deepeval/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__: str = "3.
|
|
1
|
+
__version__: str = "3.6.0"
|
|
@@ -15,7 +15,7 @@ from enum import Enum
|
|
|
15
15
|
from pydantic import SecretStr
|
|
16
16
|
from deepeval.config.settings import get_settings, _SAVE_RE
|
|
17
17
|
from deepeval.cli.dotenv_handler import DotenvHandler
|
|
18
|
-
from deepeval.utils import bool_to_env_str
|
|
18
|
+
from deepeval.config.utils import bool_to_env_str
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
StrOrEnum = Union[str, Enum]
|
deepeval/contextvars.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from contextvars import ContextVar
|
|
4
|
+
from typing import TYPE_CHECKING, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from deepeval.dataset.golden import Golden
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
CURRENT_GOLDEN: ContextVar[Optional[Golden]] = ContextVar(
|
|
12
|
+
"CURRENT_GOLDEN", default=None
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def set_current_golden(golden: Optional[Golden]):
|
|
17
|
+
return CURRENT_GOLDEN.set(golden)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_current_golden() -> Optional[Golden]:
|
|
21
|
+
return CURRENT_GOLDEN.get()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def reset_current_golden(token) -> None:
|
|
25
|
+
CURRENT_GOLDEN.reset(token)
|
deepeval/dataset/__init__.py
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
|
+
from deepeval.contextvars import get_current_golden
|
|
1
2
|
from .dataset import EvaluationDataset
|
|
2
3
|
from .golden import Golden, ConversationalGolden
|
|
3
|
-
from .test_run_tracer import init_global_test_run_tracer
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"EvaluationDataset",
|
|
8
|
+
"Golden",
|
|
9
|
+
"ConversationalGolden",
|
|
10
|
+
"get_current_golden",
|
|
11
|
+
]
|
deepeval/evaluate/execute.py
CHANGED
|
@@ -42,6 +42,7 @@ from deepeval.tracing.api import (
|
|
|
42
42
|
BaseApiSpan,
|
|
43
43
|
)
|
|
44
44
|
from deepeval.dataset import Golden
|
|
45
|
+
from deepeval.contextvars import set_current_golden, reset_current_golden
|
|
45
46
|
from deepeval.errors import MissingTestCaseParamsError
|
|
46
47
|
from deepeval.metrics.utils import copy_metrics
|
|
47
48
|
from deepeval.utils import (
|
|
@@ -1480,6 +1481,7 @@ def execute_agentic_test_cases_from_loop(
|
|
|
1480
1481
|
)
|
|
1481
1482
|
|
|
1482
1483
|
for golden in goldens:
|
|
1484
|
+
token = set_current_golden(golden)
|
|
1483
1485
|
with capture_evaluation_run("golden"):
|
|
1484
1486
|
# yield golden
|
|
1485
1487
|
count += 1
|
|
@@ -1492,8 +1494,14 @@ def execute_agentic_test_cases_from_loop(
|
|
|
1492
1494
|
_progress=progress,
|
|
1493
1495
|
_pbar_callback_id=pbar_tags_id,
|
|
1494
1496
|
):
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
+
try:
|
|
1498
|
+
# yield golden to user code
|
|
1499
|
+
yield golden
|
|
1500
|
+
# control has returned from user code without error, capture trace now
|
|
1501
|
+
current_trace: Trace = current_trace_context.get()
|
|
1502
|
+
finally:
|
|
1503
|
+
# after user code returns control, always reset the context
|
|
1504
|
+
reset_current_golden(token)
|
|
1497
1505
|
|
|
1498
1506
|
update_pbar(progress, pbar_tags_id)
|
|
1499
1507
|
update_pbar(progress, pbar_id)
|
|
@@ -1849,6 +1857,7 @@ def a_execute_agentic_test_cases_from_loop(
|
|
|
1849
1857
|
|
|
1850
1858
|
try:
|
|
1851
1859
|
for index, golden in enumerate(goldens):
|
|
1860
|
+
token = set_current_golden(golden)
|
|
1852
1861
|
current_golden_ctx.update(
|
|
1853
1862
|
{
|
|
1854
1863
|
"index": index,
|
|
@@ -1857,7 +1866,10 @@ def a_execute_agentic_test_cases_from_loop(
|
|
|
1857
1866
|
}
|
|
1858
1867
|
)
|
|
1859
1868
|
prev_task_length = len(created_tasks)
|
|
1860
|
-
|
|
1869
|
+
try:
|
|
1870
|
+
yield golden
|
|
1871
|
+
finally:
|
|
1872
|
+
reset_current_golden(token)
|
|
1861
1873
|
# if this golden created no tasks, bump bars now
|
|
1862
1874
|
if len(created_tasks) == prev_task_length:
|
|
1863
1875
|
update_pbar(progress, pbar_callback_id)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from .agent import DeepEvalPydanticAIAgent as Agent
|
|
2
|
-
from .
|
|
3
|
-
from .otel import instrument_pydantic_ai
|
|
2
|
+
from .instrumentator import ConfidentInstrumentationSettings
|
|
3
|
+
from .otel import instrument_pydantic_ai
|
|
4
4
|
|
|
5
|
-
__all__ = ["
|
|
5
|
+
__all__ = ["ConfidentInstrumentationSettings"]
|
|
@@ -1,339 +1,21 @@
|
|
|
1
|
-
import
|
|
2
|
-
from typing import Optional, List, Generic, TypeVar
|
|
3
|
-
from contextvars import ContextVar
|
|
4
|
-
from contextlib import asynccontextmanager
|
|
5
|
-
|
|
6
|
-
from deepeval.prompt import Prompt
|
|
7
|
-
from deepeval.tracing.types import AgentSpan
|
|
8
|
-
from deepeval.tracing.tracing import Observer
|
|
9
|
-
from deepeval.metrics.base_metric import BaseMetric
|
|
10
|
-
from deepeval.tracing.context import current_span_context
|
|
11
|
-
from deepeval.integrations.pydantic_ai.utils import extract_tools_called
|
|
1
|
+
import warnings
|
|
12
2
|
|
|
13
3
|
try:
|
|
14
4
|
from pydantic_ai.agent import Agent
|
|
15
|
-
from pydantic_ai.tools import AgentDepsT
|
|
16
|
-
from pydantic_ai.output import OutputDataT
|
|
17
|
-
from deepeval.integrations.pydantic_ai.utils import (
|
|
18
|
-
create_patched_tool,
|
|
19
|
-
update_trace_context,
|
|
20
|
-
patch_llm_model,
|
|
21
|
-
)
|
|
22
5
|
|
|
23
6
|
is_pydantic_ai_installed = True
|
|
24
7
|
except:
|
|
25
8
|
is_pydantic_ai_installed = False
|
|
26
9
|
|
|
27
10
|
|
|
28
|
-
|
|
29
|
-
if not is_pydantic_ai_installed:
|
|
30
|
-
raise ImportError(
|
|
31
|
-
"Pydantic AI is not installed. Please install it with `pip install pydantic-ai`."
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
_IS_RUN_SYNC = ContextVar("deepeval_is_run_sync", default=False)
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class DeepEvalPydanticAIAgent(
|
|
39
|
-
Agent[AgentDepsT, OutputDataT], Generic[AgentDepsT, OutputDataT]
|
|
40
|
-
):
|
|
41
|
-
|
|
42
|
-
trace_name: Optional[str] = None
|
|
43
|
-
trace_tags: Optional[List[str]] = None
|
|
44
|
-
trace_metadata: Optional[dict] = None
|
|
45
|
-
trace_thread_id: Optional[str] = None
|
|
46
|
-
trace_user_id: Optional[str] = None
|
|
47
|
-
trace_metric_collection: Optional[str] = None
|
|
48
|
-
trace_metrics: Optional[List[BaseMetric]] = None
|
|
49
|
-
|
|
50
|
-
llm_prompt: Optional[Prompt] = None
|
|
51
|
-
llm_metrics: Optional[List[BaseMetric]] = None
|
|
52
|
-
llm_metric_collection: Optional[str] = None
|
|
53
|
-
|
|
54
|
-
agent_metrics: Optional[List[BaseMetric]] = None
|
|
55
|
-
agent_metric_collection: Optional[str] = None
|
|
11
|
+
class DeepEvalPydanticAIAgent(Agent):
|
|
56
12
|
|
|
57
|
-
def __init__(
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
trace_user_id: Optional[str] = None,
|
|
65
|
-
trace_metric_collection: Optional[str] = None,
|
|
66
|
-
trace_metrics: Optional[List[BaseMetric]] = None,
|
|
67
|
-
llm_metric_collection: Optional[str] = None,
|
|
68
|
-
llm_metrics: Optional[List[BaseMetric]] = None,
|
|
69
|
-
llm_prompt: Optional[Prompt] = None,
|
|
70
|
-
agent_metric_collection: Optional[str] = None,
|
|
71
|
-
agent_metrics: Optional[List[BaseMetric]] = None,
|
|
72
|
-
**kwargs
|
|
73
|
-
):
|
|
74
|
-
pydantic_ai_installed()
|
|
75
|
-
|
|
76
|
-
self.trace_name = trace_name
|
|
77
|
-
self.trace_tags = trace_tags
|
|
78
|
-
self.trace_metadata = trace_metadata
|
|
79
|
-
self.trace_thread_id = trace_thread_id
|
|
80
|
-
self.trace_user_id = trace_user_id
|
|
81
|
-
self.trace_metric_collection = trace_metric_collection
|
|
82
|
-
self.trace_metrics = trace_metrics
|
|
83
|
-
|
|
84
|
-
self.llm_metric_collection = llm_metric_collection
|
|
85
|
-
self.llm_metrics = llm_metrics
|
|
86
|
-
self.llm_prompt = llm_prompt
|
|
87
|
-
|
|
88
|
-
self.agent_metric_collection = agent_metric_collection
|
|
89
|
-
self.agent_metrics = agent_metrics
|
|
13
|
+
def __init__(self, *args, **kwargs):
|
|
14
|
+
warnings.warn(
|
|
15
|
+
"instrument_pydantic_ai is deprecated and will be removed in a future version. "
|
|
16
|
+
"Please use the new ConfidentInstrumentationSettings instead. Docs: https://www.confident-ai.com/docs/integrations/third-party/pydantic-ai",
|
|
17
|
+
DeprecationWarning,
|
|
18
|
+
stacklevel=2,
|
|
19
|
+
)
|
|
90
20
|
|
|
91
21
|
super().__init__(*args, **kwargs)
|
|
92
|
-
|
|
93
|
-
patch_llm_model(
|
|
94
|
-
self._model, llm_metric_collection, llm_metrics, llm_prompt
|
|
95
|
-
) # TODO: Add dual patch guards
|
|
96
|
-
|
|
97
|
-
async def run(
|
|
98
|
-
self,
|
|
99
|
-
*args,
|
|
100
|
-
name: Optional[str] = None,
|
|
101
|
-
tags: Optional[List[str]] = None,
|
|
102
|
-
user_id: Optional[str] = None,
|
|
103
|
-
metadata: Optional[dict] = None,
|
|
104
|
-
thread_id: Optional[str] = None,
|
|
105
|
-
metrics: Optional[List[BaseMetric]] = None,
|
|
106
|
-
metric_collection: Optional[str] = None,
|
|
107
|
-
**kwargs
|
|
108
|
-
):
|
|
109
|
-
sig = inspect.signature(super().run)
|
|
110
|
-
bound = sig.bind_partial(*args, **kwargs)
|
|
111
|
-
bound.apply_defaults()
|
|
112
|
-
input = bound.arguments.get("user_prompt", None)
|
|
113
|
-
|
|
114
|
-
agent_name = super().name if super().name is not None else "Agent"
|
|
115
|
-
|
|
116
|
-
with Observer(
|
|
117
|
-
span_type="agent" if not _IS_RUN_SYNC.get() else "custom",
|
|
118
|
-
func_name=agent_name if not _IS_RUN_SYNC.get() else "run",
|
|
119
|
-
function_kwargs={"input": input},
|
|
120
|
-
metrics=self.agent_metrics if not _IS_RUN_SYNC.get() else None,
|
|
121
|
-
metric_collection=(
|
|
122
|
-
self.agent_metric_collection if not _IS_RUN_SYNC.get() else None
|
|
123
|
-
),
|
|
124
|
-
) as observer:
|
|
125
|
-
result = await super().run(*args, **kwargs)
|
|
126
|
-
observer.result = result.output
|
|
127
|
-
update_trace_context(
|
|
128
|
-
trace_name=name if name is not None else self.trace_name,
|
|
129
|
-
trace_tags=tags if tags is not None else self.trace_tags,
|
|
130
|
-
trace_metadata=(
|
|
131
|
-
metadata if metadata is not None else self.trace_metadata
|
|
132
|
-
),
|
|
133
|
-
trace_thread_id=(
|
|
134
|
-
thread_id if thread_id is not None else self.trace_thread_id
|
|
135
|
-
),
|
|
136
|
-
trace_user_id=(
|
|
137
|
-
user_id if user_id is not None else self.trace_user_id
|
|
138
|
-
),
|
|
139
|
-
trace_metric_collection=(
|
|
140
|
-
metric_collection
|
|
141
|
-
if metric_collection is not None
|
|
142
|
-
else self.trace_metric_collection
|
|
143
|
-
),
|
|
144
|
-
trace_metrics=(
|
|
145
|
-
metrics if metrics is not None else self.trace_metrics
|
|
146
|
-
),
|
|
147
|
-
trace_input=input,
|
|
148
|
-
trace_output=result.output,
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
agent_span: AgentSpan = current_span_context.get()
|
|
152
|
-
try:
|
|
153
|
-
agent_span.tools_called = extract_tools_called(result)
|
|
154
|
-
except:
|
|
155
|
-
pass
|
|
156
|
-
# TODO: available tools
|
|
157
|
-
# TODO: agent handoffs
|
|
158
|
-
|
|
159
|
-
return result
|
|
160
|
-
|
|
161
|
-
def run_sync(
|
|
162
|
-
self,
|
|
163
|
-
*args,
|
|
164
|
-
name: Optional[str] = None,
|
|
165
|
-
tags: Optional[List[str]] = None,
|
|
166
|
-
metadata: Optional[dict] = None,
|
|
167
|
-
thread_id: Optional[str] = None,
|
|
168
|
-
user_id: Optional[str] = None,
|
|
169
|
-
metric_collection: Optional[str] = None,
|
|
170
|
-
metrics: Optional[List[BaseMetric]] = None,
|
|
171
|
-
**kwargs
|
|
172
|
-
):
|
|
173
|
-
sig = inspect.signature(super().run_sync)
|
|
174
|
-
bound = sig.bind_partial(*args, **kwargs)
|
|
175
|
-
bound.apply_defaults()
|
|
176
|
-
input = bound.arguments.get("user_prompt", None)
|
|
177
|
-
|
|
178
|
-
token = _IS_RUN_SYNC.set(True)
|
|
179
|
-
|
|
180
|
-
agent_name = super().name if super().name is not None else "Agent"
|
|
181
|
-
|
|
182
|
-
with Observer(
|
|
183
|
-
span_type="agent",
|
|
184
|
-
func_name=agent_name,
|
|
185
|
-
function_kwargs={"input": input},
|
|
186
|
-
metrics=self.agent_metrics,
|
|
187
|
-
metric_collection=self.agent_metric_collection,
|
|
188
|
-
) as observer:
|
|
189
|
-
try:
|
|
190
|
-
result = super().run_sync(*args, **kwargs)
|
|
191
|
-
finally:
|
|
192
|
-
_IS_RUN_SYNC.reset(token)
|
|
193
|
-
|
|
194
|
-
observer.result = result.output
|
|
195
|
-
update_trace_context(
|
|
196
|
-
trace_name=name if name is not None else self.trace_name,
|
|
197
|
-
trace_tags=tags if tags is not None else self.trace_tags,
|
|
198
|
-
trace_metadata=(
|
|
199
|
-
metadata if metadata is not None else self.trace_metadata
|
|
200
|
-
),
|
|
201
|
-
trace_thread_id=(
|
|
202
|
-
thread_id if thread_id is not None else self.trace_thread_id
|
|
203
|
-
),
|
|
204
|
-
trace_user_id=(
|
|
205
|
-
user_id if user_id is not None else self.trace_user_id
|
|
206
|
-
),
|
|
207
|
-
trace_metric_collection=(
|
|
208
|
-
metric_collection
|
|
209
|
-
if metric_collection is not None
|
|
210
|
-
else self.trace_metric_collection
|
|
211
|
-
),
|
|
212
|
-
trace_metrics=(
|
|
213
|
-
metrics if metrics is not None else self.trace_metrics
|
|
214
|
-
),
|
|
215
|
-
trace_input=input,
|
|
216
|
-
trace_output=result.output,
|
|
217
|
-
)
|
|
218
|
-
|
|
219
|
-
agent_span: AgentSpan = current_span_context.get()
|
|
220
|
-
try:
|
|
221
|
-
agent_span.tools_called = extract_tools_called(result)
|
|
222
|
-
except:
|
|
223
|
-
pass
|
|
224
|
-
|
|
225
|
-
# TODO: available tools
|
|
226
|
-
# TODO: agent handoffs
|
|
227
|
-
|
|
228
|
-
return result
|
|
229
|
-
|
|
230
|
-
@asynccontextmanager
|
|
231
|
-
async def run_stream(
|
|
232
|
-
self,
|
|
233
|
-
*args,
|
|
234
|
-
name: Optional[str] = None,
|
|
235
|
-
tags: Optional[List[str]] = None,
|
|
236
|
-
metadata: Optional[dict] = None,
|
|
237
|
-
thread_id: Optional[str] = None,
|
|
238
|
-
user_id: Optional[str] = None,
|
|
239
|
-
metric_collection: Optional[str] = None,
|
|
240
|
-
metrics: Optional[List[BaseMetric]] = None,
|
|
241
|
-
**kwargs
|
|
242
|
-
):
|
|
243
|
-
sig = inspect.signature(super().run_stream)
|
|
244
|
-
super_params = sig.parameters
|
|
245
|
-
super_kwargs = {k: v for k, v in kwargs.items() if k in super_params}
|
|
246
|
-
bound = sig.bind_partial(*args, **super_kwargs)
|
|
247
|
-
bound.apply_defaults()
|
|
248
|
-
input = bound.arguments.get("user_prompt", None)
|
|
249
|
-
|
|
250
|
-
agent_name = super().name if super().name is not None else "Agent"
|
|
251
|
-
|
|
252
|
-
with Observer(
|
|
253
|
-
span_type="agent",
|
|
254
|
-
func_name=agent_name,
|
|
255
|
-
function_kwargs={"input": input},
|
|
256
|
-
metrics=self.agent_metrics,
|
|
257
|
-
metric_collection=self.agent_metric_collection,
|
|
258
|
-
) as observer:
|
|
259
|
-
final_result = None
|
|
260
|
-
async with super().run_stream(*args, **super_kwargs) as result:
|
|
261
|
-
try:
|
|
262
|
-
yield result
|
|
263
|
-
finally:
|
|
264
|
-
try:
|
|
265
|
-
final_result = await result.get_output()
|
|
266
|
-
observer.result = final_result
|
|
267
|
-
except Exception:
|
|
268
|
-
pass
|
|
269
|
-
|
|
270
|
-
update_trace_context(
|
|
271
|
-
trace_name=(
|
|
272
|
-
name if name is not None else self.trace_name
|
|
273
|
-
),
|
|
274
|
-
trace_tags=(
|
|
275
|
-
tags if tags is not None else self.trace_tags
|
|
276
|
-
),
|
|
277
|
-
trace_metadata=(
|
|
278
|
-
metadata
|
|
279
|
-
if metadata is not None
|
|
280
|
-
else self.trace_metadata
|
|
281
|
-
),
|
|
282
|
-
trace_thread_id=(
|
|
283
|
-
thread_id
|
|
284
|
-
if thread_id is not None
|
|
285
|
-
else self.trace_thread_id
|
|
286
|
-
),
|
|
287
|
-
trace_user_id=(
|
|
288
|
-
user_id
|
|
289
|
-
if user_id is not None
|
|
290
|
-
else self.trace_user_id
|
|
291
|
-
),
|
|
292
|
-
trace_metric_collection=(
|
|
293
|
-
metric_collection
|
|
294
|
-
if metric_collection is not None
|
|
295
|
-
else self.trace_metric_collection
|
|
296
|
-
),
|
|
297
|
-
trace_metrics=(
|
|
298
|
-
metrics
|
|
299
|
-
if metrics is not None
|
|
300
|
-
else self.trace_metrics
|
|
301
|
-
),
|
|
302
|
-
trace_input=input,
|
|
303
|
-
trace_output=(
|
|
304
|
-
final_result if final_result is not None else None
|
|
305
|
-
),
|
|
306
|
-
)
|
|
307
|
-
agent_span: AgentSpan = current_span_context.get()
|
|
308
|
-
try:
|
|
309
|
-
if final_result is not None:
|
|
310
|
-
agent_span.tools_called = extract_tools_called(
|
|
311
|
-
final_result
|
|
312
|
-
)
|
|
313
|
-
except:
|
|
314
|
-
pass
|
|
315
|
-
|
|
316
|
-
def tool(
|
|
317
|
-
self,
|
|
318
|
-
*args,
|
|
319
|
-
metrics: Optional[List[BaseMetric]] = None,
|
|
320
|
-
metric_collection: Optional[str] = None,
|
|
321
|
-
**kwargs
|
|
322
|
-
):
|
|
323
|
-
# Direct decoration: @agent.tool
|
|
324
|
-
if args and callable(args[0]):
|
|
325
|
-
patched_func = create_patched_tool(
|
|
326
|
-
args[0], metrics, metric_collection
|
|
327
|
-
)
|
|
328
|
-
new_args = (patched_func,) + args[1:]
|
|
329
|
-
return super(DeepEvalPydanticAIAgent, self).tool(
|
|
330
|
-
*new_args, **kwargs
|
|
331
|
-
)
|
|
332
|
-
# Decoration with args: @agent.tool(...)
|
|
333
|
-
super_tool = super(DeepEvalPydanticAIAgent, self).tool
|
|
334
|
-
|
|
335
|
-
def decorator(func):
|
|
336
|
-
patched_func = create_patched_tool(func, metrics, metric_collection)
|
|
337
|
-
return super_tool(*args, **kwargs)(patched_func)
|
|
338
|
-
|
|
339
|
-
return decorator
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from typing import Literal, Optional, List
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from pydantic_ai.models.instrumented import InstrumentationSettings
|
|
7
|
+
from opentelemetry.sdk.trace import SpanProcessor, TracerProvider
|
|
8
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
9
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
|
|
10
|
+
OTLPSpanExporter,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
dependency_installed = True
|
|
14
|
+
except:
|
|
15
|
+
dependency_installed = False
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def is_dependency_installed():
|
|
19
|
+
if not dependency_installed:
|
|
20
|
+
raise ImportError(
|
|
21
|
+
"Dependencies are not installed. Please install it with `pip install pydantic-ai opentelemetry-sdk opentelemetry-exporter-otlp-proto-http`."
|
|
22
|
+
)
|
|
23
|
+
return True
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
from deepeval.confident.api import get_confident_api_key
|
|
27
|
+
from deepeval.prompt import Prompt
|
|
28
|
+
|
|
29
|
+
# OTLP_ENDPOINT = "http://127.0.0.1:4318/v1/traces"
|
|
30
|
+
OTLP_ENDPOINT = "https://otel.confident-ai.com/v1/traces"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SpanInterceptor(SpanProcessor):
|
|
34
|
+
def __init__(self, settings_instance):
|
|
35
|
+
# Keep a reference to the settings instance instead of copying values
|
|
36
|
+
self.settings: ConfidentInstrumentationSettings = settings_instance
|
|
37
|
+
|
|
38
|
+
def on_start(self, span, parent_context):
|
|
39
|
+
|
|
40
|
+
# set trace attributes
|
|
41
|
+
if self.settings.thread_id:
|
|
42
|
+
span.set_attribute(
|
|
43
|
+
"confident.trace.thread_id", self.settings.thread_id
|
|
44
|
+
)
|
|
45
|
+
if self.settings.user_id:
|
|
46
|
+
span.set_attribute("confident.trace.user_id", self.settings.user_id)
|
|
47
|
+
if self.settings.metadata:
|
|
48
|
+
span.set_attribute(
|
|
49
|
+
"confident.trace.metadata", json.dumps(self.settings.metadata)
|
|
50
|
+
)
|
|
51
|
+
if self.settings.tags:
|
|
52
|
+
span.set_attribute("confident.trace.tags", self.settings.tags)
|
|
53
|
+
if self.settings.metric_collection:
|
|
54
|
+
span.set_attribute(
|
|
55
|
+
"confident.trace.metric_collection",
|
|
56
|
+
self.settings.metric_collection,
|
|
57
|
+
)
|
|
58
|
+
if self.settings.environment:
|
|
59
|
+
span.set_attribute(
|
|
60
|
+
"confident.trace.environment", self.settings.environment
|
|
61
|
+
)
|
|
62
|
+
if self.settings.name:
|
|
63
|
+
span.set_attribute("confident.trace.name", self.settings.name)
|
|
64
|
+
if self.settings.confident_prompt:
|
|
65
|
+
span.set_attribute(
|
|
66
|
+
"confident.span.prompt",
|
|
67
|
+
json.dumps(
|
|
68
|
+
{
|
|
69
|
+
"alias": self.settings.confident_prompt.alias,
|
|
70
|
+
"version": self.settings.confident_prompt.version,
|
|
71
|
+
}
|
|
72
|
+
),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# set trace metric collection
|
|
76
|
+
if self.settings.trace_metric_collection:
|
|
77
|
+
span.set_attribute(
|
|
78
|
+
"confident.trace.metric_collection",
|
|
79
|
+
self.settings.trace_metric_collection,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# set agent name and metric collection
|
|
83
|
+
if span.attributes.get("agent_name"):
|
|
84
|
+
span.set_attribute("confident.span.type", "agent")
|
|
85
|
+
span.set_attribute(
|
|
86
|
+
"confident.span.name", span.attributes.get("agent_name")
|
|
87
|
+
)
|
|
88
|
+
if self.settings.agent_metric_collection:
|
|
89
|
+
span.set_attribute(
|
|
90
|
+
"confident.span.metric_collection",
|
|
91
|
+
self.settings.agent_metric_collection,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# set llm metric collection
|
|
95
|
+
if span.attributes.get("gen_ai.operation.name") in [
|
|
96
|
+
"chat",
|
|
97
|
+
"generate_content",
|
|
98
|
+
"text_completion",
|
|
99
|
+
]:
|
|
100
|
+
if self.settings.llm_metric_collection:
|
|
101
|
+
span.set_attribute(
|
|
102
|
+
"confident.span.metric_collection",
|
|
103
|
+
self.settings.llm_metric_collection,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# set tool metric collection
|
|
107
|
+
tool_name = span.attributes.get("gen_ai.tool.name")
|
|
108
|
+
if tool_name:
|
|
109
|
+
tool_metric_collection = (
|
|
110
|
+
self.settings.tool_metric_collection_map.get(tool_name)
|
|
111
|
+
)
|
|
112
|
+
if tool_metric_collection:
|
|
113
|
+
span.set_attribute(
|
|
114
|
+
"confident.span.metric_collection",
|
|
115
|
+
str(tool_metric_collection),
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
def on_end(self, span):
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class ConfidentInstrumentationSettings(InstrumentationSettings):
|
|
123
|
+
|
|
124
|
+
name: Optional[str] = None
|
|
125
|
+
thread_id: Optional[str] = None
|
|
126
|
+
user_id: Optional[str] = None
|
|
127
|
+
metadata: Optional[dict] = None
|
|
128
|
+
tags: Optional[List[str]] = None
|
|
129
|
+
environment: Literal["production", "staging", "development", "testing"] = (
|
|
130
|
+
None
|
|
131
|
+
)
|
|
132
|
+
metric_collection: Optional[str] = None
|
|
133
|
+
confident_prompt: Optional[Prompt] = None
|
|
134
|
+
llm_metric_collection: Optional[str] = None
|
|
135
|
+
agent_metric_collection: Optional[str] = None
|
|
136
|
+
tool_metric_collection_map: dict = {}
|
|
137
|
+
trace_metric_collection: Optional[str] = None
|
|
138
|
+
|
|
139
|
+
def __init__(
|
|
140
|
+
self,
|
|
141
|
+
api_key: Optional[str] = None,
|
|
142
|
+
name: Optional[str] = None,
|
|
143
|
+
thread_id: Optional[str] = None,
|
|
144
|
+
user_id: Optional[str] = None,
|
|
145
|
+
metadata: Optional[dict] = None,
|
|
146
|
+
tags: Optional[List[str]] = None,
|
|
147
|
+
metric_collection: Optional[str] = None,
|
|
148
|
+
confident_prompt: Optional[Prompt] = None,
|
|
149
|
+
llm_metric_collection: Optional[str] = None,
|
|
150
|
+
agent_metric_collection: Optional[str] = None,
|
|
151
|
+
tool_metric_collection_map: dict = {},
|
|
152
|
+
trace_metric_collection: Optional[str] = None,
|
|
153
|
+
):
|
|
154
|
+
is_dependency_installed()
|
|
155
|
+
|
|
156
|
+
_environment = os.getenv("CONFIDENT_TRACE_ENVIRONMENT", "development")
|
|
157
|
+
if _environment and _environment in [
|
|
158
|
+
"production",
|
|
159
|
+
"staging",
|
|
160
|
+
"development",
|
|
161
|
+
"testing",
|
|
162
|
+
]:
|
|
163
|
+
self.environment = _environment
|
|
164
|
+
|
|
165
|
+
self.tool_metric_collection_map = tool_metric_collection_map
|
|
166
|
+
self.name = name
|
|
167
|
+
self.thread_id = thread_id
|
|
168
|
+
self.user_id = user_id
|
|
169
|
+
self.metadata = metadata
|
|
170
|
+
self.tags = tags
|
|
171
|
+
self.metric_collection = metric_collection
|
|
172
|
+
self.confident_prompt = confident_prompt
|
|
173
|
+
self.llm_metric_collection = llm_metric_collection
|
|
174
|
+
self.agent_metric_collection = agent_metric_collection
|
|
175
|
+
self.trace_metric_collection = trace_metric_collection
|
|
176
|
+
|
|
177
|
+
if not api_key:
|
|
178
|
+
api_key = get_confident_api_key()
|
|
179
|
+
if not api_key:
|
|
180
|
+
raise ValueError("CONFIDENT_API_KEY is not set")
|
|
181
|
+
|
|
182
|
+
trace_provider = TracerProvider()
|
|
183
|
+
|
|
184
|
+
# Pass the entire settings instance instead of individual values
|
|
185
|
+
span_interceptor = SpanInterceptor(self)
|
|
186
|
+
trace_provider.add_span_processor(span_interceptor)
|
|
187
|
+
|
|
188
|
+
trace_provider.add_span_processor(
|
|
189
|
+
BatchSpanProcessor(
|
|
190
|
+
OTLPSpanExporter(
|
|
191
|
+
endpoint=OTLP_ENDPOINT,
|
|
192
|
+
headers={"x-confident-api-key": api_key},
|
|
193
|
+
)
|
|
194
|
+
)
|
|
195
|
+
)
|
|
196
|
+
super().__init__(tracer_provider=trace_provider)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from typing import Optional
|
|
2
|
-
import deepeval
|
|
3
3
|
from deepeval.telemetry import capture_tracing_integration
|
|
4
|
-
from deepeval.confident.api import get_confident_api_key
|
|
5
4
|
|
|
6
5
|
try:
|
|
7
6
|
from opentelemetry import trace
|
|
@@ -28,6 +27,13 @@ OTLP_ENDPOINT = "https://otel.confident-ai.com/v1/traces"
|
|
|
28
27
|
|
|
29
28
|
|
|
30
29
|
def instrument_pydantic_ai(api_key: Optional[str] = None):
|
|
30
|
+
warnings.warn(
|
|
31
|
+
"instrument_pydantic_ai is deprecated and will be removed in a future version. "
|
|
32
|
+
"Please use the new ConfidentInstrumentationSettings instead. Docs: https://www.confident-ai.com/docs/integrations/third-party/pydantic-ai",
|
|
33
|
+
DeprecationWarning,
|
|
34
|
+
stacklevel=2,
|
|
35
|
+
)
|
|
36
|
+
|
|
31
37
|
with capture_tracing_integration("pydantic_ai"):
|
|
32
38
|
is_opentelemetry_available()
|
|
33
39
|
|