deepeval 3.5.8__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings_manager.py +1 -1
- deepeval/contextvars.py +25 -0
- deepeval/dataset/__init__.py +8 -2
- deepeval/evaluate/execute.py +15 -3
- deepeval/integrations/pydantic_ai/__init__.py +3 -3
- deepeval/integrations/pydantic_ai/agent.py +9 -327
- deepeval/integrations/pydantic_ai/instrumentator.py +196 -0
- deepeval/integrations/pydantic_ai/otel.py +8 -2
- deepeval/openai_agents/__init__.py +4 -3
- deepeval/openai_agents/agent.py +8 -166
- deepeval/openai_agents/callback_handler.py +63 -62
- deepeval/openai_agents/extractors.py +83 -7
- deepeval/openai_agents/patch.py +255 -61
- deepeval/openai_agents/runner.py +348 -335
- deepeval/tracing/context.py +1 -0
- deepeval/tracing/otel/exporter.py +236 -174
- deepeval/tracing/otel/utils.py +95 -7
- deepeval/tracing/tracing.py +3 -0
- deepeval/utils.py +4 -3
- {deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/METADATA +1 -1
- {deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/RECORD +25 -25
- deepeval/integrations/pydantic_ai/patcher.py +0 -484
- deepeval/integrations/pydantic_ai/utils.py +0 -323
- {deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/LICENSE.md +0 -0
- {deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/WHEEL +0 -0
- {deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,323 +0,0 @@
|
|
|
1
|
-
from time import perf_counter
|
|
2
|
-
from contextlib import asynccontextmanager
|
|
3
|
-
import inspect
|
|
4
|
-
import functools
|
|
5
|
-
from typing import Any, Callable, List, Optional
|
|
6
|
-
|
|
7
|
-
from pydantic_ai.models import Model
|
|
8
|
-
from pydantic_ai.agent import AgentRunResult
|
|
9
|
-
from pydantic_ai._run_context import RunContext
|
|
10
|
-
from pydantic_ai.messages import (
|
|
11
|
-
ModelRequest,
|
|
12
|
-
ModelResponse,
|
|
13
|
-
ModelResponsePart,
|
|
14
|
-
SystemPromptPart,
|
|
15
|
-
TextPart,
|
|
16
|
-
ToolCallPart,
|
|
17
|
-
ToolReturnPart,
|
|
18
|
-
UserPromptPart,
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
from deepeval.prompt import Prompt
|
|
22
|
-
from deepeval.tracing.tracing import Observer
|
|
23
|
-
from deepeval.metrics.base_metric import BaseMetric
|
|
24
|
-
from deepeval.test_case.llm_test_case import ToolCall
|
|
25
|
-
from deepeval.tracing.context import current_trace_context, current_span_context
|
|
26
|
-
from deepeval.tracing.types import AgentSpan, LlmOutput, LlmSpan, LlmToolCall
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
# llm tools called
|
|
30
|
-
def extract_tools_called_from_llm_response(
|
|
31
|
-
result: List[ModelResponsePart],
|
|
32
|
-
) -> List[ToolCall]:
|
|
33
|
-
tool_calls = []
|
|
34
|
-
|
|
35
|
-
# Loop through each ModelResponsePart
|
|
36
|
-
for part in result:
|
|
37
|
-
# Look for parts with part_kind="tool-call"
|
|
38
|
-
if hasattr(part, "part_kind") and part.part_kind == "tool-call":
|
|
39
|
-
# Extract tool name and args from the ToolCallPart
|
|
40
|
-
tool_name = part.tool_name
|
|
41
|
-
input_parameters = (
|
|
42
|
-
part.args_as_dict() if hasattr(part, "args_as_dict") else None
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
# Create and append ToolCall object
|
|
46
|
-
tool_call = ToolCall(
|
|
47
|
-
name=tool_name, input_parameters=input_parameters
|
|
48
|
-
)
|
|
49
|
-
tool_calls.append(tool_call)
|
|
50
|
-
|
|
51
|
-
return tool_calls
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
# TODO: llm tools called (reposne is present next message)
|
|
55
|
-
def extract_tools_called(result: AgentRunResult) -> List[ToolCall]:
|
|
56
|
-
tool_calls = []
|
|
57
|
-
|
|
58
|
-
# Access the message history from the _state
|
|
59
|
-
message_history = result._state.message_history
|
|
60
|
-
|
|
61
|
-
# Scan through all messages in the history
|
|
62
|
-
for message in message_history:
|
|
63
|
-
# Check if this is a ModelResponse (kind="response")
|
|
64
|
-
if hasattr(message, "kind") and message.kind == "response":
|
|
65
|
-
# For ModelResponse messages, check each part
|
|
66
|
-
if hasattr(message, "parts"):
|
|
67
|
-
for part in message.parts:
|
|
68
|
-
# Look for parts with part_kind="tool-call"
|
|
69
|
-
if (
|
|
70
|
-
hasattr(part, "part_kind")
|
|
71
|
-
and part.part_kind == "tool-call"
|
|
72
|
-
):
|
|
73
|
-
# Extract tool name and args from the ToolCallPart
|
|
74
|
-
tool_name = part.tool_name
|
|
75
|
-
input_parameters = (
|
|
76
|
-
part.args_as_dict()
|
|
77
|
-
if hasattr(part, "args_as_dict")
|
|
78
|
-
else None
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
# Create and append ToolCall object
|
|
82
|
-
tool_call = ToolCall(
|
|
83
|
-
name=tool_name, input_parameters=input_parameters
|
|
84
|
-
)
|
|
85
|
-
tool_calls.append(tool_call)
|
|
86
|
-
|
|
87
|
-
return tool_calls
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def sanitize_run_context(value):
|
|
91
|
-
"""
|
|
92
|
-
Recursively replace pydantic-ai RunContext instances with '<RunContext>'.
|
|
93
|
-
|
|
94
|
-
This avoids leaking internal context details into recorded function kwargs,
|
|
95
|
-
while keeping the original arguments intact for the actual function call.
|
|
96
|
-
"""
|
|
97
|
-
if isinstance(value, RunContext):
|
|
98
|
-
return "<RunContext>"
|
|
99
|
-
if isinstance(value, dict):
|
|
100
|
-
return {k: sanitize_run_context(v) for k, v in value.items()}
|
|
101
|
-
if isinstance(value, (list, tuple)):
|
|
102
|
-
sanitized = [sanitize_run_context(v) for v in value]
|
|
103
|
-
return tuple(sanitized) if isinstance(value, tuple) else sanitized
|
|
104
|
-
if isinstance(value, set):
|
|
105
|
-
return {sanitize_run_context(v) for v in value}
|
|
106
|
-
|
|
107
|
-
return value
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
def patch_llm_model(
|
|
111
|
-
model: Model,
|
|
112
|
-
llm_metric_collection: Optional[str] = None,
|
|
113
|
-
llm_metrics: Optional[List[BaseMetric]] = None,
|
|
114
|
-
llm_prompt: Optional[Prompt] = None,
|
|
115
|
-
):
|
|
116
|
-
original_func = model.request
|
|
117
|
-
sig = inspect.signature(original_func)
|
|
118
|
-
|
|
119
|
-
try:
|
|
120
|
-
model_name = model.model_name
|
|
121
|
-
except Exception:
|
|
122
|
-
model_name = "unknown"
|
|
123
|
-
|
|
124
|
-
@functools.wraps(original_func)
|
|
125
|
-
async def wrapper(*args, **kwargs):
|
|
126
|
-
bound = sig.bind_partial(*args, **kwargs)
|
|
127
|
-
bound.apply_defaults()
|
|
128
|
-
request = bound.arguments.get("messages", [])
|
|
129
|
-
|
|
130
|
-
with Observer(
|
|
131
|
-
span_type="llm",
|
|
132
|
-
func_name="LLM",
|
|
133
|
-
observe_kwargs={"model": model_name},
|
|
134
|
-
metrics=llm_metrics,
|
|
135
|
-
metric_collection=llm_metric_collection,
|
|
136
|
-
) as observer:
|
|
137
|
-
result = await original_func(*args, **kwargs)
|
|
138
|
-
observer.update_span_properties = (
|
|
139
|
-
lambda llm_span: set_llm_span_attributes(
|
|
140
|
-
llm_span, request, result, llm_prompt
|
|
141
|
-
)
|
|
142
|
-
)
|
|
143
|
-
observer.result = result
|
|
144
|
-
return result
|
|
145
|
-
|
|
146
|
-
model.request = wrapper
|
|
147
|
-
|
|
148
|
-
stream_original_func = model.request_stream
|
|
149
|
-
stream_sig = inspect.signature(stream_original_func)
|
|
150
|
-
|
|
151
|
-
@asynccontextmanager
|
|
152
|
-
async def stream_wrapper(*args, **kwargs):
|
|
153
|
-
bound = stream_sig.bind_partial(*args, **kwargs)
|
|
154
|
-
bound.apply_defaults()
|
|
155
|
-
request = bound.arguments.get("messages", [])
|
|
156
|
-
|
|
157
|
-
with Observer(
|
|
158
|
-
span_type="llm",
|
|
159
|
-
func_name="LLM",
|
|
160
|
-
observe_kwargs={"model": model_name},
|
|
161
|
-
metrics=llm_metrics,
|
|
162
|
-
metric_collection=llm_metric_collection,
|
|
163
|
-
) as observer:
|
|
164
|
-
llm_span: LlmSpan = current_span_context.get()
|
|
165
|
-
async with stream_original_func(
|
|
166
|
-
*args, **kwargs
|
|
167
|
-
) as streamed_response:
|
|
168
|
-
try:
|
|
169
|
-
yield streamed_response
|
|
170
|
-
if not llm_span.token_intervals:
|
|
171
|
-
llm_span.token_intervals = {perf_counter(): "NA"}
|
|
172
|
-
else:
|
|
173
|
-
llm_span.token_intervals[perf_counter()] = "NA"
|
|
174
|
-
finally:
|
|
175
|
-
try:
|
|
176
|
-
result = streamed_response.get()
|
|
177
|
-
observer.update_span_properties = (
|
|
178
|
-
lambda llm_span: set_llm_span_attributes(
|
|
179
|
-
llm_span, request, result, llm_prompt
|
|
180
|
-
)
|
|
181
|
-
)
|
|
182
|
-
observer.result = result
|
|
183
|
-
except Exception:
|
|
184
|
-
pass
|
|
185
|
-
|
|
186
|
-
model.request_stream = stream_wrapper
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
def create_patched_tool(
|
|
190
|
-
func: Callable,
|
|
191
|
-
metrics: Optional[List[BaseMetric]] = None,
|
|
192
|
-
metric_collection: Optional[str] = None,
|
|
193
|
-
):
|
|
194
|
-
import asyncio
|
|
195
|
-
|
|
196
|
-
original_func = func
|
|
197
|
-
|
|
198
|
-
is_async = asyncio.iscoroutinefunction(original_func)
|
|
199
|
-
|
|
200
|
-
if is_async:
|
|
201
|
-
|
|
202
|
-
@functools.wraps(original_func)
|
|
203
|
-
async def async_wrapper(*args, **kwargs):
|
|
204
|
-
sanitized_args = sanitize_run_context(args)
|
|
205
|
-
sanitized_kwargs = sanitize_run_context(kwargs)
|
|
206
|
-
with Observer(
|
|
207
|
-
span_type="tool",
|
|
208
|
-
func_name=original_func.__name__,
|
|
209
|
-
metrics=metrics,
|
|
210
|
-
metric_collection=metric_collection,
|
|
211
|
-
function_kwargs={"args": sanitized_args, **sanitized_kwargs},
|
|
212
|
-
) as observer:
|
|
213
|
-
result = await original_func(*args, **kwargs)
|
|
214
|
-
observer.result = result
|
|
215
|
-
|
|
216
|
-
return result
|
|
217
|
-
|
|
218
|
-
return async_wrapper
|
|
219
|
-
else:
|
|
220
|
-
|
|
221
|
-
@functools.wraps(original_func)
|
|
222
|
-
def sync_wrapper(*args, **kwargs):
|
|
223
|
-
sanitized_args = sanitize_run_context(args)
|
|
224
|
-
sanitized_kwargs = sanitize_run_context(kwargs)
|
|
225
|
-
with Observer(
|
|
226
|
-
span_type="tool",
|
|
227
|
-
func_name=original_func.__name__,
|
|
228
|
-
metrics=metrics,
|
|
229
|
-
metric_collection=metric_collection,
|
|
230
|
-
function_kwargs={"args": sanitized_args, **sanitized_kwargs},
|
|
231
|
-
) as observer:
|
|
232
|
-
result = original_func(*args, **kwargs)
|
|
233
|
-
observer.result = result
|
|
234
|
-
|
|
235
|
-
return result
|
|
236
|
-
|
|
237
|
-
return sync_wrapper
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
def update_trace_context(
|
|
241
|
-
trace_name: Optional[str] = None,
|
|
242
|
-
trace_tags: Optional[List[str]] = None,
|
|
243
|
-
trace_metadata: Optional[dict] = None,
|
|
244
|
-
trace_thread_id: Optional[str] = None,
|
|
245
|
-
trace_user_id: Optional[str] = None,
|
|
246
|
-
trace_metric_collection: Optional[str] = None,
|
|
247
|
-
trace_metrics: Optional[List[BaseMetric]] = None,
|
|
248
|
-
trace_input: Optional[Any] = None,
|
|
249
|
-
trace_output: Optional[Any] = None,
|
|
250
|
-
):
|
|
251
|
-
|
|
252
|
-
current_trace = current_trace_context.get()
|
|
253
|
-
|
|
254
|
-
if trace_name:
|
|
255
|
-
current_trace.name = trace_name
|
|
256
|
-
if trace_tags:
|
|
257
|
-
current_trace.tags = trace_tags
|
|
258
|
-
if trace_metadata:
|
|
259
|
-
current_trace.metadata = trace_metadata
|
|
260
|
-
if trace_thread_id:
|
|
261
|
-
current_trace.thread_id = trace_thread_id
|
|
262
|
-
if trace_user_id:
|
|
263
|
-
current_trace.user_id = trace_user_id
|
|
264
|
-
if trace_metric_collection:
|
|
265
|
-
current_trace.metric_collection = trace_metric_collection
|
|
266
|
-
if trace_metrics:
|
|
267
|
-
current_trace.metrics = trace_metrics
|
|
268
|
-
if trace_input:
|
|
269
|
-
current_trace.input = trace_input
|
|
270
|
-
if trace_output:
|
|
271
|
-
current_trace.output = trace_output
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
def set_llm_span_attributes(
|
|
275
|
-
llm_span: LlmSpan,
|
|
276
|
-
requests: List[ModelRequest],
|
|
277
|
-
result: ModelResponse,
|
|
278
|
-
llm_prompt: Optional[Prompt] = None,
|
|
279
|
-
):
|
|
280
|
-
llm_span.prompt = llm_prompt
|
|
281
|
-
|
|
282
|
-
input = []
|
|
283
|
-
for request in requests:
|
|
284
|
-
for part in request.parts:
|
|
285
|
-
if isinstance(part, SystemPromptPart):
|
|
286
|
-
input.append({"role": "System", "content": part.content})
|
|
287
|
-
elif isinstance(part, UserPromptPart):
|
|
288
|
-
input.append({"role": "User", "content": part.content})
|
|
289
|
-
elif isinstance(part, ToolCallPart):
|
|
290
|
-
input.append(
|
|
291
|
-
{
|
|
292
|
-
"role": "Tool Call",
|
|
293
|
-
"name": part.tool_name,
|
|
294
|
-
"content": part.args_as_json_str(),
|
|
295
|
-
}
|
|
296
|
-
)
|
|
297
|
-
elif isinstance(part, ToolReturnPart):
|
|
298
|
-
input.append(
|
|
299
|
-
{
|
|
300
|
-
"role": "Tool Return",
|
|
301
|
-
"name": part.tool_name,
|
|
302
|
-
"content": part.model_response_str(),
|
|
303
|
-
}
|
|
304
|
-
)
|
|
305
|
-
llm_span.input = input
|
|
306
|
-
|
|
307
|
-
content = ""
|
|
308
|
-
tool_calls = []
|
|
309
|
-
for part in result.parts:
|
|
310
|
-
if isinstance(part, TextPart):
|
|
311
|
-
content += part.content + "\n"
|
|
312
|
-
elif isinstance(part, ToolCallPart):
|
|
313
|
-
tool_calls.append(
|
|
314
|
-
LlmToolCall(name=part.tool_name, args=part.args_as_dict())
|
|
315
|
-
)
|
|
316
|
-
llm_span.output = LlmOutput(
|
|
317
|
-
role="Assistant", content=content, tool_calls=tool_calls
|
|
318
|
-
)
|
|
319
|
-
llm_span.tools_called = extract_tools_called_from_llm_response(result.parts)
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
def set_agent_span_attributes(agent_span: AgentSpan, result: AgentRunResult):
|
|
323
|
-
agent_span.tools_called = extract_tools_called(result)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|