deepeval 3.5.8__py3-none-any.whl → 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,323 +0,0 @@
1
- from time import perf_counter
2
- from contextlib import asynccontextmanager
3
- import inspect
4
- import functools
5
- from typing import Any, Callable, List, Optional
6
-
7
- from pydantic_ai.models import Model
8
- from pydantic_ai.agent import AgentRunResult
9
- from pydantic_ai._run_context import RunContext
10
- from pydantic_ai.messages import (
11
- ModelRequest,
12
- ModelResponse,
13
- ModelResponsePart,
14
- SystemPromptPart,
15
- TextPart,
16
- ToolCallPart,
17
- ToolReturnPart,
18
- UserPromptPart,
19
- )
20
-
21
- from deepeval.prompt import Prompt
22
- from deepeval.tracing.tracing import Observer
23
- from deepeval.metrics.base_metric import BaseMetric
24
- from deepeval.test_case.llm_test_case import ToolCall
25
- from deepeval.tracing.context import current_trace_context, current_span_context
26
- from deepeval.tracing.types import AgentSpan, LlmOutput, LlmSpan, LlmToolCall
27
-
28
-
29
- # llm tools called
30
- def extract_tools_called_from_llm_response(
31
- result: List[ModelResponsePart],
32
- ) -> List[ToolCall]:
33
- tool_calls = []
34
-
35
- # Loop through each ModelResponsePart
36
- for part in result:
37
- # Look for parts with part_kind="tool-call"
38
- if hasattr(part, "part_kind") and part.part_kind == "tool-call":
39
- # Extract tool name and args from the ToolCallPart
40
- tool_name = part.tool_name
41
- input_parameters = (
42
- part.args_as_dict() if hasattr(part, "args_as_dict") else None
43
- )
44
-
45
- # Create and append ToolCall object
46
- tool_call = ToolCall(
47
- name=tool_name, input_parameters=input_parameters
48
- )
49
- tool_calls.append(tool_call)
50
-
51
- return tool_calls
52
-
53
-
54
- # TODO: llm tools called (reposne is present next message)
55
- def extract_tools_called(result: AgentRunResult) -> List[ToolCall]:
56
- tool_calls = []
57
-
58
- # Access the message history from the _state
59
- message_history = result._state.message_history
60
-
61
- # Scan through all messages in the history
62
- for message in message_history:
63
- # Check if this is a ModelResponse (kind="response")
64
- if hasattr(message, "kind") and message.kind == "response":
65
- # For ModelResponse messages, check each part
66
- if hasattr(message, "parts"):
67
- for part in message.parts:
68
- # Look for parts with part_kind="tool-call"
69
- if (
70
- hasattr(part, "part_kind")
71
- and part.part_kind == "tool-call"
72
- ):
73
- # Extract tool name and args from the ToolCallPart
74
- tool_name = part.tool_name
75
- input_parameters = (
76
- part.args_as_dict()
77
- if hasattr(part, "args_as_dict")
78
- else None
79
- )
80
-
81
- # Create and append ToolCall object
82
- tool_call = ToolCall(
83
- name=tool_name, input_parameters=input_parameters
84
- )
85
- tool_calls.append(tool_call)
86
-
87
- return tool_calls
88
-
89
-
90
- def sanitize_run_context(value):
91
- """
92
- Recursively replace pydantic-ai RunContext instances with '<RunContext>'.
93
-
94
- This avoids leaking internal context details into recorded function kwargs,
95
- while keeping the original arguments intact for the actual function call.
96
- """
97
- if isinstance(value, RunContext):
98
- return "<RunContext>"
99
- if isinstance(value, dict):
100
- return {k: sanitize_run_context(v) for k, v in value.items()}
101
- if isinstance(value, (list, tuple)):
102
- sanitized = [sanitize_run_context(v) for v in value]
103
- return tuple(sanitized) if isinstance(value, tuple) else sanitized
104
- if isinstance(value, set):
105
- return {sanitize_run_context(v) for v in value}
106
-
107
- return value
108
-
109
-
110
- def patch_llm_model(
111
- model: Model,
112
- llm_metric_collection: Optional[str] = None,
113
- llm_metrics: Optional[List[BaseMetric]] = None,
114
- llm_prompt: Optional[Prompt] = None,
115
- ):
116
- original_func = model.request
117
- sig = inspect.signature(original_func)
118
-
119
- try:
120
- model_name = model.model_name
121
- except Exception:
122
- model_name = "unknown"
123
-
124
- @functools.wraps(original_func)
125
- async def wrapper(*args, **kwargs):
126
- bound = sig.bind_partial(*args, **kwargs)
127
- bound.apply_defaults()
128
- request = bound.arguments.get("messages", [])
129
-
130
- with Observer(
131
- span_type="llm",
132
- func_name="LLM",
133
- observe_kwargs={"model": model_name},
134
- metrics=llm_metrics,
135
- metric_collection=llm_metric_collection,
136
- ) as observer:
137
- result = await original_func(*args, **kwargs)
138
- observer.update_span_properties = (
139
- lambda llm_span: set_llm_span_attributes(
140
- llm_span, request, result, llm_prompt
141
- )
142
- )
143
- observer.result = result
144
- return result
145
-
146
- model.request = wrapper
147
-
148
- stream_original_func = model.request_stream
149
- stream_sig = inspect.signature(stream_original_func)
150
-
151
- @asynccontextmanager
152
- async def stream_wrapper(*args, **kwargs):
153
- bound = stream_sig.bind_partial(*args, **kwargs)
154
- bound.apply_defaults()
155
- request = bound.arguments.get("messages", [])
156
-
157
- with Observer(
158
- span_type="llm",
159
- func_name="LLM",
160
- observe_kwargs={"model": model_name},
161
- metrics=llm_metrics,
162
- metric_collection=llm_metric_collection,
163
- ) as observer:
164
- llm_span: LlmSpan = current_span_context.get()
165
- async with stream_original_func(
166
- *args, **kwargs
167
- ) as streamed_response:
168
- try:
169
- yield streamed_response
170
- if not llm_span.token_intervals:
171
- llm_span.token_intervals = {perf_counter(): "NA"}
172
- else:
173
- llm_span.token_intervals[perf_counter()] = "NA"
174
- finally:
175
- try:
176
- result = streamed_response.get()
177
- observer.update_span_properties = (
178
- lambda llm_span: set_llm_span_attributes(
179
- llm_span, request, result, llm_prompt
180
- )
181
- )
182
- observer.result = result
183
- except Exception:
184
- pass
185
-
186
- model.request_stream = stream_wrapper
187
-
188
-
189
- def create_patched_tool(
190
- func: Callable,
191
- metrics: Optional[List[BaseMetric]] = None,
192
- metric_collection: Optional[str] = None,
193
- ):
194
- import asyncio
195
-
196
- original_func = func
197
-
198
- is_async = asyncio.iscoroutinefunction(original_func)
199
-
200
- if is_async:
201
-
202
- @functools.wraps(original_func)
203
- async def async_wrapper(*args, **kwargs):
204
- sanitized_args = sanitize_run_context(args)
205
- sanitized_kwargs = sanitize_run_context(kwargs)
206
- with Observer(
207
- span_type="tool",
208
- func_name=original_func.__name__,
209
- metrics=metrics,
210
- metric_collection=metric_collection,
211
- function_kwargs={"args": sanitized_args, **sanitized_kwargs},
212
- ) as observer:
213
- result = await original_func(*args, **kwargs)
214
- observer.result = result
215
-
216
- return result
217
-
218
- return async_wrapper
219
- else:
220
-
221
- @functools.wraps(original_func)
222
- def sync_wrapper(*args, **kwargs):
223
- sanitized_args = sanitize_run_context(args)
224
- sanitized_kwargs = sanitize_run_context(kwargs)
225
- with Observer(
226
- span_type="tool",
227
- func_name=original_func.__name__,
228
- metrics=metrics,
229
- metric_collection=metric_collection,
230
- function_kwargs={"args": sanitized_args, **sanitized_kwargs},
231
- ) as observer:
232
- result = original_func(*args, **kwargs)
233
- observer.result = result
234
-
235
- return result
236
-
237
- return sync_wrapper
238
-
239
-
240
- def update_trace_context(
241
- trace_name: Optional[str] = None,
242
- trace_tags: Optional[List[str]] = None,
243
- trace_metadata: Optional[dict] = None,
244
- trace_thread_id: Optional[str] = None,
245
- trace_user_id: Optional[str] = None,
246
- trace_metric_collection: Optional[str] = None,
247
- trace_metrics: Optional[List[BaseMetric]] = None,
248
- trace_input: Optional[Any] = None,
249
- trace_output: Optional[Any] = None,
250
- ):
251
-
252
- current_trace = current_trace_context.get()
253
-
254
- if trace_name:
255
- current_trace.name = trace_name
256
- if trace_tags:
257
- current_trace.tags = trace_tags
258
- if trace_metadata:
259
- current_trace.metadata = trace_metadata
260
- if trace_thread_id:
261
- current_trace.thread_id = trace_thread_id
262
- if trace_user_id:
263
- current_trace.user_id = trace_user_id
264
- if trace_metric_collection:
265
- current_trace.metric_collection = trace_metric_collection
266
- if trace_metrics:
267
- current_trace.metrics = trace_metrics
268
- if trace_input:
269
- current_trace.input = trace_input
270
- if trace_output:
271
- current_trace.output = trace_output
272
-
273
-
274
- def set_llm_span_attributes(
275
- llm_span: LlmSpan,
276
- requests: List[ModelRequest],
277
- result: ModelResponse,
278
- llm_prompt: Optional[Prompt] = None,
279
- ):
280
- llm_span.prompt = llm_prompt
281
-
282
- input = []
283
- for request in requests:
284
- for part in request.parts:
285
- if isinstance(part, SystemPromptPart):
286
- input.append({"role": "System", "content": part.content})
287
- elif isinstance(part, UserPromptPart):
288
- input.append({"role": "User", "content": part.content})
289
- elif isinstance(part, ToolCallPart):
290
- input.append(
291
- {
292
- "role": "Tool Call",
293
- "name": part.tool_name,
294
- "content": part.args_as_json_str(),
295
- }
296
- )
297
- elif isinstance(part, ToolReturnPart):
298
- input.append(
299
- {
300
- "role": "Tool Return",
301
- "name": part.tool_name,
302
- "content": part.model_response_str(),
303
- }
304
- )
305
- llm_span.input = input
306
-
307
- content = ""
308
- tool_calls = []
309
- for part in result.parts:
310
- if isinstance(part, TextPart):
311
- content += part.content + "\n"
312
- elif isinstance(part, ToolCallPart):
313
- tool_calls.append(
314
- LlmToolCall(name=part.tool_name, args=part.args_as_dict())
315
- )
316
- llm_span.output = LlmOutput(
317
- role="Assistant", content=content, tool_calls=tool_calls
318
- )
319
- llm_span.tools_called = extract_tools_called_from_llm_response(result.parts)
320
-
321
-
322
- def set_agent_span_attributes(agent_span: AgentSpan, result: AgentRunResult):
323
- agent_span.tools_called = extract_tools_called(result)