deepeval 3.5.2__py3-none-any.whl → 3.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +94 -2
- deepeval/config/utils.py +54 -1
- deepeval/constants.py +27 -0
- deepeval/integrations/pydantic_ai/__init__.py +3 -1
- deepeval/integrations/pydantic_ai/agent.py +339 -0
- deepeval/integrations/pydantic_ai/patcher.py +479 -406
- deepeval/integrations/pydantic_ai/utils.py +239 -2
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +2 -1
- deepeval/metrics/non_advice/non_advice.py +2 -2
- deepeval/metrics/pii_leakage/pii_leakage.py +2 -2
- deepeval/models/embedding_models/azure_embedding_model.py +40 -9
- deepeval/models/embedding_models/local_embedding_model.py +52 -9
- deepeval/models/embedding_models/ollama_embedding_model.py +25 -7
- deepeval/models/embedding_models/openai_embedding_model.py +47 -5
- deepeval/models/llms/amazon_bedrock_model.py +31 -4
- deepeval/models/llms/anthropic_model.py +39 -13
- deepeval/models/llms/azure_model.py +37 -38
- deepeval/models/llms/deepseek_model.py +36 -7
- deepeval/models/llms/gemini_model.py +10 -0
- deepeval/models/llms/grok_model.py +50 -3
- deepeval/models/llms/kimi_model.py +37 -7
- deepeval/models/llms/local_model.py +38 -12
- deepeval/models/llms/ollama_model.py +15 -3
- deepeval/models/llms/openai_model.py +37 -44
- deepeval/models/mlllms/gemini_model.py +21 -3
- deepeval/models/mlllms/ollama_model.py +38 -13
- deepeval/models/mlllms/openai_model.py +18 -42
- deepeval/models/retry_policy.py +548 -64
- deepeval/tracing/tracing.py +87 -0
- {deepeval-3.5.2.dist-info → deepeval-3.5.4.dist-info}/METADATA +1 -1
- {deepeval-3.5.2.dist-info → deepeval-3.5.4.dist-info}/RECORD +35 -34
- {deepeval-3.5.2.dist-info → deepeval-3.5.4.dist-info}/LICENSE.md +0 -0
- {deepeval-3.5.2.dist-info → deepeval-3.5.4.dist-info}/WHEEL +0 -0
- {deepeval-3.5.2.dist-info → deepeval-3.5.4.dist-info}/entry_points.txt +0 -0
|
@@ -1,411 +1,484 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
|
|
4
|
-
from
|
|
5
|
-
from deepeval.tracing.
|
|
6
|
-
from
|
|
7
|
-
from deepeval.tracing.
|
|
8
|
-
from
|
|
9
|
-
from deepeval.
|
|
10
|
-
from deepeval.
|
|
11
|
-
from deepeval.
|
|
12
|
-
from deepeval.
|
|
13
|
-
from deepeval.
|
|
14
|
-
from deepeval.
|
|
15
|
-
import
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
1
|
+
# import inspect
|
|
2
|
+
# import functools
|
|
3
|
+
# import warnings
|
|
4
|
+
# from typing import List, Callable, Optional, Any
|
|
5
|
+
# from deepeval.tracing.types import LlmOutput, LlmToolCall
|
|
6
|
+
# from pydantic_ai.agent import AgentRunResult
|
|
7
|
+
# from deepeval.tracing.context import current_trace_context
|
|
8
|
+
# from deepeval.tracing.types import AgentSpan, LlmSpan
|
|
9
|
+
# from deepeval.tracing.tracing import Observer
|
|
10
|
+
# from deepeval.test_case.llm_test_case import ToolCall
|
|
11
|
+
# from deepeval.metrics.base_metric import BaseMetric
|
|
12
|
+
# from deepeval.confident.api import get_confident_api_key
|
|
13
|
+
# from deepeval.integrations.pydantic_ai.otel import instrument_pydantic_ai
|
|
14
|
+
# from deepeval.telemetry import capture_tracing_integration
|
|
15
|
+
# from deepeval.prompt import Prompt
|
|
16
|
+
# import deepeval
|
|
17
|
+
# # from contextvars import ContextVar
|
|
18
|
+
|
|
19
|
+
# try:
|
|
20
|
+
# from pydantic_ai.agent import Agent
|
|
21
|
+
# from pydantic_ai.models import Model
|
|
22
|
+
# from pydantic_ai.messages import (
|
|
23
|
+
# ModelResponse,
|
|
24
|
+
# ModelRequest,
|
|
25
|
+
# ModelResponsePart,
|
|
26
|
+
# TextPart,
|
|
27
|
+
# ToolCallPart,
|
|
28
|
+
# SystemPromptPart,
|
|
29
|
+
# ToolReturnPart,
|
|
30
|
+
# UserPromptPart,
|
|
31
|
+
# )
|
|
32
|
+
# from pydantic_ai._run_context import RunContext
|
|
33
|
+
# from deepeval.integrations.pydantic_ai.utils import (
|
|
34
|
+
# extract_tools_called_from_llm_response,
|
|
35
|
+
# extract_tools_called,
|
|
36
|
+
# sanitize_run_context,
|
|
37
|
+
# )
|
|
38
|
+
|
|
39
|
+
# pydantic_ai_installed = True
|
|
40
|
+
# except:
|
|
41
|
+
# pydantic_ai_installed = True
|
|
42
|
+
|
|
43
|
+
# # _IN_RUN_SYNC = ContextVar("deepeval_in_run_sync", default=False)
|
|
44
|
+
# # _INSTRUMENTED = False
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
import warnings
|
|
48
|
+
from typing import Optional
|
|
44
49
|
|
|
45
50
|
|
|
46
51
|
def instrument(otel: Optional[bool] = False, api_key: Optional[str] = None):
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
if otel:
|
|
57
|
-
instrument_pydantic_ai(api_key)
|
|
58
|
-
else:
|
|
59
|
-
with capture_tracing_integration("pydantic_ai"):
|
|
60
|
-
if _INSTRUMENTED:
|
|
61
|
-
return
|
|
62
|
-
_patch_agent_init()
|
|
63
|
-
_patch_agent_tool_decorator()
|
|
64
|
-
_INSTRUMENTED = True
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
################### Init Patches ###################
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def _patch_agent_init():
|
|
71
|
-
original_init = Agent.__init__
|
|
72
|
-
|
|
73
|
-
@functools.wraps(original_init)
|
|
74
|
-
def wrapper(
|
|
75
|
-
*args,
|
|
76
|
-
llm_metric_collection: Optional[str] = None,
|
|
77
|
-
llm_metrics: Optional[List[BaseMetric]] = None,
|
|
78
|
-
llm_prompt: Optional[Prompt] = None,
|
|
79
|
-
agent_metric_collection: Optional[str] = None,
|
|
80
|
-
agent_metrics: Optional[List[BaseMetric]] = None,
|
|
81
|
-
**kwargs
|
|
82
|
-
):
|
|
83
|
-
result = original_init(*args, **kwargs)
|
|
84
|
-
_patch_llm_model(
|
|
85
|
-
args[0]._model, llm_metric_collection, llm_metrics, llm_prompt
|
|
86
|
-
) # runtime patch of the model
|
|
87
|
-
_patch_agent_run(args[0], agent_metric_collection, agent_metrics)
|
|
88
|
-
_patch_agent_run_sync(args[0], agent_metric_collection, agent_metrics)
|
|
89
|
-
return result
|
|
90
|
-
|
|
91
|
-
Agent.__init__ = wrapper
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def _patch_agent_tool_decorator():
|
|
95
|
-
original_tool = Agent.tool
|
|
96
|
-
|
|
97
|
-
@functools.wraps(original_tool)
|
|
98
|
-
def wrapper(
|
|
99
|
-
*args,
|
|
100
|
-
metrics: Optional[List[BaseMetric]] = None,
|
|
101
|
-
metric_collection: Optional[str] = None,
|
|
102
|
-
**kwargs
|
|
103
|
-
):
|
|
104
|
-
# Case 1: Direct decoration - @agent.tool
|
|
105
|
-
if args and callable(args[0]):
|
|
106
|
-
patched_func = _create_patched_tool(
|
|
107
|
-
args[0], metrics, metric_collection
|
|
108
|
-
)
|
|
109
|
-
new_args = (patched_func,) + args[1:]
|
|
110
|
-
return original_tool(*new_args, **kwargs)
|
|
111
|
-
|
|
112
|
-
# Case 2: Decoration with arguments - @agent.tool(metrics=..., metric_collection=...)
|
|
113
|
-
else:
|
|
114
|
-
# Return a decorator function that will receive the actual function
|
|
115
|
-
def decorator(func):
|
|
116
|
-
patched_func = _create_patched_tool(
|
|
117
|
-
func, metrics, metric_collection
|
|
118
|
-
)
|
|
119
|
-
return original_tool(*args, **kwargs)(patched_func)
|
|
120
|
-
|
|
121
|
-
return decorator
|
|
122
|
-
|
|
123
|
-
Agent.tool = wrapper
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
################### Runtime Patches ###################
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
def _patch_agent_run_sync(
|
|
130
|
-
agent: Agent,
|
|
131
|
-
agent_metric_collection: Optional[str] = None,
|
|
132
|
-
agent_metrics: Optional[List[BaseMetric]] = None,
|
|
133
|
-
):
|
|
134
|
-
original_run_sync = agent.run_sync
|
|
135
|
-
|
|
136
|
-
@functools.wraps(original_run_sync)
|
|
137
|
-
def wrapper(
|
|
138
|
-
*args,
|
|
139
|
-
metric_collection: Optional[str] = None,
|
|
140
|
-
metrics: Optional[List[BaseMetric]] = None,
|
|
141
|
-
name: Optional[str] = None,
|
|
142
|
-
tags: Optional[List[str]] = None,
|
|
143
|
-
metadata: Optional[dict] = None,
|
|
144
|
-
thread_id: Optional[str] = None,
|
|
145
|
-
user_id: Optional[str] = None,
|
|
146
|
-
**kwargs
|
|
147
|
-
):
|
|
148
|
-
|
|
149
|
-
sig = inspect.signature(original_run_sync)
|
|
150
|
-
bound = sig.bind_partial(*args, **kwargs)
|
|
151
|
-
bound.apply_defaults()
|
|
152
|
-
input = bound.arguments.get("user_prompt", None)
|
|
153
|
-
|
|
154
|
-
with Observer(
|
|
155
|
-
span_type="agent",
|
|
156
|
-
func_name="Agent",
|
|
157
|
-
function_kwargs={"input": input},
|
|
158
|
-
metrics=agent_metrics,
|
|
159
|
-
metric_collection=agent_metric_collection,
|
|
160
|
-
) as observer:
|
|
161
|
-
|
|
162
|
-
token = _IN_RUN_SYNC.set(True)
|
|
163
|
-
try:
|
|
164
|
-
result = original_run_sync(*args, **kwargs)
|
|
165
|
-
finally:
|
|
166
|
-
_IN_RUN_SYNC.reset(token)
|
|
167
|
-
|
|
168
|
-
observer.update_span_properties = (
|
|
169
|
-
lambda agent_span: set_agent_span_attributes(agent_span, result)
|
|
170
|
-
)
|
|
171
|
-
observer.result = result.output
|
|
172
|
-
|
|
173
|
-
_update_trace_context(
|
|
174
|
-
trace_name=name,
|
|
175
|
-
trace_tags=tags,
|
|
176
|
-
trace_metadata=metadata,
|
|
177
|
-
trace_thread_id=thread_id,
|
|
178
|
-
trace_user_id=user_id,
|
|
179
|
-
trace_metric_collection=metric_collection,
|
|
180
|
-
trace_metrics=metrics,
|
|
181
|
-
trace_input=input,
|
|
182
|
-
trace_output=result.output,
|
|
183
|
-
)
|
|
184
|
-
|
|
185
|
-
return result
|
|
186
|
-
|
|
187
|
-
agent.run_sync = wrapper
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
def _patch_agent_run(
|
|
191
|
-
agent: Agent,
|
|
192
|
-
agent_metric_collection: Optional[str] = None,
|
|
193
|
-
agent_metrics: Optional[List[BaseMetric]] = None,
|
|
194
|
-
):
|
|
195
|
-
original_run = agent.run
|
|
196
|
-
|
|
197
|
-
@functools.wraps(original_run)
|
|
198
|
-
async def wrapper(
|
|
199
|
-
*args,
|
|
200
|
-
metric_collection: Optional[str] = None,
|
|
201
|
-
metrics: Optional[List[BaseMetric]] = None,
|
|
202
|
-
name: Optional[str] = None,
|
|
203
|
-
tags: Optional[List[str]] = None,
|
|
204
|
-
metadata: Optional[dict] = None,
|
|
205
|
-
thread_id: Optional[str] = None,
|
|
206
|
-
user_id: Optional[str] = None,
|
|
207
|
-
**kwargs
|
|
208
|
-
):
|
|
209
|
-
sig = inspect.signature(original_run)
|
|
210
|
-
bound = sig.bind_partial(*args, **kwargs)
|
|
211
|
-
bound.apply_defaults()
|
|
212
|
-
input = bound.arguments.get("user_prompt", None)
|
|
213
|
-
|
|
214
|
-
in_sync = _IN_RUN_SYNC.get()
|
|
215
|
-
with Observer(
|
|
216
|
-
span_type="agent" if not in_sync else "custom",
|
|
217
|
-
func_name="Agent" if not in_sync else "run",
|
|
218
|
-
function_kwargs={"input": input},
|
|
219
|
-
metrics=agent_metrics if not in_sync else None,
|
|
220
|
-
metric_collection=agent_metric_collection if not in_sync else None,
|
|
221
|
-
) as observer:
|
|
222
|
-
result = await original_run(*args, **kwargs)
|
|
223
|
-
observer.update_span_properties = (
|
|
224
|
-
lambda agent_span: set_agent_span_attributes(agent_span, result)
|
|
225
|
-
)
|
|
226
|
-
observer.result = result.output
|
|
227
|
-
|
|
228
|
-
_update_trace_context(
|
|
229
|
-
trace_name=name,
|
|
230
|
-
trace_tags=tags,
|
|
231
|
-
trace_metadata=metadata,
|
|
232
|
-
trace_thread_id=thread_id,
|
|
233
|
-
trace_user_id=user_id,
|
|
234
|
-
trace_metric_collection=metric_collection,
|
|
235
|
-
trace_metrics=metrics,
|
|
236
|
-
trace_input=input,
|
|
237
|
-
trace_output=result.output,
|
|
238
|
-
)
|
|
239
|
-
|
|
240
|
-
return result
|
|
241
|
-
|
|
242
|
-
agent.run = wrapper
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
def _patch_llm_model(
|
|
246
|
-
model: Model,
|
|
247
|
-
llm_metric_collection: Optional[str] = None,
|
|
248
|
-
llm_metrics: Optional[List[BaseMetric]] = None,
|
|
249
|
-
llm_prompt: Optional[Prompt] = None,
|
|
250
|
-
):
|
|
251
|
-
original_func = model.request
|
|
252
|
-
sig = inspect.signature(original_func)
|
|
253
|
-
|
|
254
|
-
try:
|
|
255
|
-
model_name = model.model_name
|
|
256
|
-
except Exception:
|
|
257
|
-
model_name = "unknown"
|
|
258
|
-
|
|
259
|
-
@functools.wraps(original_func)
|
|
260
|
-
async def wrapper(*args, **kwargs):
|
|
261
|
-
bound = sig.bind_partial(*args, **kwargs)
|
|
262
|
-
bound.apply_defaults()
|
|
263
|
-
request = bound.arguments.get("messages", [])
|
|
264
|
-
|
|
265
|
-
with Observer(
|
|
266
|
-
span_type="llm",
|
|
267
|
-
func_name="LLM",
|
|
268
|
-
observe_kwargs={"model": model_name},
|
|
269
|
-
metrics=llm_metrics,
|
|
270
|
-
metric_collection=llm_metric_collection,
|
|
271
|
-
) as observer:
|
|
272
|
-
result = await original_func(*args, **kwargs)
|
|
273
|
-
observer.update_span_properties = (
|
|
274
|
-
lambda llm_span: set_llm_span_attributes(
|
|
275
|
-
llm_span, request, result, llm_prompt
|
|
276
|
-
)
|
|
277
|
-
)
|
|
278
|
-
observer.result = result
|
|
279
|
-
return result
|
|
280
|
-
|
|
281
|
-
model.request = wrapper
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
################### Helper Functions ###################
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
def _create_patched_tool(
|
|
288
|
-
func: Callable,
|
|
289
|
-
metrics: Optional[List[BaseMetric]] = None,
|
|
290
|
-
metric_collection: Optional[str] = None,
|
|
291
|
-
):
|
|
292
|
-
import asyncio
|
|
293
|
-
|
|
294
|
-
original_func = func
|
|
295
|
-
|
|
296
|
-
is_async = asyncio.iscoroutinefunction(original_func)
|
|
297
|
-
|
|
298
|
-
if is_async:
|
|
299
|
-
|
|
300
|
-
@functools.wraps(original_func)
|
|
301
|
-
async def async_wrapper(*args, **kwargs):
|
|
302
|
-
sanitized_args = sanitize_run_context(args)
|
|
303
|
-
sanitized_kwargs = sanitize_run_context(kwargs)
|
|
304
|
-
with Observer(
|
|
305
|
-
span_type="tool",
|
|
306
|
-
func_name=original_func.__name__,
|
|
307
|
-
metrics=metrics,
|
|
308
|
-
metric_collection=metric_collection,
|
|
309
|
-
function_kwargs={"args": sanitized_args, **sanitized_kwargs},
|
|
310
|
-
) as observer:
|
|
311
|
-
result = await original_func(*args, **kwargs)
|
|
312
|
-
observer.result = result
|
|
313
|
-
|
|
314
|
-
return result
|
|
315
|
-
|
|
316
|
-
return async_wrapper
|
|
317
|
-
else:
|
|
318
|
-
|
|
319
|
-
@functools.wraps(original_func)
|
|
320
|
-
def sync_wrapper(*args, **kwargs):
|
|
321
|
-
sanitized_args = sanitize_run_context(args)
|
|
322
|
-
sanitized_kwargs = sanitize_run_context(kwargs)
|
|
323
|
-
with Observer(
|
|
324
|
-
span_type="tool",
|
|
325
|
-
func_name=original_func.__name__,
|
|
326
|
-
metrics=metrics,
|
|
327
|
-
metric_collection=metric_collection,
|
|
328
|
-
function_kwargs={"args": sanitized_args, **sanitized_kwargs},
|
|
329
|
-
) as observer:
|
|
330
|
-
result = original_func(*args, **kwargs)
|
|
331
|
-
observer.result = result
|
|
332
|
-
|
|
333
|
-
return result
|
|
334
|
-
|
|
335
|
-
return sync_wrapper
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
def _update_trace_context(
|
|
339
|
-
trace_name: Optional[str] = None,
|
|
340
|
-
trace_tags: Optional[List[str]] = None,
|
|
341
|
-
trace_metadata: Optional[dict] = None,
|
|
342
|
-
trace_thread_id: Optional[str] = None,
|
|
343
|
-
trace_user_id: Optional[str] = None,
|
|
344
|
-
trace_metric_collection: Optional[str] = None,
|
|
345
|
-
trace_metrics: Optional[List[BaseMetric]] = None,
|
|
346
|
-
trace_input: Optional[Any] = None,
|
|
347
|
-
trace_output: Optional[Any] = None,
|
|
348
|
-
):
|
|
349
|
-
|
|
350
|
-
current_trace = current_trace_context.get()
|
|
351
|
-
current_trace.name = trace_name
|
|
352
|
-
current_trace.tags = trace_tags
|
|
353
|
-
current_trace.metadata = trace_metadata
|
|
354
|
-
current_trace.thread_id = trace_thread_id
|
|
355
|
-
current_trace.user_id = trace_user_id
|
|
356
|
-
current_trace.metric_collection = trace_metric_collection
|
|
357
|
-
current_trace.metrics = trace_metrics
|
|
358
|
-
current_trace.input = trace_input
|
|
359
|
-
current_trace.output = trace_output
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
def set_llm_span_attributes(
|
|
363
|
-
llm_span: LlmSpan,
|
|
364
|
-
requests: List[ModelRequest],
|
|
365
|
-
result: ModelResponse,
|
|
366
|
-
llm_prompt: Optional[Prompt] = None,
|
|
367
|
-
):
|
|
368
|
-
llm_span.prompt = llm_prompt
|
|
369
|
-
|
|
370
|
-
input = []
|
|
371
|
-
for request in requests:
|
|
372
|
-
for part in request.parts:
|
|
373
|
-
if isinstance(part, SystemPromptPart):
|
|
374
|
-
input.append({"role": "System", "content": part.content})
|
|
375
|
-
elif isinstance(part, UserPromptPart):
|
|
376
|
-
input.append({"role": "User", "content": part.content})
|
|
377
|
-
elif isinstance(part, ToolCallPart):
|
|
378
|
-
input.append(
|
|
379
|
-
{
|
|
380
|
-
"role": "Tool Call",
|
|
381
|
-
"name": part.tool_name,
|
|
382
|
-
"content": part.args_as_json_str(),
|
|
383
|
-
}
|
|
384
|
-
)
|
|
385
|
-
elif isinstance(part, ToolReturnPart):
|
|
386
|
-
input.append(
|
|
387
|
-
{
|
|
388
|
-
"role": "Tool Return",
|
|
389
|
-
"name": part.tool_name,
|
|
390
|
-
"content": part.model_response_str(),
|
|
391
|
-
}
|
|
392
|
-
)
|
|
393
|
-
llm_span.input = input
|
|
394
|
-
|
|
395
|
-
content = ""
|
|
396
|
-
tool_calls = []
|
|
397
|
-
for part in result.parts:
|
|
398
|
-
if isinstance(part, TextPart):
|
|
399
|
-
content += part.content + "\n"
|
|
400
|
-
elif isinstance(part, ToolCallPart):
|
|
401
|
-
tool_calls.append(
|
|
402
|
-
LlmToolCall(name=part.tool_name, args=part.args_as_dict())
|
|
403
|
-
)
|
|
404
|
-
llm_span.output = LlmOutput(
|
|
405
|
-
role="Assistant", content=content, tool_calls=tool_calls
|
|
52
|
+
"""
|
|
53
|
+
DEPRECATED: This function is deprecated and will be removed in a future version.
|
|
54
|
+
Please deepeval.integrations.pydantic_ai.Agent to instrument instead.
|
|
55
|
+
"""
|
|
56
|
+
warnings.warn(
|
|
57
|
+
"The 'instrument_pydantic_ai()' function is deprecated and will be removed in a future version. "
|
|
58
|
+
"Please use deepeval.integrations.pydantic_ai.Agent to instrument instead. Refer to the documentation [link]", # TODO: add the link,
|
|
59
|
+
UserWarning,
|
|
60
|
+
stacklevel=2,
|
|
406
61
|
)
|
|
407
|
-
llm_span.tools_called = extract_tools_called_from_llm_response(result.parts)
|
|
408
|
-
|
|
409
62
|
|
|
410
|
-
|
|
411
|
-
|
|
63
|
+
# Don't execute the original functionality
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
# Original code below (commented out to prevent execution)
|
|
67
|
+
# global _INSTRUMENTED
|
|
68
|
+
# if api_key:
|
|
69
|
+
# deepeval.login(api_key)
|
|
70
|
+
#
|
|
71
|
+
# api_key = get_confident_api_key()
|
|
72
|
+
#
|
|
73
|
+
# if not api_key:
|
|
74
|
+
# raise ValueError("No api key provided.")
|
|
75
|
+
#
|
|
76
|
+
# if otel:
|
|
77
|
+
# instrument_pydantic_ai(api_key)
|
|
78
|
+
# else:
|
|
79
|
+
# with capture_tracing_integration("pydantic_ai"):
|
|
80
|
+
# if _INSTRUMENTED:
|
|
81
|
+
# return
|
|
82
|
+
# _patch_agent_init()
|
|
83
|
+
# _patch_agent_tool_decorator()
|
|
84
|
+
# _INSTRUMENTED = True
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# ################### Init Patches ###################
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# # def _patch_agent_init():
|
|
91
|
+
# # original_init = Agent.__init__
|
|
92
|
+
|
|
93
|
+
# # @functools.wraps(original_init)
|
|
94
|
+
# # def wrapper(
|
|
95
|
+
# # *args,
|
|
96
|
+
# # llm_metric_collection: Optional[str] = None,
|
|
97
|
+
# # llm_metrics: Optional[List[BaseMetric]] = None,
|
|
98
|
+
# # llm_prompt: Optional[Prompt] = None,
|
|
99
|
+
# # agent_metric_collection: Optional[str] = None,
|
|
100
|
+
# # agent_metrics: Optional[List[BaseMetric]] = None,
|
|
101
|
+
# # name: Optional[str] = None,
|
|
102
|
+
# # tags: Optional[List[str]] = None,
|
|
103
|
+
# # metadata: Optional[dict] = None,
|
|
104
|
+
# # thread_id: Optional[str] = None,
|
|
105
|
+
# # user_id: Optional[str] = None,
|
|
106
|
+
# # metric_collection: Optional[str] = None,
|
|
107
|
+
# # metrics: Optional[List[BaseMetric]] = None,
|
|
108
|
+
# # **kwargs
|
|
109
|
+
# # ):
|
|
110
|
+
# # result = original_init(*args, **kwargs)
|
|
111
|
+
# # _patch_llm_model(args[0]._model, llm_metric_collection, llm_metrics, llm_prompt) # runtime patch of the model
|
|
112
|
+
# # _patch_agent_run(
|
|
113
|
+
# # agent=args[0],
|
|
114
|
+
# # agent_metric_collection=agent_metric_collection,
|
|
115
|
+
# # agent_metrics=agent_metrics,
|
|
116
|
+
# # init_trace_name=name,
|
|
117
|
+
# # init_trace_tags=tags,
|
|
118
|
+
# # init_trace_metadata=metadata,
|
|
119
|
+
# # init_trace_thread_id=thread_id,
|
|
120
|
+
# # init_trace_user_id=user_id,
|
|
121
|
+
# # init_trace_metric_collection=metric_collection,
|
|
122
|
+
# # init_trace_metrics=metrics,
|
|
123
|
+
# # )
|
|
124
|
+
# # _patch_agent_run_sync(
|
|
125
|
+
# # agent=args[0],
|
|
126
|
+
# # agent_metric_collection=agent_metric_collection,
|
|
127
|
+
# # agent_metrics=agent_metrics,
|
|
128
|
+
# # init_trace_name=name,
|
|
129
|
+
# # init_trace_tags=tags,
|
|
130
|
+
# # init_trace_metadata=metadata,
|
|
131
|
+
# # init_trace_thread_id=thread_id,
|
|
132
|
+
# # init_trace_user_id=user_id,
|
|
133
|
+
# # init_trace_metric_collection=metric_collection,
|
|
134
|
+
# # init_trace_metrics=metrics,
|
|
135
|
+
# # )
|
|
136
|
+
# # return result
|
|
137
|
+
|
|
138
|
+
# # Agent.__init__ = wrapper
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# # def _patch_agent_tool_decorator():
|
|
142
|
+
# # original_tool = Agent.tool
|
|
143
|
+
|
|
144
|
+
# # @functools.wraps(original_tool)
|
|
145
|
+
# # def wrapper(
|
|
146
|
+
# # *args,
|
|
147
|
+
# # metrics: Optional[List[BaseMetric]] = None,
|
|
148
|
+
# # metric_collection: Optional[str] = None,
|
|
149
|
+
# # **kwargs
|
|
150
|
+
# # ):
|
|
151
|
+
# # # Case 1: Direct decoration - @agent.tool
|
|
152
|
+
# # if args and callable(args[0]):
|
|
153
|
+
# # patched_func = _create_patched_tool(
|
|
154
|
+
# # args[0], metrics, metric_collection
|
|
155
|
+
# # )
|
|
156
|
+
# # new_args = (patched_func,) + args[1:]
|
|
157
|
+
# # return original_tool(*new_args, **kwargs)
|
|
158
|
+
|
|
159
|
+
# # # Case 2: Decoration with arguments - @agent.tool(metrics=..., metric_collection=...)
|
|
160
|
+
# # else:
|
|
161
|
+
# # # Return a decorator function that will receive the actual function
|
|
162
|
+
# # def decorator(func):
|
|
163
|
+
# # patched_func = _create_patched_tool(
|
|
164
|
+
# # func, metrics, metric_collection
|
|
165
|
+
# # )
|
|
166
|
+
# # return original_tool(*args, **kwargs)(patched_func)
|
|
167
|
+
|
|
168
|
+
# # return decorator
|
|
169
|
+
|
|
170
|
+
# # Agent.tool = wrapper
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# ################### Runtime Patches ###################
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# # def _patch_agent_run_sync(
|
|
177
|
+
# # agent: Agent,
|
|
178
|
+
# # agent_metric_collection: Optional[str] = None,
|
|
179
|
+
# # agent_metrics: Optional[List[BaseMetric]] = None,
|
|
180
|
+
# # init_trace_name: Optional[str] = None,
|
|
181
|
+
# # init_trace_tags: Optional[List[str]] = None,
|
|
182
|
+
# # init_trace_metadata: Optional[dict] = None,
|
|
183
|
+
# # init_trace_thread_id: Optional[str] = None,
|
|
184
|
+
# # init_trace_user_id: Optional[str] = None,
|
|
185
|
+
# # init_trace_metric_collection: Optional[str] = None,
|
|
186
|
+
# # init_trace_metrics: Optional[List[BaseMetric]] = None,
|
|
187
|
+
# # ):
|
|
188
|
+
# # original_run_sync = agent.run_sync
|
|
189
|
+
|
|
190
|
+
# # @functools.wraps(original_run_sync)
|
|
191
|
+
# # def wrapper(
|
|
192
|
+
# # *args,
|
|
193
|
+
# # metric_collection: Optional[str] = None,
|
|
194
|
+
# # metrics: Optional[List[BaseMetric]] = None,
|
|
195
|
+
# # name: Optional[str] = None,
|
|
196
|
+
# # tags: Optional[List[str]] = None,
|
|
197
|
+
# # metadata: Optional[dict] = None,
|
|
198
|
+
# # thread_id: Optional[str] = None,
|
|
199
|
+
# # user_id: Optional[str] = None,
|
|
200
|
+
# # **kwargs
|
|
201
|
+
# # ):
|
|
202
|
+
|
|
203
|
+
# # sig = inspect.signature(original_run_sync)
|
|
204
|
+
# # bound = sig.bind_partial(*args, **kwargs)
|
|
205
|
+
# # bound.apply_defaults()
|
|
206
|
+
# # input = bound.arguments.get("user_prompt", None)
|
|
207
|
+
|
|
208
|
+
# # with Observer(
|
|
209
|
+
# # span_type="agent",
|
|
210
|
+
# # func_name="Agent",
|
|
211
|
+
# # function_kwargs={"input": input},
|
|
212
|
+
# # metrics=agent_metrics,
|
|
213
|
+
# # metric_collection=agent_metric_collection,
|
|
214
|
+
# # ) as observer:
|
|
215
|
+
|
|
216
|
+
# # token = _IN_RUN_SYNC.set(True)
|
|
217
|
+
# # try:
|
|
218
|
+
# # result = original_run_sync(*args, **kwargs)
|
|
219
|
+
# # finally:
|
|
220
|
+
# # _IN_RUN_SYNC.reset(token)
|
|
221
|
+
|
|
222
|
+
# # observer.update_span_properties = (
|
|
223
|
+
# # lambda agent_span: set_agent_span_attributes(agent_span, result)
|
|
224
|
+
# # )
|
|
225
|
+
# # observer.result = result.output
|
|
226
|
+
|
|
227
|
+
# # _update_trace_context(
|
|
228
|
+
# # trace_name=init_trace_name if init_trace_name else name,
|
|
229
|
+
# # trace_tags=init_trace_tags if init_trace_tags else tags,
|
|
230
|
+
# # trace_metadata=init_trace_metadata if init_trace_metadata else metadata,
|
|
231
|
+
# # trace_thread_id=init_trace_thread_id if init_trace_thread_id else thread_id,
|
|
232
|
+
# # trace_user_id=init_trace_user_id if init_trace_user_id else user_id,
|
|
233
|
+
# # trace_metric_collection=init_trace_metric_collection if init_trace_metric_collection else metric_collection,
|
|
234
|
+
# # trace_metrics=init_trace_metrics if init_trace_metrics else metrics,
|
|
235
|
+
# # trace_input=input,
|
|
236
|
+
# # trace_output=result.output,
|
|
237
|
+
# # )
|
|
238
|
+
|
|
239
|
+
# # return result
|
|
240
|
+
|
|
241
|
+
# # agent.run_sync = wrapper
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
# # def _patch_agent_run(
|
|
245
|
+
# # agent: Agent,
|
|
246
|
+
# # agent_metric_collection: Optional[str] = None,
|
|
247
|
+
# # agent_metrics: Optional[List[BaseMetric]] = None,
|
|
248
|
+
# # init_trace_name: Optional[str] = None,
|
|
249
|
+
# # init_trace_tags: Optional[List[str]] = None,
|
|
250
|
+
# # init_trace_metadata: Optional[dict] = None,
|
|
251
|
+
# # init_trace_thread_id: Optional[str] = None,
|
|
252
|
+
# # init_trace_user_id: Optional[str] = None,
|
|
253
|
+
# # init_trace_metric_collection: Optional[str] = None,
|
|
254
|
+
# # init_trace_metrics: Optional[List[BaseMetric]] = None,
|
|
255
|
+
# # ):
|
|
256
|
+
# # original_run = agent.run
|
|
257
|
+
|
|
258
|
+
# # @functools.wraps(original_run)
|
|
259
|
+
# # async def wrapper(
|
|
260
|
+
# # *args,
|
|
261
|
+
# # metric_collection: Optional[str] = None,
|
|
262
|
+
# # metrics: Optional[List[BaseMetric]] = None,
|
|
263
|
+
# # name: Optional[str] = None,
|
|
264
|
+
# # tags: Optional[List[str]] = None,
|
|
265
|
+
# # metadata: Optional[dict] = None,
|
|
266
|
+
# # thread_id: Optional[str] = None,
|
|
267
|
+
# # user_id: Optional[str] = None,
|
|
268
|
+
# # **kwargs
|
|
269
|
+
# # ):
|
|
270
|
+
# # sig = inspect.signature(original_run)
|
|
271
|
+
# # bound = sig.bind_partial(*args, **kwargs)
|
|
272
|
+
# # bound.apply_defaults()
|
|
273
|
+
# # input = bound.arguments.get("user_prompt", None)
|
|
274
|
+
|
|
275
|
+
# # in_sync = _IN_RUN_SYNC.get()
|
|
276
|
+
# # with Observer(
|
|
277
|
+
# # span_type="agent" if not in_sync else "custom",
|
|
278
|
+
# # func_name="Agent" if not in_sync else "run",
|
|
279
|
+
# # function_kwargs={"input": input},
|
|
280
|
+
# # metrics=agent_metrics if not in_sync else None,
|
|
281
|
+
# # metric_collection=agent_metric_collection if not in_sync else None,
|
|
282
|
+
# # ) as observer:
|
|
283
|
+
# # print(args)
|
|
284
|
+
# # print(kwargs)
|
|
285
|
+
# # result = await original_run(*args, **kwargs)
|
|
286
|
+
# # observer.update_span_properties = (
|
|
287
|
+
# # lambda agent_span: set_agent_span_attributes(agent_span, result)
|
|
288
|
+
# # )
|
|
289
|
+
# # observer.result = result.output
|
|
290
|
+
|
|
291
|
+
# # _update_trace_context(
|
|
292
|
+
# # trace_name=init_trace_name if init_trace_name else name,
|
|
293
|
+
# # trace_tags=init_trace_tags if init_trace_tags else tags,
|
|
294
|
+
# # trace_metadata=init_trace_metadata if init_trace_metadata else metadata,
|
|
295
|
+
# # trace_thread_id=init_trace_thread_id if init_trace_thread_id else thread_id,
|
|
296
|
+
# # trace_user_id=init_trace_user_id if init_trace_user_id else user_id,
|
|
297
|
+
# # trace_metric_collection=init_trace_metric_collection if init_trace_metric_collection else metric_collection,
|
|
298
|
+
# # trace_metrics=init_trace_metrics if init_trace_metrics else metrics,
|
|
299
|
+
# # trace_input=input,
|
|
300
|
+
# # trace_output=result.output,
|
|
301
|
+
# # )
|
|
302
|
+
|
|
303
|
+
# # return result
|
|
304
|
+
|
|
305
|
+
# # agent.run = wrapper
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
# def patch_llm_model(
|
|
309
|
+
# model: Model,
|
|
310
|
+
# llm_metric_collection: Optional[str] = None,
|
|
311
|
+
# llm_metrics: Optional[List[BaseMetric]] = None,
|
|
312
|
+
# llm_prompt: Optional[Prompt] = None,
|
|
313
|
+
# ):
|
|
314
|
+
# original_func = model.request
|
|
315
|
+
# sig = inspect.signature(original_func)
|
|
316
|
+
|
|
317
|
+
# try:
|
|
318
|
+
# model_name = model.model_name
|
|
319
|
+
# except Exception:
|
|
320
|
+
# model_name = "unknown"
|
|
321
|
+
|
|
322
|
+
# @functools.wraps(original_func)
|
|
323
|
+
# async def wrapper(*args, **kwargs):
|
|
324
|
+
# bound = sig.bind_partial(*args, **kwargs)
|
|
325
|
+
# bound.apply_defaults()
|
|
326
|
+
# request = bound.arguments.get("messages", [])
|
|
327
|
+
|
|
328
|
+
# with Observer(
|
|
329
|
+
# span_type="llm",
|
|
330
|
+
# func_name="LLM",
|
|
331
|
+
# observe_kwargs={"model": model_name},
|
|
332
|
+
# metrics=llm_metrics,
|
|
333
|
+
# metric_collection=llm_metric_collection,
|
|
334
|
+
# ) as observer:
|
|
335
|
+
# result = await original_func(*args, **kwargs)
|
|
336
|
+
# observer.update_span_properties = (
|
|
337
|
+
# lambda llm_span: set_llm_span_attributes(
|
|
338
|
+
# llm_span, request, result, llm_prompt
|
|
339
|
+
# )
|
|
340
|
+
# )
|
|
341
|
+
# observer.result = result
|
|
342
|
+
# return result
|
|
343
|
+
|
|
344
|
+
# model.request = wrapper
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
# ################### Helper Functions ###################
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
# def create_patched_tool(
|
|
351
|
+
# func: Callable,
|
|
352
|
+
# metrics: Optional[List[BaseMetric]] = None,
|
|
353
|
+
# metric_collection: Optional[str] = None,
|
|
354
|
+
# ):
|
|
355
|
+
# import asyncio
|
|
356
|
+
|
|
357
|
+
# original_func = func
|
|
358
|
+
|
|
359
|
+
# is_async = asyncio.iscoroutinefunction(original_func)
|
|
360
|
+
|
|
361
|
+
# if is_async:
|
|
362
|
+
|
|
363
|
+
# @functools.wraps(original_func)
|
|
364
|
+
# async def async_wrapper(*args, **kwargs):
|
|
365
|
+
# sanitized_args = sanitize_run_context(args)
|
|
366
|
+
# sanitized_kwargs = sanitize_run_context(kwargs)
|
|
367
|
+
# with Observer(
|
|
368
|
+
# span_type="tool",
|
|
369
|
+
# func_name=original_func.__name__,
|
|
370
|
+
# metrics=metrics,
|
|
371
|
+
# metric_collection=metric_collection,
|
|
372
|
+
# function_kwargs={"args": sanitized_args, **sanitized_kwargs},
|
|
373
|
+
# ) as observer:
|
|
374
|
+
# result = await original_func(*args, **kwargs)
|
|
375
|
+
# observer.result = result
|
|
376
|
+
|
|
377
|
+
# return result
|
|
378
|
+
|
|
379
|
+
# return async_wrapper
|
|
380
|
+
# else:
|
|
381
|
+
|
|
382
|
+
# @functools.wraps(original_func)
|
|
383
|
+
# def sync_wrapper(*args, **kwargs):
|
|
384
|
+
# sanitized_args = sanitize_run_context(args)
|
|
385
|
+
# sanitized_kwargs = sanitize_run_context(kwargs)
|
|
386
|
+
# with Observer(
|
|
387
|
+
# span_type="tool",
|
|
388
|
+
# func_name=original_func.__name__,
|
|
389
|
+
# metrics=metrics,
|
|
390
|
+
# metric_collection=metric_collection,
|
|
391
|
+
# function_kwargs={"args": sanitized_args, **sanitized_kwargs},
|
|
392
|
+
# ) as observer:
|
|
393
|
+
# result = original_func(*args, **kwargs)
|
|
394
|
+
# observer.result = result
|
|
395
|
+
|
|
396
|
+
# return result
|
|
397
|
+
|
|
398
|
+
# return sync_wrapper
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
# def update_trace_context(
|
|
402
|
+
# trace_name: Optional[str] = None,
|
|
403
|
+
# trace_tags: Optional[List[str]] = None,
|
|
404
|
+
# trace_metadata: Optional[dict] = None,
|
|
405
|
+
# trace_thread_id: Optional[str] = None,
|
|
406
|
+
# trace_user_id: Optional[str] = None,
|
|
407
|
+
# trace_metric_collection: Optional[str] = None,
|
|
408
|
+
# trace_metrics: Optional[List[BaseMetric]] = None,
|
|
409
|
+
# trace_input: Optional[Any] = None,
|
|
410
|
+
# trace_output: Optional[Any] = None,
|
|
411
|
+
# ):
|
|
412
|
+
|
|
413
|
+
# current_trace = current_trace_context.get()
|
|
414
|
+
|
|
415
|
+
# if trace_name:
|
|
416
|
+
# current_trace.name = trace_name
|
|
417
|
+
# if trace_tags:
|
|
418
|
+
# current_trace.tags = trace_tags
|
|
419
|
+
# if trace_metadata:
|
|
420
|
+
# current_trace.metadata = trace_metadata
|
|
421
|
+
# if trace_thread_id:
|
|
422
|
+
# current_trace.thread_id = trace_thread_id
|
|
423
|
+
# if trace_user_id:
|
|
424
|
+
# current_trace.user_id = trace_user_id
|
|
425
|
+
# if trace_metric_collection:
|
|
426
|
+
# current_trace.metric_collection = trace_metric_collection
|
|
427
|
+
# if trace_metrics:
|
|
428
|
+
# current_trace.metrics = trace_metrics
|
|
429
|
+
# if trace_input:
|
|
430
|
+
# current_trace.input = trace_input
|
|
431
|
+
# if trace_output:
|
|
432
|
+
# current_trace.output = trace_output
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
# def set_llm_span_attributes(
|
|
436
|
+
# llm_span: LlmSpan,
|
|
437
|
+
# requests: List[ModelRequest],
|
|
438
|
+
# result: ModelResponse,
|
|
439
|
+
# llm_prompt: Optional[Prompt] = None,
|
|
440
|
+
# ):
|
|
441
|
+
# llm_span.prompt = llm_prompt
|
|
442
|
+
|
|
443
|
+
# input = []
|
|
444
|
+
# for request in requests:
|
|
445
|
+
# for part in request.parts:
|
|
446
|
+
# if isinstance(part, SystemPromptPart):
|
|
447
|
+
# input.append({"role": "System", "content": part.content})
|
|
448
|
+
# elif isinstance(part, UserPromptPart):
|
|
449
|
+
# input.append({"role": "User", "content": part.content})
|
|
450
|
+
# elif isinstance(part, ToolCallPart):
|
|
451
|
+
# input.append(
|
|
452
|
+
# {
|
|
453
|
+
# "role": "Tool Call",
|
|
454
|
+
# "name": part.tool_name,
|
|
455
|
+
# "content": part.args_as_json_str(),
|
|
456
|
+
# }
|
|
457
|
+
# )
|
|
458
|
+
# elif isinstance(part, ToolReturnPart):
|
|
459
|
+
# input.append(
|
|
460
|
+
# {
|
|
461
|
+
# "role": "Tool Return",
|
|
462
|
+
# "name": part.tool_name,
|
|
463
|
+
# "content": part.model_response_str(),
|
|
464
|
+
# }
|
|
465
|
+
# )
|
|
466
|
+
# llm_span.input = input
|
|
467
|
+
|
|
468
|
+
# content = ""
|
|
469
|
+
# tool_calls = []
|
|
470
|
+
# for part in result.parts:
|
|
471
|
+
# if isinstance(part, TextPart):
|
|
472
|
+
# content += part.content + "\n"
|
|
473
|
+
# elif isinstance(part, ToolCallPart):
|
|
474
|
+
# tool_calls.append(
|
|
475
|
+
# LlmToolCall(name=part.tool_name, args=part.args_as_dict())
|
|
476
|
+
# )
|
|
477
|
+
# llm_span.output = LlmOutput(
|
|
478
|
+
# role="Assistant", content=content, tool_calls=tool_calls
|
|
479
|
+
# )
|
|
480
|
+
# llm_span.tools_called = extract_tools_called_from_llm_response(result.parts)
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
# def set_agent_span_attributes(agent_span: AgentSpan, result: AgentRunResult):
|
|
484
|
+
# agent_span.tools_called = extract_tools_called(result)
|