deepeval 3.5.9__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings_manager.py +1 -1
- deepeval/contextvars.py +25 -0
- deepeval/dataset/__init__.py +8 -2
- deepeval/evaluate/execute.py +15 -3
- deepeval/openai_agents/__init__.py +4 -3
- deepeval/openai_agents/agent.py +8 -166
- deepeval/openai_agents/callback_handler.py +63 -62
- deepeval/openai_agents/extractors.py +83 -7
- deepeval/openai_agents/patch.py +255 -61
- deepeval/openai_agents/runner.py +348 -335
- deepeval/tracing/context.py +1 -0
- deepeval/tracing/tracing.py +3 -0
- deepeval/utils.py +4 -3
- {deepeval-3.5.9.dist-info → deepeval-3.6.0.dist-info}/METADATA +1 -1
- {deepeval-3.5.9.dist-info → deepeval-3.6.0.dist-info}/RECORD +19 -18
- {deepeval-3.5.9.dist-info → deepeval-3.6.0.dist-info}/LICENSE.md +0 -0
- {deepeval-3.5.9.dist-info → deepeval-3.6.0.dist-info}/WHEEL +0 -0
- {deepeval-3.5.9.dist-info → deepeval-3.6.0.dist-info}/entry_points.txt +0 -0
deepeval/openai_agents/patch.py
CHANGED
|
@@ -1,58 +1,57 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
|
-
from typing import Any, Callable, Optional
|
|
5
|
-
|
|
4
|
+
from typing import Any, Callable, Optional, List
|
|
5
|
+
from deepeval.tracing.context import current_span_context
|
|
6
|
+
from deepeval.tracing.types import AgentSpan, ToolSpan
|
|
7
|
+
from deepeval.tracing.utils import make_json_serializable
|
|
6
8
|
from deepeval.tracing import observe
|
|
9
|
+
from deepeval.tracing.tracing import Observer
|
|
10
|
+
from deepeval.metrics import BaseMetric
|
|
11
|
+
from deepeval.prompt import Prompt
|
|
12
|
+
from deepeval.tracing.types import LlmSpan
|
|
13
|
+
from functools import wraps
|
|
7
14
|
|
|
8
15
|
try:
|
|
9
16
|
from agents import function_tool as _agents_function_tool # type: ignore
|
|
10
|
-
from
|
|
17
|
+
from deepeval.openai_agents.extractors import parse_response_output
|
|
18
|
+
from agents.run import AgentRunner
|
|
19
|
+
from agents.run import SingleStepResult
|
|
20
|
+
from agents.models.interface import Model
|
|
21
|
+
from agents import Agent
|
|
11
22
|
except Exception:
|
|
12
|
-
|
|
13
|
-
_agents_function_schema = None # type: ignore
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def _compute_description(
|
|
17
|
-
the_func: Callable[..., Any],
|
|
18
|
-
*,
|
|
19
|
-
name_override: Optional[str],
|
|
20
|
-
description_override: Optional[str],
|
|
21
|
-
docstring_style: Optional[str],
|
|
22
|
-
use_docstring_info: Optional[bool],
|
|
23
|
-
strict_mode: Optional[bool],
|
|
24
|
-
) -> Optional[str]:
|
|
25
|
-
if _agents_function_schema is None:
|
|
26
|
-
return None
|
|
27
|
-
schema = _agents_function_schema(
|
|
28
|
-
func=the_func,
|
|
29
|
-
name_override=name_override,
|
|
30
|
-
description_override=description_override,
|
|
31
|
-
docstring_style=docstring_style,
|
|
32
|
-
use_docstring_info=(
|
|
33
|
-
use_docstring_info if use_docstring_info is not None else True
|
|
34
|
-
),
|
|
35
|
-
strict_json_schema=strict_mode if strict_mode is not None else True,
|
|
36
|
-
)
|
|
37
|
-
return schema.description
|
|
23
|
+
pass
|
|
38
24
|
|
|
39
25
|
|
|
40
26
|
def _wrap_with_observe(
|
|
41
27
|
func: Callable[..., Any],
|
|
42
28
|
metrics: Optional[str] = None,
|
|
43
29
|
metric_collection: Optional[str] = None,
|
|
44
|
-
description: Optional[str] = None,
|
|
45
30
|
) -> Callable[..., Any]:
|
|
46
31
|
if getattr(func, "_is_deepeval_observed", False):
|
|
47
32
|
return func
|
|
48
33
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
34
|
+
if inspect.iscoroutinefunction(func):
|
|
35
|
+
|
|
36
|
+
@wraps(func)
|
|
37
|
+
async def observed(*args: Any, **kwargs: Any) -> Any:
|
|
38
|
+
current_span = current_span_context.get()
|
|
39
|
+
if isinstance(current_span, ToolSpan):
|
|
40
|
+
current_span.metrics = metrics
|
|
41
|
+
current_span.metric_collection = metric_collection
|
|
42
|
+
return await func(*args, **kwargs)
|
|
43
|
+
|
|
44
|
+
else:
|
|
45
|
+
|
|
46
|
+
@wraps(func)
|
|
47
|
+
def observed(*args: Any, **kwargs: Any) -> Any:
|
|
48
|
+
current_span = current_span_context.get()
|
|
49
|
+
if isinstance(current_span, ToolSpan):
|
|
50
|
+
current_span.metrics = metrics
|
|
51
|
+
current_span.metric_collection = metric_collection
|
|
52
|
+
return func(*args, **kwargs)
|
|
55
53
|
|
|
54
|
+
setattr(observed, "_is_deepeval_observed", True)
|
|
56
55
|
try:
|
|
57
56
|
observed.__signature__ = inspect.signature(func) # type: ignore[attr-defined]
|
|
58
57
|
except Exception:
|
|
@@ -71,45 +70,240 @@ def function_tool(
|
|
|
71
70
|
"agents.function_tool is not available. Please install agents via your package manager"
|
|
72
71
|
)
|
|
73
72
|
|
|
74
|
-
# Peek decorator options to mirror description logic
|
|
75
|
-
name_override = kwargs.get("name_override")
|
|
76
|
-
description_override = kwargs.get("description_override")
|
|
77
|
-
docstring_style = kwargs.get("docstring_style")
|
|
78
|
-
use_docstring_info = kwargs.get("use_docstring_info")
|
|
79
|
-
strict_mode = kwargs.get("strict_mode")
|
|
80
|
-
|
|
81
73
|
if callable(func):
|
|
82
|
-
|
|
83
|
-
func,
|
|
84
|
-
name_override=name_override,
|
|
85
|
-
description_override=description_override,
|
|
86
|
-
docstring_style=docstring_style,
|
|
87
|
-
use_docstring_info=use_docstring_info,
|
|
88
|
-
strict_mode=strict_mode,
|
|
89
|
-
)
|
|
74
|
+
|
|
90
75
|
wrapped = _wrap_with_observe(
|
|
91
76
|
func,
|
|
92
77
|
metrics=metrics,
|
|
93
78
|
metric_collection=metric_collection,
|
|
94
|
-
description=description,
|
|
95
79
|
)
|
|
96
80
|
return _agents_function_tool(wrapped, *args, **kwargs)
|
|
97
81
|
|
|
98
82
|
def decorator(real_func: Callable[..., Any]) -> Any:
|
|
99
|
-
|
|
100
|
-
real_func,
|
|
101
|
-
name_override=name_override,
|
|
102
|
-
description_override=description_override,
|
|
103
|
-
docstring_style=docstring_style,
|
|
104
|
-
use_docstring_info=use_docstring_info,
|
|
105
|
-
strict_mode=strict_mode,
|
|
106
|
-
)
|
|
83
|
+
|
|
107
84
|
wrapped = _wrap_with_observe(
|
|
108
85
|
real_func,
|
|
109
86
|
metrics=metrics,
|
|
110
87
|
metric_collection=metric_collection,
|
|
111
|
-
description=description,
|
|
112
88
|
)
|
|
113
89
|
return _agents_function_tool(wrapped, *args, **kwargs)
|
|
114
90
|
|
|
115
91
|
return decorator
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
_PATCHED_DEFAULT_RUN_SINGLE_TURN = False
|
|
95
|
+
_PATCHED_DEFAULT_RUN_SINGLE_TURN_STREAMED = False
|
|
96
|
+
_PATCHED_DEFAULT_GET_MODEL = False
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class _ObservedModel(Model):
|
|
100
|
+
def __init__(
|
|
101
|
+
self,
|
|
102
|
+
inner: Model,
|
|
103
|
+
llm_metric_collection: str = None,
|
|
104
|
+
llm_metrics: List[BaseMetric] = None,
|
|
105
|
+
confident_prompt: Prompt = None,
|
|
106
|
+
) -> None:
|
|
107
|
+
self._inner = inner
|
|
108
|
+
self._llm_metric_collection = llm_metric_collection
|
|
109
|
+
self._llm_metrics = llm_metrics
|
|
110
|
+
self._confident_prompt = confident_prompt
|
|
111
|
+
|
|
112
|
+
def __getattr__(self, name: str) -> Any:
|
|
113
|
+
return getattr(self._inner, name)
|
|
114
|
+
|
|
115
|
+
async def get_response(
|
|
116
|
+
self,
|
|
117
|
+
*args,
|
|
118
|
+
**kwargs,
|
|
119
|
+
):
|
|
120
|
+
with Observer(
|
|
121
|
+
span_type="llm",
|
|
122
|
+
func_name="LLM",
|
|
123
|
+
observe_kwargs={"model": "temp_model"},
|
|
124
|
+
metrics=self._llm_metrics,
|
|
125
|
+
metric_collection=self._llm_metric_collection,
|
|
126
|
+
):
|
|
127
|
+
result = await self._inner.get_response(
|
|
128
|
+
*args,
|
|
129
|
+
**kwargs,
|
|
130
|
+
)
|
|
131
|
+
llm_span: LlmSpan = current_span_context.get()
|
|
132
|
+
llm_span.prompt = self._confident_prompt
|
|
133
|
+
|
|
134
|
+
return result
|
|
135
|
+
|
|
136
|
+
def stream_response(
|
|
137
|
+
self,
|
|
138
|
+
*args,
|
|
139
|
+
**kwargs,
|
|
140
|
+
):
|
|
141
|
+
|
|
142
|
+
async def _gen():
|
|
143
|
+
observer = Observer(
|
|
144
|
+
span_type="llm",
|
|
145
|
+
func_name="LLM",
|
|
146
|
+
observe_kwargs={"model": "temp_model"},
|
|
147
|
+
metrics=self._llm_metrics,
|
|
148
|
+
metric_collection=self._llm_metric_collection,
|
|
149
|
+
)
|
|
150
|
+
observer.__enter__()
|
|
151
|
+
|
|
152
|
+
llm_span: LlmSpan = current_span_context.get()
|
|
153
|
+
llm_span.prompt = self._confident_prompt
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
async for event in self._inner.stream_response(
|
|
157
|
+
*args,
|
|
158
|
+
**kwargs,
|
|
159
|
+
):
|
|
160
|
+
yield event
|
|
161
|
+
except Exception as e:
|
|
162
|
+
observer.__exit__(type(e), e, e.__traceback__)
|
|
163
|
+
raise
|
|
164
|
+
finally:
|
|
165
|
+
observer.__exit__(None, None, None)
|
|
166
|
+
|
|
167
|
+
return _gen()
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def patch_default_agent_run_single_turn():
|
|
171
|
+
global _PATCHED_DEFAULT_RUN_SINGLE_TURN
|
|
172
|
+
if _PATCHED_DEFAULT_RUN_SINGLE_TURN:
|
|
173
|
+
return
|
|
174
|
+
|
|
175
|
+
original_run_single_turn = AgentRunner._run_single_turn
|
|
176
|
+
|
|
177
|
+
@classmethod
|
|
178
|
+
async def patched_run_single_turn(cls, *args, **kwargs):
|
|
179
|
+
res: SingleStepResult = await original_run_single_turn.__func__(
|
|
180
|
+
cls, *args, **kwargs
|
|
181
|
+
)
|
|
182
|
+
try:
|
|
183
|
+
if isinstance(res, SingleStepResult):
|
|
184
|
+
agent_span = current_span_context.get()
|
|
185
|
+
if isinstance(agent_span, AgentSpan):
|
|
186
|
+
_set_agent_metrics(kwargs.get("agent", None), agent_span)
|
|
187
|
+
if agent_span.input is None:
|
|
188
|
+
_pre_step_items_raw_list = [
|
|
189
|
+
item.raw_item for item in res.pre_step_items
|
|
190
|
+
]
|
|
191
|
+
agent_span.input = (
|
|
192
|
+
make_json_serializable(_pre_step_items_raw_list)
|
|
193
|
+
if _pre_step_items_raw_list
|
|
194
|
+
else make_json_serializable(res.original_input)
|
|
195
|
+
)
|
|
196
|
+
agent_span.output = parse_response_output(
|
|
197
|
+
res.model_response.output
|
|
198
|
+
)
|
|
199
|
+
except Exception:
|
|
200
|
+
pass
|
|
201
|
+
return res
|
|
202
|
+
|
|
203
|
+
AgentRunner._run_single_turn = patched_run_single_turn
|
|
204
|
+
_PATCHED_DEFAULT_RUN_SINGLE_TURN = True # type: ignore
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def patch_default_agent_run_single_turn_streamed():
|
|
208
|
+
global _PATCHED_DEFAULT_RUN_SINGLE_TURN_STREAMED
|
|
209
|
+
if _PATCHED_DEFAULT_RUN_SINGLE_TURN_STREAMED:
|
|
210
|
+
return
|
|
211
|
+
|
|
212
|
+
original_run_single_turn_streamed = AgentRunner._run_single_turn_streamed
|
|
213
|
+
|
|
214
|
+
@classmethod
|
|
215
|
+
async def patched_run_single_turn_streamed(cls, *args, **kwargs):
|
|
216
|
+
|
|
217
|
+
res: SingleStepResult = (
|
|
218
|
+
await original_run_single_turn_streamed.__func__(
|
|
219
|
+
cls, *args, **kwargs
|
|
220
|
+
)
|
|
221
|
+
)
|
|
222
|
+
try:
|
|
223
|
+
if isinstance(res, SingleStepResult):
|
|
224
|
+
agent_span = current_span_context.get()
|
|
225
|
+
if isinstance(agent_span, AgentSpan):
|
|
226
|
+
_set_agent_metrics(
|
|
227
|
+
kwargs.get("agent", None), agent_span
|
|
228
|
+
) # TODO: getting no agent
|
|
229
|
+
if agent_span.input is None:
|
|
230
|
+
_pre_step_items_raw_list = [
|
|
231
|
+
item.raw_item for item in res.pre_step_items
|
|
232
|
+
]
|
|
233
|
+
agent_span.input = (
|
|
234
|
+
make_json_serializable(_pre_step_items_raw_list)
|
|
235
|
+
if _pre_step_items_raw_list
|
|
236
|
+
else make_json_serializable(res.original_input)
|
|
237
|
+
)
|
|
238
|
+
agent_span.output = parse_response_output(
|
|
239
|
+
res.model_response.output
|
|
240
|
+
)
|
|
241
|
+
except Exception:
|
|
242
|
+
pass
|
|
243
|
+
return res
|
|
244
|
+
|
|
245
|
+
AgentRunner._run_single_turn_streamed = patched_run_single_turn_streamed
|
|
246
|
+
_PATCHED_DEFAULT_RUN_SINGLE_TURN_STREAMED = True # type: ignore
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def patch_default_agent_runner_get_model():
|
|
250
|
+
global _PATCHED_DEFAULT_GET_MODEL
|
|
251
|
+
if _PATCHED_DEFAULT_GET_MODEL:
|
|
252
|
+
return
|
|
253
|
+
|
|
254
|
+
original_get_model_cm = AgentRunner._get_model
|
|
255
|
+
try:
|
|
256
|
+
original_get_model = original_get_model_cm.__func__
|
|
257
|
+
except AttributeError:
|
|
258
|
+
original_get_model = (
|
|
259
|
+
original_get_model_cm # fallback (non-classmethod edge case)
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def patched_get_model(cls, *args, **kwargs) -> Model:
|
|
263
|
+
model = original_get_model(cls, *args, **kwargs)
|
|
264
|
+
|
|
265
|
+
agent = (
|
|
266
|
+
kwargs.get("agent")
|
|
267
|
+
if "agent" in kwargs
|
|
268
|
+
else (args[0] if args else None)
|
|
269
|
+
)
|
|
270
|
+
if agent is None:
|
|
271
|
+
return model
|
|
272
|
+
|
|
273
|
+
if isinstance(model, _ObservedModel):
|
|
274
|
+
return model
|
|
275
|
+
|
|
276
|
+
llm_metrics = getattr(agent, "llm_metrics", None)
|
|
277
|
+
llm_metric_collection = getattr(agent, "llm_metric_collection", None)
|
|
278
|
+
confident_prompt = getattr(agent, "confident_prompt", None)
|
|
279
|
+
return _ObservedModel(
|
|
280
|
+
inner=model,
|
|
281
|
+
llm_metric_collection=llm_metric_collection,
|
|
282
|
+
llm_metrics=llm_metrics,
|
|
283
|
+
confident_prompt=confident_prompt,
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
# Preserve basic metadata and mark as patched
|
|
287
|
+
patched_get_model.__name__ = original_get_model.__name__
|
|
288
|
+
patched_get_model.__doc__ = original_get_model.__doc__
|
|
289
|
+
|
|
290
|
+
AgentRunner._get_model = classmethod(patched_get_model)
|
|
291
|
+
_PATCHED_DEFAULT_GET_MODEL = True
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def _set_agent_metrics(agent: Agent, agent_span: AgentSpan) -> None:
|
|
295
|
+
try:
|
|
296
|
+
if agent is None or agent_span is None:
|
|
297
|
+
return
|
|
298
|
+
agent_metrics = getattr(agent, "agent_metrics", None)
|
|
299
|
+
agent_metric_collection = getattr(
|
|
300
|
+
agent, "agent_metric_collection", None
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
if agent_metrics is not None:
|
|
304
|
+
agent_span.metrics = agent_metrics
|
|
305
|
+
if agent_metric_collection is not None:
|
|
306
|
+
agent_span.metric_collection = agent_metric_collection
|
|
307
|
+
except Exception:
|
|
308
|
+
# Be conservative: never break the run on metrics propagation
|
|
309
|
+
pass
|