deepeval 3.8.2__py3-none-any.whl → 3.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/integrations/crewai/__init__.py +9 -2
- deepeval/integrations/crewai/handler.py +261 -66
- deepeval/integrations/crewai/subs.py +23 -10
- deepeval/integrations/crewai/tool.py +20 -3
- deepeval/integrations/crewai/wrapper.py +69 -15
- deepeval/integrations/langchain/callback.py +4 -0
- deepeval/integrations/llama_index/handler.py +69 -21
- deepeval/integrations/pydantic_ai/instrumentator.py +7 -3
- deepeval/tracing/api.py +1 -0
- deepeval/tracing/context.py +3 -0
- deepeval/tracing/trace_context.py +5 -0
- deepeval/tracing/tracing.py +1 -0
- deepeval/tracing/types.py +1 -0
- {deepeval-3.8.2.dist-info → deepeval-3.8.3.dist-info}/METADATA +1 -1
- {deepeval-3.8.2.dist-info → deepeval-3.8.3.dist-info}/RECORD +19 -19
- {deepeval-3.8.2.dist-info → deepeval-3.8.3.dist-info}/LICENSE.md +0 -0
- {deepeval-3.8.2.dist-info → deepeval-3.8.3.dist-info}/WHEEL +0 -0
- {deepeval-3.8.2.dist-info → deepeval-3.8.3.dist-info}/entry_points.txt +0 -0
deepeval/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__: str = "3.8.
|
|
1
|
+
__version__: str = "3.8.3"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from .handler import instrument_crewai
|
|
1
|
+
from .handler import instrument_crewai, reset_crewai_instrumentation
|
|
2
2
|
from .subs import (
|
|
3
3
|
DeepEvalCrew as Crew,
|
|
4
4
|
DeepEvalAgent as Agent,
|
|
@@ -6,4 +6,11 @@ from .subs import (
|
|
|
6
6
|
)
|
|
7
7
|
from .tool import tool
|
|
8
8
|
|
|
9
|
-
__all__ = [
|
|
9
|
+
__all__ = [
|
|
10
|
+
"instrument_crewai",
|
|
11
|
+
"Crew",
|
|
12
|
+
"Agent",
|
|
13
|
+
"LLM",
|
|
14
|
+
"tool",
|
|
15
|
+
"reset_crewai_instrumentation",
|
|
16
|
+
]
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import deepeval
|
|
3
|
-
|
|
4
|
-
from
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from time import perf_counter
|
|
5
|
+
from typing import Optional, Tuple, Any, List, Union
|
|
5
6
|
from deepeval.telemetry import capture_tracing_integration
|
|
6
7
|
from deepeval.tracing.context import current_span_context, current_trace_context
|
|
7
|
-
from deepeval.tracing.tracing import Observer
|
|
8
|
-
from deepeval.tracing.types import
|
|
8
|
+
from deepeval.tracing.tracing import Observer, trace_manager
|
|
9
|
+
from deepeval.tracing.types import ToolSpan, SpanType, TraceSpanStatus
|
|
9
10
|
from deepeval.config.settings import get_settings
|
|
10
11
|
|
|
11
12
|
|
|
@@ -45,7 +46,9 @@ except ImportError as e:
|
|
|
45
46
|
|
|
46
47
|
crewai_installed = False
|
|
47
48
|
|
|
49
|
+
# GLOBAL STATE to prevent duplicate listeners
|
|
48
50
|
IS_WRAPPED_ALL = False
|
|
51
|
+
_listener_instance = None
|
|
49
52
|
|
|
50
53
|
|
|
51
54
|
def is_crewai_installed():
|
|
@@ -55,21 +58,49 @@ def is_crewai_installed():
|
|
|
55
58
|
)
|
|
56
59
|
|
|
57
60
|
|
|
61
|
+
def _get_metrics_data(obj: Any) -> Tuple[Optional[str], Optional[Any]]:
|
|
62
|
+
"""Helper to safely extract metrics attached to CrewAI objects."""
|
|
63
|
+
|
|
64
|
+
if not obj:
|
|
65
|
+
return None, None
|
|
66
|
+
metric_collection = getattr(obj, "_metric_collection", None)
|
|
67
|
+
metrics = getattr(obj, "_metrics", None)
|
|
68
|
+
|
|
69
|
+
if metric_collection is not None or metrics is not None:
|
|
70
|
+
return metric_collection, metrics
|
|
71
|
+
|
|
72
|
+
func = getattr(obj, "func", None)
|
|
73
|
+
if func:
|
|
74
|
+
metric_collection = getattr(func, "_metric_collection", None)
|
|
75
|
+
metrics = getattr(func, "_metrics", None)
|
|
76
|
+
|
|
77
|
+
return metric_collection, metrics
|
|
78
|
+
|
|
79
|
+
|
|
58
80
|
class CrewAIEventsListener(BaseEventListener):
|
|
59
81
|
def __init__(self):
|
|
60
82
|
is_crewai_installed()
|
|
61
83
|
super().__init__()
|
|
62
84
|
self.span_observers: dict[str, Observer] = {}
|
|
85
|
+
self.tool_observers_stack: dict[str, List[Union[Observer, None]]] = (
|
|
86
|
+
defaultdict(list)
|
|
87
|
+
)
|
|
63
88
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
agent_id = getattr(event, "agent_id", "unknown")
|
|
69
|
-
tool_name = getattr(event, "tool_name", "unknown")
|
|
70
|
-
execution_id = f"tool_{source_id}_{task_id}_{agent_id}_{tool_name}"
|
|
89
|
+
def reset_state(self):
|
|
90
|
+
"""Clears all internal state to prevent pollution between tests."""
|
|
91
|
+
self.span_observers.clear()
|
|
92
|
+
self.tool_observers_stack.clear()
|
|
71
93
|
|
|
72
|
-
|
|
94
|
+
@staticmethod
|
|
95
|
+
def get_tool_stack_key(source, tool_name) -> str:
|
|
96
|
+
"""
|
|
97
|
+
Generates a unique key for the tool stack.
|
|
98
|
+
FIX: Uses role/name instead of id() to be robust against object copying by CrewAI.
|
|
99
|
+
"""
|
|
100
|
+
identifier = getattr(
|
|
101
|
+
source, "role", getattr(source, "name", str(id(source)))
|
|
102
|
+
)
|
|
103
|
+
return f"{tool_name}_{identifier}"
|
|
73
104
|
|
|
74
105
|
@staticmethod
|
|
75
106
|
def get_knowledge_execution_id(source, event) -> str:
|
|
@@ -79,98 +110,234 @@ class CrewAIEventsListener(BaseEventListener):
|
|
|
79
110
|
|
|
80
111
|
return execution_id
|
|
81
112
|
|
|
113
|
+
@staticmethod
|
|
114
|
+
def get_llm_execution_id(source, event) -> str:
|
|
115
|
+
source_id = id(source)
|
|
116
|
+
return f"llm_{source_id}"
|
|
117
|
+
|
|
118
|
+
def _flatten_tool_span(self, span):
|
|
119
|
+
"""
|
|
120
|
+
Callback to move any child ToolSpans up to the parent.
|
|
121
|
+
"""
|
|
122
|
+
if not span.parent_uuid or not span.children:
|
|
123
|
+
return
|
|
124
|
+
|
|
125
|
+
parent_span = trace_manager.get_span_by_uuid(span.parent_uuid)
|
|
126
|
+
if not parent_span:
|
|
127
|
+
return
|
|
128
|
+
|
|
129
|
+
# Identify child tool spans (ghost nesting)
|
|
130
|
+
tools_to_move = [
|
|
131
|
+
child for child in span.children if isinstance(child, ToolSpan)
|
|
132
|
+
]
|
|
133
|
+
|
|
134
|
+
if tools_to_move:
|
|
135
|
+
if parent_span.children is None:
|
|
136
|
+
parent_span.children = []
|
|
137
|
+
|
|
138
|
+
for child in tools_to_move:
|
|
139
|
+
child.parent_uuid = parent_span.uuid
|
|
140
|
+
parent_span.children.append(child)
|
|
141
|
+
|
|
142
|
+
span.children = [
|
|
143
|
+
child
|
|
144
|
+
for child in span.children
|
|
145
|
+
if not isinstance(child, ToolSpan)
|
|
146
|
+
]
|
|
147
|
+
|
|
82
148
|
def setup_listeners(self, crewai_event_bus):
|
|
83
149
|
@crewai_event_bus.on(CrewKickoffStartedEvent)
|
|
84
150
|
def on_crew_started(source, event: CrewKickoffStartedEvent):
|
|
85
|
-
# Assuming that this event is called in the crew.kickoff method
|
|
86
151
|
current_span = current_span_context.get()
|
|
87
|
-
|
|
88
|
-
# set the input
|
|
89
152
|
if current_span:
|
|
90
153
|
current_span.input = event.inputs
|
|
91
|
-
|
|
92
|
-
# set trace input
|
|
93
154
|
current_trace = current_trace_context.get()
|
|
94
155
|
if current_trace:
|
|
95
156
|
current_trace.input = event.inputs
|
|
96
157
|
|
|
97
158
|
@crewai_event_bus.on(CrewKickoffCompletedEvent)
|
|
98
159
|
def on_crew_completed(source, event: CrewKickoffCompletedEvent):
|
|
99
|
-
# Assuming that this event is called in the crew.kickoff method
|
|
100
160
|
current_span = current_span_context.get()
|
|
101
|
-
|
|
102
|
-
|
|
161
|
+
output = getattr(
|
|
162
|
+
event, "output", getattr(event, "result", str(event))
|
|
163
|
+
)
|
|
103
164
|
if current_span:
|
|
104
|
-
current_span.output = str(
|
|
105
|
-
|
|
106
|
-
# set trace output
|
|
165
|
+
current_span.output = str(output)
|
|
107
166
|
current_trace = current_trace_context.get()
|
|
108
167
|
if current_trace:
|
|
109
|
-
current_trace.output = str(
|
|
168
|
+
current_trace.output = str(output)
|
|
110
169
|
|
|
111
170
|
@crewai_event_bus.on(LLMCallStartedEvent)
|
|
112
171
|
def on_llm_started(source, event: LLMCallStartedEvent):
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
172
|
+
metric_collection, metrics = _get_metrics_data(source)
|
|
173
|
+
observer = Observer(
|
|
174
|
+
span_type="llm",
|
|
175
|
+
func_name="call",
|
|
176
|
+
observe_kwargs={"model": getattr(event, "model", "unknown")},
|
|
177
|
+
metric_collection=metric_collection,
|
|
178
|
+
metrics=metrics,
|
|
179
|
+
)
|
|
180
|
+
self.span_observers[self.get_llm_execution_id(source, event)] = (
|
|
181
|
+
observer
|
|
182
|
+
)
|
|
183
|
+
observer.__enter__()
|
|
119
184
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
185
|
+
if observer.trace_uuid:
|
|
186
|
+
span = trace_manager.get_span_by_uuid(observer.uuid)
|
|
187
|
+
if span:
|
|
188
|
+
msgs = getattr(event, "messages")
|
|
189
|
+
span.input = msgs
|
|
123
190
|
|
|
124
191
|
@crewai_event_bus.on(LLMCallCompletedEvent)
|
|
125
192
|
def on_llm_completed(source, event: LLMCallCompletedEvent):
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
193
|
+
key = self.get_llm_execution_id(source, event)
|
|
194
|
+
if key in self.span_observers:
|
|
195
|
+
observer = self.span_observers.pop(key)
|
|
196
|
+
if observer:
|
|
197
|
+
current_span = current_span_context.get()
|
|
198
|
+
token = None
|
|
199
|
+
span_to_close = trace_manager.get_span_by_uuid(
|
|
200
|
+
observer.uuid
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
if span_to_close:
|
|
204
|
+
output = getattr(
|
|
205
|
+
event, "response", getattr(event, "output", "")
|
|
206
|
+
)
|
|
207
|
+
span_to_close.output = output
|
|
208
|
+
if (
|
|
209
|
+
not current_span
|
|
210
|
+
or current_span.uuid != observer.uuid
|
|
211
|
+
):
|
|
212
|
+
token = current_span_context.set(span_to_close)
|
|
213
|
+
|
|
214
|
+
observer.__exit__(None, None, None)
|
|
215
|
+
if token:
|
|
216
|
+
current_span_context.reset(token)
|
|
132
217
|
|
|
133
218
|
@crewai_event_bus.on(AgentExecutionStartedEvent)
|
|
134
219
|
def on_agent_started(source, event: AgentExecutionStartedEvent):
|
|
135
|
-
# Assuming that this event is called in the agent.execute_task method
|
|
136
220
|
current_span = current_span_context.get()
|
|
137
|
-
|
|
138
|
-
# set the input
|
|
139
221
|
if current_span:
|
|
140
222
|
current_span.input = event.task_prompt
|
|
141
223
|
|
|
142
224
|
@crewai_event_bus.on(AgentExecutionCompletedEvent)
|
|
143
225
|
def on_agent_completed(source, event: AgentExecutionCompletedEvent):
|
|
144
|
-
# Assuming that this event is called in the agent.execute_task method
|
|
145
226
|
current_span = current_span_context.get()
|
|
146
|
-
|
|
147
|
-
# set the output
|
|
148
227
|
if current_span:
|
|
149
|
-
current_span.output =
|
|
228
|
+
current_span.output = getattr(
|
|
229
|
+
event, "output", getattr(event, "result", "")
|
|
230
|
+
)
|
|
150
231
|
|
|
151
232
|
@crewai_event_bus.on(ToolUsageStartedEvent)
|
|
152
233
|
def on_tool_started(source, event: ToolUsageStartedEvent):
|
|
234
|
+
key = self.get_tool_stack_key(source, event.tool_name)
|
|
235
|
+
|
|
236
|
+
# 1. Internal Stack Check
|
|
237
|
+
if self.tool_observers_stack[key]:
|
|
238
|
+
self.tool_observers_stack[key].append(None)
|
|
239
|
+
return
|
|
240
|
+
|
|
241
|
+
# 2. SMART DEDUPING
|
|
242
|
+
current_span = current_span_context.get()
|
|
243
|
+
span_type = getattr(current_span, "type", None)
|
|
244
|
+
is_tool_span = span_type == "tool" or span_type == SpanType.TOOL
|
|
245
|
+
if (
|
|
246
|
+
is_tool_span
|
|
247
|
+
and getattr(current_span, "name", "") == event.tool_name
|
|
248
|
+
):
|
|
249
|
+
self.tool_observers_stack[key].append(None)
|
|
250
|
+
return
|
|
251
|
+
|
|
252
|
+
metric_collection = None
|
|
253
|
+
metrics = None
|
|
254
|
+
|
|
255
|
+
if hasattr(source, "tools"):
|
|
256
|
+
for tools in source.tools:
|
|
257
|
+
if getattr(tools, "name", None) == event.tool_name:
|
|
258
|
+
metric_collection, metrics = _get_metrics_data(tools)
|
|
259
|
+
break
|
|
260
|
+
|
|
261
|
+
if not metric_collection:
|
|
262
|
+
agent = getattr(source, "agent", source)
|
|
263
|
+
metric_collection, metrics = _get_metrics_data(agent)
|
|
264
|
+
|
|
153
265
|
observer = Observer(
|
|
154
266
|
span_type="tool",
|
|
155
267
|
func_name=event.tool_name,
|
|
156
268
|
function_kwargs=event.tool_args,
|
|
269
|
+
metric_collection=metric_collection,
|
|
270
|
+
metrics=metrics,
|
|
157
271
|
)
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
)
|
|
272
|
+
|
|
273
|
+
self.tool_observers_stack[key].append(observer)
|
|
161
274
|
observer.__enter__()
|
|
162
275
|
|
|
163
276
|
@crewai_event_bus.on(ToolUsageFinishedEvent)
|
|
164
277
|
def on_tool_completed(source, event: ToolUsageFinishedEvent):
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
278
|
+
key = self.get_tool_stack_key(source, event.tool_name)
|
|
279
|
+
observer = None
|
|
280
|
+
|
|
281
|
+
if (
|
|
282
|
+
key in self.tool_observers_stack
|
|
283
|
+
and self.tool_observers_stack[key]
|
|
284
|
+
):
|
|
285
|
+
item = self.tool_observers_stack[key].pop()
|
|
286
|
+
if item is None:
|
|
287
|
+
return
|
|
288
|
+
observer = item
|
|
289
|
+
|
|
290
|
+
if not observer:
|
|
291
|
+
current_span = current_span_context.get()
|
|
292
|
+
if (
|
|
293
|
+
current_span
|
|
294
|
+
and getattr(current_span, "type", None)
|
|
295
|
+
in ["tool", SpanType.TOOL]
|
|
296
|
+
and getattr(current_span, "name", "") == event.tool_name
|
|
297
|
+
):
|
|
298
|
+
current_span.output = getattr(
|
|
299
|
+
event, "output", getattr(event, "result", None)
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
if current_span.end_time is None:
|
|
303
|
+
current_span.end_time = perf_counter()
|
|
304
|
+
|
|
305
|
+
current_span.status = TraceSpanStatus.SUCCESS
|
|
306
|
+
|
|
307
|
+
self._flatten_tool_span(current_span)
|
|
308
|
+
trace_manager.remove_span(current_span.uuid)
|
|
309
|
+
|
|
310
|
+
if current_span.parent_uuid:
|
|
311
|
+
parent = trace_manager.get_span_by_uuid(
|
|
312
|
+
current_span.parent_uuid
|
|
313
|
+
)
|
|
314
|
+
current_span_context.set(parent if parent else None)
|
|
315
|
+
else:
|
|
316
|
+
current_span_context.set(None)
|
|
317
|
+
return
|
|
318
|
+
|
|
168
319
|
if observer:
|
|
169
320
|
current_span = current_span_context.get()
|
|
170
|
-
|
|
171
|
-
|
|
321
|
+
token = None
|
|
322
|
+
span_to_close = trace_manager.get_span_by_uuid(observer.uuid)
|
|
323
|
+
|
|
324
|
+
if span_to_close:
|
|
325
|
+
span_to_close.output = getattr(
|
|
326
|
+
event, "output", getattr(event, "result", None)
|
|
327
|
+
)
|
|
328
|
+
if not current_span or current_span.uuid != observer.uuid:
|
|
329
|
+
token = current_span_context.set(span_to_close)
|
|
330
|
+
|
|
331
|
+
observer.update_span_properties = self._flatten_tool_span
|
|
172
332
|
observer.__exit__(None, None, None)
|
|
173
333
|
|
|
334
|
+
if span_to_close and span_to_close.end_time is None:
|
|
335
|
+
span_to_close.end_time = perf_counter()
|
|
336
|
+
span_to_close.status = TraceSpanStatus.SUCCESS
|
|
337
|
+
|
|
338
|
+
if token:
|
|
339
|
+
current_span_context.reset(token)
|
|
340
|
+
|
|
174
341
|
@crewai_event_bus.on(KnowledgeRetrievalStartedEvent)
|
|
175
342
|
def on_knowledge_started(source, event: KnowledgeRetrievalStartedEvent):
|
|
176
343
|
observer = Observer(
|
|
@@ -187,18 +354,35 @@ class CrewAIEventsListener(BaseEventListener):
|
|
|
187
354
|
def on_knowledge_completed(
|
|
188
355
|
source, event: KnowledgeRetrievalCompletedEvent
|
|
189
356
|
):
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
357
|
+
key = self.get_knowledge_execution_id(source, event)
|
|
358
|
+
if key in self.span_observers:
|
|
359
|
+
observer = self.span_observers.pop(key)
|
|
360
|
+
if observer:
|
|
361
|
+
current_span = current_span_context.get()
|
|
362
|
+
token = None
|
|
363
|
+
span_to_close = trace_manager.get_span_by_uuid(
|
|
364
|
+
observer.uuid
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
if span_to_close:
|
|
368
|
+
span_to_close.input = event.query
|
|
369
|
+
span_to_close.output = event.retrieved_knowledge
|
|
370
|
+
|
|
371
|
+
if (
|
|
372
|
+
not current_span
|
|
373
|
+
or current_span.uuid != observer.uuid
|
|
374
|
+
):
|
|
375
|
+
token = current_span_context.set(span_to_close)
|
|
376
|
+
|
|
377
|
+
observer.__exit__(None, None, None)
|
|
378
|
+
|
|
379
|
+
if token:
|
|
380
|
+
current_span_context.reset(token)
|
|
199
381
|
|
|
200
382
|
|
|
201
383
|
def instrument_crewai(api_key: Optional[str] = None):
|
|
384
|
+
global _listener_instance
|
|
385
|
+
|
|
202
386
|
is_crewai_installed()
|
|
203
387
|
with capture_tracing_integration("crewai"):
|
|
204
388
|
if api_key:
|
|
@@ -206,7 +390,14 @@ def instrument_crewai(api_key: Optional[str] = None):
|
|
|
206
390
|
|
|
207
391
|
wrap_all()
|
|
208
392
|
|
|
209
|
-
|
|
393
|
+
if _listener_instance is None:
|
|
394
|
+
_listener_instance = CrewAIEventsListener()
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def reset_crewai_instrumentation():
|
|
398
|
+
global _listener_instance
|
|
399
|
+
if _listener_instance:
|
|
400
|
+
_listener_instance.reset_state()
|
|
210
401
|
|
|
211
402
|
|
|
212
403
|
def wrap_all():
|
|
@@ -218,15 +409,19 @@ def wrap_all():
|
|
|
218
409
|
wrap_crew_kickoff_for_each,
|
|
219
410
|
wrap_crew_kickoff_async,
|
|
220
411
|
wrap_crew_kickoff_for_each_async,
|
|
221
|
-
|
|
412
|
+
wrap_crew_akickoff,
|
|
413
|
+
wrap_crew_akickoff_for_each,
|
|
222
414
|
wrap_agent_execute_task,
|
|
415
|
+
wrap_agent_aexecute_task,
|
|
223
416
|
)
|
|
224
417
|
|
|
225
418
|
wrap_crew_kickoff()
|
|
226
419
|
wrap_crew_kickoff_for_each()
|
|
227
420
|
wrap_crew_kickoff_async()
|
|
228
421
|
wrap_crew_kickoff_for_each_async()
|
|
229
|
-
|
|
422
|
+
wrap_crew_akickoff()
|
|
423
|
+
wrap_crew_akickoff_for_each()
|
|
230
424
|
wrap_agent_execute_task()
|
|
425
|
+
wrap_agent_aexecute_task()
|
|
231
426
|
|
|
232
427
|
IS_WRAPPED_ALL = True
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Optional, Type, TypeVar
|
|
1
|
+
from typing import List, Optional, Type, TypeVar, Callable
|
|
2
2
|
from pydantic import PrivateAttr
|
|
3
3
|
|
|
4
4
|
from deepeval.metrics.base_metric import BaseMetric
|
|
@@ -28,14 +28,10 @@ def create_deepeval_class(base_class: Type[T], class_name: str) -> Type[T]:
|
|
|
28
28
|
_metric_collection: Optional[str] = PrivateAttr(default=None)
|
|
29
29
|
_metrics: Optional[List[BaseMetric]] = PrivateAttr(default=None)
|
|
30
30
|
|
|
31
|
-
def __init__(
|
|
32
|
-
self,
|
|
33
|
-
*args,
|
|
34
|
-
metrics: Optional[List[BaseMetric]] = None,
|
|
35
|
-
metric_collection: Optional[str] = None,
|
|
36
|
-
**kwargs
|
|
37
|
-
):
|
|
31
|
+
def __init__(self, *args, **kwargs):
|
|
38
32
|
is_crewai_installed()
|
|
33
|
+
metric_collection = kwargs.pop("metric_collection", None)
|
|
34
|
+
metrics = kwargs.pop("metrics", None)
|
|
39
35
|
super().__init__(*args, **kwargs)
|
|
40
36
|
self._metric_collection = metric_collection
|
|
41
37
|
self._metrics = metrics
|
|
@@ -45,7 +41,24 @@ def create_deepeval_class(base_class: Type[T], class_name: str) -> Type[T]:
|
|
|
45
41
|
return DeepEvalClass
|
|
46
42
|
|
|
47
43
|
|
|
48
|
-
|
|
44
|
+
def create_deepeval_llm(base_factory: Callable) -> Callable:
|
|
45
|
+
"""Wrapper for factory functions/classes (LLM)."""
|
|
46
|
+
|
|
47
|
+
def factory_wrapper(*args, **kwargs):
|
|
48
|
+
is_crewai_installed()
|
|
49
|
+
metric_collection = kwargs.pop("metric_collection", None)
|
|
50
|
+
metrics = kwargs.pop("metrics", None)
|
|
51
|
+
instance = base_factory(*args, **kwargs)
|
|
52
|
+
try:
|
|
53
|
+
instance._metric_collection = metric_collection
|
|
54
|
+
instance._metrics = metrics
|
|
55
|
+
except Exception:
|
|
56
|
+
pass
|
|
57
|
+
return instance
|
|
58
|
+
|
|
59
|
+
return factory_wrapper
|
|
60
|
+
|
|
61
|
+
|
|
49
62
|
DeepEvalCrew = create_deepeval_class(Crew, "DeepEvalCrew")
|
|
50
63
|
DeepEvalAgent = create_deepeval_class(Agent, "DeepEvalAgent")
|
|
51
|
-
DeepEvalLLM =
|
|
64
|
+
DeepEvalLLM = create_deepeval_llm(LLM)
|
|
@@ -15,6 +15,20 @@ def tool(*args, metric=None, metric_collection=None, **kwargs) -> Callable:
|
|
|
15
15
|
"""
|
|
16
16
|
crewai_kwargs = kwargs
|
|
17
17
|
|
|
18
|
+
def _attach_metadata(tool_instance):
|
|
19
|
+
try:
|
|
20
|
+
object.__setattr__(
|
|
21
|
+
tool_instance, "metric_collection", metric_collection
|
|
22
|
+
)
|
|
23
|
+
object.__setattr__(tool_instance, "metrics", metric)
|
|
24
|
+
except Exception:
|
|
25
|
+
try:
|
|
26
|
+
tool_instance._metric_collection = metric_collection
|
|
27
|
+
tool_instance._metrics = metric
|
|
28
|
+
except Exception:
|
|
29
|
+
pass
|
|
30
|
+
return tool_instance
|
|
31
|
+
|
|
18
32
|
# Case 1: @tool (function passed directly)
|
|
19
33
|
if len(args) == 1 and callable(args[0]):
|
|
20
34
|
f = args[0]
|
|
@@ -29,7 +43,8 @@ def tool(*args, metric=None, metric_collection=None, **kwargs) -> Callable:
|
|
|
29
43
|
result = f(*f_args, **f_kwargs)
|
|
30
44
|
return result
|
|
31
45
|
|
|
32
|
-
|
|
46
|
+
tool_instance = crewai_tool(tool_name, **crewai_kwargs)(wrapped)
|
|
47
|
+
return _attach_metadata(tool_instance)
|
|
33
48
|
|
|
34
49
|
# Case 2: @tool("name")
|
|
35
50
|
if len(args) == 1 and isinstance(args[0], str):
|
|
@@ -45,7 +60,8 @@ def tool(*args, metric=None, metric_collection=None, **kwargs) -> Callable:
|
|
|
45
60
|
result = f(*f_args, **f_kwargs)
|
|
46
61
|
return result
|
|
47
62
|
|
|
48
|
-
|
|
63
|
+
tool_instance = crewai_tool(tool_name, **crewai_kwargs)(wrapped)
|
|
64
|
+
return _attach_metadata(tool_instance)
|
|
49
65
|
|
|
50
66
|
return _decorator
|
|
51
67
|
|
|
@@ -64,7 +80,8 @@ def tool(*args, metric=None, metric_collection=None, **kwargs) -> Callable:
|
|
|
64
80
|
result = f(*f_args, **f_kwargs)
|
|
65
81
|
return result
|
|
66
82
|
|
|
67
|
-
|
|
83
|
+
tool_instance = crewai_tool(tool_name, **crewai_kwargs)(wrapped)
|
|
84
|
+
return _attach_metadata(tool_instance)
|
|
68
85
|
|
|
69
86
|
return _decorator
|
|
70
87
|
|
|
@@ -17,8 +17,9 @@ def wrap_crew_kickoff():
|
|
|
17
17
|
func_name="kickoff",
|
|
18
18
|
metric_collection=metric_collection,
|
|
19
19
|
metrics=metrics,
|
|
20
|
-
):
|
|
20
|
+
) as observer:
|
|
21
21
|
result = original_kickoff(self, *args, **kwargs)
|
|
22
|
+
observer.result = str(result) if result else None
|
|
22
23
|
|
|
23
24
|
return result
|
|
24
25
|
|
|
@@ -36,8 +37,9 @@ def wrap_crew_kickoff_for_each():
|
|
|
36
37
|
func_name="kickoff_for_each",
|
|
37
38
|
metric_collection=metric_collection,
|
|
38
39
|
metrics=metrics,
|
|
39
|
-
):
|
|
40
|
+
) as observer:
|
|
40
41
|
result = original_kickoff_for_each(self, *args, **kwargs)
|
|
42
|
+
observer.result = str(result) if result else None
|
|
41
43
|
|
|
42
44
|
return result
|
|
43
45
|
|
|
@@ -55,8 +57,9 @@ def wrap_crew_kickoff_async():
|
|
|
55
57
|
func_name="kickoff_async",
|
|
56
58
|
metric_collection=metric_collection,
|
|
57
59
|
metrics=metrics,
|
|
58
|
-
):
|
|
60
|
+
) as observer:
|
|
59
61
|
result = await original_kickoff_async(self, *args, **kwargs)
|
|
62
|
+
observer.result = str(result) if result else None
|
|
60
63
|
|
|
61
64
|
return result
|
|
62
65
|
|
|
@@ -74,33 +77,61 @@ def wrap_crew_kickoff_for_each_async():
|
|
|
74
77
|
func_name="kickoff_for_each_async",
|
|
75
78
|
metric_collection=metric_collection,
|
|
76
79
|
metrics=metrics,
|
|
77
|
-
):
|
|
80
|
+
) as observer:
|
|
78
81
|
result = await original_kickoff_for_each_async(
|
|
79
82
|
self, *args, **kwargs
|
|
80
83
|
)
|
|
84
|
+
observer.result = str(result) if result else None
|
|
81
85
|
|
|
82
86
|
return result
|
|
83
87
|
|
|
84
88
|
Crew.kickoff_for_each_async = wrapper
|
|
85
89
|
|
|
86
90
|
|
|
87
|
-
def
|
|
88
|
-
|
|
91
|
+
def wrap_crew_akickoff():
|
|
92
|
+
if not hasattr(Crew, "akickoff"):
|
|
93
|
+
return
|
|
89
94
|
|
|
90
|
-
|
|
91
|
-
|
|
95
|
+
original_akickoff = Crew.akickoff
|
|
96
|
+
|
|
97
|
+
@wraps(original_akickoff)
|
|
98
|
+
async def wrapper(self, *args, **kwargs):
|
|
99
|
+
metric_collection, metrics = _check_metrics_and_metric_collection(self)
|
|
100
|
+
with Observer(
|
|
101
|
+
span_type="crew",
|
|
102
|
+
func_name="akickoff",
|
|
103
|
+
metric_collection=metric_collection,
|
|
104
|
+
metrics=metrics,
|
|
105
|
+
) as observer:
|
|
106
|
+
result = await original_akickoff(self, *args, **kwargs)
|
|
107
|
+
observer.result = str(result) if result else None
|
|
108
|
+
|
|
109
|
+
return result
|
|
110
|
+
|
|
111
|
+
Crew.akickoff = wrapper
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def wrap_crew_akickoff_for_each():
|
|
115
|
+
if not hasattr(Crew, "akickoff_for_each"):
|
|
116
|
+
return
|
|
117
|
+
|
|
118
|
+
original_akickoff_for_each = Crew.akickoff_for_each
|
|
119
|
+
|
|
120
|
+
@wraps(original_akickoff_for_each)
|
|
121
|
+
async def wrapper(self, *args, **kwargs):
|
|
92
122
|
metric_collection, metrics = _check_metrics_and_metric_collection(self)
|
|
93
123
|
with Observer(
|
|
94
|
-
span_type="
|
|
95
|
-
func_name="
|
|
96
|
-
observe_kwargs={"model": "temp_model"},
|
|
124
|
+
span_type="crew",
|
|
125
|
+
func_name="akickoff_for_each",
|
|
97
126
|
metric_collection=metric_collection,
|
|
98
127
|
metrics=metrics,
|
|
99
|
-
):
|
|
100
|
-
result =
|
|
128
|
+
) as observer:
|
|
129
|
+
result = await original_akickoff_for_each(self, *args, **kwargs)
|
|
130
|
+
observer.result = str(result) if result else None
|
|
131
|
+
|
|
101
132
|
return result
|
|
102
133
|
|
|
103
|
-
|
|
134
|
+
Crew.akickoff_for_each = wrapper
|
|
104
135
|
|
|
105
136
|
|
|
106
137
|
def wrap_agent_execute_task():
|
|
@@ -114,13 +145,36 @@ def wrap_agent_execute_task():
|
|
|
114
145
|
func_name="execute_task",
|
|
115
146
|
metric_collection=metric_collection,
|
|
116
147
|
metrics=metrics,
|
|
117
|
-
):
|
|
148
|
+
) as observer:
|
|
118
149
|
result = original_execute_task(self, *args, **kwargs)
|
|
150
|
+
observer.result = str(result) if result else None
|
|
119
151
|
return result
|
|
120
152
|
|
|
121
153
|
Agent.execute_task = wrapper
|
|
122
154
|
|
|
123
155
|
|
|
156
|
+
def wrap_agent_aexecute_task():
|
|
157
|
+
if not hasattr(Agent, "aexecute_task"):
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
original_aexecute_task = Agent.aexecute_task
|
|
161
|
+
|
|
162
|
+
@wraps(original_aexecute_task)
|
|
163
|
+
async def wrapper(self, *args, **kwargs):
|
|
164
|
+
metric_collection, metrics = _check_metrics_and_metric_collection(self)
|
|
165
|
+
with Observer(
|
|
166
|
+
span_type="agent",
|
|
167
|
+
func_name="aexecute_task",
|
|
168
|
+
metric_collection=metric_collection,
|
|
169
|
+
metrics=metrics,
|
|
170
|
+
) as observer:
|
|
171
|
+
result = await original_aexecute_task(self, *args, **kwargs)
|
|
172
|
+
observer.result = str(result) if result else None
|
|
173
|
+
return result
|
|
174
|
+
|
|
175
|
+
Agent.aexecute_task = wrapper
|
|
176
|
+
|
|
177
|
+
|
|
124
178
|
def _check_metrics_and_metric_collection(obj: Any):
|
|
125
179
|
metric_collection = getattr(obj, "_metric_collection", None)
|
|
126
180
|
metrics = getattr(obj, "_metrics", None)
|
|
@@ -84,6 +84,7 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
84
84
|
user_id: Optional[str] = None,
|
|
85
85
|
metrics: Optional[List[BaseMetric]] = None,
|
|
86
86
|
metric_collection: Optional[str] = None,
|
|
87
|
+
test_case_id: Optional[str] = None,
|
|
87
88
|
):
|
|
88
89
|
is_langchain_installed()
|
|
89
90
|
with capture_tracing_integration("langchain.callback.CallbackHandler"):
|
|
@@ -108,6 +109,7 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
108
109
|
"metadata": metadata,
|
|
109
110
|
"thread_id": thread_id,
|
|
110
111
|
"user_id": user_id,
|
|
112
|
+
"test_case_id": test_case_id,
|
|
111
113
|
}
|
|
112
114
|
self._trace_init_fields: Dict[str, Any] = dict(
|
|
113
115
|
self._original_init_fields
|
|
@@ -200,6 +202,8 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
200
202
|
trace.thread_id = fields["thread_id"]
|
|
201
203
|
if fields.get("user_id") is not None:
|
|
202
204
|
trace.user_id = fields["user_id"]
|
|
205
|
+
if fields.get("test_case_id") is not None:
|
|
206
|
+
trace.test_case_id = fields["test_case_id"]
|
|
203
207
|
# prevent re-applying on every callback
|
|
204
208
|
self._trace_init_fields = {}
|
|
205
209
|
|
|
@@ -21,6 +21,7 @@ from deepeval.tracing.types import (
|
|
|
21
21
|
from deepeval.tracing.trace_context import (
|
|
22
22
|
current_llm_context,
|
|
23
23
|
current_agent_context,
|
|
24
|
+
current_trace_context,
|
|
24
25
|
)
|
|
25
26
|
from deepeval.test_case import ToolCall
|
|
26
27
|
from deepeval.tracing.utils import make_json_serializable
|
|
@@ -40,7 +41,10 @@ try:
|
|
|
40
41
|
LLMChatStartEvent,
|
|
41
42
|
LLMChatEndEvent,
|
|
42
43
|
)
|
|
43
|
-
from
|
|
44
|
+
from llama_index.core.instrumentation import Dispatcher
|
|
45
|
+
from llama_index.core.instrumentation.events.retrieval import (
|
|
46
|
+
RetrievalEndEvent,
|
|
47
|
+
)
|
|
44
48
|
from deepeval.integrations.llama_index.utils import (
|
|
45
49
|
parse_id,
|
|
46
50
|
prepare_input_llm_test_case_params,
|
|
@@ -82,15 +86,23 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
82
86
|
input_messages.append({"role": role, "content": content})
|
|
83
87
|
|
|
84
88
|
llm_span_context = current_llm_context.get()
|
|
85
|
-
|
|
89
|
+
|
|
90
|
+
parent_span = trace_manager.get_span_by_uuid(event.span_id)
|
|
91
|
+
if parent_span:
|
|
92
|
+
trace_uuid = parent_span.trace_uuid
|
|
93
|
+
else:
|
|
94
|
+
current_trace = current_trace_context.get()
|
|
95
|
+
if current_trace:
|
|
96
|
+
trace_uuid = current_trace.uuid
|
|
97
|
+
else:
|
|
98
|
+
trace_uuid = trace_manager.start_new_trace().uuid
|
|
99
|
+
|
|
86
100
|
llm_span = LlmSpan(
|
|
87
101
|
name="ConfidentLLMSpan",
|
|
88
102
|
uuid=str(uuid.uuid4()),
|
|
89
103
|
status=TraceSpanStatus.IN_PROGRESS,
|
|
90
104
|
children=[],
|
|
91
|
-
trace_uuid=
|
|
92
|
-
event.span_id
|
|
93
|
-
).trace_uuid,
|
|
105
|
+
trace_uuid=trace_uuid,
|
|
94
106
|
parent_uuid=event.span_id,
|
|
95
107
|
start_time=perf_counter(),
|
|
96
108
|
model=getattr(event, "model_dict", {}).get(
|
|
@@ -128,6 +140,13 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
128
140
|
trace_manager.remove_span(llm_span.uuid)
|
|
129
141
|
del self.open_ai_astream_to_llm_span_map[event.span_id]
|
|
130
142
|
|
|
143
|
+
if isinstance(event, RetrievalEndEvent):
|
|
144
|
+
span = trace_manager.get_span_by_uuid(event.span_id)
|
|
145
|
+
if span:
|
|
146
|
+
span.retrieval_context = [
|
|
147
|
+
node.node.get_content() for node in event.nodes
|
|
148
|
+
]
|
|
149
|
+
|
|
131
150
|
def new_span(
|
|
132
151
|
self,
|
|
133
152
|
id_: str,
|
|
@@ -139,18 +158,30 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
139
158
|
) -> Optional[LlamaIndexBaseSpan]:
|
|
140
159
|
class_name, method_name = parse_id(id_)
|
|
141
160
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
161
|
+
current_trace = current_trace_context.get()
|
|
162
|
+
trace_uuid = None
|
|
163
|
+
|
|
164
|
+
if parent_span_id is None or (
|
|
165
|
+
class_name == "Workflow" and method_name == "run"
|
|
166
|
+
):
|
|
167
|
+
if current_trace:
|
|
168
|
+
trace_uuid = current_trace.uuid
|
|
169
|
+
else:
|
|
170
|
+
trace_uuid = trace_manager.start_new_trace().uuid
|
|
171
|
+
|
|
172
|
+
if class_name == "Workflow" and method_name == "run":
|
|
173
|
+
parent_span_id = None
|
|
174
|
+
|
|
148
175
|
elif trace_manager.get_span_by_uuid(parent_span_id):
|
|
149
176
|
trace_uuid = trace_manager.get_span_by_uuid(
|
|
150
177
|
parent_span_id
|
|
151
178
|
).trace_uuid
|
|
179
|
+
|
|
152
180
|
else:
|
|
153
|
-
|
|
181
|
+
if current_trace:
|
|
182
|
+
trace_uuid = current_trace.uuid
|
|
183
|
+
else:
|
|
184
|
+
trace_uuid = trace_manager.start_new_trace().uuid
|
|
154
185
|
|
|
155
186
|
self.root_span_trace_id_map[id_] = trace_uuid
|
|
156
187
|
|
|
@@ -195,7 +226,7 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
195
226
|
else None
|
|
196
227
|
),
|
|
197
228
|
)
|
|
198
|
-
elif method_name
|
|
229
|
+
elif method_name in ["acall", "call_tool", "acall_tool"]:
|
|
199
230
|
span = ToolSpan(
|
|
200
231
|
uuid=id_,
|
|
201
232
|
status=TraceSpanStatus.IN_PROGRESS,
|
|
@@ -206,7 +237,7 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
206
237
|
input=bound_args.arguments,
|
|
207
238
|
name="Tool",
|
|
208
239
|
)
|
|
209
|
-
|
|
240
|
+
|
|
210
241
|
prepare_input_llm_test_case_params(
|
|
211
242
|
class_name, method_name, span, bound_args.arguments
|
|
212
243
|
)
|
|
@@ -215,6 +246,22 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
215
246
|
|
|
216
247
|
return span
|
|
217
248
|
|
|
249
|
+
def _get_output_value(self, result: Any) -> Any:
|
|
250
|
+
"""Helper to ensure AgentChatResponse and similar objects are serialized as dicts."""
|
|
251
|
+
if hasattr(result, "response") and hasattr(result, "sources"):
|
|
252
|
+
if hasattr(result, "model_dump"):
|
|
253
|
+
return result.model_dump()
|
|
254
|
+
if hasattr(result, "to_dict"):
|
|
255
|
+
return result.to_dict()
|
|
256
|
+
return {"response": result.response, "sources": result.sources}
|
|
257
|
+
|
|
258
|
+
if hasattr(result, "response"):
|
|
259
|
+
if hasattr(result, "model_dump"):
|
|
260
|
+
return result.model_dump()
|
|
261
|
+
return {"response": result.response}
|
|
262
|
+
|
|
263
|
+
return result
|
|
264
|
+
|
|
218
265
|
def prepare_to_exit_span(
|
|
219
266
|
self,
|
|
220
267
|
id_: str,
|
|
@@ -229,7 +276,8 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
229
276
|
return None
|
|
230
277
|
|
|
231
278
|
class_name, method_name = parse_id(id_)
|
|
232
|
-
|
|
279
|
+
|
|
280
|
+
if method_name in ["call_tool", "acall_tool"]:
|
|
233
281
|
output_json = make_json_serializable(result)
|
|
234
282
|
if output_json and isinstance(output_json, dict):
|
|
235
283
|
if base_span.tools_called is None:
|
|
@@ -243,7 +291,7 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
243
291
|
)
|
|
244
292
|
base_span.end_time = perf_counter()
|
|
245
293
|
base_span.status = TraceSpanStatus.SUCCESS
|
|
246
|
-
base_span.output = result
|
|
294
|
+
base_span.output = self._get_output_value(result)
|
|
247
295
|
|
|
248
296
|
if isinstance(base_span, ToolSpan):
|
|
249
297
|
result_json = make_json_serializable(result)
|
|
@@ -265,7 +313,8 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
265
313
|
|
|
266
314
|
if base_span.parent_uuid is None:
|
|
267
315
|
trace_manager.end_trace(base_span.trace_uuid)
|
|
268
|
-
self.root_span_trace_id_map
|
|
316
|
+
if base_span.uuid in self.root_span_trace_id_map:
|
|
317
|
+
self.root_span_trace_id_map.pop(base_span.uuid)
|
|
269
318
|
|
|
270
319
|
return base_span
|
|
271
320
|
|
|
@@ -282,13 +331,12 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
282
331
|
return None
|
|
283
332
|
|
|
284
333
|
base_span.end_time = perf_counter()
|
|
285
|
-
base_span.status =
|
|
286
|
-
TraceSpanStatus.SUCCESS
|
|
287
|
-
) # find a way to add error and handle the span without the parent id
|
|
334
|
+
base_span.status = TraceSpanStatus.SUCCESS
|
|
288
335
|
|
|
289
336
|
if base_span.parent_uuid is None:
|
|
290
337
|
trace_manager.end_trace(base_span.trace_uuid)
|
|
291
|
-
self.root_span_trace_id_map
|
|
338
|
+
if base_span.uuid in self.root_span_trace_id_map:
|
|
339
|
+
self.root_span_trace_id_map.pop(base_span.uuid)
|
|
292
340
|
|
|
293
341
|
return base_span
|
|
294
342
|
|
|
@@ -36,7 +36,10 @@ try:
|
|
|
36
36
|
SpanProcessor as _SpanProcessor,
|
|
37
37
|
TracerProvider,
|
|
38
38
|
)
|
|
39
|
-
from opentelemetry.sdk.trace.export import
|
|
39
|
+
from opentelemetry.sdk.trace.export import (
|
|
40
|
+
BatchSpanProcessor,
|
|
41
|
+
SimpleSpanProcessor,
|
|
42
|
+
)
|
|
40
43
|
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
|
|
41
44
|
OTLPSpanExporter,
|
|
42
45
|
)
|
|
@@ -172,7 +175,9 @@ class ConfidentInstrumentationSettings(InstrumentationSettings):
|
|
|
172
175
|
trace_provider.add_span_processor(span_interceptor)
|
|
173
176
|
|
|
174
177
|
if is_test_mode:
|
|
175
|
-
trace_provider.add_span_processor(
|
|
178
|
+
trace_provider.add_span_processor(
|
|
179
|
+
SimpleSpanProcessor(ConfidentSpanExporter())
|
|
180
|
+
)
|
|
176
181
|
else:
|
|
177
182
|
trace_provider.add_span_processor(
|
|
178
183
|
BatchSpanProcessor(
|
|
@@ -345,7 +350,6 @@ class SpanInterceptor(SpanProcessor):
|
|
|
345
350
|
trace.status = TraceSpanStatus.SUCCESS
|
|
346
351
|
trace.end_time = perf_counter()
|
|
347
352
|
trace_manager.traces_to_evaluate.append(trace)
|
|
348
|
-
test_exporter.clear_span_json_list()
|
|
349
353
|
|
|
350
354
|
def _add_agent_span(self, span, name):
|
|
351
355
|
span.set_attribute("confident.span.type", "agent")
|
deepeval/tracing/api.py
CHANGED
|
@@ -126,6 +126,7 @@ class TraceApi(BaseModel):
|
|
|
126
126
|
input: Optional[Any] = Field(None)
|
|
127
127
|
output: Optional[Any] = Field(None)
|
|
128
128
|
status: Optional[TraceSpanApiStatus] = Field(TraceSpanApiStatus.SUCCESS)
|
|
129
|
+
test_case_id: Optional[str] = Field(None, alias="testCaseId")
|
|
129
130
|
|
|
130
131
|
# additional test case parameters
|
|
131
132
|
retrieval_context: Optional[List[str]] = Field(
|
deepeval/tracing/context.py
CHANGED
|
@@ -74,6 +74,7 @@ def update_current_trace(
|
|
|
74
74
|
expected_tools: Optional[List[ToolCall]] = None,
|
|
75
75
|
test_case: Optional[LLMTestCase] = None,
|
|
76
76
|
confident_api_key: Optional[str] = None,
|
|
77
|
+
test_case_id: Optional[str] = None,
|
|
77
78
|
):
|
|
78
79
|
current_trace = current_trace_context.get()
|
|
79
80
|
if not current_trace:
|
|
@@ -112,6 +113,8 @@ def update_current_trace(
|
|
|
112
113
|
current_trace.expected_tools = expected_tools
|
|
113
114
|
if confident_api_key:
|
|
114
115
|
current_trace.confident_api_key = confident_api_key
|
|
116
|
+
if test_case_id:
|
|
117
|
+
current_trace.test_case_id = test_case_id
|
|
115
118
|
|
|
116
119
|
|
|
117
120
|
def update_llm_span(
|
|
@@ -69,9 +69,11 @@ def trace(
|
|
|
69
69
|
)
|
|
70
70
|
|
|
71
71
|
current_trace = current_trace_context.get()
|
|
72
|
+
started_new_trace = False
|
|
72
73
|
|
|
73
74
|
if not current_trace:
|
|
74
75
|
current_trace = trace_manager.start_new_trace()
|
|
76
|
+
started_new_trace = True
|
|
75
77
|
|
|
76
78
|
if metrics:
|
|
77
79
|
current_trace.metrics = metrics
|
|
@@ -103,5 +105,8 @@ def trace(
|
|
|
103
105
|
try:
|
|
104
106
|
yield current_trace
|
|
105
107
|
finally:
|
|
108
|
+
if started_new_trace:
|
|
109
|
+
trace_manager.end_trace(current_trace.uuid)
|
|
110
|
+
|
|
106
111
|
current_llm_context.set(LlmSpanContext())
|
|
107
112
|
current_agent_context.set(AgentSpanContext())
|
deepeval/tracing/tracing.py
CHANGED
|
@@ -690,6 +690,7 @@ class TraceManager:
|
|
|
690
690
|
expectedOutput=trace.expected_output,
|
|
691
691
|
toolsCalled=trace.tools_called,
|
|
692
692
|
expectedTools=trace.expected_tools,
|
|
693
|
+
testCaseId=trace.test_case_id,
|
|
693
694
|
confident_api_key=trace.confident_api_key,
|
|
694
695
|
environment=(
|
|
695
696
|
self.environment if not trace.environment else trace.environment
|
deepeval/tracing/types.py
CHANGED
|
@@ -157,6 +157,7 @@ class Trace(BaseModel):
|
|
|
157
157
|
output: Optional[Any] = None
|
|
158
158
|
metrics: Optional[List[BaseMetric]] = None
|
|
159
159
|
metric_collection: Optional[str] = None
|
|
160
|
+
test_case_id: Optional[str] = Field(None, serialization_alias="testCaseId")
|
|
160
161
|
|
|
161
162
|
# Don't serialize these
|
|
162
163
|
confident_api_key: Optional[str] = Field(None, exclude=True)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
deepeval/__init__.py,sha256=tle4lT4FONApg3OeztGPEdrpGMEGLWajyGTu7bEd3s0,2976
|
|
2
|
-
deepeval/_version.py,sha256=
|
|
2
|
+
deepeval/_version.py,sha256=DuZF4w5M7-EVzWNXFxp8yJIQADpdXSQSjeWW87pqtn4,27
|
|
3
3
|
deepeval/annotation/__init__.py,sha256=ZFhUVNNuH_YgQSZJ-m5E9iUb9TkAkEV33a6ouMDZ8EI,111
|
|
4
4
|
deepeval/annotation/annotation.py,sha256=WLFZRkx6wRJcNzaOMMGXuTfw6Q1_1Mv5A4jpD7Ea4sU,2300
|
|
5
5
|
deepeval/annotation/api.py,sha256=EYN33ACVzVxsFleRYm60KB4Exvff3rPJKt1VBuuX970,2147
|
|
@@ -169,26 +169,26 @@ deepeval/evaluate/execute.py,sha256=5RZrTRfe-AnwO5aS16LL-iBqT3fciun9zt3wbXp70v8,
|
|
|
169
169
|
deepeval/evaluate/types.py,sha256=jf424xPHgdJcvgG2l_wTMskJBOEe9tl55c3v3B-SLNU,1071
|
|
170
170
|
deepeval/evaluate/utils.py,sha256=STYyJCvVkewU5iigKnAsUDcVtJuFU_Qefi-aoyv2elA,20740
|
|
171
171
|
deepeval/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
172
|
-
deepeval/integrations/crewai/__init__.py,sha256=
|
|
173
|
-
deepeval/integrations/crewai/handler.py,sha256=
|
|
174
|
-
deepeval/integrations/crewai/subs.py,sha256=
|
|
175
|
-
deepeval/integrations/crewai/tool.py,sha256=
|
|
176
|
-
deepeval/integrations/crewai/wrapper.py,sha256=
|
|
172
|
+
deepeval/integrations/crewai/__init__.py,sha256=8bkOWfzrqETEYWMB9nCKbqMd7nlU-TGvNH0CIrqtlps,316
|
|
173
|
+
deepeval/integrations/crewai/handler.py,sha256=dgzfWwHw94ro28-h8RyYfNQ8cOmnXCwEoWOZatxcbVk,15178
|
|
174
|
+
deepeval/integrations/crewai/subs.py,sha256=wcMJVBNf69dqFO8L1kMTvm4n1GsO2S-nopQn924JdU4,1974
|
|
175
|
+
deepeval/integrations/crewai/tool.py,sha256=thdDXt-dA2B4LmUuQipAcOiR6H19KA7zzqjlsfTyn3s,3210
|
|
176
|
+
deepeval/integrations/crewai/wrapper.py,sha256=w3NsXHe1M4BhjGcpMA7grUCdSAjlu2zBk9daLeTRluc,5615
|
|
177
177
|
deepeval/integrations/hugging_face/__init__.py,sha256=MuHIf9im9Jypq4VkfLzhklxIrd7vSTGlT74iUNSPgvg,93
|
|
178
178
|
deepeval/integrations/hugging_face/callback.py,sha256=15QQEzR34Cpdp5kUp5oVA6dEsShtiMNZ03akJWAh7lo,7911
|
|
179
179
|
deepeval/integrations/hugging_face/rich_manager.py,sha256=WvFtPGpPmGeg2Ftsnojga6yvbBLiZv_tvNbnFcGb6og,3630
|
|
180
180
|
deepeval/integrations/hugging_face/tests/test_callbacks.py,sha256=88Wyg-aDaXujj9jHeGdFF3ITSl2-y7eaJGWgSyvvDi8,4607
|
|
181
181
|
deepeval/integrations/hugging_face/utils.py,sha256=HUKdQcTIb76Ct69AS737oPxmlVxk5fw2UbT2pLn-o8k,1817
|
|
182
182
|
deepeval/integrations/langchain/__init__.py,sha256=G1Qey5WkKou2-PA34KwWgmayQ_TbvXqPyotTbzmD8tw,84
|
|
183
|
-
deepeval/integrations/langchain/callback.py,sha256
|
|
183
|
+
deepeval/integrations/langchain/callback.py,sha256=-Ip1PU84YqKbf4j17qV925GHsW5mRJN9b-d7V4fOuP8,32892
|
|
184
184
|
deepeval/integrations/langchain/patch.py,sha256=fCHfZXU9xX3IJ6SG8GEYzn3qrifyUkT0i_uUABTsmcs,1255
|
|
185
185
|
deepeval/integrations/langchain/utils.py,sha256=mhv0anU5ZnbBsESMuCooT9FSNPkx2ObrVLlq7QNEZOI,13104
|
|
186
186
|
deepeval/integrations/llama_index/__init__.py,sha256=Ujs9ZBJFkuCWUDBJOF88UbM1Y-S6QFQhxSo0oQnEWNw,90
|
|
187
|
-
deepeval/integrations/llama_index/handler.py,sha256=
|
|
187
|
+
deepeval/integrations/llama_index/handler.py,sha256=uVfMs9VC2vp5J_A8lxy1OmVtha31wvkJGzkp5GKhf-A,12367
|
|
188
188
|
deepeval/integrations/llama_index/utils.py,sha256=onmmo1vpn6cpOY5EhfTc0Uui7X6l1M0HD3sq-KVAesg,3380
|
|
189
189
|
deepeval/integrations/pydantic_ai/__init__.py,sha256=UIkXn_g6h9LTQXG1PaWu1eCFkCssIwG48WSvN46UWgU,202
|
|
190
190
|
deepeval/integrations/pydantic_ai/agent.py,sha256=-NKvpTUw3AxRNhuxVFcx9mw5BWCujzOwsaC8u7K0ubc,1178
|
|
191
|
-
deepeval/integrations/pydantic_ai/instrumentator.py,sha256=
|
|
191
|
+
deepeval/integrations/pydantic_ai/instrumentator.py,sha256=PPoGeJzkuoCerrjc-s_Nv8hn9DC54Jp-uXgJU0OI1Ug,13081
|
|
192
192
|
deepeval/integrations/pydantic_ai/otel.py,sha256=xWYnMT1HwcAmyWdoJa6C1sHwd5frP9_IcR8dj9sKsG0,2386
|
|
193
193
|
deepeval/integrations/pydantic_ai/test_instrumentator.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
194
194
|
deepeval/key_handler.py,sha256=lajMBgF2lCzbQpW4e6Y7cD9FOw0Qk5UOKS4_kIIHj6Y,9562
|
|
@@ -501,8 +501,8 @@ deepeval/test_run/hooks.py,sha256=Qnd06bk9RJN4WmFUzJrBAi3Xj261hzyzI2iRmG8wbKw,37
|
|
|
501
501
|
deepeval/test_run/hyperparameters.py,sha256=4yJkNgwL2y6eyWDTmUV62f5RUlfOui4R22wsJ5uTbto,3874
|
|
502
502
|
deepeval/test_run/test_run.py,sha256=csbj0KVsp1QDCFKEqthQzPSmxusjNQkoVfsWBnq2Z_s,41549
|
|
503
503
|
deepeval/tracing/__init__.py,sha256=aSOk_ZgL-K7CZzcyiaIa5peAiaPViDST5GhpHA3Adc8,614
|
|
504
|
-
deepeval/tracing/api.py,sha256=
|
|
505
|
-
deepeval/tracing/context.py,sha256=
|
|
504
|
+
deepeval/tracing/api.py,sha256=x6Ze5ruPDbuRsR8rS0524cvUkCQ7CxLoT0up1gMQWMk,5062
|
|
505
|
+
deepeval/tracing/context.py,sha256=KKP0Wp7zpzTzISyDceI7fndpVnehawI8rBMyRYEwb9U,5580
|
|
506
506
|
deepeval/tracing/offline_evals/__init__.py,sha256=bEniJAl7PmS9u2ksiOTfHtlCPJ9_CJV5R6umrUOX5MM,102
|
|
507
507
|
deepeval/tracing/offline_evals/api.py,sha256=eBfqh2uWyeRkIeGhjrN1bTQzAEow-XPubs-42WEZ2QQ,510
|
|
508
508
|
deepeval/tracing/offline_evals/span.py,sha256=pXqTVXs-WnjRVpCYYEbNe0zSM6Wz9GsKHsM5ZcWxrmM,1802
|
|
@@ -514,14 +514,14 @@ deepeval/tracing/otel/test_exporter.py,sha256=bezihPGWJpwUEF3ZghxqhhorocVFTO2b43
|
|
|
514
514
|
deepeval/tracing/otel/utils.py,sha256=NVMN07JtxuvVPtdyTW7KFdTqQL3TpoCO-JdZeghQJBY,17859
|
|
515
515
|
deepeval/tracing/patchers.py,sha256=Oi9wao3oDYhcviv7p0KoWBeS9ne7rHLa2gh9AR9EyiU,6882
|
|
516
516
|
deepeval/tracing/perf_epoch_bridge.py,sha256=iyAPddB6Op7NpMtPHJ29lDm53Btz9yLaN6xSCfTRQm4,1825
|
|
517
|
-
deepeval/tracing/trace_context.py,sha256=
|
|
517
|
+
deepeval/tracing/trace_context.py,sha256=RkXSlAWuMzuRrvepsM0PlGaWwX7QBstkWv_frYn_7CE,3718
|
|
518
518
|
deepeval/tracing/trace_test_manager.py,sha256=wt4y7EWTRc4Bw938-UFFtXHkdFFOrnx6JaIk7J5Iulw,555
|
|
519
|
-
deepeval/tracing/tracing.py,sha256=
|
|
520
|
-
deepeval/tracing/types.py,sha256=
|
|
519
|
+
deepeval/tracing/tracing.py,sha256=RS3mBV-63_vDyz-WxYPir34u0BF3mPnAJWee1aCc1sc,46892
|
|
520
|
+
deepeval/tracing/types.py,sha256=PUXDC1JZDaAalPc3uUHywkt2GE2hZ-2ocGP0Fe4sB2E,6120
|
|
521
521
|
deepeval/tracing/utils.py,sha256=mdvhYAxDNsdnusaEXJd-c-_O2Jn6S3xSuzRvLO1Jz4U,5684
|
|
522
522
|
deepeval/utils.py,sha256=Wsu95g6t1wdttxWIESVwuUxbml7C-9ZTsV7qHCQI3Xg,27259
|
|
523
|
-
deepeval-3.8.
|
|
524
|
-
deepeval-3.8.
|
|
525
|
-
deepeval-3.8.
|
|
526
|
-
deepeval-3.8.
|
|
527
|
-
deepeval-3.8.
|
|
523
|
+
deepeval-3.8.3.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
|
|
524
|
+
deepeval-3.8.3.dist-info/METADATA,sha256=BXj2MMYGn2usVl9VX5fcXNOHK-gp2DEY5uUDlP4kObQ,18752
|
|
525
|
+
deepeval-3.8.3.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
526
|
+
deepeval-3.8.3.dist-info/entry_points.txt,sha256=NoismUQfwLOojSGZmBrdcpwfaoFRAzUhBvZD3UwOKog,95
|
|
527
|
+
deepeval-3.8.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|