deepeval 3.8.1__py3-none-any.whl → 3.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deepeval/_version.py CHANGED
@@ -1 +1 @@
1
- __version__: str = "3.8.1"
1
+ __version__: str = "3.8.3"
deepeval/confident/api.py CHANGED
@@ -26,16 +26,44 @@ API_BASE_URL_EU = "https://eu.api.confident-ai.com"
26
26
  retryable_exceptions = requests.exceptions.SSLError
27
27
 
28
28
 
29
+ def _infer_region_from_api_key(api_key: Optional[str]) -> Optional[str]:
30
+ """
31
+ Infer region from Confident API key prefix.
32
+
33
+ Supported:
34
+ - confident_eu_... => "EU"
35
+ - confident_us_... => "US"
36
+
37
+ Returns None if prefix is not recognized or api_key is falsy.
38
+ """
39
+ if not api_key:
40
+ return None
41
+ key = api_key.strip().lower()
42
+ if key.startswith("confident_eu_"):
43
+ return "EU"
44
+ if key.startswith("confident_us_"):
45
+ return "US"
46
+ return None
47
+
48
+
29
49
  def get_base_api_url():
30
50
  s = get_settings()
31
51
  if s.CONFIDENT_BASE_URL:
32
52
  base_url = s.CONFIDENT_BASE_URL.rstrip("/")
33
53
  return base_url
54
+ # If the user has explicitly set a region, respect it.
34
55
  region = KEY_FILE_HANDLER.fetch_data(KeyValues.CONFIDENT_REGION)
35
- if region == "EU":
56
+ if region:
57
+ return API_BASE_URL_EU if region == "EU" else API_BASE_URL
58
+
59
+ # Otherwise, infer region from the API key prefix.
60
+ api_key = get_confident_api_key()
61
+ inferred = _infer_region_from_api_key(api_key)
62
+ if inferred == "EU":
36
63
  return API_BASE_URL_EU
37
- else:
38
- return API_BASE_URL
64
+
65
+ # Default to US (backwards compatible)
66
+ return API_BASE_URL
39
67
 
40
68
 
41
69
  def get_confident_api_key() -> Optional[str]:
@@ -1,4 +1,4 @@
1
- from .handler import instrument_crewai
1
+ from .handler import instrument_crewai, reset_crewai_instrumentation
2
2
  from .subs import (
3
3
  DeepEvalCrew as Crew,
4
4
  DeepEvalAgent as Agent,
@@ -6,4 +6,11 @@ from .subs import (
6
6
  )
7
7
  from .tool import tool
8
8
 
9
- __all__ = ["instrument_crewai", "Crew", "Agent", "LLM", "tool"]
9
+ __all__ = [
10
+ "instrument_crewai",
11
+ "Crew",
12
+ "Agent",
13
+ "LLM",
14
+ "tool",
15
+ "reset_crewai_instrumentation",
16
+ ]
@@ -1,11 +1,12 @@
1
1
  import logging
2
2
  import deepeval
3
-
4
- from typing import Optional
3
+ from collections import defaultdict
4
+ from time import perf_counter
5
+ from typing import Optional, Tuple, Any, List, Union
5
6
  from deepeval.telemetry import capture_tracing_integration
6
7
  from deepeval.tracing.context import current_span_context, current_trace_context
7
- from deepeval.tracing.tracing import Observer
8
- from deepeval.tracing.types import LlmSpan
8
+ from deepeval.tracing.tracing import Observer, trace_manager
9
+ from deepeval.tracing.types import ToolSpan, SpanType, TraceSpanStatus
9
10
  from deepeval.config.settings import get_settings
10
11
 
11
12
 
@@ -45,7 +46,9 @@ except ImportError as e:
45
46
 
46
47
  crewai_installed = False
47
48
 
49
+ # GLOBAL STATE to prevent duplicate listeners
48
50
  IS_WRAPPED_ALL = False
51
+ _listener_instance = None
49
52
 
50
53
 
51
54
  def is_crewai_installed():
@@ -55,21 +58,49 @@ def is_crewai_installed():
55
58
  )
56
59
 
57
60
 
61
+ def _get_metrics_data(obj: Any) -> Tuple[Optional[str], Optional[Any]]:
62
+ """Helper to safely extract metrics attached to CrewAI objects."""
63
+
64
+ if not obj:
65
+ return None, None
66
+ metric_collection = getattr(obj, "_metric_collection", None)
67
+ metrics = getattr(obj, "_metrics", None)
68
+
69
+ if metric_collection is not None or metrics is not None:
70
+ return metric_collection, metrics
71
+
72
+ func = getattr(obj, "func", None)
73
+ if func:
74
+ metric_collection = getattr(func, "_metric_collection", None)
75
+ metrics = getattr(func, "_metrics", None)
76
+
77
+ return metric_collection, metrics
78
+
79
+
58
80
  class CrewAIEventsListener(BaseEventListener):
59
81
  def __init__(self):
60
82
  is_crewai_installed()
61
83
  super().__init__()
62
84
  self.span_observers: dict[str, Observer] = {}
85
+ self.tool_observers_stack: dict[str, List[Union[Observer, None]]] = (
86
+ defaultdict(list)
87
+ )
63
88
 
64
- @staticmethod
65
- def get_tool_execution_id(source, event) -> str:
66
- source_id = id(source)
67
- task_id = getattr(event, "task_id", "unknown")
68
- agent_id = getattr(event, "agent_id", "unknown")
69
- tool_name = getattr(event, "tool_name", "unknown")
70
- execution_id = f"tool_{source_id}_{task_id}_{agent_id}_{tool_name}"
89
+ def reset_state(self):
90
+ """Clears all internal state to prevent pollution between tests."""
91
+ self.span_observers.clear()
92
+ self.tool_observers_stack.clear()
71
93
 
72
- return execution_id
94
+ @staticmethod
95
+ def get_tool_stack_key(source, tool_name) -> str:
96
+ """
97
+ Generates a unique key for the tool stack.
98
+ FIX: Uses role/name instead of id() to be robust against object copying by CrewAI.
99
+ """
100
+ identifier = getattr(
101
+ source, "role", getattr(source, "name", str(id(source)))
102
+ )
103
+ return f"{tool_name}_{identifier}"
73
104
 
74
105
  @staticmethod
75
106
  def get_knowledge_execution_id(source, event) -> str:
@@ -79,98 +110,234 @@ class CrewAIEventsListener(BaseEventListener):
79
110
 
80
111
  return execution_id
81
112
 
113
+ @staticmethod
114
+ def get_llm_execution_id(source, event) -> str:
115
+ source_id = id(source)
116
+ return f"llm_{source_id}"
117
+
118
+ def _flatten_tool_span(self, span):
119
+ """
120
+ Callback to move any child ToolSpans up to the parent.
121
+ """
122
+ if not span.parent_uuid or not span.children:
123
+ return
124
+
125
+ parent_span = trace_manager.get_span_by_uuid(span.parent_uuid)
126
+ if not parent_span:
127
+ return
128
+
129
+ # Identify child tool spans (ghost nesting)
130
+ tools_to_move = [
131
+ child for child in span.children if isinstance(child, ToolSpan)
132
+ ]
133
+
134
+ if tools_to_move:
135
+ if parent_span.children is None:
136
+ parent_span.children = []
137
+
138
+ for child in tools_to_move:
139
+ child.parent_uuid = parent_span.uuid
140
+ parent_span.children.append(child)
141
+
142
+ span.children = [
143
+ child
144
+ for child in span.children
145
+ if not isinstance(child, ToolSpan)
146
+ ]
147
+
82
148
  def setup_listeners(self, crewai_event_bus):
83
149
  @crewai_event_bus.on(CrewKickoffStartedEvent)
84
150
  def on_crew_started(source, event: CrewKickoffStartedEvent):
85
- # Assuming that this event is called in the crew.kickoff method
86
151
  current_span = current_span_context.get()
87
-
88
- # set the input
89
152
  if current_span:
90
153
  current_span.input = event.inputs
91
-
92
- # set trace input
93
154
  current_trace = current_trace_context.get()
94
155
  if current_trace:
95
156
  current_trace.input = event.inputs
96
157
 
97
158
  @crewai_event_bus.on(CrewKickoffCompletedEvent)
98
159
  def on_crew_completed(source, event: CrewKickoffCompletedEvent):
99
- # Assuming that this event is called in the crew.kickoff method
100
160
  current_span = current_span_context.get()
101
-
102
- # set the output
161
+ output = getattr(
162
+ event, "output", getattr(event, "result", str(event))
163
+ )
103
164
  if current_span:
104
- current_span.output = str(event.output)
105
-
106
- # set trace output
165
+ current_span.output = str(output)
107
166
  current_trace = current_trace_context.get()
108
167
  if current_trace:
109
- current_trace.output = str(event.output)
168
+ current_trace.output = str(output)
110
169
 
111
170
  @crewai_event_bus.on(LLMCallStartedEvent)
112
171
  def on_llm_started(source, event: LLMCallStartedEvent):
113
- # Assuming that this event is called in the llm.call method
114
- current_span = current_span_context.get()
115
-
116
- # set the input
117
- if current_span:
118
- current_span.input = event.messages
172
+ metric_collection, metrics = _get_metrics_data(source)
173
+ observer = Observer(
174
+ span_type="llm",
175
+ func_name="call",
176
+ observe_kwargs={"model": getattr(event, "model", "unknown")},
177
+ metric_collection=metric_collection,
178
+ metrics=metrics,
179
+ )
180
+ self.span_observers[self.get_llm_execution_id(source, event)] = (
181
+ observer
182
+ )
183
+ observer.__enter__()
119
184
 
120
- # set the model
121
- if isinstance(current_span, LlmSpan):
122
- current_span.model = event.model
185
+ if observer.trace_uuid:
186
+ span = trace_manager.get_span_by_uuid(observer.uuid)
187
+ if span:
188
+ msgs = getattr(event, "messages")
189
+ span.input = msgs
123
190
 
124
191
  @crewai_event_bus.on(LLMCallCompletedEvent)
125
192
  def on_llm_completed(source, event: LLMCallCompletedEvent):
126
- # Assuming that this event is called in the llm.call method
127
- current_span = current_span_context.get()
128
-
129
- # set the output
130
- if current_span:
131
- current_span.output = event.response
193
+ key = self.get_llm_execution_id(source, event)
194
+ if key in self.span_observers:
195
+ observer = self.span_observers.pop(key)
196
+ if observer:
197
+ current_span = current_span_context.get()
198
+ token = None
199
+ span_to_close = trace_manager.get_span_by_uuid(
200
+ observer.uuid
201
+ )
202
+
203
+ if span_to_close:
204
+ output = getattr(
205
+ event, "response", getattr(event, "output", "")
206
+ )
207
+ span_to_close.output = output
208
+ if (
209
+ not current_span
210
+ or current_span.uuid != observer.uuid
211
+ ):
212
+ token = current_span_context.set(span_to_close)
213
+
214
+ observer.__exit__(None, None, None)
215
+ if token:
216
+ current_span_context.reset(token)
132
217
 
133
218
  @crewai_event_bus.on(AgentExecutionStartedEvent)
134
219
  def on_agent_started(source, event: AgentExecutionStartedEvent):
135
- # Assuming that this event is called in the agent.execute_task method
136
220
  current_span = current_span_context.get()
137
-
138
- # set the input
139
221
  if current_span:
140
222
  current_span.input = event.task_prompt
141
223
 
142
224
  @crewai_event_bus.on(AgentExecutionCompletedEvent)
143
225
  def on_agent_completed(source, event: AgentExecutionCompletedEvent):
144
- # Assuming that this event is called in the agent.execute_task method
145
226
  current_span = current_span_context.get()
146
-
147
- # set the output
148
227
  if current_span:
149
- current_span.output = event.output
228
+ current_span.output = getattr(
229
+ event, "output", getattr(event, "result", "")
230
+ )
150
231
 
151
232
  @crewai_event_bus.on(ToolUsageStartedEvent)
152
233
  def on_tool_started(source, event: ToolUsageStartedEvent):
234
+ key = self.get_tool_stack_key(source, event.tool_name)
235
+
236
+ # 1. Internal Stack Check
237
+ if self.tool_observers_stack[key]:
238
+ self.tool_observers_stack[key].append(None)
239
+ return
240
+
241
+ # 2. SMART DEDUPING
242
+ current_span = current_span_context.get()
243
+ span_type = getattr(current_span, "type", None)
244
+ is_tool_span = span_type == "tool" or span_type == SpanType.TOOL
245
+ if (
246
+ is_tool_span
247
+ and getattr(current_span, "name", "") == event.tool_name
248
+ ):
249
+ self.tool_observers_stack[key].append(None)
250
+ return
251
+
252
+ metric_collection = None
253
+ metrics = None
254
+
255
+ if hasattr(source, "tools"):
256
+ for tools in source.tools:
257
+ if getattr(tools, "name", None) == event.tool_name:
258
+ metric_collection, metrics = _get_metrics_data(tools)
259
+ break
260
+
261
+ if not metric_collection:
262
+ agent = getattr(source, "agent", source)
263
+ metric_collection, metrics = _get_metrics_data(agent)
264
+
153
265
  observer = Observer(
154
266
  span_type="tool",
155
267
  func_name=event.tool_name,
156
268
  function_kwargs=event.tool_args,
269
+ metric_collection=metric_collection,
270
+ metrics=metrics,
157
271
  )
158
- self.span_observers[self.get_tool_execution_id(source, event)] = (
159
- observer
160
- )
272
+
273
+ self.tool_observers_stack[key].append(observer)
161
274
  observer.__enter__()
162
275
 
163
276
  @crewai_event_bus.on(ToolUsageFinishedEvent)
164
277
  def on_tool_completed(source, event: ToolUsageFinishedEvent):
165
- observer = self.span_observers.pop(
166
- self.get_tool_execution_id(source, event)
167
- )
278
+ key = self.get_tool_stack_key(source, event.tool_name)
279
+ observer = None
280
+
281
+ if (
282
+ key in self.tool_observers_stack
283
+ and self.tool_observers_stack[key]
284
+ ):
285
+ item = self.tool_observers_stack[key].pop()
286
+ if item is None:
287
+ return
288
+ observer = item
289
+
290
+ if not observer:
291
+ current_span = current_span_context.get()
292
+ if (
293
+ current_span
294
+ and getattr(current_span, "type", None)
295
+ in ["tool", SpanType.TOOL]
296
+ and getattr(current_span, "name", "") == event.tool_name
297
+ ):
298
+ current_span.output = getattr(
299
+ event, "output", getattr(event, "result", None)
300
+ )
301
+
302
+ if current_span.end_time is None:
303
+ current_span.end_time = perf_counter()
304
+
305
+ current_span.status = TraceSpanStatus.SUCCESS
306
+
307
+ self._flatten_tool_span(current_span)
308
+ trace_manager.remove_span(current_span.uuid)
309
+
310
+ if current_span.parent_uuid:
311
+ parent = trace_manager.get_span_by_uuid(
312
+ current_span.parent_uuid
313
+ )
314
+ current_span_context.set(parent if parent else None)
315
+ else:
316
+ current_span_context.set(None)
317
+ return
318
+
168
319
  if observer:
169
320
  current_span = current_span_context.get()
170
- if current_span:
171
- current_span.output = event.output
321
+ token = None
322
+ span_to_close = trace_manager.get_span_by_uuid(observer.uuid)
323
+
324
+ if span_to_close:
325
+ span_to_close.output = getattr(
326
+ event, "output", getattr(event, "result", None)
327
+ )
328
+ if not current_span or current_span.uuid != observer.uuid:
329
+ token = current_span_context.set(span_to_close)
330
+
331
+ observer.update_span_properties = self._flatten_tool_span
172
332
  observer.__exit__(None, None, None)
173
333
 
334
+ if span_to_close and span_to_close.end_time is None:
335
+ span_to_close.end_time = perf_counter()
336
+ span_to_close.status = TraceSpanStatus.SUCCESS
337
+
338
+ if token:
339
+ current_span_context.reset(token)
340
+
174
341
  @crewai_event_bus.on(KnowledgeRetrievalStartedEvent)
175
342
  def on_knowledge_started(source, event: KnowledgeRetrievalStartedEvent):
176
343
  observer = Observer(
@@ -187,18 +354,35 @@ class CrewAIEventsListener(BaseEventListener):
187
354
  def on_knowledge_completed(
188
355
  source, event: KnowledgeRetrievalCompletedEvent
189
356
  ):
190
- observer = self.span_observers.pop(
191
- self.get_knowledge_execution_id(source, event)
192
- )
193
- if observer:
194
- current_span = current_span_context.get()
195
- if current_span:
196
- current_span.input = event.query
197
- current_span.output = event.retrieved_knowledge
198
- observer.__exit__(None, None, None)
357
+ key = self.get_knowledge_execution_id(source, event)
358
+ if key in self.span_observers:
359
+ observer = self.span_observers.pop(key)
360
+ if observer:
361
+ current_span = current_span_context.get()
362
+ token = None
363
+ span_to_close = trace_manager.get_span_by_uuid(
364
+ observer.uuid
365
+ )
366
+
367
+ if span_to_close:
368
+ span_to_close.input = event.query
369
+ span_to_close.output = event.retrieved_knowledge
370
+
371
+ if (
372
+ not current_span
373
+ or current_span.uuid != observer.uuid
374
+ ):
375
+ token = current_span_context.set(span_to_close)
376
+
377
+ observer.__exit__(None, None, None)
378
+
379
+ if token:
380
+ current_span_context.reset(token)
199
381
 
200
382
 
201
383
  def instrument_crewai(api_key: Optional[str] = None):
384
+ global _listener_instance
385
+
202
386
  is_crewai_installed()
203
387
  with capture_tracing_integration("crewai"):
204
388
  if api_key:
@@ -206,7 +390,14 @@ def instrument_crewai(api_key: Optional[str] = None):
206
390
 
207
391
  wrap_all()
208
392
 
209
- CrewAIEventsListener()
393
+ if _listener_instance is None:
394
+ _listener_instance = CrewAIEventsListener()
395
+
396
+
397
+ def reset_crewai_instrumentation():
398
+ global _listener_instance
399
+ if _listener_instance:
400
+ _listener_instance.reset_state()
210
401
 
211
402
 
212
403
  def wrap_all():
@@ -218,15 +409,19 @@ def wrap_all():
218
409
  wrap_crew_kickoff_for_each,
219
410
  wrap_crew_kickoff_async,
220
411
  wrap_crew_kickoff_for_each_async,
221
- wrap_llm_call,
412
+ wrap_crew_akickoff,
413
+ wrap_crew_akickoff_for_each,
222
414
  wrap_agent_execute_task,
415
+ wrap_agent_aexecute_task,
223
416
  )
224
417
 
225
418
  wrap_crew_kickoff()
226
419
  wrap_crew_kickoff_for_each()
227
420
  wrap_crew_kickoff_async()
228
421
  wrap_crew_kickoff_for_each_async()
229
- wrap_llm_call()
422
+ wrap_crew_akickoff()
423
+ wrap_crew_akickoff_for_each()
230
424
  wrap_agent_execute_task()
425
+ wrap_agent_aexecute_task()
231
426
 
232
427
  IS_WRAPPED_ALL = True
@@ -1,4 +1,4 @@
1
- from typing import List, Optional, Type, TypeVar
1
+ from typing import List, Optional, Type, TypeVar, Callable
2
2
  from pydantic import PrivateAttr
3
3
 
4
4
  from deepeval.metrics.base_metric import BaseMetric
@@ -28,14 +28,10 @@ def create_deepeval_class(base_class: Type[T], class_name: str) -> Type[T]:
28
28
  _metric_collection: Optional[str] = PrivateAttr(default=None)
29
29
  _metrics: Optional[List[BaseMetric]] = PrivateAttr(default=None)
30
30
 
31
- def __init__(
32
- self,
33
- *args,
34
- metrics: Optional[List[BaseMetric]] = None,
35
- metric_collection: Optional[str] = None,
36
- **kwargs
37
- ):
31
+ def __init__(self, *args, **kwargs):
38
32
  is_crewai_installed()
33
+ metric_collection = kwargs.pop("metric_collection", None)
34
+ metrics = kwargs.pop("metrics", None)
39
35
  super().__init__(*args, **kwargs)
40
36
  self._metric_collection = metric_collection
41
37
  self._metrics = metrics
@@ -45,7 +41,24 @@ def create_deepeval_class(base_class: Type[T], class_name: str) -> Type[T]:
45
41
  return DeepEvalClass
46
42
 
47
43
 
48
- # Create the classes
44
+ def create_deepeval_llm(base_factory: Callable) -> Callable:
45
+ """Wrapper for factory functions/classes (LLM)."""
46
+
47
+ def factory_wrapper(*args, **kwargs):
48
+ is_crewai_installed()
49
+ metric_collection = kwargs.pop("metric_collection", None)
50
+ metrics = kwargs.pop("metrics", None)
51
+ instance = base_factory(*args, **kwargs)
52
+ try:
53
+ instance._metric_collection = metric_collection
54
+ instance._metrics = metrics
55
+ except Exception:
56
+ pass
57
+ return instance
58
+
59
+ return factory_wrapper
60
+
61
+
49
62
  DeepEvalCrew = create_deepeval_class(Crew, "DeepEvalCrew")
50
63
  DeepEvalAgent = create_deepeval_class(Agent, "DeepEvalAgent")
51
- DeepEvalLLM = create_deepeval_class(LLM, "DeepEvalLLM")
64
+ DeepEvalLLM = create_deepeval_llm(LLM)
@@ -15,6 +15,20 @@ def tool(*args, metric=None, metric_collection=None, **kwargs) -> Callable:
15
15
  """
16
16
  crewai_kwargs = kwargs
17
17
 
18
+ def _attach_metadata(tool_instance):
19
+ try:
20
+ object.__setattr__(
21
+ tool_instance, "metric_collection", metric_collection
22
+ )
23
+ object.__setattr__(tool_instance, "metrics", metric)
24
+ except Exception:
25
+ try:
26
+ tool_instance._metric_collection = metric_collection
27
+ tool_instance._metrics = metric
28
+ except Exception:
29
+ pass
30
+ return tool_instance
31
+
18
32
  # Case 1: @tool (function passed directly)
19
33
  if len(args) == 1 and callable(args[0]):
20
34
  f = args[0]
@@ -29,7 +43,8 @@ def tool(*args, metric=None, metric_collection=None, **kwargs) -> Callable:
29
43
  result = f(*f_args, **f_kwargs)
30
44
  return result
31
45
 
32
- return crewai_tool(tool_name, **crewai_kwargs)(wrapped)
46
+ tool_instance = crewai_tool(tool_name, **crewai_kwargs)(wrapped)
47
+ return _attach_metadata(tool_instance)
33
48
 
34
49
  # Case 2: @tool("name")
35
50
  if len(args) == 1 and isinstance(args[0], str):
@@ -45,7 +60,8 @@ def tool(*args, metric=None, metric_collection=None, **kwargs) -> Callable:
45
60
  result = f(*f_args, **f_kwargs)
46
61
  return result
47
62
 
48
- return crewai_tool(tool_name, **crewai_kwargs)(wrapped)
63
+ tool_instance = crewai_tool(tool_name, **crewai_kwargs)(wrapped)
64
+ return _attach_metadata(tool_instance)
49
65
 
50
66
  return _decorator
51
67
 
@@ -64,7 +80,8 @@ def tool(*args, metric=None, metric_collection=None, **kwargs) -> Callable:
64
80
  result = f(*f_args, **f_kwargs)
65
81
  return result
66
82
 
67
- return crewai_tool(tool_name, **crewai_kwargs)(wrapped)
83
+ tool_instance = crewai_tool(tool_name, **crewai_kwargs)(wrapped)
84
+ return _attach_metadata(tool_instance)
68
85
 
69
86
  return _decorator
70
87