deepeval 3.8.1__py3-none-any.whl → 3.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/confident/api.py +31 -3
- deepeval/integrations/crewai/__init__.py +9 -2
- deepeval/integrations/crewai/handler.py +261 -66
- deepeval/integrations/crewai/subs.py +23 -10
- deepeval/integrations/crewai/tool.py +20 -3
- deepeval/integrations/crewai/wrapper.py +69 -15
- deepeval/integrations/langchain/callback.py +310 -14
- deepeval/integrations/langchain/utils.py +75 -24
- deepeval/integrations/llama_index/handler.py +69 -21
- deepeval/integrations/pydantic_ai/instrumentator.py +50 -14
- deepeval/integrations/pydantic_ai/otel.py +9 -0
- deepeval/metrics/utils.py +11 -0
- deepeval/simulator/conversation_simulator.py +4 -2
- deepeval/telemetry.py +12 -91
- deepeval/tracing/api.py +1 -0
- deepeval/tracing/context.py +3 -0
- deepeval/tracing/trace_context.py +5 -0
- deepeval/tracing/tracing.py +7 -5
- deepeval/tracing/types.py +1 -0
- {deepeval-3.8.1.dist-info → deepeval-3.8.3.dist-info}/METADATA +1 -1
- {deepeval-3.8.1.dist-info → deepeval-3.8.3.dist-info}/RECORD +25 -25
- {deepeval-3.8.1.dist-info → deepeval-3.8.3.dist-info}/LICENSE.md +0 -0
- {deepeval-3.8.1.dist-info → deepeval-3.8.3.dist-info}/WHEEL +0 -0
- {deepeval-3.8.1.dist-info → deepeval-3.8.3.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,77 @@
|
|
|
1
|
-
|
|
1
|
+
import uuid
|
|
2
|
+
from typing import Any, List, Dict, Optional, Union, Literal, Callable
|
|
3
|
+
from time import perf_counter
|
|
2
4
|
from langchain_core.outputs import ChatGeneration
|
|
5
|
+
from rich.progress import Progress
|
|
6
|
+
|
|
7
|
+
from deepeval.metrics import BaseMetric
|
|
8
|
+
from deepeval.tracing.context import current_span_context, current_trace_context
|
|
9
|
+
from deepeval.tracing.tracing import trace_manager
|
|
10
|
+
from deepeval.tracing.types import (
|
|
11
|
+
AgentSpan,
|
|
12
|
+
BaseSpan,
|
|
13
|
+
LlmSpan,
|
|
14
|
+
RetrieverSpan,
|
|
15
|
+
SpanType,
|
|
16
|
+
ToolSpan,
|
|
17
|
+
TraceSpanStatus,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def convert_chat_messages_to_input(
|
|
22
|
+
messages: list[list[Any]], **kwargs
|
|
23
|
+
) -> List[Dict[str, str]]:
|
|
24
|
+
"""
|
|
25
|
+
Convert LangChain chat messages to our internal format.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
messages: list[list[BaseMessage]] - outer list is batches, inner is messages.
|
|
29
|
+
**kwargs: May contain invocation_params with tools definitions.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
List of dicts with 'role' and 'content' keys, matching the schema used
|
|
33
|
+
by parse_prompts_to_messages for consistency.
|
|
34
|
+
"""
|
|
35
|
+
# Valid roles matching parse_prompts_to_messages
|
|
36
|
+
ROLE_MAPPING = {
|
|
37
|
+
"human": "human",
|
|
38
|
+
"user": "human",
|
|
39
|
+
"ai": "ai",
|
|
40
|
+
"assistant": "ai",
|
|
41
|
+
"system": "system",
|
|
42
|
+
"tool": "tool",
|
|
43
|
+
"function": "function",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
result: List[Dict[str, str]] = []
|
|
47
|
+
for batch in messages:
|
|
48
|
+
for msg in batch:
|
|
49
|
+
# BaseMessage has .type (role) and .content
|
|
50
|
+
raw_role = getattr(msg, "type", "unknown")
|
|
51
|
+
content = getattr(msg, "content", "")
|
|
52
|
+
|
|
53
|
+
# Normalize role using same conventions as prompt parsing
|
|
54
|
+
role = ROLE_MAPPING.get(raw_role.lower(), raw_role)
|
|
55
|
+
|
|
56
|
+
# Convert content to string (handles empty content, lists, etc.)
|
|
57
|
+
if isinstance(content, list):
|
|
58
|
+
# Some messages have content as a list of content blocks
|
|
59
|
+
content_str = " ".join(
|
|
60
|
+
str(c.get("text", c) if isinstance(c, dict) else c)
|
|
61
|
+
for c in content
|
|
62
|
+
)
|
|
63
|
+
else:
|
|
64
|
+
content_str = str(content) if content else ""
|
|
65
|
+
|
|
66
|
+
result.append({"role": role, "content": content_str})
|
|
67
|
+
|
|
68
|
+
# Append tool definitions if present which matches parse_prompts_to_messages behavior
|
|
69
|
+
tools = kwargs.get("invocation_params", {}).get("tools", None)
|
|
70
|
+
if tools and isinstance(tools, list):
|
|
71
|
+
for tool in tools:
|
|
72
|
+
result.append({"role": "Tool Input", "content": str(tool)})
|
|
73
|
+
|
|
74
|
+
return result
|
|
3
75
|
|
|
4
76
|
|
|
5
77
|
def parse_prompts_to_messages(
|
|
@@ -112,27 +184,6 @@ def safe_extract_model_name(
|
|
|
112
184
|
return None
|
|
113
185
|
|
|
114
186
|
|
|
115
|
-
from typing import Any, List, Dict, Optional, Union, Literal, Callable
|
|
116
|
-
from langchain_core.outputs import ChatGeneration
|
|
117
|
-
from time import perf_counter
|
|
118
|
-
import uuid
|
|
119
|
-
from rich.progress import Progress
|
|
120
|
-
from deepeval.tracing.tracing import Observer
|
|
121
|
-
|
|
122
|
-
from deepeval.metrics import BaseMetric
|
|
123
|
-
from deepeval.tracing.context import current_span_context, current_trace_context
|
|
124
|
-
from deepeval.tracing.tracing import trace_manager
|
|
125
|
-
from deepeval.tracing.types import (
|
|
126
|
-
AgentSpan,
|
|
127
|
-
BaseSpan,
|
|
128
|
-
LlmSpan,
|
|
129
|
-
RetrieverSpan,
|
|
130
|
-
SpanType,
|
|
131
|
-
ToolSpan,
|
|
132
|
-
TraceSpanStatus,
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
|
|
136
187
|
def enter_current_context(
|
|
137
188
|
span_type: Optional[
|
|
138
189
|
Union[Literal["agent", "llm", "retriever", "tool"], str]
|
|
@@ -239,8 +290,8 @@ def enter_current_context(
|
|
|
239
290
|
|
|
240
291
|
if (
|
|
241
292
|
parent_span
|
|
242
|
-
and
|
|
243
|
-
and
|
|
293
|
+
and parent_span.progress is not None
|
|
294
|
+
and parent_span.pbar_callback_id is not None
|
|
244
295
|
):
|
|
245
296
|
progress = parent_span.progress
|
|
246
297
|
pbar_callback_id = parent_span.pbar_callback_id
|
|
@@ -21,6 +21,7 @@ from deepeval.tracing.types import (
|
|
|
21
21
|
from deepeval.tracing.trace_context import (
|
|
22
22
|
current_llm_context,
|
|
23
23
|
current_agent_context,
|
|
24
|
+
current_trace_context,
|
|
24
25
|
)
|
|
25
26
|
from deepeval.test_case import ToolCall
|
|
26
27
|
from deepeval.tracing.utils import make_json_serializable
|
|
@@ -40,7 +41,10 @@ try:
|
|
|
40
41
|
LLMChatStartEvent,
|
|
41
42
|
LLMChatEndEvent,
|
|
42
43
|
)
|
|
43
|
-
from
|
|
44
|
+
from llama_index.core.instrumentation import Dispatcher
|
|
45
|
+
from llama_index.core.instrumentation.events.retrieval import (
|
|
46
|
+
RetrievalEndEvent,
|
|
47
|
+
)
|
|
44
48
|
from deepeval.integrations.llama_index.utils import (
|
|
45
49
|
parse_id,
|
|
46
50
|
prepare_input_llm_test_case_params,
|
|
@@ -82,15 +86,23 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
82
86
|
input_messages.append({"role": role, "content": content})
|
|
83
87
|
|
|
84
88
|
llm_span_context = current_llm_context.get()
|
|
85
|
-
|
|
89
|
+
|
|
90
|
+
parent_span = trace_manager.get_span_by_uuid(event.span_id)
|
|
91
|
+
if parent_span:
|
|
92
|
+
trace_uuid = parent_span.trace_uuid
|
|
93
|
+
else:
|
|
94
|
+
current_trace = current_trace_context.get()
|
|
95
|
+
if current_trace:
|
|
96
|
+
trace_uuid = current_trace.uuid
|
|
97
|
+
else:
|
|
98
|
+
trace_uuid = trace_manager.start_new_trace().uuid
|
|
99
|
+
|
|
86
100
|
llm_span = LlmSpan(
|
|
87
101
|
name="ConfidentLLMSpan",
|
|
88
102
|
uuid=str(uuid.uuid4()),
|
|
89
103
|
status=TraceSpanStatus.IN_PROGRESS,
|
|
90
104
|
children=[],
|
|
91
|
-
trace_uuid=
|
|
92
|
-
event.span_id
|
|
93
|
-
).trace_uuid,
|
|
105
|
+
trace_uuid=trace_uuid,
|
|
94
106
|
parent_uuid=event.span_id,
|
|
95
107
|
start_time=perf_counter(),
|
|
96
108
|
model=getattr(event, "model_dict", {}).get(
|
|
@@ -128,6 +140,13 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
128
140
|
trace_manager.remove_span(llm_span.uuid)
|
|
129
141
|
del self.open_ai_astream_to_llm_span_map[event.span_id]
|
|
130
142
|
|
|
143
|
+
if isinstance(event, RetrievalEndEvent):
|
|
144
|
+
span = trace_manager.get_span_by_uuid(event.span_id)
|
|
145
|
+
if span:
|
|
146
|
+
span.retrieval_context = [
|
|
147
|
+
node.node.get_content() for node in event.nodes
|
|
148
|
+
]
|
|
149
|
+
|
|
131
150
|
def new_span(
|
|
132
151
|
self,
|
|
133
152
|
id_: str,
|
|
@@ -139,18 +158,30 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
139
158
|
) -> Optional[LlamaIndexBaseSpan]:
|
|
140
159
|
class_name, method_name = parse_id(id_)
|
|
141
160
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
161
|
+
current_trace = current_trace_context.get()
|
|
162
|
+
trace_uuid = None
|
|
163
|
+
|
|
164
|
+
if parent_span_id is None or (
|
|
165
|
+
class_name == "Workflow" and method_name == "run"
|
|
166
|
+
):
|
|
167
|
+
if current_trace:
|
|
168
|
+
trace_uuid = current_trace.uuid
|
|
169
|
+
else:
|
|
170
|
+
trace_uuid = trace_manager.start_new_trace().uuid
|
|
171
|
+
|
|
172
|
+
if class_name == "Workflow" and method_name == "run":
|
|
173
|
+
parent_span_id = None
|
|
174
|
+
|
|
148
175
|
elif trace_manager.get_span_by_uuid(parent_span_id):
|
|
149
176
|
trace_uuid = trace_manager.get_span_by_uuid(
|
|
150
177
|
parent_span_id
|
|
151
178
|
).trace_uuid
|
|
179
|
+
|
|
152
180
|
else:
|
|
153
|
-
|
|
181
|
+
if current_trace:
|
|
182
|
+
trace_uuid = current_trace.uuid
|
|
183
|
+
else:
|
|
184
|
+
trace_uuid = trace_manager.start_new_trace().uuid
|
|
154
185
|
|
|
155
186
|
self.root_span_trace_id_map[id_] = trace_uuid
|
|
156
187
|
|
|
@@ -195,7 +226,7 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
195
226
|
else None
|
|
196
227
|
),
|
|
197
228
|
)
|
|
198
|
-
elif method_name
|
|
229
|
+
elif method_name in ["acall", "call_tool", "acall_tool"]:
|
|
199
230
|
span = ToolSpan(
|
|
200
231
|
uuid=id_,
|
|
201
232
|
status=TraceSpanStatus.IN_PROGRESS,
|
|
@@ -206,7 +237,7 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
206
237
|
input=bound_args.arguments,
|
|
207
238
|
name="Tool",
|
|
208
239
|
)
|
|
209
|
-
|
|
240
|
+
|
|
210
241
|
prepare_input_llm_test_case_params(
|
|
211
242
|
class_name, method_name, span, bound_args.arguments
|
|
212
243
|
)
|
|
@@ -215,6 +246,22 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
215
246
|
|
|
216
247
|
return span
|
|
217
248
|
|
|
249
|
+
def _get_output_value(self, result: Any) -> Any:
|
|
250
|
+
"""Helper to ensure AgentChatResponse and similar objects are serialized as dicts."""
|
|
251
|
+
if hasattr(result, "response") and hasattr(result, "sources"):
|
|
252
|
+
if hasattr(result, "model_dump"):
|
|
253
|
+
return result.model_dump()
|
|
254
|
+
if hasattr(result, "to_dict"):
|
|
255
|
+
return result.to_dict()
|
|
256
|
+
return {"response": result.response, "sources": result.sources}
|
|
257
|
+
|
|
258
|
+
if hasattr(result, "response"):
|
|
259
|
+
if hasattr(result, "model_dump"):
|
|
260
|
+
return result.model_dump()
|
|
261
|
+
return {"response": result.response}
|
|
262
|
+
|
|
263
|
+
return result
|
|
264
|
+
|
|
218
265
|
def prepare_to_exit_span(
|
|
219
266
|
self,
|
|
220
267
|
id_: str,
|
|
@@ -229,7 +276,8 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
229
276
|
return None
|
|
230
277
|
|
|
231
278
|
class_name, method_name = parse_id(id_)
|
|
232
|
-
|
|
279
|
+
|
|
280
|
+
if method_name in ["call_tool", "acall_tool"]:
|
|
233
281
|
output_json = make_json_serializable(result)
|
|
234
282
|
if output_json and isinstance(output_json, dict):
|
|
235
283
|
if base_span.tools_called is None:
|
|
@@ -243,7 +291,7 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
243
291
|
)
|
|
244
292
|
base_span.end_time = perf_counter()
|
|
245
293
|
base_span.status = TraceSpanStatus.SUCCESS
|
|
246
|
-
base_span.output = result
|
|
294
|
+
base_span.output = self._get_output_value(result)
|
|
247
295
|
|
|
248
296
|
if isinstance(base_span, ToolSpan):
|
|
249
297
|
result_json = make_json_serializable(result)
|
|
@@ -265,7 +313,8 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
265
313
|
|
|
266
314
|
if base_span.parent_uuid is None:
|
|
267
315
|
trace_manager.end_trace(base_span.trace_uuid)
|
|
268
|
-
self.root_span_trace_id_map
|
|
316
|
+
if base_span.uuid in self.root_span_trace_id_map:
|
|
317
|
+
self.root_span_trace_id_map.pop(base_span.uuid)
|
|
269
318
|
|
|
270
319
|
return base_span
|
|
271
320
|
|
|
@@ -282,13 +331,12 @@ class LLamaIndexHandler(BaseEventHandler, BaseSpanHandler):
|
|
|
282
331
|
return None
|
|
283
332
|
|
|
284
333
|
base_span.end_time = perf_counter()
|
|
285
|
-
base_span.status =
|
|
286
|
-
TraceSpanStatus.SUCCESS
|
|
287
|
-
) # find a way to add error and handle the span without the parent id
|
|
334
|
+
base_span.status = TraceSpanStatus.SUCCESS
|
|
288
335
|
|
|
289
336
|
if base_span.parent_uuid is None:
|
|
290
337
|
trace_manager.end_trace(base_span.trace_uuid)
|
|
291
|
-
self.root_span_trace_id_map
|
|
338
|
+
if base_span.uuid in self.root_span_trace_id_map:
|
|
339
|
+
self.root_span_trace_id_map.pop(base_span.uuid)
|
|
292
340
|
|
|
293
341
|
return base_span
|
|
294
342
|
|
|
@@ -36,10 +36,14 @@ try:
|
|
|
36
36
|
SpanProcessor as _SpanProcessor,
|
|
37
37
|
TracerProvider,
|
|
38
38
|
)
|
|
39
|
-
from opentelemetry.sdk.trace.export import
|
|
39
|
+
from opentelemetry.sdk.trace.export import (
|
|
40
|
+
BatchSpanProcessor,
|
|
41
|
+
SimpleSpanProcessor,
|
|
42
|
+
)
|
|
40
43
|
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
|
|
41
44
|
OTLPSpanExporter,
|
|
42
45
|
)
|
|
46
|
+
from opentelemetry.trace import set_tracer_provider
|
|
43
47
|
from pydantic_ai.models.instrumented import (
|
|
44
48
|
InstrumentationSettings as _BaseInstrumentationSettings,
|
|
45
49
|
)
|
|
@@ -131,7 +135,12 @@ class ConfidentInstrumentationSettings(InstrumentationSettings):
|
|
|
131
135
|
):
|
|
132
136
|
is_dependency_installed()
|
|
133
137
|
|
|
134
|
-
|
|
138
|
+
if trace_manager.environment is not None:
|
|
139
|
+
_environment = trace_manager.environment
|
|
140
|
+
elif settings.CONFIDENT_TRACE_ENVIRONMENT is not None:
|
|
141
|
+
_environment = settings.CONFIDENT_TRACE_ENVIRONMENT
|
|
142
|
+
else:
|
|
143
|
+
_environment = "development"
|
|
135
144
|
if _environment and _environment in [
|
|
136
145
|
"production",
|
|
137
146
|
"staging",
|
|
@@ -166,7 +175,9 @@ class ConfidentInstrumentationSettings(InstrumentationSettings):
|
|
|
166
175
|
trace_provider.add_span_processor(span_interceptor)
|
|
167
176
|
|
|
168
177
|
if is_test_mode:
|
|
169
|
-
trace_provider.add_span_processor(
|
|
178
|
+
trace_provider.add_span_processor(
|
|
179
|
+
SimpleSpanProcessor(ConfidentSpanExporter())
|
|
180
|
+
)
|
|
170
181
|
else:
|
|
171
182
|
trace_provider.add_span_processor(
|
|
172
183
|
BatchSpanProcessor(
|
|
@@ -176,6 +187,12 @@ class ConfidentInstrumentationSettings(InstrumentationSettings):
|
|
|
176
187
|
)
|
|
177
188
|
)
|
|
178
189
|
)
|
|
190
|
+
try:
|
|
191
|
+
set_tracer_provider(trace_provider)
|
|
192
|
+
except Exception as e:
|
|
193
|
+
# Handle case where provider is already set (optional warning)
|
|
194
|
+
logger.warning(f"Could not set global tracer provider: {e}")
|
|
195
|
+
|
|
179
196
|
super().__init__(tracer_provider=trace_provider)
|
|
180
197
|
|
|
181
198
|
|
|
@@ -234,16 +251,14 @@ class SpanInterceptor(SpanProcessor):
|
|
|
234
251
|
)
|
|
235
252
|
|
|
236
253
|
# set agent name and metric collection
|
|
237
|
-
|
|
238
|
-
span.
|
|
239
|
-
span.
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
self.settings.agent_metric_collection,
|
|
246
|
-
)
|
|
254
|
+
agent_name = (
|
|
255
|
+
span.attributes.get("gen_ai.agent.name")
|
|
256
|
+
or span.attributes.get("pydantic_ai.agent.name")
|
|
257
|
+
or span.attributes.get("agent_name")
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
if agent_name:
|
|
261
|
+
self._add_agent_span(span, agent_name)
|
|
247
262
|
|
|
248
263
|
# set llm metric collection
|
|
249
264
|
if span.attributes.get("gen_ai.operation.name") in [
|
|
@@ -270,6 +285,19 @@ class SpanInterceptor(SpanProcessor):
|
|
|
270
285
|
)
|
|
271
286
|
|
|
272
287
|
def on_end(self, span):
|
|
288
|
+
|
|
289
|
+
already_processed = (
|
|
290
|
+
span.attributes.get("confident.span.type") == "agent"
|
|
291
|
+
)
|
|
292
|
+
if not already_processed:
|
|
293
|
+
agent_name = (
|
|
294
|
+
span.attributes.get("gen_ai.agent.name")
|
|
295
|
+
or span.attributes.get("pydantic_ai.agent.name")
|
|
296
|
+
or span.attributes.get("agent_name")
|
|
297
|
+
)
|
|
298
|
+
if agent_name:
|
|
299
|
+
self._add_agent_span(span, agent_name)
|
|
300
|
+
|
|
273
301
|
if self.settings.is_test_mode:
|
|
274
302
|
if span.attributes.get("confident.span.type") == "agent":
|
|
275
303
|
|
|
@@ -322,4 +350,12 @@ class SpanInterceptor(SpanProcessor):
|
|
|
322
350
|
trace.status = TraceSpanStatus.SUCCESS
|
|
323
351
|
trace.end_time = perf_counter()
|
|
324
352
|
trace_manager.traces_to_evaluate.append(trace)
|
|
325
|
-
|
|
353
|
+
|
|
354
|
+
def _add_agent_span(self, span, name):
|
|
355
|
+
span.set_attribute("confident.span.type", "agent")
|
|
356
|
+
span.set_attribute("confident.span.name", name)
|
|
357
|
+
if self.settings.agent_metric_collection:
|
|
358
|
+
span.set_attribute(
|
|
359
|
+
"confident.span.metric_collection",
|
|
360
|
+
self.settings.agent_metric_collection,
|
|
361
|
+
)
|
|
@@ -2,6 +2,7 @@ import warnings
|
|
|
2
2
|
from typing import Optional
|
|
3
3
|
from deepeval.telemetry import capture_tracing_integration
|
|
4
4
|
from deepeval.config.settings import get_settings
|
|
5
|
+
import logging
|
|
5
6
|
|
|
6
7
|
try:
|
|
7
8
|
from opentelemetry import trace
|
|
@@ -24,6 +25,9 @@ def is_opentelemetry_available():
|
|
|
24
25
|
return True
|
|
25
26
|
|
|
26
27
|
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
settings = get_settings()
|
|
30
|
+
|
|
27
31
|
settings = get_settings()
|
|
28
32
|
# OTLP_ENDPOINT = "https://otel.confident-ai.com/v1/traces"
|
|
29
33
|
|
|
@@ -51,6 +55,11 @@ def instrument_pydantic_ai(api_key: Optional[str] = None):
|
|
|
51
55
|
)
|
|
52
56
|
)
|
|
53
57
|
)
|
|
58
|
+
try:
|
|
59
|
+
trace.set_tracer_provider(tracer_provider)
|
|
60
|
+
except Exception as e:
|
|
61
|
+
# Handle case where provider is already set (optional warning)
|
|
62
|
+
logger.warning(f"Could not set global tracer provider: {e}")
|
|
54
63
|
|
|
55
64
|
# create an instrumented exporter
|
|
56
65
|
from pydantic_ai.models.instrumented import InstrumentationSettings
|
deepeval/metrics/utils.py
CHANGED
|
@@ -320,6 +320,17 @@ def check_llm_test_case_params(
|
|
|
320
320
|
metric.error = error_str
|
|
321
321
|
raise ValueError(error_str)
|
|
322
322
|
|
|
323
|
+
# Centralized: if a metric requires actual_output, reject empty/whitespace
|
|
324
|
+
# (including empty multimodal outputs) as "missing params".
|
|
325
|
+
if LLMTestCaseParams.ACTUAL_OUTPUT in test_case_params:
|
|
326
|
+
actual_output = getattr(
|
|
327
|
+
test_case, LLMTestCaseParams.ACTUAL_OUTPUT.value
|
|
328
|
+
)
|
|
329
|
+
if isinstance(actual_output, str) and actual_output == "":
|
|
330
|
+
error_str = f"'actual_output' cannot be empty for the '{metric.__name__}' metric"
|
|
331
|
+
metric.error = error_str
|
|
332
|
+
raise MissingTestCaseParamsError(error_str)
|
|
333
|
+
|
|
323
334
|
missing_params = []
|
|
324
335
|
for param in test_case_params:
|
|
325
336
|
if getattr(test_case, param.value) is None:
|
|
@@ -610,7 +610,8 @@ class ConversationSimulator:
|
|
|
610
610
|
) -> BaseModel:
|
|
611
611
|
if self.using_native_model:
|
|
612
612
|
res, cost = self.simulator_model.generate(prompt, schema=schema)
|
|
613
|
-
|
|
613
|
+
if cost is not None:
|
|
614
|
+
self.simulation_cost += cost
|
|
614
615
|
return res
|
|
615
616
|
else:
|
|
616
617
|
try:
|
|
@@ -630,7 +631,8 @@ class ConversationSimulator:
|
|
|
630
631
|
res, cost = await self.simulator_model.a_generate(
|
|
631
632
|
prompt, schema=schema
|
|
632
633
|
)
|
|
633
|
-
|
|
634
|
+
if cost is not None:
|
|
635
|
+
self.simulation_cost += cost
|
|
634
636
|
return res
|
|
635
637
|
else:
|
|
636
638
|
try:
|
deepeval/telemetry.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from contextlib import contextmanager
|
|
2
|
-
import logging
|
|
3
2
|
import os
|
|
4
3
|
import socket
|
|
5
4
|
import sys
|
|
@@ -85,13 +84,6 @@ if not telemetry_opt_out():
|
|
|
85
84
|
anonymous_public_ip = None
|
|
86
85
|
|
|
87
86
|
if not telemetry_opt_out():
|
|
88
|
-
from opentelemetry import trace
|
|
89
|
-
from opentelemetry.sdk.trace import TracerProvider
|
|
90
|
-
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
91
|
-
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
|
|
92
|
-
OTLPSpanExporter,
|
|
93
|
-
)
|
|
94
|
-
|
|
95
87
|
anonymous_public_ip = get_anonymous_public_ip()
|
|
96
88
|
sentry_sdk.init(
|
|
97
89
|
dsn="https://5ef587d58109ee45d6544f3657efdd1f@o4506098477236224.ingest.sentry.io/4506098479136768",
|
|
@@ -102,27 +94,6 @@ if not telemetry_opt_out():
|
|
|
102
94
|
default_integrations=False, # Disable Sentry's default integrations
|
|
103
95
|
)
|
|
104
96
|
|
|
105
|
-
# Set up the Tracer Provider
|
|
106
|
-
trace.set_tracer_provider(TracerProvider())
|
|
107
|
-
tracer_provider = trace.get_tracer_provider()
|
|
108
|
-
|
|
109
|
-
# New Relic License Key and OTLP Endpoint
|
|
110
|
-
NEW_RELIC_LICENSE_KEY = "1711c684db8a30361a7edb0d0398772cFFFFNRAL"
|
|
111
|
-
NEW_RELIC_OTLP_ENDPOINT = "https://otlp.nr-data.net:4317"
|
|
112
|
-
otlp_exporter = OTLPSpanExporter(
|
|
113
|
-
endpoint=NEW_RELIC_OTLP_ENDPOINT,
|
|
114
|
-
headers={"api-key": NEW_RELIC_LICENSE_KEY},
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
# Add the OTLP exporter to the span processor
|
|
118
|
-
span_processor = BatchSpanProcessor(otlp_exporter)
|
|
119
|
-
tracer_provider.add_span_processor(span_processor)
|
|
120
|
-
|
|
121
|
-
logging.getLogger("opentelemetry.exporter.otlp").setLevel(logging.CRITICAL)
|
|
122
|
-
|
|
123
|
-
# Create a tracer for your application
|
|
124
|
-
tracer = trace.get_tracer(__name__)
|
|
125
|
-
|
|
126
97
|
# Initialize PostHog
|
|
127
98
|
posthog = Posthog(
|
|
128
99
|
project_api_key="phc_IXvGRcscJJoIb049PtjIZ65JnXQguOUZ5B5MncunFdB",
|
|
@@ -199,11 +170,7 @@ def capture_evaluation_run(type: str):
|
|
|
199
170
|
posthog.capture(
|
|
200
171
|
distinct_id=distinct_id, event=event, properties=properties
|
|
201
172
|
)
|
|
202
|
-
|
|
203
|
-
with tracer.start_as_current_span(event) as span:
|
|
204
|
-
for property, value in properties.items():
|
|
205
|
-
span.set_attribute(property, value)
|
|
206
|
-
yield span
|
|
173
|
+
yield
|
|
207
174
|
|
|
208
175
|
|
|
209
176
|
@contextmanager
|
|
@@ -227,11 +194,7 @@ def capture_recommend_metrics():
|
|
|
227
194
|
posthog.capture(
|
|
228
195
|
distinct_id=distinct_id, event=event, properties=properties
|
|
229
196
|
)
|
|
230
|
-
|
|
231
|
-
with tracer.start_as_current_span(event) as span:
|
|
232
|
-
for property, value in properties.items():
|
|
233
|
-
span.set_attribute(property, value)
|
|
234
|
-
yield span
|
|
197
|
+
yield
|
|
235
198
|
|
|
236
199
|
|
|
237
200
|
@contextmanager
|
|
@@ -259,11 +222,7 @@ def capture_metric_type(
|
|
|
259
222
|
posthog.capture(
|
|
260
223
|
distinct_id=distinct_id, event=event, properties=properties
|
|
261
224
|
)
|
|
262
|
-
|
|
263
|
-
with tracer.start_as_current_span(event) as span:
|
|
264
|
-
for property, value in properties.items():
|
|
265
|
-
span.set_attribute(property, value)
|
|
266
|
-
yield span
|
|
225
|
+
yield
|
|
267
226
|
|
|
268
227
|
|
|
269
228
|
@contextmanager
|
|
@@ -297,11 +256,7 @@ def capture_synthesizer_run(
|
|
|
297
256
|
posthog.capture(
|
|
298
257
|
distinct_id=distinct_id, event=event, properties=properties
|
|
299
258
|
)
|
|
300
|
-
|
|
301
|
-
with tracer.start_as_current_span(event) as span:
|
|
302
|
-
for property, value in properties.items():
|
|
303
|
-
span.set_attribute(property, value)
|
|
304
|
-
yield span
|
|
259
|
+
yield
|
|
305
260
|
|
|
306
261
|
|
|
307
262
|
@contextmanager
|
|
@@ -330,11 +285,7 @@ def capture_conversation_simulator_run(num_conversations: int):
|
|
|
330
285
|
posthog.capture(
|
|
331
286
|
distinct_id=distinct_id, event=event, properties=properties
|
|
332
287
|
)
|
|
333
|
-
|
|
334
|
-
with tracer.start_as_current_span(event) as span:
|
|
335
|
-
for property, value in properties.items():
|
|
336
|
-
span.set_attribute(property, value)
|
|
337
|
-
yield span
|
|
288
|
+
yield
|
|
338
289
|
|
|
339
290
|
|
|
340
291
|
@contextmanager
|
|
@@ -360,11 +311,7 @@ def capture_guardrails(guards: List[str]):
|
|
|
360
311
|
posthog.capture(
|
|
361
312
|
distinct_id=distinct_id, event=event, properties=properties
|
|
362
313
|
)
|
|
363
|
-
|
|
364
|
-
with tracer.start_as_current_span(event) as span:
|
|
365
|
-
for property, value in properties.items():
|
|
366
|
-
span.set_attribute(property, value)
|
|
367
|
-
yield span
|
|
314
|
+
yield
|
|
368
315
|
|
|
369
316
|
|
|
370
317
|
@contextmanager
|
|
@@ -391,11 +338,7 @@ def capture_benchmark_run(benchmark: str, num_tasks: int):
|
|
|
391
338
|
posthog.capture(
|
|
392
339
|
distinct_id=distinct_id, event=event, properties=properties
|
|
393
340
|
)
|
|
394
|
-
|
|
395
|
-
with tracer.start_as_current_span(event) as span:
|
|
396
|
-
for property, value in properties.items():
|
|
397
|
-
span.set_attribute(property, value)
|
|
398
|
-
yield span
|
|
341
|
+
yield
|
|
399
342
|
|
|
400
343
|
|
|
401
344
|
@contextmanager
|
|
@@ -421,11 +364,7 @@ def capture_login_event():
|
|
|
421
364
|
posthog.capture(
|
|
422
365
|
distinct_id=distinct_id, event=event, properties=properties
|
|
423
366
|
)
|
|
424
|
-
|
|
425
|
-
with tracer.start_as_current_span(event) as span:
|
|
426
|
-
for property, value in properties.items():
|
|
427
|
-
span.set_attribute(property, value)
|
|
428
|
-
yield span
|
|
367
|
+
yield
|
|
429
368
|
|
|
430
369
|
|
|
431
370
|
@contextmanager
|
|
@@ -451,11 +390,7 @@ def capture_view_event():
|
|
|
451
390
|
posthog.capture(
|
|
452
391
|
distinct_id=distinct_id, event=event, properties=properties
|
|
453
392
|
)
|
|
454
|
-
|
|
455
|
-
with tracer.start_as_current_span(event) as span:
|
|
456
|
-
for property, value in properties.items():
|
|
457
|
-
span.set_attribute(property, value)
|
|
458
|
-
yield span
|
|
393
|
+
yield
|
|
459
394
|
|
|
460
395
|
|
|
461
396
|
@contextmanager
|
|
@@ -478,11 +413,7 @@ def capture_pull_dataset():
|
|
|
478
413
|
posthog.capture(
|
|
479
414
|
distinct_id=distinct_id, event=event, properties=properties
|
|
480
415
|
)
|
|
481
|
-
|
|
482
|
-
with tracer.start_as_current_span(event) as span:
|
|
483
|
-
for property, value in properties.items():
|
|
484
|
-
span.set_attribute(property, value)
|
|
485
|
-
yield span
|
|
416
|
+
yield
|
|
486
417
|
|
|
487
418
|
|
|
488
419
|
# track metrics that are components and metrics that aren't components
|
|
@@ -509,11 +440,7 @@ def capture_send_trace():
|
|
|
509
440
|
posthog.capture(
|
|
510
441
|
distinct_id=distinct_id, event=event, properties=properties
|
|
511
442
|
)
|
|
512
|
-
|
|
513
|
-
with tracer.start_as_current_span(event) as span:
|
|
514
|
-
for property, value in properties.items():
|
|
515
|
-
span.set_attribute(property, value)
|
|
516
|
-
yield span
|
|
443
|
+
yield
|
|
517
444
|
|
|
518
445
|
|
|
519
446
|
# tracing integration
|
|
@@ -542,13 +469,7 @@ def capture_tracing_integration(integration_name: str):
|
|
|
542
469
|
posthog.capture(
|
|
543
470
|
distinct_id=distinct_id, event=event, properties=properties
|
|
544
471
|
)
|
|
545
|
-
|
|
546
|
-
with tracer.start_as_current_span(event) as span:
|
|
547
|
-
for property, value in properties.items():
|
|
548
|
-
span.set_attribute(property, value)
|
|
549
|
-
# OTEL/New Relic filtering attributes
|
|
550
|
-
span.set_attribute("integration.name", integration_name)
|
|
551
|
-
yield span
|
|
472
|
+
yield
|
|
552
473
|
|
|
553
474
|
|
|
554
475
|
#########################################################
|
deepeval/tracing/api.py
CHANGED
|
@@ -126,6 +126,7 @@ class TraceApi(BaseModel):
|
|
|
126
126
|
input: Optional[Any] = Field(None)
|
|
127
127
|
output: Optional[Any] = Field(None)
|
|
128
128
|
status: Optional[TraceSpanApiStatus] = Field(TraceSpanApiStatus.SUCCESS)
|
|
129
|
+
test_case_id: Optional[str] = Field(None, alias="testCaseId")
|
|
129
130
|
|
|
130
131
|
# additional test case parameters
|
|
131
132
|
retrieval_context: Optional[List[str]] = Field(
|