deepeval 3.5.8__py3-none-any.whl → 3.5.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/integrations/pydantic_ai/__init__.py +3 -3
- deepeval/integrations/pydantic_ai/agent.py +9 -327
- deepeval/integrations/pydantic_ai/instrumentator.py +196 -0
- deepeval/integrations/pydantic_ai/otel.py +8 -2
- deepeval/tracing/otel/exporter.py +236 -174
- deepeval/tracing/otel/utils.py +95 -7
- {deepeval-3.5.8.dist-info → deepeval-3.5.9.dist-info}/METADATA +1 -1
- {deepeval-3.5.8.dist-info → deepeval-3.5.9.dist-info}/RECORD +12 -13
- deepeval/integrations/pydantic_ai/patcher.py +0 -484
- deepeval/integrations/pydantic_ai/utils.py +0 -323
- {deepeval-3.5.8.dist-info → deepeval-3.5.9.dist-info}/LICENSE.md +0 -0
- {deepeval-3.5.8.dist-info → deepeval-3.5.9.dist-info}/WHEEL +0 -0
- {deepeval-3.5.8.dist-info → deepeval-3.5.9.dist-info}/entry_points.txt +0 -0
|
@@ -10,9 +10,11 @@ from collections import defaultdict
|
|
|
10
10
|
import typing
|
|
11
11
|
import json
|
|
12
12
|
|
|
13
|
+
from deepeval.prompt.prompt import Prompt
|
|
13
14
|
from deepeval.telemetry import capture_tracing_integration
|
|
14
15
|
from deepeval.tracing import trace_manager
|
|
15
16
|
from deepeval.tracing.types import (
|
|
17
|
+
Trace,
|
|
16
18
|
TraceSpanStatus,
|
|
17
19
|
RetrieverSpan,
|
|
18
20
|
AgentSpan,
|
|
@@ -21,11 +23,14 @@ from deepeval.tracing.types import (
|
|
|
21
23
|
ToolSpan,
|
|
22
24
|
)
|
|
23
25
|
from deepeval.tracing.otel.utils import (
|
|
26
|
+
check_pydantic_ai_agent_input_output,
|
|
27
|
+
check_pydantic_ai_trace_input_output,
|
|
24
28
|
check_tool_input_parameters_from_gen_ai_attributes,
|
|
25
29
|
check_span_type_from_gen_ai_attributes,
|
|
26
30
|
check_model_from_gen_ai_attributes,
|
|
27
31
|
check_llm_input_from_gen_ai_attributes,
|
|
28
32
|
check_tool_name_from_gen_ai_attributes,
|
|
33
|
+
check_tool_output,
|
|
29
34
|
set_trace_time,
|
|
30
35
|
to_hex_string,
|
|
31
36
|
parse_string,
|
|
@@ -85,14 +90,21 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
85
90
|
_test_run_id: Optional[str] = None,
|
|
86
91
|
) -> SpanExportResult:
|
|
87
92
|
# build forest of spans
|
|
93
|
+
# for span in spans:
|
|
94
|
+
# print("--------------------------------")
|
|
95
|
+
# print(span.to_json())
|
|
96
|
+
# print("--------------------------------")
|
|
97
|
+
# return SpanExportResult.SUCCESS
|
|
98
|
+
|
|
99
|
+
################ Build Forest of Spans ################
|
|
88
100
|
forest = self._build_span_forest(spans)
|
|
89
101
|
|
|
90
|
-
|
|
102
|
+
################ Convert Forest of Spans to Forest of Base Span Wrappers ################
|
|
91
103
|
spans_wrappers_forest: List[List[BaseSpanWrapper]] = []
|
|
104
|
+
|
|
92
105
|
for span_list in forest:
|
|
93
106
|
spans_wrappers_list: List[BaseSpanWrapper] = []
|
|
94
107
|
for span in span_list:
|
|
95
|
-
|
|
96
108
|
base_span_wrapper = self._convert_readable_span_to_base_span(
|
|
97
109
|
span
|
|
98
110
|
)
|
|
@@ -100,10 +112,11 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
100
112
|
spans_wrappers_list.append(base_span_wrapper)
|
|
101
113
|
spans_wrappers_forest.append(spans_wrappers_list)
|
|
102
114
|
|
|
103
|
-
|
|
115
|
+
################ Add Spans to Trace Manager ################
|
|
104
116
|
for spans_wrappers_list in spans_wrappers_forest:
|
|
105
117
|
for base_span_wrapper in spans_wrappers_list:
|
|
106
118
|
|
|
119
|
+
# get current trace
|
|
107
120
|
current_trace = trace_manager.get_trace_by_uuid(
|
|
108
121
|
base_span_wrapper.base_span.trace_uuid
|
|
109
122
|
)
|
|
@@ -112,118 +125,18 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
112
125
|
trace_uuid=base_span_wrapper.base_span.trace_uuid
|
|
113
126
|
)
|
|
114
127
|
|
|
128
|
+
# set confident api key
|
|
115
129
|
if api_key:
|
|
116
130
|
current_trace.confident_api_key = api_key
|
|
117
131
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
== TraceSpanStatus.ERRORED
|
|
123
|
-
):
|
|
124
|
-
current_trace.status = TraceSpanStatus.ERRORED
|
|
125
|
-
|
|
126
|
-
# set the trace attributes (to be deprecated)
|
|
127
|
-
if base_span_wrapper.trace_attributes:
|
|
128
|
-
|
|
129
|
-
if base_span_wrapper.trace_attributes.name:
|
|
130
|
-
current_trace.name = (
|
|
131
|
-
base_span_wrapper.trace_attributes.name
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
if base_span_wrapper.trace_attributes.tags:
|
|
135
|
-
current_trace.tags = (
|
|
136
|
-
base_span_wrapper.trace_attributes.tags
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
if base_span_wrapper.trace_attributes.thread_id:
|
|
140
|
-
current_trace.thread_id = (
|
|
141
|
-
base_span_wrapper.trace_attributes.thread_id
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
if base_span_wrapper.trace_attributes.user_id:
|
|
145
|
-
current_trace.user_id = (
|
|
146
|
-
base_span_wrapper.trace_attributes.user_id
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
if base_span_wrapper.trace_attributes.metadata:
|
|
150
|
-
current_trace.metadata = (
|
|
151
|
-
base_span_wrapper.trace_attributes.metadata
|
|
152
|
-
)
|
|
153
|
-
|
|
154
|
-
# set the trace attributes
|
|
155
|
-
if base_span_wrapper.trace_name and isinstance(
|
|
156
|
-
base_span_wrapper.trace_name, str
|
|
157
|
-
):
|
|
158
|
-
current_trace.name = base_span_wrapper.trace_name
|
|
159
|
-
|
|
160
|
-
if base_span_wrapper.trace_tags and isinstance(
|
|
161
|
-
base_span_wrapper.trace_tags, list
|
|
162
|
-
):
|
|
163
|
-
try:
|
|
164
|
-
current_trace.tags = [
|
|
165
|
-
str(tag) for tag in base_span_wrapper.trace_tags
|
|
166
|
-
]
|
|
167
|
-
except Exception:
|
|
168
|
-
pass
|
|
169
|
-
|
|
170
|
-
if base_span_wrapper.trace_metadata and isinstance(
|
|
171
|
-
base_span_wrapper.trace_metadata, dict
|
|
172
|
-
):
|
|
173
|
-
try:
|
|
174
|
-
current_trace.metadata = (
|
|
175
|
-
base_span_wrapper.trace_metadata
|
|
176
|
-
)
|
|
177
|
-
except Exception:
|
|
178
|
-
pass
|
|
179
|
-
|
|
180
|
-
if base_span_wrapper.trace_thread_id and isinstance(
|
|
181
|
-
base_span_wrapper.trace_thread_id, str
|
|
182
|
-
):
|
|
183
|
-
current_trace.thread_id = base_span_wrapper.trace_thread_id
|
|
184
|
-
|
|
185
|
-
if base_span_wrapper.trace_user_id and isinstance(
|
|
186
|
-
base_span_wrapper.trace_user_id, str
|
|
187
|
-
):
|
|
188
|
-
current_trace.user_id = base_span_wrapper.trace_user_id
|
|
189
|
-
|
|
190
|
-
# set the trace input and output
|
|
191
|
-
if base_span_wrapper.trace_input:
|
|
192
|
-
current_trace.input = base_span_wrapper.trace_input
|
|
193
|
-
if base_span_wrapper.trace_output:
|
|
194
|
-
current_trace.output = base_span_wrapper.trace_output
|
|
195
|
-
|
|
196
|
-
# set the trace environment
|
|
197
|
-
if base_span_wrapper.trace_environment:
|
|
198
|
-
current_trace.environment = (
|
|
199
|
-
base_span_wrapper.trace_environment
|
|
200
|
-
)
|
|
201
|
-
|
|
202
|
-
# set the trace test case parameters
|
|
203
|
-
if base_span_wrapper.trace_retrieval_context:
|
|
204
|
-
current_trace.retrieval_context = (
|
|
205
|
-
base_span_wrapper.trace_retrieval_context
|
|
206
|
-
)
|
|
207
|
-
if base_span_wrapper.trace_context:
|
|
208
|
-
current_trace.context = base_span_wrapper.trace_context
|
|
209
|
-
if base_span_wrapper.trace_tools_called:
|
|
210
|
-
current_trace.tools_called = (
|
|
211
|
-
base_span_wrapper.trace_tools_called
|
|
212
|
-
)
|
|
213
|
-
if base_span_wrapper.trace_expected_tools:
|
|
214
|
-
current_trace.expected_tools = (
|
|
215
|
-
base_span_wrapper.trace_expected_tools
|
|
216
|
-
)
|
|
217
|
-
|
|
218
|
-
# set the trace metric collection
|
|
219
|
-
if base_span_wrapper.trace_metric_collection:
|
|
220
|
-
current_trace.metric_collection = (
|
|
221
|
-
base_span_wrapper.trace_metric_collection
|
|
222
|
-
)
|
|
132
|
+
################ Set Trace Attributes from ################
|
|
133
|
+
self._set_current_trace_attributes_from_base_span_wrapper(
|
|
134
|
+
current_trace, base_span_wrapper
|
|
135
|
+
)
|
|
223
136
|
|
|
137
|
+
# no removing span because it can be parent of other spans
|
|
224
138
|
trace_manager.add_span(base_span_wrapper.base_span)
|
|
225
139
|
trace_manager.add_span_to_trace(base_span_wrapper.base_span)
|
|
226
|
-
# no removing span because it can be parent of other spans
|
|
227
140
|
|
|
228
141
|
# safely end all active traces or return them for test runs
|
|
229
142
|
active_traces_keys = list(trace_manager.active_traces.keys())
|
|
@@ -244,22 +157,115 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
244
157
|
trace_manager.clear_traces()
|
|
245
158
|
return SpanExportResult.SUCCESS
|
|
246
159
|
|
|
160
|
+
def _set_current_trace_attributes_from_base_span_wrapper(
|
|
161
|
+
self, current_trace: Trace, base_span_wrapper: BaseSpanWrapper
|
|
162
|
+
):
|
|
163
|
+
# error trace if root span is errored
|
|
164
|
+
if base_span_wrapper.base_span.parent_uuid is None:
|
|
165
|
+
if base_span_wrapper.base_span.status == TraceSpanStatus.ERRORED:
|
|
166
|
+
current_trace.status = TraceSpanStatus.ERRORED
|
|
167
|
+
|
|
168
|
+
# set the trace attributes (to be deprecated)
|
|
169
|
+
if base_span_wrapper.trace_attributes:
|
|
170
|
+
|
|
171
|
+
if base_span_wrapper.trace_attributes.name:
|
|
172
|
+
current_trace.name = base_span_wrapper.trace_attributes.name
|
|
173
|
+
|
|
174
|
+
if base_span_wrapper.trace_attributes.tags:
|
|
175
|
+
current_trace.tags = base_span_wrapper.trace_attributes.tags
|
|
176
|
+
|
|
177
|
+
if base_span_wrapper.trace_attributes.thread_id:
|
|
178
|
+
current_trace.thread_id = (
|
|
179
|
+
base_span_wrapper.trace_attributes.thread_id
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
if base_span_wrapper.trace_attributes.user_id:
|
|
183
|
+
current_trace.user_id = (
|
|
184
|
+
base_span_wrapper.trace_attributes.user_id
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
if base_span_wrapper.trace_attributes.metadata:
|
|
188
|
+
current_trace.metadata = (
|
|
189
|
+
base_span_wrapper.trace_attributes.metadata
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# set the trace attributes
|
|
193
|
+
if base_span_wrapper.trace_name and isinstance(
|
|
194
|
+
base_span_wrapper.trace_name, str
|
|
195
|
+
):
|
|
196
|
+
current_trace.name = base_span_wrapper.trace_name
|
|
197
|
+
|
|
198
|
+
if base_span_wrapper.trace_tags and isinstance(
|
|
199
|
+
base_span_wrapper.trace_tags, list
|
|
200
|
+
):
|
|
201
|
+
try:
|
|
202
|
+
current_trace.tags = [
|
|
203
|
+
str(tag) for tag in base_span_wrapper.trace_tags
|
|
204
|
+
]
|
|
205
|
+
except Exception:
|
|
206
|
+
pass
|
|
207
|
+
|
|
208
|
+
if base_span_wrapper.trace_metadata and isinstance(
|
|
209
|
+
base_span_wrapper.trace_metadata, dict
|
|
210
|
+
):
|
|
211
|
+
try:
|
|
212
|
+
current_trace.metadata = base_span_wrapper.trace_metadata
|
|
213
|
+
except Exception:
|
|
214
|
+
pass
|
|
215
|
+
|
|
216
|
+
if base_span_wrapper.trace_thread_id and isinstance(
|
|
217
|
+
base_span_wrapper.trace_thread_id, str
|
|
218
|
+
):
|
|
219
|
+
current_trace.thread_id = base_span_wrapper.trace_thread_id
|
|
220
|
+
|
|
221
|
+
if base_span_wrapper.trace_user_id and isinstance(
|
|
222
|
+
base_span_wrapper.trace_user_id, str
|
|
223
|
+
):
|
|
224
|
+
current_trace.user_id = base_span_wrapper.trace_user_id
|
|
225
|
+
|
|
226
|
+
# set the trace input and output
|
|
227
|
+
if base_span_wrapper.trace_input:
|
|
228
|
+
current_trace.input = base_span_wrapper.trace_input
|
|
229
|
+
if base_span_wrapper.trace_output:
|
|
230
|
+
current_trace.output = base_span_wrapper.trace_output
|
|
231
|
+
|
|
232
|
+
# set the trace environment
|
|
233
|
+
if base_span_wrapper.trace_environment:
|
|
234
|
+
current_trace.environment = base_span_wrapper.trace_environment
|
|
235
|
+
|
|
236
|
+
# set the trace test case parameters
|
|
237
|
+
if base_span_wrapper.trace_retrieval_context:
|
|
238
|
+
current_trace.retrieval_context = (
|
|
239
|
+
base_span_wrapper.trace_retrieval_context
|
|
240
|
+
)
|
|
241
|
+
if base_span_wrapper.trace_context:
|
|
242
|
+
current_trace.context = base_span_wrapper.trace_context
|
|
243
|
+
if base_span_wrapper.trace_tools_called:
|
|
244
|
+
current_trace.tools_called = base_span_wrapper.trace_tools_called
|
|
245
|
+
if base_span_wrapper.trace_expected_tools:
|
|
246
|
+
current_trace.expected_tools = (
|
|
247
|
+
base_span_wrapper.trace_expected_tools
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# set the trace metric collection
|
|
251
|
+
if base_span_wrapper.trace_metric_collection:
|
|
252
|
+
current_trace.metric_collection = (
|
|
253
|
+
base_span_wrapper.trace_metric_collection
|
|
254
|
+
)
|
|
255
|
+
|
|
247
256
|
def _convert_readable_span_to_base_span(
|
|
248
257
|
self, span: ReadableSpan
|
|
249
258
|
) -> BaseSpanWrapper:
|
|
250
259
|
|
|
251
|
-
# Create typed spans
|
|
252
260
|
base_span = None
|
|
253
261
|
try:
|
|
254
|
-
base_span = self.
|
|
262
|
+
base_span = self.__prepare_boilerplate_base_span(span)
|
|
255
263
|
except Exception:
|
|
256
264
|
pass
|
|
257
265
|
|
|
258
|
-
# Creaete base span if no typed span
|
|
259
266
|
parent_uuid = (
|
|
260
267
|
to_hex_string(span.parent.span_id, 16) if span.parent else None
|
|
261
268
|
)
|
|
262
|
-
|
|
263
269
|
base_span_status = TraceSpanStatus.SUCCESS
|
|
264
270
|
base_span_error = None
|
|
265
271
|
|
|
@@ -279,34 +285,37 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
279
285
|
end_time=peb.epoch_nanos_to_perf_seconds(span.end_time),
|
|
280
286
|
)
|
|
281
287
|
|
|
282
|
-
#
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
span_name = span.attributes.get("confident.span.name")
|
|
286
|
-
|
|
287
|
-
raw_span_metric_collection = span.attributes.get(
|
|
288
|
-
"confident.span.metric_collection"
|
|
289
|
-
)
|
|
290
|
-
raw_span_context = span.attributes.get("confident.span.context")
|
|
291
|
-
raw_span_retrieval_context = span.attributes.get(
|
|
292
|
-
"confident.span.retrieval_context"
|
|
293
|
-
)
|
|
294
|
-
raw_span_tools_called = span.attributes.get(
|
|
295
|
-
"confident.span.tools_called"
|
|
288
|
+
# NOTE: Confident Span is reffered as base span in this codebase
|
|
289
|
+
self.__set_base_span_attributes(
|
|
290
|
+
base_span, span, base_span_status, base_span_error
|
|
296
291
|
)
|
|
297
|
-
if raw_span_tools_called and isinstance(raw_span_tools_called, tuple):
|
|
298
|
-
raw_span_tools_called = list(raw_span_tools_called)
|
|
299
292
|
|
|
300
|
-
|
|
301
|
-
|
|
293
|
+
base_span_wrapper = BaseSpanWrapper(base_span=base_span)
|
|
294
|
+
|
|
295
|
+
self.__set_trace_attributes(base_span_wrapper, span)
|
|
296
|
+
|
|
297
|
+
################ Set Custom attributes from different integrations ################
|
|
298
|
+
self.__set_custom_trace_input_output(base_span_wrapper, span)
|
|
299
|
+
|
|
300
|
+
return base_span_wrapper
|
|
301
|
+
|
|
302
|
+
def __set_custom_trace_input_output(
|
|
303
|
+
self, base_span_wrapper: BaseSpanWrapper, span: ReadableSpan
|
|
304
|
+
):
|
|
305
|
+
|
|
306
|
+
# check for pydantic ai trace input and output
|
|
307
|
+
pydantic_trace_input, pydantic_trace_output = (
|
|
308
|
+
check_pydantic_ai_trace_input_output(span)
|
|
302
309
|
)
|
|
303
|
-
if raw_span_expected_tools and isinstance(
|
|
304
|
-
raw_span_expected_tools, tuple
|
|
305
|
-
):
|
|
306
|
-
raw_span_expected_tools = list(raw_span_expected_tools)
|
|
307
310
|
|
|
308
|
-
|
|
311
|
+
if not base_span_wrapper.trace_input and pydantic_trace_input:
|
|
312
|
+
base_span_wrapper.trace_input = pydantic_trace_input
|
|
313
|
+
if not base_span_wrapper.trace_output and pydantic_trace_output:
|
|
314
|
+
base_span_wrapper.trace_output = pydantic_trace_output
|
|
309
315
|
|
|
316
|
+
def __set_trace_attributes(
|
|
317
|
+
self, base_span_wrapper: BaseSpanWrapper, span: ReadableSpan
|
|
318
|
+
):
|
|
310
319
|
# Extract Trace Attributes
|
|
311
320
|
trace_name = span.attributes.get("confident.trace.name")
|
|
312
321
|
trace_thread_id = span.attributes.get("confident.trace.thread_id")
|
|
@@ -340,16 +349,6 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
340
349
|
"confident.trace.metric_collection"
|
|
341
350
|
)
|
|
342
351
|
|
|
343
|
-
# Validate Span Attributes
|
|
344
|
-
span_retrieval_context = parse_list_of_strings(
|
|
345
|
-
raw_span_retrieval_context
|
|
346
|
-
)
|
|
347
|
-
span_context = parse_list_of_strings(raw_span_context)
|
|
348
|
-
span_tools_called = self._parse_list_of_tools(raw_span_tools_called)
|
|
349
|
-
span_expected_tools = self._parse_list_of_tools(raw_span_expected_tools)
|
|
350
|
-
span_metadata = self._parse_json_string(raw_span_metadata)
|
|
351
|
-
span_metric_collection = parse_string(raw_span_metric_collection)
|
|
352
|
-
|
|
353
352
|
# Validate Trace Attributes
|
|
354
353
|
trace_tags = parse_list_of_strings(raw_trace_tags)
|
|
355
354
|
trace_retrieval_context = parse_list_of_strings(
|
|
@@ -363,6 +362,72 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
363
362
|
trace_metadata = self._parse_json_string(raw_trace_metadata)
|
|
364
363
|
trace_metric_collection = parse_string(raw_trace_metric_collection)
|
|
365
364
|
|
|
365
|
+
base_span_wrapper.trace_input = trace_input
|
|
366
|
+
base_span_wrapper.trace_output = trace_output
|
|
367
|
+
base_span_wrapper.trace_name = trace_name
|
|
368
|
+
base_span_wrapper.trace_tags = trace_tags
|
|
369
|
+
base_span_wrapper.trace_metadata = trace_metadata
|
|
370
|
+
base_span_wrapper.trace_thread_id = trace_thread_id
|
|
371
|
+
base_span_wrapper.trace_user_id = trace_user_id
|
|
372
|
+
base_span_wrapper.trace_retrieval_context = trace_retrieval_context
|
|
373
|
+
base_span_wrapper.trace_context = trace_context
|
|
374
|
+
base_span_wrapper.trace_tools_called = trace_tools_called
|
|
375
|
+
base_span_wrapper.trace_expected_tools = trace_expected_tools
|
|
376
|
+
base_span_wrapper.trace_metric_collection = trace_metric_collection
|
|
377
|
+
base_span_wrapper.trace_environment = trace_environment
|
|
378
|
+
|
|
379
|
+
# Resource attributes
|
|
380
|
+
resource_attributes = span.resource.attributes
|
|
381
|
+
if resource_attributes:
|
|
382
|
+
environment = resource_attributes.get("confident.trace.environment")
|
|
383
|
+
if environment and isinstance(environment, str):
|
|
384
|
+
base_span_wrapper.trace_environment = environment
|
|
385
|
+
|
|
386
|
+
def __set_base_span_attributes(
|
|
387
|
+
self,
|
|
388
|
+
base_span: BaseSpan,
|
|
389
|
+
span: ReadableSpan,
|
|
390
|
+
base_span_status: TraceSpanStatus,
|
|
391
|
+
base_span_error: Optional[str],
|
|
392
|
+
):
|
|
393
|
+
span_input = span.attributes.get("confident.span.input")
|
|
394
|
+
span_output = span.attributes.get("confident.span.output")
|
|
395
|
+
|
|
396
|
+
span_name = span.attributes.get("confident.span.name")
|
|
397
|
+
|
|
398
|
+
raw_span_metric_collection = span.attributes.get(
|
|
399
|
+
"confident.span.metric_collection"
|
|
400
|
+
)
|
|
401
|
+
raw_span_context = span.attributes.get("confident.span.context")
|
|
402
|
+
raw_span_retrieval_context = span.attributes.get(
|
|
403
|
+
"confident.span.retrieval_context"
|
|
404
|
+
)
|
|
405
|
+
raw_span_tools_called = span.attributes.get(
|
|
406
|
+
"confident.span.tools_called"
|
|
407
|
+
)
|
|
408
|
+
if raw_span_tools_called and isinstance(raw_span_tools_called, tuple):
|
|
409
|
+
raw_span_tools_called = list(raw_span_tools_called)
|
|
410
|
+
|
|
411
|
+
raw_span_expected_tools = span.attributes.get(
|
|
412
|
+
"confident.span.expected_tools"
|
|
413
|
+
)
|
|
414
|
+
if raw_span_expected_tools and isinstance(
|
|
415
|
+
raw_span_expected_tools, tuple
|
|
416
|
+
):
|
|
417
|
+
raw_span_expected_tools = list(raw_span_expected_tools)
|
|
418
|
+
|
|
419
|
+
raw_span_metadata = span.attributes.get("confident.span.metadata")
|
|
420
|
+
|
|
421
|
+
# Validate Span Attributes
|
|
422
|
+
span_retrieval_context = parse_list_of_strings(
|
|
423
|
+
raw_span_retrieval_context
|
|
424
|
+
)
|
|
425
|
+
span_context = parse_list_of_strings(raw_span_context)
|
|
426
|
+
span_tools_called = self._parse_list_of_tools(raw_span_tools_called)
|
|
427
|
+
span_expected_tools = self._parse_list_of_tools(raw_span_expected_tools)
|
|
428
|
+
span_metadata = self._parse_json_string(raw_span_metadata)
|
|
429
|
+
span_metric_collection = parse_string(raw_span_metric_collection)
|
|
430
|
+
|
|
366
431
|
# Set Span Attributes
|
|
367
432
|
base_span.parent_uuid = (
|
|
368
433
|
to_hex_string(span.parent.span_id, 16) if span.parent else None
|
|
@@ -388,38 +453,16 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
388
453
|
if span_output:
|
|
389
454
|
base_span.output = span_output
|
|
390
455
|
|
|
391
|
-
|
|
392
|
-
resource_attributes = span.resource.attributes
|
|
393
|
-
if resource_attributes:
|
|
394
|
-
environment = resource_attributes.get("confident.trace.environment")
|
|
395
|
-
if environment and isinstance(environment, str):
|
|
396
|
-
trace_environment = environment
|
|
397
|
-
|
|
398
|
-
return BaseSpanWrapper(
|
|
399
|
-
base_span=base_span,
|
|
400
|
-
trace_input=trace_input,
|
|
401
|
-
trace_output=trace_output,
|
|
402
|
-
trace_name=trace_name,
|
|
403
|
-
trace_tags=trace_tags,
|
|
404
|
-
trace_metadata=trace_metadata,
|
|
405
|
-
trace_thread_id=trace_thread_id,
|
|
406
|
-
trace_user_id=trace_user_id,
|
|
407
|
-
trace_retrieval_context=trace_retrieval_context,
|
|
408
|
-
trace_context=trace_context,
|
|
409
|
-
trace_tools_called=trace_tools_called,
|
|
410
|
-
trace_expected_tools=trace_expected_tools,
|
|
411
|
-
trace_metric_collection=trace_metric_collection,
|
|
412
|
-
trace_environment=trace_environment,
|
|
413
|
-
)
|
|
414
|
-
|
|
415
|
-
def _prepare_boilerplate_base_span(
|
|
456
|
+
def __prepare_boilerplate_base_span(
|
|
416
457
|
self, span: ReadableSpan
|
|
417
458
|
) -> Optional[BaseSpan]:
|
|
459
|
+
|
|
460
|
+
################ Get Span Type ################
|
|
418
461
|
span_type = span.attributes.get("confident.span.type")
|
|
419
462
|
if not span_type:
|
|
420
463
|
span_type = check_span_type_from_gen_ai_attributes(span)
|
|
421
464
|
|
|
422
|
-
|
|
465
|
+
################ Get Required Fields ################
|
|
423
466
|
uuid = to_hex_string(span.context.span_id, 16)
|
|
424
467
|
status = (
|
|
425
468
|
TraceSpanStatus.ERRORED
|
|
@@ -434,6 +477,8 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
434
477
|
start_time = peb.epoch_nanos_to_perf_seconds(span.start_time)
|
|
435
478
|
end_time = peb.epoch_nanos_to_perf_seconds(span.end_time)
|
|
436
479
|
|
|
480
|
+
################ Populate Spans ################
|
|
481
|
+
|
|
437
482
|
#######################################################
|
|
438
483
|
### LLM Span
|
|
439
484
|
#######################################################
|
|
@@ -442,7 +487,7 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
442
487
|
model = span.attributes.get("confident.llm.model")
|
|
443
488
|
if not model:
|
|
444
489
|
model = check_model_from_gen_ai_attributes(span)
|
|
445
|
-
prompt = span.attributes.get("confident.llm.prompt")
|
|
490
|
+
# prompt = span.attributes.get("confident.llm.prompt")
|
|
446
491
|
input_token_count = span.attributes.get(
|
|
447
492
|
"confident.llm.input_token_count"
|
|
448
493
|
)
|
|
@@ -468,6 +513,16 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
468
513
|
output = [json.loads(o) for o in output]
|
|
469
514
|
except Exception:
|
|
470
515
|
pass
|
|
516
|
+
prompt = span.attributes.get("confident.span.prompt")
|
|
517
|
+
confident_prompt = None
|
|
518
|
+
if prompt and isinstance(prompt, str):
|
|
519
|
+
prompt = json.loads(prompt)
|
|
520
|
+
try:
|
|
521
|
+
confident_prompt = Prompt(alias=prompt["alias"])
|
|
522
|
+
confident_prompt.version = prompt["version"]
|
|
523
|
+
except Exception:
|
|
524
|
+
pass
|
|
525
|
+
|
|
471
526
|
llm_span = LlmSpan(
|
|
472
527
|
uuid=uuid,
|
|
473
528
|
status=status,
|
|
@@ -480,11 +535,12 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
480
535
|
model=model,
|
|
481
536
|
cost_per_input_token=cost_per_input_token,
|
|
482
537
|
cost_per_output_token=cost_per_output_token,
|
|
483
|
-
prompt=prompt,
|
|
538
|
+
# prompt=prompt,
|
|
484
539
|
input_token_count=input_token_count,
|
|
485
540
|
output_token_count=output_token_count,
|
|
486
541
|
input=input,
|
|
487
542
|
output=output,
|
|
543
|
+
prompt=confident_prompt,
|
|
488
544
|
)
|
|
489
545
|
return llm_span
|
|
490
546
|
|
|
@@ -514,6 +570,8 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
514
570
|
agent_handoffs.append(str(handoff))
|
|
515
571
|
except Exception:
|
|
516
572
|
pass
|
|
573
|
+
|
|
574
|
+
input, output = check_pydantic_ai_agent_input_output(span)
|
|
517
575
|
agent_span = AgentSpan(
|
|
518
576
|
uuid=uuid,
|
|
519
577
|
status=status,
|
|
@@ -526,6 +584,8 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
526
584
|
name=name if name else "",
|
|
527
585
|
available_tools=available_tools,
|
|
528
586
|
agent_handoffs=agent_handoffs,
|
|
587
|
+
input=input,
|
|
588
|
+
output=output,
|
|
529
589
|
)
|
|
530
590
|
return agent_span
|
|
531
591
|
|
|
@@ -562,6 +622,7 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
562
622
|
name = check_tool_name_from_gen_ai_attributes(span)
|
|
563
623
|
description = span.attributes.get("confident.tool.description")
|
|
564
624
|
input = check_tool_input_parameters_from_gen_ai_attributes(span)
|
|
625
|
+
output = check_tool_output(span)
|
|
565
626
|
|
|
566
627
|
tool_span = ToolSpan(
|
|
567
628
|
uuid=uuid,
|
|
@@ -575,6 +636,7 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
575
636
|
name=name if name else "",
|
|
576
637
|
description=description,
|
|
577
638
|
input=input,
|
|
639
|
+
output=output,
|
|
578
640
|
)
|
|
579
641
|
return tool_span
|
|
580
642
|
|
deepeval/tracing/otel/utils.py
CHANGED
|
@@ -99,17 +99,32 @@ def validate_llm_test_case_data(
|
|
|
99
99
|
def check_llm_input_from_gen_ai_attributes(
|
|
100
100
|
span: ReadableSpan,
|
|
101
101
|
) -> Tuple[Optional[list], Optional[dict]]:
|
|
102
|
+
input = None
|
|
103
|
+
output = None
|
|
102
104
|
try:
|
|
103
|
-
input = json.loads(span.attributes.get("
|
|
104
|
-
if input and isinstance(input, list):
|
|
105
|
-
# check if the last event is a genai choice
|
|
106
|
-
last_event = input.pop()
|
|
107
|
-
if last_event and last_event.get("event.name") == "gen_ai.choice":
|
|
108
|
-
return input, last_event
|
|
105
|
+
input = json.loads(span.attributes.get("gen_ai.input.messages"))
|
|
109
106
|
except Exception as e:
|
|
110
107
|
pass
|
|
108
|
+
try:
|
|
109
|
+
output = json.loads(span.attributes.get("gen_ai.output.messages"))
|
|
110
|
+
except Exception as e:
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
if input is None and output is None:
|
|
114
|
+
try:
|
|
115
|
+
input = json.loads(span.attributes.get("events"))
|
|
116
|
+
if input and isinstance(input, list):
|
|
117
|
+
# check if the last event is a genai choice
|
|
118
|
+
last_event = input.pop()
|
|
119
|
+
if (
|
|
120
|
+
last_event
|
|
121
|
+
and last_event.get("event.name") == "gen_ai.choice"
|
|
122
|
+
):
|
|
123
|
+
output = last_event
|
|
124
|
+
except Exception as e:
|
|
125
|
+
pass
|
|
111
126
|
|
|
112
|
-
return
|
|
127
|
+
return input, output
|
|
113
128
|
|
|
114
129
|
|
|
115
130
|
def check_tool_name_from_gen_ai_attributes(span: ReadableSpan) -> Optional[str]:
|
|
@@ -307,3 +322,76 @@ def post_test_run(traces: List[Trace], test_run_id: Optional[str]):
|
|
|
307
322
|
test_run.add_test_case(case)
|
|
308
323
|
|
|
309
324
|
# return test_run_manager.post_test_run(test_run) TODO: add after test run with metric collection is implemented
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def check_pydantic_ai_agent_input_output(
|
|
328
|
+
span: ReadableSpan,
|
|
329
|
+
) -> Tuple[Optional[Any], Optional[Any]]:
|
|
330
|
+
input_val: Optional[Any] = None
|
|
331
|
+
output_val: Optional[Any] = None
|
|
332
|
+
|
|
333
|
+
# Input (pydantic_ai.all_messages) - slice up to and including the first 'user' message
|
|
334
|
+
try:
|
|
335
|
+
raw = span.attributes.get("pydantic_ai.all_messages")
|
|
336
|
+
if raw:
|
|
337
|
+
messages = raw
|
|
338
|
+
if isinstance(messages, str):
|
|
339
|
+
messages = json.loads(messages)
|
|
340
|
+
elif isinstance(messages, tuple):
|
|
341
|
+
messages = list(messages)
|
|
342
|
+
|
|
343
|
+
if isinstance(messages, list):
|
|
344
|
+
normalized = []
|
|
345
|
+
for m in messages:
|
|
346
|
+
if isinstance(m, str):
|
|
347
|
+
try:
|
|
348
|
+
m = json.loads(m)
|
|
349
|
+
except Exception:
|
|
350
|
+
pass
|
|
351
|
+
normalized.append(m)
|
|
352
|
+
|
|
353
|
+
first_user_idx = None
|
|
354
|
+
for i, m in enumerate(normalized):
|
|
355
|
+
role = None
|
|
356
|
+
if isinstance(m, dict):
|
|
357
|
+
role = m.get("role") or m.get("author")
|
|
358
|
+
if role == "user":
|
|
359
|
+
first_user_idx = i
|
|
360
|
+
break
|
|
361
|
+
|
|
362
|
+
input_val = (
|
|
363
|
+
normalized
|
|
364
|
+
if first_user_idx is None
|
|
365
|
+
else normalized[: first_user_idx + 1]
|
|
366
|
+
)
|
|
367
|
+
except Exception:
|
|
368
|
+
pass
|
|
369
|
+
|
|
370
|
+
# Output (agent final_result)
|
|
371
|
+
try:
|
|
372
|
+
if span.attributes.get("confident.span.type") == "agent":
|
|
373
|
+
output_val = span.attributes.get("final_result")
|
|
374
|
+
except Exception:
|
|
375
|
+
pass
|
|
376
|
+
|
|
377
|
+
return input_val, output_val
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def check_tool_output(span: ReadableSpan):
|
|
381
|
+
try:
|
|
382
|
+
return span.attributes.get("tool_response")
|
|
383
|
+
except Exception as e:
|
|
384
|
+
pass
|
|
385
|
+
return None
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def check_pydantic_ai_trace_input_output(
|
|
389
|
+
span: ReadableSpan,
|
|
390
|
+
) -> Tuple[Optional[Any], Optional[Any]]:
|
|
391
|
+
input_val: Optional[Any] = None
|
|
392
|
+
output_val: Optional[Any] = None
|
|
393
|
+
|
|
394
|
+
if not span.parent:
|
|
395
|
+
input_val, output_val = check_pydantic_ai_agent_input_output(span)
|
|
396
|
+
|
|
397
|
+
return input_val, output_val
|