deepeval 3.5.7__py3-none-any.whl → 3.5.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,9 +10,11 @@ from collections import defaultdict
10
10
  import typing
11
11
  import json
12
12
 
13
+ from deepeval.prompt.prompt import Prompt
13
14
  from deepeval.telemetry import capture_tracing_integration
14
15
  from deepeval.tracing import trace_manager
15
16
  from deepeval.tracing.types import (
17
+ Trace,
16
18
  TraceSpanStatus,
17
19
  RetrieverSpan,
18
20
  AgentSpan,
@@ -21,11 +23,14 @@ from deepeval.tracing.types import (
21
23
  ToolSpan,
22
24
  )
23
25
  from deepeval.tracing.otel.utils import (
26
+ check_pydantic_ai_agent_input_output,
27
+ check_pydantic_ai_trace_input_output,
24
28
  check_tool_input_parameters_from_gen_ai_attributes,
25
29
  check_span_type_from_gen_ai_attributes,
26
30
  check_model_from_gen_ai_attributes,
27
31
  check_llm_input_from_gen_ai_attributes,
28
32
  check_tool_name_from_gen_ai_attributes,
33
+ check_tool_output,
29
34
  set_trace_time,
30
35
  to_hex_string,
31
36
  parse_string,
@@ -85,14 +90,21 @@ class ConfidentSpanExporter(SpanExporter):
85
90
  _test_run_id: Optional[str] = None,
86
91
  ) -> SpanExportResult:
87
92
  # build forest of spans
93
+ # for span in spans:
94
+ # print("--------------------------------")
95
+ # print(span.to_json())
96
+ # print("--------------------------------")
97
+ # return SpanExportResult.SUCCESS
98
+
99
+ ################ Build Forest of Spans ################
88
100
  forest = self._build_span_forest(spans)
89
101
 
90
- # convert forest of spans to forest of base span wrappers
102
+ ################ Convert Forest of Spans to Forest of Base Span Wrappers ################
91
103
  spans_wrappers_forest: List[List[BaseSpanWrapper]] = []
104
+
92
105
  for span_list in forest:
93
106
  spans_wrappers_list: List[BaseSpanWrapper] = []
94
107
  for span in span_list:
95
-
96
108
  base_span_wrapper = self._convert_readable_span_to_base_span(
97
109
  span
98
110
  )
@@ -100,10 +112,11 @@ class ConfidentSpanExporter(SpanExporter):
100
112
  spans_wrappers_list.append(base_span_wrapper)
101
113
  spans_wrappers_forest.append(spans_wrappers_list)
102
114
 
103
- # add spans to trace manager
115
+ ################ Add Spans to Trace Manager ################
104
116
  for spans_wrappers_list in spans_wrappers_forest:
105
117
  for base_span_wrapper in spans_wrappers_list:
106
118
 
119
+ # get current trace
107
120
  current_trace = trace_manager.get_trace_by_uuid(
108
121
  base_span_wrapper.base_span.trace_uuid
109
122
  )
@@ -112,118 +125,18 @@ class ConfidentSpanExporter(SpanExporter):
112
125
  trace_uuid=base_span_wrapper.base_span.trace_uuid
113
126
  )
114
127
 
128
+ # set confident api key
115
129
  if api_key:
116
130
  current_trace.confident_api_key = api_key
117
131
 
118
- # error trace if root span is errored
119
- if base_span_wrapper.base_span.parent_uuid is None:
120
- if (
121
- base_span_wrapper.base_span.status
122
- == TraceSpanStatus.ERRORED
123
- ):
124
- current_trace.status = TraceSpanStatus.ERRORED
125
-
126
- # set the trace attributes (to be deprecated)
127
- if base_span_wrapper.trace_attributes:
128
-
129
- if base_span_wrapper.trace_attributes.name:
130
- current_trace.name = (
131
- base_span_wrapper.trace_attributes.name
132
- )
133
-
134
- if base_span_wrapper.trace_attributes.tags:
135
- current_trace.tags = (
136
- base_span_wrapper.trace_attributes.tags
137
- )
138
-
139
- if base_span_wrapper.trace_attributes.thread_id:
140
- current_trace.thread_id = (
141
- base_span_wrapper.trace_attributes.thread_id
142
- )
143
-
144
- if base_span_wrapper.trace_attributes.user_id:
145
- current_trace.user_id = (
146
- base_span_wrapper.trace_attributes.user_id
147
- )
148
-
149
- if base_span_wrapper.trace_attributes.metadata:
150
- current_trace.metadata = (
151
- base_span_wrapper.trace_attributes.metadata
152
- )
153
-
154
- # set the trace attributes
155
- if base_span_wrapper.trace_name and isinstance(
156
- base_span_wrapper.trace_name, str
157
- ):
158
- current_trace.name = base_span_wrapper.trace_name
159
-
160
- if base_span_wrapper.trace_tags and isinstance(
161
- base_span_wrapper.trace_tags, list
162
- ):
163
- try:
164
- current_trace.tags = [
165
- str(tag) for tag in base_span_wrapper.trace_tags
166
- ]
167
- except Exception:
168
- pass
169
-
170
- if base_span_wrapper.trace_metadata and isinstance(
171
- base_span_wrapper.trace_metadata, dict
172
- ):
173
- try:
174
- current_trace.metadata = (
175
- base_span_wrapper.trace_metadata
176
- )
177
- except Exception:
178
- pass
179
-
180
- if base_span_wrapper.trace_thread_id and isinstance(
181
- base_span_wrapper.trace_thread_id, str
182
- ):
183
- current_trace.thread_id = base_span_wrapper.trace_thread_id
184
-
185
- if base_span_wrapper.trace_user_id and isinstance(
186
- base_span_wrapper.trace_user_id, str
187
- ):
188
- current_trace.user_id = base_span_wrapper.trace_user_id
189
-
190
- # set the trace input and output
191
- if base_span_wrapper.trace_input:
192
- current_trace.input = base_span_wrapper.trace_input
193
- if base_span_wrapper.trace_output:
194
- current_trace.output = base_span_wrapper.trace_output
195
-
196
- # set the trace environment
197
- if base_span_wrapper.trace_environment:
198
- current_trace.environment = (
199
- base_span_wrapper.trace_environment
200
- )
201
-
202
- # set the trace test case parameters
203
- if base_span_wrapper.trace_retrieval_context:
204
- current_trace.retrieval_context = (
205
- base_span_wrapper.trace_retrieval_context
206
- )
207
- if base_span_wrapper.trace_context:
208
- current_trace.context = base_span_wrapper.trace_context
209
- if base_span_wrapper.trace_tools_called:
210
- current_trace.tools_called = (
211
- base_span_wrapper.trace_tools_called
212
- )
213
- if base_span_wrapper.trace_expected_tools:
214
- current_trace.expected_tools = (
215
- base_span_wrapper.trace_expected_tools
216
- )
217
-
218
- # set the trace metric collection
219
- if base_span_wrapper.trace_metric_collection:
220
- current_trace.metric_collection = (
221
- base_span_wrapper.trace_metric_collection
222
- )
132
+ ################ Set Trace Attributes from ################
133
+ self._set_current_trace_attributes_from_base_span_wrapper(
134
+ current_trace, base_span_wrapper
135
+ )
223
136
 
137
+ # no removing span because it can be parent of other spans
224
138
  trace_manager.add_span(base_span_wrapper.base_span)
225
139
  trace_manager.add_span_to_trace(base_span_wrapper.base_span)
226
- # no removing span because it can be parent of other spans
227
140
 
228
141
  # safely end all active traces or return them for test runs
229
142
  active_traces_keys = list(trace_manager.active_traces.keys())
@@ -244,22 +157,115 @@ class ConfidentSpanExporter(SpanExporter):
244
157
  trace_manager.clear_traces()
245
158
  return SpanExportResult.SUCCESS
246
159
 
160
+ def _set_current_trace_attributes_from_base_span_wrapper(
161
+ self, current_trace: Trace, base_span_wrapper: BaseSpanWrapper
162
+ ):
163
+ # error trace if root span is errored
164
+ if base_span_wrapper.base_span.parent_uuid is None:
165
+ if base_span_wrapper.base_span.status == TraceSpanStatus.ERRORED:
166
+ current_trace.status = TraceSpanStatus.ERRORED
167
+
168
+ # set the trace attributes (to be deprecated)
169
+ if base_span_wrapper.trace_attributes:
170
+
171
+ if base_span_wrapper.trace_attributes.name:
172
+ current_trace.name = base_span_wrapper.trace_attributes.name
173
+
174
+ if base_span_wrapper.trace_attributes.tags:
175
+ current_trace.tags = base_span_wrapper.trace_attributes.tags
176
+
177
+ if base_span_wrapper.trace_attributes.thread_id:
178
+ current_trace.thread_id = (
179
+ base_span_wrapper.trace_attributes.thread_id
180
+ )
181
+
182
+ if base_span_wrapper.trace_attributes.user_id:
183
+ current_trace.user_id = (
184
+ base_span_wrapper.trace_attributes.user_id
185
+ )
186
+
187
+ if base_span_wrapper.trace_attributes.metadata:
188
+ current_trace.metadata = (
189
+ base_span_wrapper.trace_attributes.metadata
190
+ )
191
+
192
+ # set the trace attributes
193
+ if base_span_wrapper.trace_name and isinstance(
194
+ base_span_wrapper.trace_name, str
195
+ ):
196
+ current_trace.name = base_span_wrapper.trace_name
197
+
198
+ if base_span_wrapper.trace_tags and isinstance(
199
+ base_span_wrapper.trace_tags, list
200
+ ):
201
+ try:
202
+ current_trace.tags = [
203
+ str(tag) for tag in base_span_wrapper.trace_tags
204
+ ]
205
+ except Exception:
206
+ pass
207
+
208
+ if base_span_wrapper.trace_metadata and isinstance(
209
+ base_span_wrapper.trace_metadata, dict
210
+ ):
211
+ try:
212
+ current_trace.metadata = base_span_wrapper.trace_metadata
213
+ except Exception:
214
+ pass
215
+
216
+ if base_span_wrapper.trace_thread_id and isinstance(
217
+ base_span_wrapper.trace_thread_id, str
218
+ ):
219
+ current_trace.thread_id = base_span_wrapper.trace_thread_id
220
+
221
+ if base_span_wrapper.trace_user_id and isinstance(
222
+ base_span_wrapper.trace_user_id, str
223
+ ):
224
+ current_trace.user_id = base_span_wrapper.trace_user_id
225
+
226
+ # set the trace input and output
227
+ if base_span_wrapper.trace_input:
228
+ current_trace.input = base_span_wrapper.trace_input
229
+ if base_span_wrapper.trace_output:
230
+ current_trace.output = base_span_wrapper.trace_output
231
+
232
+ # set the trace environment
233
+ if base_span_wrapper.trace_environment:
234
+ current_trace.environment = base_span_wrapper.trace_environment
235
+
236
+ # set the trace test case parameters
237
+ if base_span_wrapper.trace_retrieval_context:
238
+ current_trace.retrieval_context = (
239
+ base_span_wrapper.trace_retrieval_context
240
+ )
241
+ if base_span_wrapper.trace_context:
242
+ current_trace.context = base_span_wrapper.trace_context
243
+ if base_span_wrapper.trace_tools_called:
244
+ current_trace.tools_called = base_span_wrapper.trace_tools_called
245
+ if base_span_wrapper.trace_expected_tools:
246
+ current_trace.expected_tools = (
247
+ base_span_wrapper.trace_expected_tools
248
+ )
249
+
250
+ # set the trace metric collection
251
+ if base_span_wrapper.trace_metric_collection:
252
+ current_trace.metric_collection = (
253
+ base_span_wrapper.trace_metric_collection
254
+ )
255
+
247
256
  def _convert_readable_span_to_base_span(
248
257
  self, span: ReadableSpan
249
258
  ) -> BaseSpanWrapper:
250
259
 
251
- # Create typed spans
252
260
  base_span = None
253
261
  try:
254
- base_span = self._prepare_boilerplate_base_span(span)
262
+ base_span = self.__prepare_boilerplate_base_span(span)
255
263
  except Exception:
256
264
  pass
257
265
 
258
- # Creaete base span if no typed span
259
266
  parent_uuid = (
260
267
  to_hex_string(span.parent.span_id, 16) if span.parent else None
261
268
  )
262
-
263
269
  base_span_status = TraceSpanStatus.SUCCESS
264
270
  base_span_error = None
265
271
 
@@ -279,34 +285,37 @@ class ConfidentSpanExporter(SpanExporter):
279
285
  end_time=peb.epoch_nanos_to_perf_seconds(span.end_time),
280
286
  )
281
287
 
282
- # Extract Span Attributes
283
- span_input = span.attributes.get("confident.span.input")
284
- span_output = span.attributes.get("confident.span.output")
285
- span_name = span.attributes.get("confident.span.name")
286
-
287
- raw_span_metric_collection = span.attributes.get(
288
- "confident.span.metric_collection"
289
- )
290
- raw_span_context = span.attributes.get("confident.span.context")
291
- raw_span_retrieval_context = span.attributes.get(
292
- "confident.span.retrieval_context"
293
- )
294
- raw_span_tools_called = span.attributes.get(
295
- "confident.span.tools_called"
288
+ # NOTE: Confident Span is reffered as base span in this codebase
289
+ self.__set_base_span_attributes(
290
+ base_span, span, base_span_status, base_span_error
296
291
  )
297
- if raw_span_tools_called and isinstance(raw_span_tools_called, tuple):
298
- raw_span_tools_called = list(raw_span_tools_called)
299
292
 
300
- raw_span_expected_tools = span.attributes.get(
301
- "confident.span.expected_tools"
293
+ base_span_wrapper = BaseSpanWrapper(base_span=base_span)
294
+
295
+ self.__set_trace_attributes(base_span_wrapper, span)
296
+
297
+ ################ Set Custom attributes from different integrations ################
298
+ self.__set_custom_trace_input_output(base_span_wrapper, span)
299
+
300
+ return base_span_wrapper
301
+
302
+ def __set_custom_trace_input_output(
303
+ self, base_span_wrapper: BaseSpanWrapper, span: ReadableSpan
304
+ ):
305
+
306
+ # check for pydantic ai trace input and output
307
+ pydantic_trace_input, pydantic_trace_output = (
308
+ check_pydantic_ai_trace_input_output(span)
302
309
  )
303
- if raw_span_expected_tools and isinstance(
304
- raw_span_expected_tools, tuple
305
- ):
306
- raw_span_expected_tools = list(raw_span_expected_tools)
307
310
 
308
- raw_span_metadata = span.attributes.get("confident.span.metadata")
311
+ if not base_span_wrapper.trace_input and pydantic_trace_input:
312
+ base_span_wrapper.trace_input = pydantic_trace_input
313
+ if not base_span_wrapper.trace_output and pydantic_trace_output:
314
+ base_span_wrapper.trace_output = pydantic_trace_output
309
315
 
316
+ def __set_trace_attributes(
317
+ self, base_span_wrapper: BaseSpanWrapper, span: ReadableSpan
318
+ ):
310
319
  # Extract Trace Attributes
311
320
  trace_name = span.attributes.get("confident.trace.name")
312
321
  trace_thread_id = span.attributes.get("confident.trace.thread_id")
@@ -340,16 +349,6 @@ class ConfidentSpanExporter(SpanExporter):
340
349
  "confident.trace.metric_collection"
341
350
  )
342
351
 
343
- # Validate Span Attributes
344
- span_retrieval_context = parse_list_of_strings(
345
- raw_span_retrieval_context
346
- )
347
- span_context = parse_list_of_strings(raw_span_context)
348
- span_tools_called = self._parse_list_of_tools(raw_span_tools_called)
349
- span_expected_tools = self._parse_list_of_tools(raw_span_expected_tools)
350
- span_metadata = self._parse_json_string(raw_span_metadata)
351
- span_metric_collection = parse_string(raw_span_metric_collection)
352
-
353
352
  # Validate Trace Attributes
354
353
  trace_tags = parse_list_of_strings(raw_trace_tags)
355
354
  trace_retrieval_context = parse_list_of_strings(
@@ -363,6 +362,72 @@ class ConfidentSpanExporter(SpanExporter):
363
362
  trace_metadata = self._parse_json_string(raw_trace_metadata)
364
363
  trace_metric_collection = parse_string(raw_trace_metric_collection)
365
364
 
365
+ base_span_wrapper.trace_input = trace_input
366
+ base_span_wrapper.trace_output = trace_output
367
+ base_span_wrapper.trace_name = trace_name
368
+ base_span_wrapper.trace_tags = trace_tags
369
+ base_span_wrapper.trace_metadata = trace_metadata
370
+ base_span_wrapper.trace_thread_id = trace_thread_id
371
+ base_span_wrapper.trace_user_id = trace_user_id
372
+ base_span_wrapper.trace_retrieval_context = trace_retrieval_context
373
+ base_span_wrapper.trace_context = trace_context
374
+ base_span_wrapper.trace_tools_called = trace_tools_called
375
+ base_span_wrapper.trace_expected_tools = trace_expected_tools
376
+ base_span_wrapper.trace_metric_collection = trace_metric_collection
377
+ base_span_wrapper.trace_environment = trace_environment
378
+
379
+ # Resource attributes
380
+ resource_attributes = span.resource.attributes
381
+ if resource_attributes:
382
+ environment = resource_attributes.get("confident.trace.environment")
383
+ if environment and isinstance(environment, str):
384
+ base_span_wrapper.trace_environment = environment
385
+
386
+ def __set_base_span_attributes(
387
+ self,
388
+ base_span: BaseSpan,
389
+ span: ReadableSpan,
390
+ base_span_status: TraceSpanStatus,
391
+ base_span_error: Optional[str],
392
+ ):
393
+ span_input = span.attributes.get("confident.span.input")
394
+ span_output = span.attributes.get("confident.span.output")
395
+
396
+ span_name = span.attributes.get("confident.span.name")
397
+
398
+ raw_span_metric_collection = span.attributes.get(
399
+ "confident.span.metric_collection"
400
+ )
401
+ raw_span_context = span.attributes.get("confident.span.context")
402
+ raw_span_retrieval_context = span.attributes.get(
403
+ "confident.span.retrieval_context"
404
+ )
405
+ raw_span_tools_called = span.attributes.get(
406
+ "confident.span.tools_called"
407
+ )
408
+ if raw_span_tools_called and isinstance(raw_span_tools_called, tuple):
409
+ raw_span_tools_called = list(raw_span_tools_called)
410
+
411
+ raw_span_expected_tools = span.attributes.get(
412
+ "confident.span.expected_tools"
413
+ )
414
+ if raw_span_expected_tools and isinstance(
415
+ raw_span_expected_tools, tuple
416
+ ):
417
+ raw_span_expected_tools = list(raw_span_expected_tools)
418
+
419
+ raw_span_metadata = span.attributes.get("confident.span.metadata")
420
+
421
+ # Validate Span Attributes
422
+ span_retrieval_context = parse_list_of_strings(
423
+ raw_span_retrieval_context
424
+ )
425
+ span_context = parse_list_of_strings(raw_span_context)
426
+ span_tools_called = self._parse_list_of_tools(raw_span_tools_called)
427
+ span_expected_tools = self._parse_list_of_tools(raw_span_expected_tools)
428
+ span_metadata = self._parse_json_string(raw_span_metadata)
429
+ span_metric_collection = parse_string(raw_span_metric_collection)
430
+
366
431
  # Set Span Attributes
367
432
  base_span.parent_uuid = (
368
433
  to_hex_string(span.parent.span_id, 16) if span.parent else None
@@ -388,38 +453,16 @@ class ConfidentSpanExporter(SpanExporter):
388
453
  if span_output:
389
454
  base_span.output = span_output
390
455
 
391
- # Resource attributes
392
- resource_attributes = span.resource.attributes
393
- if resource_attributes:
394
- environment = resource_attributes.get("confident.trace.environment")
395
- if environment and isinstance(environment, str):
396
- trace_environment = environment
397
-
398
- return BaseSpanWrapper(
399
- base_span=base_span,
400
- trace_input=trace_input,
401
- trace_output=trace_output,
402
- trace_name=trace_name,
403
- trace_tags=trace_tags,
404
- trace_metadata=trace_metadata,
405
- trace_thread_id=trace_thread_id,
406
- trace_user_id=trace_user_id,
407
- trace_retrieval_context=trace_retrieval_context,
408
- trace_context=trace_context,
409
- trace_tools_called=trace_tools_called,
410
- trace_expected_tools=trace_expected_tools,
411
- trace_metric_collection=trace_metric_collection,
412
- trace_environment=trace_environment,
413
- )
414
-
415
- def _prepare_boilerplate_base_span(
456
+ def __prepare_boilerplate_base_span(
416
457
  self, span: ReadableSpan
417
458
  ) -> Optional[BaseSpan]:
459
+
460
+ ################ Get Span Type ################
418
461
  span_type = span.attributes.get("confident.span.type")
419
462
  if not span_type:
420
463
  span_type = check_span_type_from_gen_ai_attributes(span)
421
464
 
422
- # required fields
465
+ ################ Get Required Fields ################
423
466
  uuid = to_hex_string(span.context.span_id, 16)
424
467
  status = (
425
468
  TraceSpanStatus.ERRORED
@@ -434,6 +477,8 @@ class ConfidentSpanExporter(SpanExporter):
434
477
  start_time = peb.epoch_nanos_to_perf_seconds(span.start_time)
435
478
  end_time = peb.epoch_nanos_to_perf_seconds(span.end_time)
436
479
 
480
+ ################ Populate Spans ################
481
+
437
482
  #######################################################
438
483
  ### LLM Span
439
484
  #######################################################
@@ -442,7 +487,7 @@ class ConfidentSpanExporter(SpanExporter):
442
487
  model = span.attributes.get("confident.llm.model")
443
488
  if not model:
444
489
  model = check_model_from_gen_ai_attributes(span)
445
- prompt = span.attributes.get("confident.llm.prompt")
490
+ # prompt = span.attributes.get("confident.llm.prompt")
446
491
  input_token_count = span.attributes.get(
447
492
  "confident.llm.input_token_count"
448
493
  )
@@ -468,6 +513,16 @@ class ConfidentSpanExporter(SpanExporter):
468
513
  output = [json.loads(o) for o in output]
469
514
  except Exception:
470
515
  pass
516
+ prompt = span.attributes.get("confident.span.prompt")
517
+ confident_prompt = None
518
+ if prompt and isinstance(prompt, str):
519
+ prompt = json.loads(prompt)
520
+ try:
521
+ confident_prompt = Prompt(alias=prompt["alias"])
522
+ confident_prompt.version = prompt["version"]
523
+ except Exception:
524
+ pass
525
+
471
526
  llm_span = LlmSpan(
472
527
  uuid=uuid,
473
528
  status=status,
@@ -480,11 +535,12 @@ class ConfidentSpanExporter(SpanExporter):
480
535
  model=model,
481
536
  cost_per_input_token=cost_per_input_token,
482
537
  cost_per_output_token=cost_per_output_token,
483
- prompt=prompt,
538
+ # prompt=prompt,
484
539
  input_token_count=input_token_count,
485
540
  output_token_count=output_token_count,
486
541
  input=input,
487
542
  output=output,
543
+ prompt=confident_prompt,
488
544
  )
489
545
  return llm_span
490
546
 
@@ -514,6 +570,8 @@ class ConfidentSpanExporter(SpanExporter):
514
570
  agent_handoffs.append(str(handoff))
515
571
  except Exception:
516
572
  pass
573
+
574
+ input, output = check_pydantic_ai_agent_input_output(span)
517
575
  agent_span = AgentSpan(
518
576
  uuid=uuid,
519
577
  status=status,
@@ -526,6 +584,8 @@ class ConfidentSpanExporter(SpanExporter):
526
584
  name=name if name else "",
527
585
  available_tools=available_tools,
528
586
  agent_handoffs=agent_handoffs,
587
+ input=input,
588
+ output=output,
529
589
  )
530
590
  return agent_span
531
591
 
@@ -562,6 +622,7 @@ class ConfidentSpanExporter(SpanExporter):
562
622
  name = check_tool_name_from_gen_ai_attributes(span)
563
623
  description = span.attributes.get("confident.tool.description")
564
624
  input = check_tool_input_parameters_from_gen_ai_attributes(span)
625
+ output = check_tool_output(span)
565
626
 
566
627
  tool_span = ToolSpan(
567
628
  uuid=uuid,
@@ -575,6 +636,7 @@ class ConfidentSpanExporter(SpanExporter):
575
636
  name=name if name else "",
576
637
  description=description,
577
638
  input=input,
639
+ output=output,
578
640
  )
579
641
  return tool_span
580
642
 
@@ -99,17 +99,32 @@ def validate_llm_test_case_data(
99
99
  def check_llm_input_from_gen_ai_attributes(
100
100
  span: ReadableSpan,
101
101
  ) -> Tuple[Optional[list], Optional[dict]]:
102
+ input = None
103
+ output = None
102
104
  try:
103
- input = json.loads(span.attributes.get("events"))
104
- if input and isinstance(input, list):
105
- # check if the last event is a genai choice
106
- last_event = input.pop()
107
- if last_event and last_event.get("event.name") == "gen_ai.choice":
108
- return input, last_event
105
+ input = json.loads(span.attributes.get("gen_ai.input.messages"))
109
106
  except Exception as e:
110
107
  pass
108
+ try:
109
+ output = json.loads(span.attributes.get("gen_ai.output.messages"))
110
+ except Exception as e:
111
+ pass
112
+
113
+ if input is None and output is None:
114
+ try:
115
+ input = json.loads(span.attributes.get("events"))
116
+ if input and isinstance(input, list):
117
+ # check if the last event is a genai choice
118
+ last_event = input.pop()
119
+ if (
120
+ last_event
121
+ and last_event.get("event.name") == "gen_ai.choice"
122
+ ):
123
+ output = last_event
124
+ except Exception as e:
125
+ pass
111
126
 
112
- return None, None
127
+ return input, output
113
128
 
114
129
 
115
130
  def check_tool_name_from_gen_ai_attributes(span: ReadableSpan) -> Optional[str]:
@@ -307,3 +322,76 @@ def post_test_run(traces: List[Trace], test_run_id: Optional[str]):
307
322
  test_run.add_test_case(case)
308
323
 
309
324
  # return test_run_manager.post_test_run(test_run) TODO: add after test run with metric collection is implemented
325
+
326
+
327
+ def check_pydantic_ai_agent_input_output(
328
+ span: ReadableSpan,
329
+ ) -> Tuple[Optional[Any], Optional[Any]]:
330
+ input_val: Optional[Any] = None
331
+ output_val: Optional[Any] = None
332
+
333
+ # Input (pydantic_ai.all_messages) - slice up to and including the first 'user' message
334
+ try:
335
+ raw = span.attributes.get("pydantic_ai.all_messages")
336
+ if raw:
337
+ messages = raw
338
+ if isinstance(messages, str):
339
+ messages = json.loads(messages)
340
+ elif isinstance(messages, tuple):
341
+ messages = list(messages)
342
+
343
+ if isinstance(messages, list):
344
+ normalized = []
345
+ for m in messages:
346
+ if isinstance(m, str):
347
+ try:
348
+ m = json.loads(m)
349
+ except Exception:
350
+ pass
351
+ normalized.append(m)
352
+
353
+ first_user_idx = None
354
+ for i, m in enumerate(normalized):
355
+ role = None
356
+ if isinstance(m, dict):
357
+ role = m.get("role") or m.get("author")
358
+ if role == "user":
359
+ first_user_idx = i
360
+ break
361
+
362
+ input_val = (
363
+ normalized
364
+ if first_user_idx is None
365
+ else normalized[: first_user_idx + 1]
366
+ )
367
+ except Exception:
368
+ pass
369
+
370
+ # Output (agent final_result)
371
+ try:
372
+ if span.attributes.get("confident.span.type") == "agent":
373
+ output_val = span.attributes.get("final_result")
374
+ except Exception:
375
+ pass
376
+
377
+ return input_val, output_val
378
+
379
+
380
+ def check_tool_output(span: ReadableSpan):
381
+ try:
382
+ return span.attributes.get("tool_response")
383
+ except Exception as e:
384
+ pass
385
+ return None
386
+
387
+
388
+ def check_pydantic_ai_trace_input_output(
389
+ span: ReadableSpan,
390
+ ) -> Tuple[Optional[Any], Optional[Any]]:
391
+ input_val: Optional[Any] = None
392
+ output_val: Optional[Any] = None
393
+
394
+ if not span.parent:
395
+ input_val, output_val = check_pydantic_ai_agent_input_output(span)
396
+
397
+ return input_val, output_val