agentevals-cli 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. agentevals/__init__.py +16 -0
  2. agentevals/_protocol.py +83 -0
  3. agentevals/api/__init__.py +0 -0
  4. agentevals/api/app.py +137 -0
  5. agentevals/api/debug_routes.py +268 -0
  6. agentevals/api/models.py +204 -0
  7. agentevals/api/otlp_app.py +25 -0
  8. agentevals/api/otlp_routes.py +383 -0
  9. agentevals/api/routes.py +554 -0
  10. agentevals/api/streaming_routes.py +373 -0
  11. agentevals/builtin_metrics.py +234 -0
  12. agentevals/cli.py +643 -0
  13. agentevals/config.py +108 -0
  14. agentevals/converter.py +328 -0
  15. agentevals/custom_evaluators.py +468 -0
  16. agentevals/eval_config_loader.py +147 -0
  17. agentevals/evaluator/__init__.py +24 -0
  18. agentevals/evaluator/resolver.py +70 -0
  19. agentevals/evaluator/sources.py +293 -0
  20. agentevals/evaluator/templates.py +224 -0
  21. agentevals/extraction.py +444 -0
  22. agentevals/genai_converter.py +538 -0
  23. agentevals/loader/__init__.py +7 -0
  24. agentevals/loader/base.py +53 -0
  25. agentevals/loader/jaeger.py +112 -0
  26. agentevals/loader/otlp.py +193 -0
  27. agentevals/mcp_server.py +236 -0
  28. agentevals/output.py +204 -0
  29. agentevals/runner.py +310 -0
  30. agentevals/sdk.py +433 -0
  31. agentevals/streaming/__init__.py +120 -0
  32. agentevals/streaming/incremental_processor.py +337 -0
  33. agentevals/streaming/processor.py +285 -0
  34. agentevals/streaming/session.py +36 -0
  35. agentevals/streaming/ws_server.py +806 -0
  36. agentevals/trace_attrs.py +32 -0
  37. agentevals/trace_metrics.py +126 -0
  38. agentevals/utils/__init__.py +0 -0
  39. agentevals/utils/genai_messages.py +142 -0
  40. agentevals/utils/log_buffer.py +43 -0
  41. agentevals/utils/log_enrichment.py +187 -0
  42. agentevals_cli-0.5.2.dist-info/METADATA +22 -0
  43. agentevals_cli-0.5.2.dist-info/RECORD +46 -0
  44. agentevals_cli-0.5.2.dist-info/WHEEL +4 -0
  45. agentevals_cli-0.5.2.dist-info/entry_points.txt +2 -0
  46. agentevals_cli-0.5.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,538 @@
1
+ """Convert trace spans using GenAI semantic conventions into ADK Invocation objects.
2
+
3
+ Supports traces from frameworks using OpenTelemetry GenAI semantic conventions:
4
+ - LangChain (via LANGSMITH_OTEL_ENABLED)
5
+ - Any framework using standard gen_ai.* attributes
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ from dataclasses import dataclass, field
12
+
13
+ from google.adk.evaluation.eval_case import IntermediateData, Invocation
14
+ from google.genai import types as genai_types
15
+
16
+ from .converter import ConversionResult
17
+ from .extraction import GenAIExtractor, is_invocation_span, is_llm_span, parse_tool_response_content
18
+ from .loader.base import Span, Trace
19
+ from .trace_attrs import (
20
+ OTEL_GENAI_INPUT_MESSAGES,
21
+ OTEL_GENAI_OUTPUT_MESSAGES,
22
+ OTEL_GENAI_TOOL_CALL_ARGUMENTS,
23
+ OTEL_GENAI_TOOL_CALL_ID,
24
+ OTEL_GENAI_TOOL_CALL_RESULT,
25
+ OTEL_GENAI_TOOL_NAME,
26
+ )
27
+ from .utils.genai_messages import (
28
+ ASSISTANT_ROLES,
29
+ USER_ROLES,
30
+ extract_text_from_message,
31
+ extract_tool_call_args_from_messages,
32
+ extract_tool_calls_from_message,
33
+ parse_json_attr,
34
+ )
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ @dataclass
40
+ class _ToolCall:
41
+ name: str
42
+ args: dict
43
+ id: str | None = None
44
+
45
+
46
+ @dataclass
47
+ class _ToolResponse:
48
+ name: str
49
+ response: dict
50
+ id: str | None = None
51
+
52
+
53
+ @dataclass
54
+ class _ConversationTurn:
55
+ invocation_id: str
56
+ user_text: str
57
+ assistant_text: str
58
+ tool_calls: list[_ToolCall] = field(default_factory=list)
59
+ tool_responses: list[_ToolResponse] = field(default_factory=list)
60
+ start_time: float = 0.0
61
+
62
+
63
+ def convert_genai_trace(trace: Trace) -> ConversionResult:
64
+ result = ConversionResult(trace_id=trace.trace_id)
65
+
66
+ logger.debug(f"Converting GenAI trace {trace.trace_id} ({len(trace.all_spans)} spans)")
67
+
68
+ llm_root_spans = [s for s in trace.root_spans if is_llm_span(s)]
69
+
70
+ if llm_root_spans:
71
+ has_messages = any(
72
+ s.get_tag(OTEL_GENAI_INPUT_MESSAGES) or s.get_tag(OTEL_GENAI_OUTPUT_MESSAGES) for s in llm_root_spans
73
+ )
74
+ if not has_messages:
75
+ msg = (
76
+ f"Trace {trace.trace_id}: GenAI LLM spans found but missing message content. "
77
+ "This usually means logs were not enriched into spans. "
78
+ "Conversion may fail or produce incomplete results."
79
+ )
80
+ logger.warning(msg)
81
+ result.warnings.append(msg)
82
+
83
+ if len(llm_root_spans) > 1:
84
+ if not any(is_invocation_span(s) for s in llm_root_spans):
85
+ has_enriched = any(
86
+ s.get_tag(OTEL_GENAI_INPUT_MESSAGES) and s.get_tag(OTEL_GENAI_OUTPUT_MESSAGES) for s in llm_root_spans
87
+ )
88
+
89
+ if has_enriched and _is_broadcast_enriched(llm_root_spans[0]):
90
+ logger.debug(f"Multi-turn conversation: {len(llm_root_spans)} LLM spans")
91
+ try:
92
+ turns = _extract_multiturn_turns(llm_root_spans)
93
+ for turn in turns:
94
+ result.invocations.append(_turn_to_invocation(turn))
95
+ except Exception as exc:
96
+ msg = f"Trace {trace.trace_id}: failed to convert multi-turn conversation: {exc}"
97
+ logger.warning(msg)
98
+ result.warnings.append(msg)
99
+ return result
100
+
101
+ invocation_spans = _find_genai_invocation_spans(trace)
102
+ logger.debug(f"Found {len(invocation_spans)} invocation spans")
103
+
104
+ if not invocation_spans:
105
+ result.warnings.append(f"Trace {trace.trace_id}: no GenAI invocation spans found")
106
+ return result
107
+
108
+ for inv_span in invocation_spans:
109
+ try:
110
+ turn = _extract_single_turn(inv_span)
111
+ result.invocations.append(_turn_to_invocation(turn))
112
+ except Exception as exc:
113
+ msg = f"Failed to convert span {inv_span.span_id}: {exc}"
114
+ logger.warning(msg)
115
+ result.warnings.append(msg)
116
+
117
+ result.invocations = _deduplicate_invocations(result.invocations)
118
+ return result
119
+
120
+
121
+ def _find_genai_invocation_spans(trace: Trace) -> list[Span]:
122
+ candidates = []
123
+
124
+ for span in trace.root_spans:
125
+ if is_invocation_span(span):
126
+ candidates.append(span)
127
+
128
+ if not candidates:
129
+ for span in trace.root_spans:
130
+ if _has_llm_children(span):
131
+ candidates.append(span)
132
+
133
+ if not candidates and trace.root_spans:
134
+ llm_spans = [s for s in trace.root_spans if is_llm_span(s)]
135
+
136
+ if len(llm_spans) > 1:
137
+ has_enriched_messages = any(
138
+ s.get_tag(OTEL_GENAI_INPUT_MESSAGES) or s.get_tag(OTEL_GENAI_OUTPUT_MESSAGES) for s in llm_spans
139
+ )
140
+
141
+ if has_enriched_messages and _is_broadcast_enriched(llm_spans[0]):
142
+ logger.debug(
143
+ f"Found {len(llm_spans)} LLM spans with broadcast-enriched messages, treating as single multi-turn conversation"
144
+ )
145
+ return [llm_spans[0]]
146
+
147
+ logger.debug("No clear invocation spans found, treating each root span as invocation")
148
+ candidates = llm_spans if llm_spans else trace.root_spans
149
+
150
+ if not candidates and trace.root_spans:
151
+ logger.debug("Falling back to all root spans")
152
+ candidates = list(trace.root_spans)
153
+
154
+ candidates.sort(key=lambda s: s.start_time)
155
+ return candidates
156
+
157
+
158
+ def _extract_single_turn(inv_span: Span) -> _ConversationTurn:
159
+ llm_spans = _find_llm_spans(inv_span)
160
+
161
+ logger.debug(f"Converting invocation span: {inv_span.operation_name}")
162
+ logger.debug(f"Found {len(llm_spans)} LLM spans")
163
+
164
+ if not llm_spans:
165
+ if is_llm_span(inv_span):
166
+ llm_spans = [inv_span]
167
+ else:
168
+ raise ValueError(f"Invocation span {inv_span.span_id} has no LLM call spans")
169
+
170
+ tool_spans = _find_tool_spans(inv_span)
171
+ logger.debug(f"Found {len(tool_spans)} tool spans")
172
+
173
+ user_text = _extract_user_text(llm_spans[0])
174
+ assistant_text = _extract_assistant_text(llm_spans[-1])
175
+ tool_calls, tool_responses = _extract_tool_calls(tool_spans, llm_spans)
176
+
177
+ return _ConversationTurn(
178
+ invocation_id=f"genai-{inv_span.span_id}",
179
+ user_text=user_text,
180
+ assistant_text=assistant_text,
181
+ tool_calls=tool_calls,
182
+ tool_responses=tool_responses,
183
+ start_time=float(inv_span.start_time),
184
+ )
185
+
186
+
187
+ def _extract_multiturn_turns(llm_spans: list[Span]) -> list[_ConversationTurn]:
188
+ messages_raw = llm_spans[0].get_tag(OTEL_GENAI_INPUT_MESSAGES, "[]")
189
+ all_input_messages = parse_json_attr(messages_raw, "gen_ai.input.messages")
190
+
191
+ output_messages_raw = llm_spans[0].get_tag(OTEL_GENAI_OUTPUT_MESSAGES, "[]")
192
+ all_output_messages = parse_json_attr(output_messages_raw, "gen_ai.output.messages")
193
+
194
+ if not isinstance(all_input_messages, list) or not isinstance(all_output_messages, list):
195
+ logger.warning("Messages are not lists, falling back to single invocation")
196
+ user_text = _extract_user_text(llm_spans[0])
197
+ assistant_text = _extract_assistant_text(llm_spans[-1])
198
+ return [
199
+ _ConversationTurn(
200
+ invocation_id=f"genai-{llm_spans[0].span_id}",
201
+ user_text=user_text,
202
+ assistant_text=assistant_text,
203
+ start_time=float(llm_spans[0].start_time),
204
+ )
205
+ ]
206
+
207
+ user_messages = [msg for msg in all_input_messages if msg.get("role") in USER_ROLES]
208
+ assistant_messages = [msg for msg in all_output_messages if msg.get("role") in ASSISTANT_ROLES]
209
+
210
+ logger.debug(f"Multi-turn: {len(user_messages)} user, {len(assistant_messages)} assistant messages")
211
+ for i, msg in enumerate(assistant_messages):
212
+ has_content = bool(msg.get("content"))
213
+ has_tools = bool(msg.get("tool_calls"))
214
+ logger.debug(f" Assistant msg {i}: has_content={has_content}, has_tools={has_tools}")
215
+
216
+ turns = []
217
+ assistant_idx = 0
218
+
219
+ for user_idx, user_msg in enumerate(user_messages):
220
+ user_text = extract_text_from_message(user_msg)
221
+ if not user_text:
222
+ continue
223
+
224
+ tool_calls: list[_ToolCall] = []
225
+ assistant_text = ""
226
+
227
+ while assistant_idx < len(assistant_messages):
228
+ assistant_msg = assistant_messages[assistant_idx]
229
+
230
+ for tc in extract_tool_calls_from_message(assistant_msg):
231
+ tool_calls.append(_ToolCall(name=tc["name"], args=tc["arguments"], id=tc["id"]))
232
+
233
+ content = extract_text_from_message(assistant_msg)
234
+ if content:
235
+ assistant_text = content
236
+ assistant_idx += 1
237
+ break
238
+
239
+ assistant_idx += 1
240
+
241
+ turns.append(
242
+ _ConversationTurn(
243
+ invocation_id=f"genai-turn-{user_idx + 1}-{llm_spans[0].span_id[:8]}",
244
+ user_text=user_text if isinstance(user_text, str) else "",
245
+ assistant_text=assistant_text,
246
+ tool_calls=tool_calls,
247
+ start_time=float(llm_spans[0].start_time),
248
+ )
249
+ )
250
+
251
+ return turns
252
+
253
+
254
+ def _deduplicate_invocations(invocations: list[Invocation]) -> list[Invocation]:
255
+ """Deduplicate invocations with the same user text, keeping the best one.
256
+
257
+ The OpenAI instrumentor creates separate LLM calls for tool-use loops within
258
+ a single conversation turn. Each call logs the full conversation history, so
259
+ multiple spans produce invocations with the same user text. We keep the last
260
+ one per unique user text — it has the final response (not the intermediate
261
+ tool-call-only response).
262
+ """
263
+ if len(invocations) <= 1:
264
+ return invocations
265
+
266
+ def _user_text(inv: Invocation) -> str:
267
+ if inv.user_content and inv.user_content.parts:
268
+ return inv.user_content.parts[0].text or ""
269
+ return ""
270
+
271
+ seen: dict[str, int] = {}
272
+ always_keep: set[int] = set()
273
+ for i, inv in enumerate(invocations):
274
+ text = _user_text(inv)
275
+ if not text.strip():
276
+ always_keep.add(i)
277
+ else:
278
+ seen[text] = i
279
+
280
+ if len(seen) + len(always_keep) == len(invocations):
281
+ return invocations
282
+
283
+ keep = always_keep | set(seen.values())
284
+ return [inv for i, inv in enumerate(invocations) if i in keep]
285
+
286
+
287
+ def _turn_to_invocation(turn: _ConversationTurn) -> Invocation:
288
+ user_content = genai_types.Content(
289
+ role="user",
290
+ parts=[genai_types.Part(text=turn.user_text)],
291
+ )
292
+ final_response = genai_types.Content(
293
+ role="model",
294
+ parts=[genai_types.Part(text=turn.assistant_text)],
295
+ )
296
+ tool_uses = [genai_types.FunctionCall(name=tc.name, args=tc.args, id=tc.id) for tc in turn.tool_calls]
297
+ tool_responses = [
298
+ genai_types.FunctionResponse(name=tr.name, response=tr.response, id=tr.id) for tr in turn.tool_responses
299
+ ]
300
+ return Invocation(
301
+ invocation_id=turn.invocation_id,
302
+ user_content=user_content,
303
+ final_response=final_response,
304
+ intermediate_data=IntermediateData(tool_uses=tool_uses, tool_responses=tool_responses),
305
+ creation_timestamp=turn.start_time / 1_000_000.0,
306
+ )
307
+
308
+
309
+ def _extract_user_text(llm_span: Span) -> str:
310
+ messages_raw = llm_span.get_tag(OTEL_GENAI_INPUT_MESSAGES, "[]")
311
+ messages = parse_json_attr(messages_raw, "gen_ai.input.messages")
312
+
313
+ if not isinstance(messages, list):
314
+ messages = []
315
+
316
+ for msg in reversed(messages):
317
+ if not isinstance(msg, dict):
318
+ continue
319
+ if msg.get("role") in USER_ROLES:
320
+ text = extract_text_from_message(msg)
321
+ if text:
322
+ logger.debug(f"Found user message: {text[:100]}")
323
+ return text
324
+
325
+ logger.warning(f"No user message found in {len(messages)} messages")
326
+ raise ValueError(f"LLM span {llm_span.span_id}: no user message found in gen_ai.input.messages")
327
+
328
+
329
+ def _extract_assistant_text(llm_span: Span) -> str:
330
+ messages_raw = llm_span.get_tag(OTEL_GENAI_OUTPUT_MESSAGES, "[]")
331
+ messages = parse_json_attr(messages_raw, "gen_ai.output.messages")
332
+
333
+ if not isinstance(messages, list):
334
+ messages = []
335
+
336
+ logger.debug(f"Extracting final response from {len(messages)} output messages")
337
+ for i, msg in enumerate(messages):
338
+ if isinstance(msg, dict):
339
+ logger.debug(
340
+ f" Message {i}: role={msg.get('role')}, content_len={len(msg.get('content', ''))}, has_tool_calls={bool(msg.get('tool_calls'))}"
341
+ )
342
+
343
+ for msg in reversed(messages):
344
+ if not isinstance(msg, dict):
345
+ continue
346
+ if msg.get("role") in ASSISTANT_ROLES:
347
+ text = extract_text_from_message(msg)
348
+ if text:
349
+ logger.debug(f"Found assistant message with text: {text[:100]}")
350
+ return text
351
+
352
+ logger.warning(
353
+ f"LLM span {llm_span.span_id}: no assistant message with content in gen_ai.output.messages ({len(messages)} messages)"
354
+ )
355
+ return ""
356
+
357
+
358
+ def _trim_cumulative_output(llm_span: Span, output_messages: list[dict]) -> list[dict]:
359
+ """For cumulative-history traces, return only the current turn's output messages.
360
+
361
+ The OpenAI instrumentor v2 stores the full conversation history in each span.
362
+ Each span's output includes ALL previous turns' assistant responses. Given N
363
+ user messages in input, the current turn is N. We skip past the first (N-1)
364
+ assistant text responses in the output — everything after that belongs to the
365
+ current turn.
366
+ """
367
+ input_raw = llm_span.get_tag(OTEL_GENAI_INPUT_MESSAGES)
368
+ if not input_raw:
369
+ return output_messages
370
+
371
+ input_messages = parse_json_attr(input_raw, "gen_ai.input.messages")
372
+ if not isinstance(input_messages, list):
373
+ return output_messages
374
+
375
+ user_count = sum(1 for m in input_messages if isinstance(m, dict) and m.get("role") in USER_ROLES)
376
+ if user_count <= 1:
377
+ return output_messages
378
+
379
+ previous_turns = user_count - 1
380
+ text_responses_seen = 0
381
+
382
+ for i, msg in enumerate(output_messages):
383
+ if not isinstance(msg, dict) or msg.get("role") not in ASSISTANT_ROLES:
384
+ continue
385
+ content = extract_text_from_message(msg)
386
+ if content:
387
+ text_responses_seen += 1
388
+ if text_responses_seen >= previous_turns:
389
+ trimmed = output_messages[i + 1 :]
390
+ logger.debug(
391
+ "Trimmed cumulative output: %d → %d messages (skipped %d previous turns)",
392
+ len(output_messages),
393
+ len(trimmed),
394
+ previous_turns,
395
+ )
396
+ return trimmed
397
+
398
+ return output_messages
399
+
400
+
401
+ def _extract_tool_calls(
402
+ tool_spans: list[Span],
403
+ llm_spans: list[Span] | None = None,
404
+ ) -> tuple[list[_ToolCall], list[_ToolResponse]]:
405
+ tool_calls_by_id: dict[str, _ToolCall] = {}
406
+ tool_calls_no_id: list[_ToolCall] = []
407
+ tool_responses: list[_ToolResponse] = []
408
+
409
+ for tool_span in tool_spans:
410
+ tool_name = tool_span.get_tag(OTEL_GENAI_TOOL_NAME)
411
+ if not tool_name:
412
+ logger.warning(f"Tool span missing gen_ai.tool.name: {tool_span.operation_name}")
413
+ continue
414
+
415
+ tool_call_id = tool_span.get_tag(OTEL_GENAI_TOOL_CALL_ID)
416
+
417
+ args_raw = tool_span.get_tag(OTEL_GENAI_TOOL_CALL_ARGUMENTS, "{}")
418
+ args = parse_json_attr(args_raw, "gen_ai.tool.call.arguments")
419
+ if not isinstance(args, dict):
420
+ args = {}
421
+
422
+ if not args:
423
+ input_msgs_raw = tool_span.get_tag(OTEL_GENAI_INPUT_MESSAGES)
424
+ if input_msgs_raw:
425
+ args, _ = extract_tool_call_args_from_messages(input_msgs_raw, tool_name)
426
+
427
+ tc = _ToolCall(name=tool_name, args=args, id=tool_call_id)
428
+ if tool_call_id:
429
+ tool_calls_by_id[tool_call_id] = tc
430
+ else:
431
+ tool_calls_no_id.append(tc)
432
+
433
+ result_raw = tool_span.get_tag(OTEL_GENAI_TOOL_CALL_RESULT)
434
+ if result_raw:
435
+ result_data = parse_tool_response_content(result_raw)
436
+ logger.debug(f"Tool {tool_name} result: {str(result_data)[:100]}")
437
+ tool_responses.append(
438
+ _ToolResponse(
439
+ name=tool_name,
440
+ response=result_data,
441
+ id=tool_call_id,
442
+ )
443
+ )
444
+ else:
445
+ output_msgs_raw = tool_span.get_tag(OTEL_GENAI_OUTPUT_MESSAGES)
446
+ if output_msgs_raw:
447
+ output_msgs = parse_json_attr(output_msgs_raw, "gen_ai.output.messages")
448
+ if isinstance(output_msgs, list):
449
+ for msg in output_msgs:
450
+ if not isinstance(msg, dict):
451
+ continue
452
+ for part in msg.get("parts", []):
453
+ if not isinstance(part, dict):
454
+ continue
455
+ if part.get("type") == "tool_call_response" and "response" in part:
456
+ resp = part["response"]
457
+ if isinstance(resp, list):
458
+ texts = [t.get("text", "") for t in resp if isinstance(t, dict) and "text" in t]
459
+ result_data = parse_tool_response_content(" ".join(texts))
460
+ elif isinstance(resp, dict):
461
+ result_data = resp
462
+ else:
463
+ result_data = {"result": str(resp)}
464
+ tool_responses.append(
465
+ _ToolResponse(
466
+ name=tool_name,
467
+ response=result_data,
468
+ id=tool_call_id,
469
+ )
470
+ )
471
+ break
472
+
473
+ if llm_spans:
474
+ for llm_span in llm_spans:
475
+ messages_raw = llm_span.get_tag(OTEL_GENAI_OUTPUT_MESSAGES, "[]")
476
+ messages = parse_json_attr(messages_raw, "gen_ai.output.messages")
477
+
478
+ if not isinstance(messages, list):
479
+ continue
480
+
481
+ messages = _trim_cumulative_output(llm_span, messages)
482
+
483
+ for msg in messages:
484
+ if not isinstance(msg, dict):
485
+ continue
486
+ if msg.get("role") not in ASSISTANT_ROLES:
487
+ continue
488
+ for tc in extract_tool_calls_from_message(msg):
489
+ tc_id = tc["id"]
490
+ new_tc = _ToolCall(
491
+ name=tc["name"],
492
+ args=tc["arguments"],
493
+ id=tc_id,
494
+ )
495
+ if tc_id and tc_id in tool_calls_by_id:
496
+ # Prefer LLM message version if it has richer args
497
+ existing = tool_calls_by_id[tc_id]
498
+ if tc["arguments"] and not existing.args:
499
+ tool_calls_by_id[tc_id] = new_tc
500
+ elif tc_id:
501
+ tool_calls_by_id[tc_id] = new_tc
502
+ else:
503
+ tool_calls_no_id.append(new_tc)
504
+
505
+ tool_calls = list(tool_calls_by_id.values()) + tool_calls_no_id
506
+ logger.debug(f"Extracted {len(tool_calls)} tool calls, {len(tool_responses)} responses")
507
+ return tool_calls, tool_responses
508
+
509
+
510
+ _genai_extractor = GenAIExtractor()
511
+
512
+
513
+ def _is_broadcast_enriched(span: Span) -> bool:
514
+ """Detect whether a span was enriched via broadcast (all messages in every span).
515
+
516
+ Broadcast enrichment (WebSocket path) injects the full conversation history
517
+ into every span, so the first span has multiple user messages.
518
+ Per-span enrichment (OTLP path) gives each span only its own messages,
519
+ so each span has at most 1 user message.
520
+ """
521
+ messages_raw = span.get_tag(OTEL_GENAI_INPUT_MESSAGES, "[]")
522
+ messages = parse_json_attr(messages_raw, "gen_ai.input.messages")
523
+ if not isinstance(messages, list):
524
+ return False
525
+ user_count = sum(1 for m in messages if isinstance(m, dict) and m.get("role") in USER_ROLES)
526
+ return user_count > 1
527
+
528
+
529
+ def _find_llm_spans(root: Span) -> list[Span]:
530
+ return _genai_extractor.find_llm_spans_in(root)
531
+
532
+
533
+ def _find_tool_spans(root: Span) -> list[Span]:
534
+ return _genai_extractor.find_tool_spans_in(root)
535
+
536
+
537
+ def _has_llm_children(span: Span) -> bool:
538
+ return _genai_extractor._has_llm_children(span)
@@ -0,0 +1,7 @@
1
+ """Trace loader implementations."""
2
+
3
+ from .base import TraceLoader
4
+ from .jaeger import JaegerJsonLoader
5
+ from .otlp import OtlpJsonLoader
6
+
7
+ __all__ = ["JaegerJsonLoader", "OtlpJsonLoader", "TraceLoader"]
@@ -0,0 +1,53 @@
1
+ """Abstract base class for trace loaders."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from dataclasses import dataclass, field
7
+ from typing import Any
8
+
9
+
10
+ @dataclass
11
+ class Span:
12
+ """Normalized representation of a trace span."""
13
+
14
+ trace_id: str
15
+ span_id: str
16
+ parent_span_id: str | None
17
+ operation_name: str
18
+ start_time: int # microseconds
19
+ duration: int # microseconds
20
+ tags: dict[str, Any] = field(default_factory=dict)
21
+ children: list[Span] = field(default_factory=list)
22
+
23
+ def get_tag(self, key: str, default: Any = None) -> Any:
24
+ return self.tags.get(key, default)
25
+
26
+ @property
27
+ def end_time(self) -> int:
28
+ return self.start_time + self.duration
29
+
30
+
31
+ @dataclass
32
+ class Trace:
33
+ trace_id: str
34
+ root_spans: list[Span] = field(default_factory=list)
35
+ all_spans: list[Span] = field(default_factory=list)
36
+
37
+ def find_spans_by_operation(self, operation_prefix: str) -> list[Span]:
38
+ return [s for s in self.all_spans if s.operation_name.startswith(operation_prefix)]
39
+
40
+ def find_spans_by_tag(self, key: str, value: Any) -> list[Span]:
41
+ return [s for s in self.all_spans if s.get_tag(key) == value]
42
+
43
+
44
+ class TraceLoader(ABC):
45
+ @abstractmethod
46
+ def load(self, source: str) -> list[Trace]:
47
+ """Load traces from a source (file path, URL, etc.)."""
48
+ ...
49
+
50
+ @abstractmethod
51
+ def format_name(self) -> str:
52
+ """Return the name of the trace format this loader handles."""
53
+ ...