prela 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. prela/__init__.py +394 -0
  2. prela/_version.py +3 -0
  3. prela/contrib/CLI.md +431 -0
  4. prela/contrib/README.md +118 -0
  5. prela/contrib/__init__.py +5 -0
  6. prela/contrib/cli.py +1063 -0
  7. prela/contrib/explorer.py +571 -0
  8. prela/core/__init__.py +64 -0
  9. prela/core/clock.py +98 -0
  10. prela/core/context.py +228 -0
  11. prela/core/replay.py +403 -0
  12. prela/core/sampler.py +178 -0
  13. prela/core/span.py +295 -0
  14. prela/core/tracer.py +498 -0
  15. prela/evals/__init__.py +94 -0
  16. prela/evals/assertions/README.md +484 -0
  17. prela/evals/assertions/__init__.py +78 -0
  18. prela/evals/assertions/base.py +90 -0
  19. prela/evals/assertions/multi_agent.py +625 -0
  20. prela/evals/assertions/semantic.py +223 -0
  21. prela/evals/assertions/structural.py +443 -0
  22. prela/evals/assertions/tool.py +380 -0
  23. prela/evals/case.py +370 -0
  24. prela/evals/n8n/__init__.py +69 -0
  25. prela/evals/n8n/assertions.py +450 -0
  26. prela/evals/n8n/runner.py +497 -0
  27. prela/evals/reporters/README.md +184 -0
  28. prela/evals/reporters/__init__.py +32 -0
  29. prela/evals/reporters/console.py +251 -0
  30. prela/evals/reporters/json.py +176 -0
  31. prela/evals/reporters/junit.py +278 -0
  32. prela/evals/runner.py +525 -0
  33. prela/evals/suite.py +316 -0
  34. prela/exporters/__init__.py +27 -0
  35. prela/exporters/base.py +189 -0
  36. prela/exporters/console.py +443 -0
  37. prela/exporters/file.py +322 -0
  38. prela/exporters/http.py +394 -0
  39. prela/exporters/multi.py +154 -0
  40. prela/exporters/otlp.py +388 -0
  41. prela/instrumentation/ANTHROPIC.md +297 -0
  42. prela/instrumentation/LANGCHAIN.md +480 -0
  43. prela/instrumentation/OPENAI.md +59 -0
  44. prela/instrumentation/__init__.py +49 -0
  45. prela/instrumentation/anthropic.py +1436 -0
  46. prela/instrumentation/auto.py +129 -0
  47. prela/instrumentation/base.py +436 -0
  48. prela/instrumentation/langchain.py +959 -0
  49. prela/instrumentation/llamaindex.py +719 -0
  50. prela/instrumentation/multi_agent/__init__.py +48 -0
  51. prela/instrumentation/multi_agent/autogen.py +357 -0
  52. prela/instrumentation/multi_agent/crewai.py +404 -0
  53. prela/instrumentation/multi_agent/langgraph.py +299 -0
  54. prela/instrumentation/multi_agent/models.py +203 -0
  55. prela/instrumentation/multi_agent/swarm.py +231 -0
  56. prela/instrumentation/n8n/__init__.py +68 -0
  57. prela/instrumentation/n8n/code_node.py +534 -0
  58. prela/instrumentation/n8n/models.py +336 -0
  59. prela/instrumentation/n8n/webhook.py +489 -0
  60. prela/instrumentation/openai.py +1198 -0
  61. prela/license.py +245 -0
  62. prela/replay/__init__.py +31 -0
  63. prela/replay/comparison.py +390 -0
  64. prela/replay/engine.py +1227 -0
  65. prela/replay/loader.py +231 -0
  66. prela/replay/result.py +196 -0
  67. prela-0.1.0.dist-info/METADATA +399 -0
  68. prela-0.1.0.dist-info/RECORD +71 -0
  69. prela-0.1.0.dist-info/WHEEL +4 -0
  70. prela-0.1.0.dist-info/entry_points.txt +2 -0
  71. prela-0.1.0.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,1436 @@
1
+ """Instrumentation for Anthropic SDK (anthropic>=0.40.0).
2
+
3
+ This module provides automatic tracing for Anthropic's Claude API, including:
4
+ - Synchronous and asynchronous messages.create calls
5
+ - Streaming responses (MessageStream and AsyncMessageStream)
6
+ - Tool use detection and tracking
7
+ - Extended thinking blocks (if enabled)
8
+ - Comprehensive error handling
9
+
10
+ Example:
11
+ ```python
12
+ from prela.instrumentation.anthropic import AnthropicInstrumentor
13
+ from prela.core.tracer import Tracer
14
+ import anthropic
15
+
16
+ tracer = Tracer()
17
+ instrumentor = AnthropicInstrumentor()
18
+ instrumentor.instrument(tracer)
19
+
20
+ # Now all Anthropic API calls will be automatically traced
21
+ client = anthropic.Anthropic()
22
+ response = client.messages.create(
23
+ model="claude-sonnet-4-20250514",
24
+ max_tokens=1024,
25
+ messages=[{"role": "user", "content": "Hello!"}]
26
+ )
27
+ ```
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import logging
33
+ import time
34
+ from functools import wraps
35
+ from typing import TYPE_CHECKING, Any, Callable
36
+
37
+ from prela.core.clock import monotonic_ns, duration_ms
38
+ from prela.core.span import SpanType, SpanStatus
39
+ from prela.instrumentation.base import (
40
+ Instrumentor,
41
+ wrap_function,
42
+ unwrap_function,
43
+ _ORIGINALS_ATTR,
44
+ )
45
+
46
+ if TYPE_CHECKING:
47
+ from prela.core.tracer import Tracer
48
+
49
+ logger = logging.getLogger(__name__)
50
+
51
+
52
+ class AnthropicInstrumentor(Instrumentor):
53
+ """Instrumentor for Anthropic SDK.
54
+
55
+ Patches the following methods:
56
+ - anthropic.Anthropic.messages.create (sync)
57
+ - anthropic.AsyncAnthropic.messages.create (async)
58
+ - anthropic.Anthropic.messages.stream (sync)
59
+ - anthropic.AsyncAnthropic.messages.stream (async)
60
+
61
+ Captures detailed information about requests, responses, tool usage,
62
+ and streaming events.
63
+ """
64
+
65
+ def __init__(self) -> None:
66
+ """Initialize the Anthropic instrumentor."""
67
+ self._tracer: Tracer | None = None
68
+ self._anthropic_module: Any = None
69
+ self._messages_module: Any = None
70
+ self._async_messages_module: Any = None
71
+
72
+ def instrument(self, tracer: Tracer) -> None:
73
+ """Enable instrumentation for Anthropic SDK.
74
+
75
+ Args:
76
+ tracer: The tracer to use for creating spans
77
+
78
+ Raises:
79
+ ImportError: If anthropic package is not installed
80
+ RuntimeError: If instrumentation fails
81
+ """
82
+ if self.is_instrumented:
83
+ logger.debug("Anthropic SDK is already instrumented, skipping")
84
+ return
85
+
86
+ try:
87
+ import anthropic
88
+ except ImportError as e:
89
+ raise ImportError(
90
+ "anthropic package is not installed. "
91
+ "Install it with: pip install anthropic>=0.40.0"
92
+ ) from e
93
+
94
+ self._tracer = tracer
95
+ self._anthropic_module = anthropic
96
+
97
+ try:
98
+ # Get the messages modules for sync and async
99
+ if hasattr(anthropic, "Anthropic"):
100
+ client = anthropic.Anthropic.__new__(anthropic.Anthropic)
101
+ if hasattr(client, "messages"):
102
+ self._messages_module = client.messages.__class__
103
+
104
+ if hasattr(anthropic, "AsyncAnthropic"):
105
+ async_client = anthropic.AsyncAnthropic.__new__(
106
+ anthropic.AsyncAnthropic
107
+ )
108
+ if hasattr(async_client, "messages"):
109
+ self._async_messages_module = async_client.messages.__class__
110
+
111
+ # Wrap sync messages.create
112
+ if self._messages_module is not None:
113
+ wrap_function(
114
+ self._messages_module,
115
+ "create",
116
+ lambda orig: self._create_messages_wrapper(orig, is_async=False),
117
+ )
118
+ logger.debug("Wrapped anthropic.Anthropic.messages.create")
119
+
120
+ # Wrap async messages.create
121
+ if self._async_messages_module is not None:
122
+ wrap_function(
123
+ self._async_messages_module,
124
+ "create",
125
+ lambda orig: self._create_messages_wrapper(orig, is_async=True),
126
+ )
127
+ logger.debug("Wrapped anthropic.AsyncAnthropic.messages.create")
128
+
129
+ # Wrap sync messages.stream
130
+ if self._messages_module is not None:
131
+ wrap_function(
132
+ self._messages_module,
133
+ "stream",
134
+ lambda orig: self._create_stream_wrapper(orig, is_async=False),
135
+ )
136
+ logger.debug("Wrapped anthropic.Anthropic.messages.stream")
137
+
138
+ # Wrap async messages.stream
139
+ if self._async_messages_module is not None:
140
+ wrap_function(
141
+ self._async_messages_module,
142
+ "stream",
143
+ lambda orig: self._create_stream_wrapper(orig, is_async=True),
144
+ )
145
+ logger.debug("Wrapped anthropic.AsyncAnthropic.messages.stream")
146
+
147
+ logger.info("Successfully instrumented Anthropic SDK")
148
+
149
+ except Exception as e:
150
+ self._tracer = None
151
+ self._anthropic_module = None
152
+ self._messages_module = None
153
+ self._async_messages_module = None
154
+ raise RuntimeError(f"Failed to instrument Anthropic SDK: {e}") from e
155
+
156
+ def uninstrument(self) -> None:
157
+ """Disable instrumentation and restore original functions."""
158
+ if not self.is_instrumented:
159
+ logger.debug("Anthropic SDK is not instrumented, skipping")
160
+ return
161
+
162
+ try:
163
+ # Unwrap sync methods
164
+ if self._messages_module is not None:
165
+ unwrap_function(self._messages_module, "create")
166
+ unwrap_function(self._messages_module, "stream")
167
+
168
+ # Unwrap async methods
169
+ if self._async_messages_module is not None:
170
+ unwrap_function(self._async_messages_module, "create")
171
+ unwrap_function(self._async_messages_module, "stream")
172
+
173
+ logger.info("Successfully uninstrumented Anthropic SDK")
174
+
175
+ finally:
176
+ self._tracer = None
177
+ self._anthropic_module = None
178
+ self._messages_module = None
179
+ self._async_messages_module = None
180
+
181
+ @property
182
+ def is_instrumented(self) -> bool:
183
+ """Check if Anthropic SDK is currently instrumented."""
184
+ return (
185
+ self._tracer is not None
186
+ and self._messages_module is not None
187
+ and hasattr(self._messages_module, _ORIGINALS_ATTR)
188
+ )
189
+
190
+ def _create_messages_wrapper(
191
+ self, original_func: Callable[..., Any], is_async: bool
192
+ ) -> Callable[..., Any]:
193
+ """Create a wrapper for messages.create method.
194
+
195
+ Args:
196
+ original_func: The original create function
197
+ is_async: Whether this is an async function
198
+
199
+ Returns:
200
+ Wrapped function that creates spans
201
+ """
202
+ if is_async:
203
+
204
+ @wraps(original_func)
205
+ async def async_wrapper(self_obj: Any, *args: Any, **kwargs: Any) -> Any:
206
+ return await self._trace_messages_create(
207
+ original_func, self_obj, is_async=True, *args, **kwargs
208
+ )
209
+
210
+ return async_wrapper
211
+ else:
212
+
213
+ @wraps(original_func)
214
+ def sync_wrapper(self_obj: Any, *args: Any, **kwargs: Any) -> Any:
215
+ return self._trace_messages_create(
216
+ original_func, self_obj, is_async=False, *args, **kwargs
217
+ )
218
+
219
+ return sync_wrapper
220
+
221
+ def _create_stream_wrapper(
222
+ self, original_func: Callable[..., Any], is_async: bool
223
+ ) -> Callable[..., Any]:
224
+ """Create a wrapper for messages.stream method.
225
+
226
+ Args:
227
+ original_func: The original stream function
228
+ is_async: Whether this is an async function
229
+
230
+ Returns:
231
+ Wrapped function that creates spans and wraps streams
232
+ """
233
+ if is_async:
234
+
235
+ @wraps(original_func)
236
+ async def async_wrapper(self_obj: Any, *args: Any, **kwargs: Any) -> Any:
237
+ return await self._trace_messages_stream(
238
+ original_func, self_obj, is_async=True, *args, **kwargs
239
+ )
240
+
241
+ return async_wrapper
242
+ else:
243
+
244
+ @wraps(original_func)
245
+ def sync_wrapper(self_obj: Any, *args: Any, **kwargs: Any) -> Any:
246
+ return self._trace_messages_stream(
247
+ original_func, self_obj, is_async=False, *args, **kwargs
248
+ )
249
+
250
+ return sync_wrapper
251
+
252
+ def _trace_messages_create(
253
+ self,
254
+ original_func: Callable[..., Any],
255
+ self_obj: Any,
256
+ is_async: bool,
257
+ *args: Any,
258
+ **kwargs: Any,
259
+ ) -> Any:
260
+ """Trace a messages.create call (sync or async).
261
+
262
+ Args:
263
+ original_func: The original create function
264
+ self_obj: The messages object (self)
265
+ is_async: Whether this is an async call
266
+ *args: Positional arguments
267
+ **kwargs: Keyword arguments
268
+
269
+ Returns:
270
+ The response from the API call
271
+ """
272
+ if is_async:
273
+ return self._trace_messages_create_async(
274
+ original_func, self_obj, *args, **kwargs
275
+ )
276
+ else:
277
+ return self._trace_messages_create_sync(
278
+ original_func, self_obj, *args, **kwargs
279
+ )
280
+
281
+ def _trace_messages_create_sync(
282
+ self,
283
+ original_func: Callable[..., Any],
284
+ self_obj: Any,
285
+ *args: Any,
286
+ **kwargs: Any,
287
+ ) -> Any:
288
+ """Trace a synchronous messages.create call.
289
+
290
+ Args:
291
+ original_func: The original create function
292
+ self_obj: The messages object (self)
293
+ *args: Positional arguments
294
+ **kwargs: Keyword arguments
295
+
296
+ Returns:
297
+ The response from the API call
298
+ """
299
+ if self._tracer is None:
300
+ return original_func(self_obj, *args, **kwargs)
301
+
302
+ # Extract request parameters
303
+ model = kwargs.get("model", "unknown")
304
+ messages = kwargs.get("messages", [])
305
+ system = kwargs.get("system")
306
+ max_tokens = kwargs.get("max_tokens")
307
+ temperature = kwargs.get("temperature")
308
+
309
+ # Start timing
310
+ start_time = monotonic_ns()
311
+
312
+ # Create span
313
+ span = self._tracer.start_span(
314
+ name="anthropic.messages.create",
315
+ span_type=SpanType.LLM,
316
+ )
317
+
318
+ # Initialize replay capture if enabled
319
+ replay_capture = None
320
+ if self._tracer.capture_for_replay:
321
+ from prela.core.replay import ReplayCapture
322
+
323
+ replay_capture = ReplayCapture()
324
+ replay_capture.set_llm_request(
325
+ model=model,
326
+ messages=messages,
327
+ temperature=temperature,
328
+ max_tokens=max_tokens,
329
+ system=system, # Anthropic-specific
330
+ # Capture all other parameters
331
+ **{
332
+ k: v
333
+ for k, v in kwargs.items()
334
+ if k not in ["model", "messages", "temperature", "max_tokens", "system"]
335
+ },
336
+ )
337
+
338
+ try:
339
+ # Set request attributes
340
+ span.set_attribute("llm.vendor", "anthropic")
341
+ span.set_attribute("llm.model", model)
342
+ span.set_attribute("llm.request.model", model)
343
+
344
+ if system:
345
+ span.set_attribute("llm.system", system)
346
+ if temperature is not None:
347
+ span.set_attribute("llm.temperature", temperature)
348
+ if max_tokens is not None:
349
+ span.set_attribute("llm.max_tokens", max_tokens)
350
+
351
+ # Add request event
352
+ span.add_event(
353
+ name="llm.request",
354
+ attributes={
355
+ "messages": messages,
356
+ **({"system": system} if system else {}),
357
+ },
358
+ )
359
+
360
+ # Make the API call
361
+ response = original_func(self_obj, *args, **kwargs)
362
+
363
+ # Calculate latency
364
+ end_time = monotonic_ns()
365
+ latency_ms = duration_ms(start_time, end_time)
366
+ span.set_attribute("llm.latency_ms", latency_ms)
367
+
368
+ # Extract response attributes
369
+ self._extract_response_attributes(span, response)
370
+
371
+ # Add response event (serialize content to avoid TextBlock serialization issues)
372
+ span.add_event(
373
+ name="llm.response",
374
+ attributes={"content": self._serialize_content(response.content)},
375
+ )
376
+
377
+ # Capture replay data if enabled
378
+ if replay_capture:
379
+ try:
380
+ # Extract response text
381
+ response_text = ""
382
+ if hasattr(response, "content") and response.content:
383
+ for block in response.content:
384
+ if hasattr(block, "type") and block.type == "text":
385
+ if hasattr(block, "text"):
386
+ response_text += block.text
387
+
388
+ # Capture response
389
+ replay_capture.set_llm_response(
390
+ text=response_text,
391
+ finish_reason=getattr(response, "stop_reason", None),
392
+ model=getattr(response, "model", None),
393
+ prompt_tokens=getattr(response.usage, "input_tokens", None)
394
+ if hasattr(response, "usage")
395
+ else None,
396
+ completion_tokens=getattr(response.usage, "output_tokens", None)
397
+ if hasattr(response, "usage")
398
+ else None,
399
+ )
400
+
401
+ # Capture model info
402
+ if hasattr(response, "id"):
403
+ replay_capture.set_model_info(
404
+ model=getattr(response, "model", None),
405
+ id=response.id,
406
+ )
407
+
408
+ # Attach to span
409
+ object.__setattr__(span, "replay_snapshot", replay_capture.build())
410
+
411
+ except Exception as e:
412
+ logger.debug(f"Failed to capture replay data: {e}")
413
+
414
+ # Handle tool use if present
415
+ if hasattr(response, "stop_reason") and response.stop_reason == "tool_use":
416
+ self._handle_tool_use(span, response)
417
+
418
+ # Handle extended thinking if present
419
+ self._handle_thinking_blocks(span, response)
420
+
421
+ # Mark as successful
422
+ span.set_status(SpanStatus.SUCCESS)
423
+
424
+ return response
425
+
426
+ except Exception as e:
427
+ # Handle errors
428
+ self._handle_error(span, e)
429
+ raise
430
+
431
+ finally:
432
+ span.end()
433
+
434
+ async def _trace_messages_create_async(
435
+ self,
436
+ original_func: Callable[..., Any],
437
+ self_obj: Any,
438
+ *args: Any,
439
+ **kwargs: Any,
440
+ ) -> Any:
441
+ """Trace an asynchronous messages.create call.
442
+
443
+ Args:
444
+ original_func: The original create function
445
+ self_obj: The messages object (self)
446
+ *args: Positional arguments
447
+ **kwargs: Keyword arguments
448
+
449
+ Returns:
450
+ The response from the API call
451
+ """
452
+ if self._tracer is None:
453
+ return await original_func(self_obj, *args, **kwargs)
454
+
455
+ # Extract request parameters
456
+ model = kwargs.get("model", "unknown")
457
+ messages = kwargs.get("messages", [])
458
+ system = kwargs.get("system")
459
+ max_tokens = kwargs.get("max_tokens")
460
+ temperature = kwargs.get("temperature")
461
+
462
+ # Start timing
463
+ start_time = monotonic_ns()
464
+
465
+ # Create span
466
+ span = self._tracer.start_span(
467
+ name="anthropic.messages.create",
468
+ span_type=SpanType.LLM,
469
+ )
470
+
471
+ # Initialize replay capture if enabled
472
+ replay_capture = None
473
+ if self._tracer.capture_for_replay:
474
+ from prela.core.replay import ReplayCapture
475
+
476
+ replay_capture = ReplayCapture()
477
+ replay_capture.set_llm_request(
478
+ model=model,
479
+ messages=messages,
480
+ temperature=temperature,
481
+ max_tokens=max_tokens,
482
+ system=system, # Anthropic-specific
483
+ # Capture all other parameters
484
+ **{
485
+ k: v
486
+ for k, v in kwargs.items()
487
+ if k not in ["model", "messages", "temperature", "max_tokens", "system"]
488
+ },
489
+ )
490
+
491
+ try:
492
+ # Set request attributes
493
+ span.set_attribute("llm.vendor", "anthropic")
494
+ span.set_attribute("llm.model", model)
495
+ span.set_attribute("llm.request.model", model)
496
+
497
+ if system:
498
+ span.set_attribute("llm.system", system)
499
+ if temperature is not None:
500
+ span.set_attribute("llm.temperature", temperature)
501
+ if max_tokens is not None:
502
+ span.set_attribute("llm.max_tokens", max_tokens)
503
+
504
+ # Add request event
505
+ span.add_event(
506
+ name="llm.request",
507
+ attributes={
508
+ "messages": messages,
509
+ **({"system": system} if system else {}),
510
+ },
511
+ )
512
+
513
+ # Make the API call
514
+ response = await original_func(self_obj, *args, **kwargs)
515
+
516
+ # Calculate latency
517
+ end_time = monotonic_ns()
518
+ latency_ms = duration_ms(start_time, end_time)
519
+ span.set_attribute("llm.latency_ms", latency_ms)
520
+
521
+ # Extract response attributes
522
+ self._extract_response_attributes(span, response)
523
+
524
+ # Add response event (serialize content to avoid TextBlock serialization issues)
525
+ span.add_event(
526
+ name="llm.response",
527
+ attributes={"content": self._serialize_content(response.content)},
528
+ )
529
+
530
+ # Capture replay data if enabled
531
+ if replay_capture:
532
+ try:
533
+ # Extract response text
534
+ response_text = ""
535
+ if hasattr(response, "content") and response.content:
536
+ for block in response.content:
537
+ if hasattr(block, "type") and block.type == "text":
538
+ if hasattr(block, "text"):
539
+ response_text += block.text
540
+
541
+ # Capture response
542
+ replay_capture.set_llm_response(
543
+ text=response_text,
544
+ finish_reason=getattr(response, "stop_reason", None),
545
+ model=getattr(response, "model", None),
546
+ prompt_tokens=getattr(response.usage, "input_tokens", None)
547
+ if hasattr(response, "usage")
548
+ else None,
549
+ completion_tokens=getattr(response.usage, "output_tokens", None)
550
+ if hasattr(response, "usage")
551
+ else None,
552
+ )
553
+
554
+ # Capture model info
555
+ if hasattr(response, "id"):
556
+ replay_capture.set_model_info(
557
+ model=getattr(response, "model", None),
558
+ id=response.id,
559
+ )
560
+
561
+ # Attach to span
562
+ object.__setattr__(span, "replay_snapshot", replay_capture.build())
563
+
564
+ except Exception as e:
565
+ logger.debug(f"Failed to capture replay data: {e}")
566
+
567
+ # Handle tool use if present
568
+ if hasattr(response, "stop_reason") and response.stop_reason == "tool_use":
569
+ self._handle_tool_use(span, response)
570
+
571
+ # Handle extended thinking if present
572
+ self._handle_thinking_blocks(span, response)
573
+
574
+ # Mark as successful
575
+ span.set_status(SpanStatus.SUCCESS)
576
+
577
+ return response
578
+
579
+ except Exception as e:
580
+ # Handle errors
581
+ self._handle_error(span, e)
582
+ raise
583
+
584
+ finally:
585
+ span.end()
586
+
587
+ def _trace_messages_stream(
588
+ self,
589
+ original_func: Callable[..., Any],
590
+ self_obj: Any,
591
+ is_async: bool,
592
+ *args: Any,
593
+ **kwargs: Any,
594
+ ) -> Any:
595
+ """Trace a messages.stream call (sync or async).
596
+
597
+ Args:
598
+ original_func: The original stream function
599
+ self_obj: The messages object (self)
600
+ is_async: Whether this is an async stream
601
+ *args: Positional arguments
602
+ **kwargs: Keyword arguments
603
+
604
+ Returns:
605
+ The wrapped stream object
606
+ """
607
+ if is_async:
608
+ return self._trace_messages_stream_async(
609
+ original_func, self_obj, *args, **kwargs
610
+ )
611
+ else:
612
+ return self._trace_messages_stream_sync(
613
+ original_func, self_obj, *args, **kwargs
614
+ )
615
+
616
+ def _trace_messages_stream_sync(
617
+ self,
618
+ original_func: Callable[..., Any],
619
+ self_obj: Any,
620
+ *args: Any,
621
+ **kwargs: Any,
622
+ ) -> Any:
623
+ """Trace a synchronous messages.stream call.
624
+
625
+ Args:
626
+ original_func: The original stream function
627
+ self_obj: The messages object (self)
628
+ *args: Positional arguments
629
+ **kwargs: Keyword arguments
630
+
631
+ Returns:
632
+ The wrapped stream object
633
+ """
634
+ if self._tracer is None:
635
+ return original_func(self_obj, *args, **kwargs)
636
+
637
+ # Extract request parameters
638
+ model = kwargs.get("model", "unknown")
639
+ messages = kwargs.get("messages", [])
640
+ system = kwargs.get("system")
641
+ max_tokens = kwargs.get("max_tokens")
642
+ temperature = kwargs.get("temperature")
643
+
644
+ # Start timing
645
+ start_time = monotonic_ns()
646
+
647
+ # Create span
648
+ span = self._tracer.start_span(
649
+ name="anthropic.messages.stream",
650
+ span_type=SpanType.LLM,
651
+ )
652
+
653
+ # Initialize replay capture if enabled
654
+ replay_capture = None
655
+ if self._tracer.capture_for_replay:
656
+ from prela.core.replay import ReplayCapture
657
+
658
+ replay_capture = ReplayCapture()
659
+ replay_capture.set_llm_request(
660
+ model=model,
661
+ messages=messages,
662
+ temperature=temperature,
663
+ max_tokens=max_tokens,
664
+ system=system, # Anthropic-specific
665
+ # Capture all other parameters
666
+ **{
667
+ k: v
668
+ for k, v in kwargs.items()
669
+ if k not in ["model", "messages", "temperature", "max_tokens", "system"]
670
+ },
671
+ )
672
+
673
+ # Set request attributes
674
+ span.set_attribute("llm.vendor", "anthropic")
675
+ span.set_attribute("llm.model", model)
676
+ span.set_attribute("llm.request.model", model)
677
+ span.set_attribute("llm.stream", True)
678
+
679
+ if system:
680
+ span.set_attribute("llm.system", system)
681
+ if temperature is not None:
682
+ span.set_attribute("llm.temperature", temperature)
683
+ if max_tokens is not None:
684
+ span.set_attribute("llm.max_tokens", max_tokens)
685
+
686
+ # Add request event
687
+ span.add_event(
688
+ name="llm.request",
689
+ attributes={
690
+ "messages": messages,
691
+ **({"system": system} if system else {}),
692
+ },
693
+ )
694
+
695
+ try:
696
+ # Create the stream
697
+ stream = original_func(self_obj, *args, **kwargs)
698
+
699
+ # Wrap the stream to capture events
700
+ return TracedMessageStream(
701
+ stream=stream,
702
+ span=span,
703
+ tracer=self._tracer,
704
+ start_time=start_time,
705
+ replay_capture=replay_capture,
706
+ )
707
+
708
+ except Exception as e:
709
+ # Handle errors during stream creation
710
+ self._handle_error(span, e)
711
+ span.end()
712
+ raise
713
+
714
+ async def _trace_messages_stream_async(
715
+ self,
716
+ original_func: Callable[..., Any],
717
+ self_obj: Any,
718
+ *args: Any,
719
+ **kwargs: Any,
720
+ ) -> Any:
721
+ """Trace an asynchronous messages.stream call.
722
+
723
+ Args:
724
+ original_func: The original stream function
725
+ self_obj: The messages object (self)
726
+ *args: Positional arguments
727
+ **kwargs: Keyword arguments
728
+
729
+ Returns:
730
+ The wrapped async stream object
731
+ """
732
+ if self._tracer is None:
733
+ return await original_func(self_obj, *args, **kwargs)
734
+
735
+ # Extract request parameters
736
+ model = kwargs.get("model", "unknown")
737
+ messages = kwargs.get("messages", [])
738
+ system = kwargs.get("system")
739
+ max_tokens = kwargs.get("max_tokens")
740
+ temperature = kwargs.get("temperature")
741
+
742
+ # Start timing
743
+ start_time = monotonic_ns()
744
+
745
+ # Create span
746
+ span = self._tracer.start_span(
747
+ name="anthropic.messages.stream",
748
+ span_type=SpanType.LLM,
749
+ )
750
+
751
+ # Initialize replay capture if enabled
752
+ replay_capture = None
753
+ if self._tracer.capture_for_replay:
754
+ from prela.core.replay import ReplayCapture
755
+
756
+ replay_capture = ReplayCapture()
757
+ replay_capture.set_llm_request(
758
+ model=model,
759
+ messages=messages,
760
+ temperature=temperature,
761
+ max_tokens=max_tokens,
762
+ system=system, # Anthropic-specific
763
+ # Capture all other parameters
764
+ **{
765
+ k: v
766
+ for k, v in kwargs.items()
767
+ if k not in ["model", "messages", "temperature", "max_tokens", "system"]
768
+ },
769
+ )
770
+
771
+ # Set request attributes
772
+ span.set_attribute("llm.vendor", "anthropic")
773
+ span.set_attribute("llm.model", model)
774
+ span.set_attribute("llm.request.model", model)
775
+ span.set_attribute("llm.stream", True)
776
+
777
+ if system:
778
+ span.set_attribute("llm.system", system)
779
+ if temperature is not None:
780
+ span.set_attribute("llm.temperature", temperature)
781
+ if max_tokens is not None:
782
+ span.set_attribute("llm.max_tokens", max_tokens)
783
+
784
+ # Add request event
785
+ span.add_event(
786
+ name="llm.request",
787
+ attributes={
788
+ "messages": messages,
789
+ **({"system": system} if system else {}),
790
+ },
791
+ )
792
+
793
+ try:
794
+ # Create the async stream
795
+ stream = await original_func(self_obj, *args, **kwargs)
796
+
797
+ # Wrap the stream to capture events
798
+ return TracedAsyncMessageStream(
799
+ stream=stream,
800
+ span=span,
801
+ tracer=self._tracer,
802
+ start_time=start_time,
803
+ replay_capture=replay_capture,
804
+ )
805
+
806
+ except Exception as e:
807
+ # Handle errors during stream creation
808
+ self._handle_error(span, e)
809
+ span.end()
810
+ raise
811
+
812
+ def _serialize_content(self, content: Any) -> list[dict[str, Any]]:
813
+ """Serialize response content blocks to JSON-serializable format.
814
+
815
+ Args:
816
+ content: The content blocks from the API response
817
+
818
+ Returns:
819
+ List of serialized content blocks as dicts
820
+ """
821
+ serialized = []
822
+ try:
823
+ for block in content:
824
+ # Try model_dump() first (Pydantic v2)
825
+ if hasattr(block, "model_dump"):
826
+ serialized.append(block.model_dump())
827
+ # Fall back to dict() (Pydantic v1)
828
+ elif hasattr(block, "dict"):
829
+ serialized.append(block.dict())
830
+ # Manual extraction as last resort
831
+ else:
832
+ block_dict = {"type": getattr(block, "type", "unknown")}
833
+ if hasattr(block, "text"):
834
+ block_dict["text"] = block.text
835
+ if hasattr(block, "id"):
836
+ block_dict["id"] = block.id
837
+ if hasattr(block, "name"):
838
+ block_dict["name"] = block.name
839
+ if hasattr(block, "input"):
840
+ block_dict["input"] = block.input
841
+ serialized.append(block_dict)
842
+ except Exception as e:
843
+ logger.debug(f"Failed to serialize content blocks: {e}")
844
+ # Return empty list on failure
845
+ return []
846
+
847
+ return serialized
848
+
849
+ def _extract_response_attributes(self, span: Any, response: Any) -> None:
850
+ """Extract attributes from a response object.
851
+
852
+ Args:
853
+ span: The span to add attributes to
854
+ response: The response object from the API
855
+ """
856
+ try:
857
+ # Model (actual model used)
858
+ if hasattr(response, "model"):
859
+ span.set_attribute("llm.response.model", response.model)
860
+
861
+ # Response ID
862
+ if hasattr(response, "id"):
863
+ span.set_attribute("llm.response.id", response.id)
864
+
865
+ # Usage statistics
866
+ if hasattr(response, "usage"):
867
+ usage = response.usage
868
+ if hasattr(usage, "input_tokens"):
869
+ span.set_attribute("llm.input_tokens", usage.input_tokens)
870
+ if hasattr(usage, "output_tokens"):
871
+ span.set_attribute("llm.output_tokens", usage.output_tokens)
872
+
873
+ # Stop reason
874
+ if hasattr(response, "stop_reason"):
875
+ span.set_attribute("llm.stop_reason", response.stop_reason)
876
+
877
+ except Exception as e:
878
+ # Don't let attribute extraction failures break the instrumentation
879
+ logger.debug(f"Failed to extract response attributes: {e}")
880
+
881
+ def _handle_tool_use(self, span: Any, response: Any) -> None:
882
+ """Handle tool use in the response.
883
+
884
+ Args:
885
+ span: The span to add tool use information to
886
+ response: The response object containing tool use
887
+ """
888
+ try:
889
+ if not hasattr(response, "content"):
890
+ return
891
+
892
+ tool_calls = []
893
+ for block in response.content:
894
+ if hasattr(block, "type") and block.type == "tool_use":
895
+ tool_call = {
896
+ "id": getattr(block, "id", None),
897
+ "name": getattr(block, "name", None),
898
+ "input": getattr(block, "input", None),
899
+ }
900
+ tool_calls.append(tool_call)
901
+
902
+ if tool_calls:
903
+ span.add_event(
904
+ name="llm.tool_use",
905
+ attributes={"tool_calls": tool_calls},
906
+ )
907
+
908
+ except Exception as e:
909
+ # Don't let tool use handling failures break the instrumentation
910
+ logger.debug(f"Failed to handle tool use: {e}")
911
+
912
+ def _handle_thinking_blocks(self, span: Any, response: Any) -> None:
913
+ """Handle extended thinking blocks in the response.
914
+
915
+ Args:
916
+ span: The span to add thinking information to
917
+ response: The response object that may contain thinking blocks
918
+ """
919
+ try:
920
+ if not hasattr(response, "content"):
921
+ return
922
+
923
+ thinking_content = []
924
+ for block in response.content:
925
+ if hasattr(block, "type") and block.type == "thinking":
926
+ if hasattr(block, "thinking"):
927
+ thinking_content.append(block.thinking)
928
+
929
+ if thinking_content:
930
+ span.add_event(
931
+ name="llm.thinking",
932
+ attributes={"thinking": thinking_content},
933
+ )
934
+
935
+ except Exception as e:
936
+ # Don't let thinking block handling failures break the instrumentation
937
+ logger.debug(f"Failed to handle thinking blocks: {e}")
938
+
939
+ def _handle_error(self, span: Any, error: Exception) -> None:
940
+ """Handle an error during API call.
941
+
942
+ Args:
943
+ span: The span to record the error on
944
+ error: The exception that was raised
945
+ """
946
+ try:
947
+ # Set error status
948
+ span.set_status(SpanStatus.ERROR, str(error))
949
+
950
+ # Extract error details
951
+ error_attrs: dict[str, Any] = {
952
+ "error.type": type(error).__name__,
953
+ "error.message": str(error),
954
+ }
955
+
956
+ # Handle anthropic-specific errors
957
+ if hasattr(error, "status_code"):
958
+ error_attrs["error.status_code"] = error.status_code
959
+
960
+ span.add_event(name="error", attributes=error_attrs)
961
+
962
+ except Exception as e:
963
+ # Don't let error handling failures break the instrumentation
964
+ logger.debug(f"Failed to handle error: {e}")
965
+
966
+
967
+ class TracedMessageStream:
968
+ """Wrapper for MessageStream that captures streaming events."""
969
+
970
+ def __init__(
971
+ self,
972
+ stream: Any,
973
+ span: Any,
974
+ tracer: Tracer,
975
+ start_time: int,
976
+ replay_capture: Any = None,
977
+ ) -> None:
978
+ """Initialize the traced stream.
979
+
980
+ Args:
981
+ stream: The original MessageStream
982
+ span: The span to record events on
983
+ tracer: The tracer instance
984
+ start_time: Start time in nanoseconds
985
+ replay_capture: Optional ReplayCapture instance for replay data
986
+ """
987
+ self._stream = stream
988
+ self._span = span
989
+ self._tracer = tracer
990
+ self._start_time = start_time
991
+ self._replay_capture = replay_capture
992
+ self._first_token_time: int | None = None
993
+ self._text_content: list[str] = []
994
+ self._tool_calls: list[dict[str, Any]] = []
995
+ self._thinking_content: list[str] = []
996
+ self._streaming_chunks: list[dict[str, Any]] = []
997
+
998
+ def __enter__(self) -> TracedMessageStream:
999
+ """Enter context manager."""
1000
+ self._message_stream = self._stream.__enter__()
1001
+ return self
1002
+
1003
+ @property
1004
+ def text_stream(self):
1005
+ """Expose the underlying MessageStream's text_stream iterator.
1006
+
1007
+ This allows users to iterate over text deltas:
1008
+
1009
+ ```python
1010
+ with client.messages.stream(...) as stream:
1011
+ for text in stream.text_stream:
1012
+ print(text, end="", flush=True)
1013
+ ```
1014
+ """
1015
+ return self._message_stream.text_stream
1016
+
1017
+ def get_final_message(self):
1018
+ """Get the final message from the stream.
1019
+
1020
+ Returns:
1021
+ The final Message object with complete content and usage data
1022
+ """
1023
+ return self._message_stream.get_final_message()
1024
+
1025
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
1026
+ """Exit context manager and finalize span."""
1027
+ try:
1028
+ self._stream.__exit__(exc_type, exc_val, exc_tb)
1029
+
1030
+ if exc_type is None:
1031
+ # Success - record final attributes
1032
+ self._finalize_span()
1033
+ else:
1034
+ # Error during streaming
1035
+ self._span.set_status(SpanStatus.ERROR, str(exc_val))
1036
+
1037
+ finally:
1038
+ self._span.end()
1039
+
1040
+ def __iter__(self) -> Any:
1041
+ """Iterate over stream events."""
1042
+ try:
1043
+ for event in self._stream:
1044
+ self._process_event(event)
1045
+ yield event
1046
+
1047
+ except Exception as e:
1048
+ self._span.set_status(SpanStatus.ERROR, str(e))
1049
+ raise
1050
+
1051
+ def _process_event(self, event: Any) -> None:
1052
+ """Process a streaming event.
1053
+
1054
+ Args:
1055
+ event: The streaming event
1056
+ """
1057
+ try:
1058
+ event_type = getattr(event, "type", None)
1059
+
1060
+ # Capture raw event for replay if enabled
1061
+ if self._replay_capture:
1062
+ try:
1063
+ chunk_data = {
1064
+ "type": event_type,
1065
+ }
1066
+
1067
+ # Capture event-specific data
1068
+ if event_type == "content_block_delta" and hasattr(event, "delta"):
1069
+ delta = event.delta
1070
+ chunk_data["delta"] = {
1071
+ "type": getattr(delta, "type", None),
1072
+ "text": getattr(delta, "text", None),
1073
+ }
1074
+ elif event_type == "content_block_start" and hasattr(event, "content_block"):
1075
+ block = event.content_block
1076
+ chunk_data["content_block"] = {
1077
+ "type": getattr(block, "type", None),
1078
+ "id": getattr(block, "id", None),
1079
+ "name": getattr(block, "name", None),
1080
+ }
1081
+ elif event_type == "message_delta":
1082
+ if hasattr(event, "usage"):
1083
+ usage = event.usage
1084
+ chunk_data["usage"] = {
1085
+ "output_tokens": getattr(usage, "output_tokens", None),
1086
+ }
1087
+ if hasattr(event, "delta"):
1088
+ delta = event.delta
1089
+ chunk_data["delta"] = {
1090
+ "stop_reason": getattr(delta, "stop_reason", None),
1091
+ }
1092
+
1093
+ self._streaming_chunks.append(chunk_data)
1094
+ except Exception as e:
1095
+ logger.debug(f"Failed to capture streaming chunk: {e}")
1096
+
1097
+ if event_type == "content_block_delta":
1098
+ # Capture first token time
1099
+ if self._first_token_time is None:
1100
+ self._first_token_time = monotonic_ns()
1101
+
1102
+ # Aggregate text content
1103
+ if hasattr(event, "delta"):
1104
+ delta = event.delta
1105
+ if hasattr(delta, "type") and delta.type == "text_delta":
1106
+ if hasattr(delta, "text"):
1107
+ self._text_content.append(delta.text)
1108
+
1109
+ elif event_type == "content_block_start":
1110
+ # Detect tool use or thinking blocks
1111
+ if hasattr(event, "content_block"):
1112
+ block = event.content_block
1113
+ if hasattr(block, "type"):
1114
+ if block.type == "tool_use":
1115
+ self._tool_calls.append(
1116
+ {
1117
+ "id": getattr(block, "id", None),
1118
+ "name": getattr(block, "name", None),
1119
+ }
1120
+ )
1121
+ elif block.type == "thinking":
1122
+ # Mark that we have thinking content
1123
+ pass
1124
+
1125
+ elif event_type == "message_delta":
1126
+ # Extract final usage stats
1127
+ if hasattr(event, "usage"):
1128
+ usage = event.usage
1129
+ if hasattr(usage, "output_tokens"):
1130
+ self._span.set_attribute(
1131
+ "llm.output_tokens", usage.output_tokens
1132
+ )
1133
+
1134
+ # Extract stop reason
1135
+ if hasattr(event, "delta"):
1136
+ delta = event.delta
1137
+ if hasattr(delta, "stop_reason"):
1138
+ self._span.set_attribute("llm.stop_reason", delta.stop_reason)
1139
+
1140
+ except Exception as e:
1141
+ # Don't let event processing failures break the stream
1142
+ logger.debug(f"Failed to process streaming event: {e}")
1143
+
1144
+ def _finalize_span(self) -> None:
1145
+ """Finalize the span with aggregated data."""
1146
+ try:
1147
+ # Calculate latency
1148
+ end_time = monotonic_ns()
1149
+ latency_ms = duration_ms(self._start_time, end_time)
1150
+ self._span.set_attribute("llm.latency_ms", latency_ms)
1151
+
1152
+ # Time to first token
1153
+ if self._first_token_time is not None:
1154
+ ttft_ms = duration_ms(self._start_time, self._first_token_time)
1155
+ self._span.set_attribute("llm.time_to_first_token_ms", ttft_ms)
1156
+
1157
+ # Aggregated text content
1158
+ if self._text_content:
1159
+ full_text = "".join(self._text_content)
1160
+ self._span.add_event(
1161
+ name="llm.response",
1162
+ attributes={"content": [{"type": "text", "text": full_text}]},
1163
+ )
1164
+
1165
+ # Tool calls
1166
+ if self._tool_calls:
1167
+ self._span.add_event(
1168
+ name="llm.tool_use",
1169
+ attributes={"tool_calls": self._tool_calls},
1170
+ )
1171
+
1172
+ # Finalize replay capture if enabled
1173
+ if self._replay_capture:
1174
+ try:
1175
+ # Capture aggregated response text
1176
+ full_text = "".join(self._text_content)
1177
+ self._replay_capture.set_llm_response(
1178
+ text=full_text,
1179
+ finish_reason=self._span.attributes.get("llm.stop_reason"),
1180
+ )
1181
+
1182
+ # Add streaming chunks
1183
+ if self._streaming_chunks:
1184
+ for chunk in self._streaming_chunks:
1185
+ self._replay_capture.add_streaming_chunk(chunk)
1186
+
1187
+ # Attach to span
1188
+ object.__setattr__(
1189
+ self._span, "replay_snapshot", self._replay_capture.build()
1190
+ )
1191
+
1192
+ except Exception as e:
1193
+ logger.debug(f"Failed to finalize replay capture: {e}")
1194
+
1195
+ # Mark as successful
1196
+ self._span.set_status(SpanStatus.SUCCESS)
1197
+
1198
+ except Exception as e:
1199
+ # Don't let finalization failures break the instrumentation
1200
+ logger.debug(f"Failed to finalize span: {e}")
1201
+
1202
+
1203
+ class TracedAsyncMessageStream:
1204
+ """Wrapper for AsyncMessageStream that captures streaming events."""
1205
+
1206
+ def __init__(
1207
+ self,
1208
+ stream: Any,
1209
+ span: Any,
1210
+ tracer: Tracer,
1211
+ start_time: int,
1212
+ replay_capture: Any = None,
1213
+ ) -> None:
1214
+ """Initialize the traced async stream.
1215
+
1216
+ Args:
1217
+ stream: The original AsyncMessageStream
1218
+ span: The span to record events on
1219
+ tracer: The tracer instance
1220
+ start_time: Start time in nanoseconds
1221
+ replay_capture: Optional ReplayCapture instance for replay data
1222
+ """
1223
+ self._stream = stream
1224
+ self._span = span
1225
+ self._tracer = tracer
1226
+ self._start_time = start_time
1227
+ self._replay_capture = replay_capture
1228
+ self._first_token_time: int | None = None
1229
+ self._text_content: list[str] = []
1230
+ self._tool_calls: list[dict[str, Any]] = []
1231
+ self._thinking_content: list[str] = []
1232
+ self._streaming_chunks: list[dict[str, Any]] = []
1233
+
1234
+ async def __aenter__(self) -> TracedAsyncMessageStream:
1235
+ """Enter async context manager."""
1236
+ self._message_stream = await self._stream.__aenter__()
1237
+ return self
1238
+
1239
+ @property
1240
+ def text_stream(self):
1241
+ """Expose the underlying AsyncMessageStream's text_stream async iterator.
1242
+
1243
+ This allows users to iterate over text deltas:
1244
+
1245
+ ```python
1246
+ async with client.messages.stream(...) as stream:
1247
+ async for text in stream.text_stream:
1248
+ print(text, end="", flush=True)
1249
+ ```
1250
+ """
1251
+ return self._message_stream.text_stream
1252
+
1253
+ async def get_final_message(self):
1254
+ """Get the final message from the async stream.
1255
+
1256
+ Returns:
1257
+ The final Message object with complete content and usage data
1258
+ """
1259
+ return await self._message_stream.get_final_message()
1260
+
1261
+ async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
1262
+ """Exit async context manager and finalize span."""
1263
+ try:
1264
+ await self._stream.__aexit__(exc_type, exc_val, exc_tb)
1265
+
1266
+ if exc_type is None:
1267
+ # Success - record final attributes
1268
+ self._finalize_span()
1269
+ else:
1270
+ # Error during streaming
1271
+ self._span.set_status(SpanStatus.ERROR, str(exc_val))
1272
+
1273
+ finally:
1274
+ self._span.end()
1275
+
1276
+ async def __aiter__(self) -> Any:
1277
+ """Async iterate over stream events."""
1278
+ try:
1279
+ async for event in self._stream:
1280
+ self._process_event(event)
1281
+ yield event
1282
+
1283
+ except Exception as e:
1284
+ self._span.set_status(SpanStatus.ERROR, str(e))
1285
+ raise
1286
+
1287
+ def _process_event(self, event: Any) -> None:
1288
+ """Process a streaming event.
1289
+
1290
+ Args:
1291
+ event: The streaming event
1292
+ """
1293
+ try:
1294
+ event_type = getattr(event, "type", None)
1295
+
1296
+ # Capture raw event for replay if enabled
1297
+ if self._replay_capture:
1298
+ try:
1299
+ chunk_data = {
1300
+ "type": event_type,
1301
+ }
1302
+
1303
+ # Capture event-specific data
1304
+ if event_type == "content_block_delta" and hasattr(event, "delta"):
1305
+ delta = event.delta
1306
+ chunk_data["delta"] = {
1307
+ "type": getattr(delta, "type", None),
1308
+ "text": getattr(delta, "text", None),
1309
+ }
1310
+ elif event_type == "content_block_start" and hasattr(event, "content_block"):
1311
+ block = event.content_block
1312
+ chunk_data["content_block"] = {
1313
+ "type": getattr(block, "type", None),
1314
+ "id": getattr(block, "id", None),
1315
+ "name": getattr(block, "name", None),
1316
+ }
1317
+ elif event_type == "message_delta":
1318
+ if hasattr(event, "usage"):
1319
+ usage = event.usage
1320
+ chunk_data["usage"] = {
1321
+ "output_tokens": getattr(usage, "output_tokens", None),
1322
+ }
1323
+ if hasattr(event, "delta"):
1324
+ delta = event.delta
1325
+ chunk_data["delta"] = {
1326
+ "stop_reason": getattr(delta, "stop_reason", None),
1327
+ }
1328
+
1329
+ self._streaming_chunks.append(chunk_data)
1330
+ except Exception as e:
1331
+ logger.debug(f"Failed to capture streaming chunk: {e}")
1332
+
1333
+ if event_type == "content_block_delta":
1334
+ # Capture first token time
1335
+ if self._first_token_time is None:
1336
+ self._first_token_time = monotonic_ns()
1337
+
1338
+ # Aggregate text content
1339
+ if hasattr(event, "delta"):
1340
+ delta = event.delta
1341
+ if hasattr(delta, "type") and delta.type == "text_delta":
1342
+ if hasattr(delta, "text"):
1343
+ self._text_content.append(delta.text)
1344
+
1345
+ elif event_type == "content_block_start":
1346
+ # Detect tool use or thinking blocks
1347
+ if hasattr(event, "content_block"):
1348
+ block = event.content_block
1349
+ if hasattr(block, "type"):
1350
+ if block.type == "tool_use":
1351
+ self._tool_calls.append(
1352
+ {
1353
+ "id": getattr(block, "id", None),
1354
+ "name": getattr(block, "name", None),
1355
+ }
1356
+ )
1357
+ elif block.type == "thinking":
1358
+ # Mark that we have thinking content
1359
+ pass
1360
+
1361
+ elif event_type == "message_delta":
1362
+ # Extract final usage stats
1363
+ if hasattr(event, "usage"):
1364
+ usage = event.usage
1365
+ if hasattr(usage, "output_tokens"):
1366
+ self._span.set_attribute(
1367
+ "llm.output_tokens", usage.output_tokens
1368
+ )
1369
+
1370
+ # Extract stop reason
1371
+ if hasattr(event, "delta"):
1372
+ delta = event.delta
1373
+ if hasattr(delta, "stop_reason"):
1374
+ self._span.set_attribute("llm.stop_reason", delta.stop_reason)
1375
+
1376
+ except Exception as e:
1377
+ # Don't let event processing failures break the stream
1378
+ logger.debug(f"Failed to process streaming event: {e}")
1379
+
1380
+ def _finalize_span(self) -> None:
1381
+ """Finalize the span with aggregated data."""
1382
+ try:
1383
+ # Calculate latency
1384
+ end_time = monotonic_ns()
1385
+ latency_ms = duration_ms(self._start_time, end_time)
1386
+ self._span.set_attribute("llm.latency_ms", latency_ms)
1387
+
1388
+ # Time to first token
1389
+ if self._first_token_time is not None:
1390
+ ttft_ms = duration_ms(self._start_time, self._first_token_time)
1391
+ self._span.set_attribute("llm.time_to_first_token_ms", ttft_ms)
1392
+
1393
+ # Aggregated text content
1394
+ if self._text_content:
1395
+ full_text = "".join(self._text_content)
1396
+ self._span.add_event(
1397
+ name="llm.response",
1398
+ attributes={"content": [{"type": "text", "text": full_text}]},
1399
+ )
1400
+
1401
+ # Tool calls
1402
+ if self._tool_calls:
1403
+ self._span.add_event(
1404
+ name="llm.tool_use",
1405
+ attributes={"tool_calls": self._tool_calls},
1406
+ )
1407
+
1408
+ # Finalize replay capture if enabled
1409
+ if self._replay_capture:
1410
+ try:
1411
+ # Capture aggregated response text
1412
+ full_text = "".join(self._text_content)
1413
+ self._replay_capture.set_llm_response(
1414
+ text=full_text,
1415
+ finish_reason=self._span.attributes.get("llm.stop_reason"),
1416
+ )
1417
+
1418
+ # Add streaming chunks
1419
+ if self._streaming_chunks:
1420
+ for chunk in self._streaming_chunks:
1421
+ self._replay_capture.add_streaming_chunk(chunk)
1422
+
1423
+ # Attach to span
1424
+ object.__setattr__(
1425
+ self._span, "replay_snapshot", self._replay_capture.build()
1426
+ )
1427
+
1428
+ except Exception as e:
1429
+ logger.debug(f"Failed to finalize replay capture: {e}")
1430
+
1431
+ # Mark as successful
1432
+ self._span.set_status(SpanStatus.SUCCESS)
1433
+
1434
+ except Exception as e:
1435
+ # Don't let finalization failures break the instrumentation
1436
+ logger.debug(f"Failed to finalize span: {e}")