prela 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. prela/__init__.py +394 -0
  2. prela/_version.py +3 -0
  3. prela/contrib/CLI.md +431 -0
  4. prela/contrib/README.md +118 -0
  5. prela/contrib/__init__.py +5 -0
  6. prela/contrib/cli.py +1063 -0
  7. prela/contrib/explorer.py +571 -0
  8. prela/core/__init__.py +64 -0
  9. prela/core/clock.py +98 -0
  10. prela/core/context.py +228 -0
  11. prela/core/replay.py +403 -0
  12. prela/core/sampler.py +178 -0
  13. prela/core/span.py +295 -0
  14. prela/core/tracer.py +498 -0
  15. prela/evals/__init__.py +94 -0
  16. prela/evals/assertions/README.md +484 -0
  17. prela/evals/assertions/__init__.py +78 -0
  18. prela/evals/assertions/base.py +90 -0
  19. prela/evals/assertions/multi_agent.py +625 -0
  20. prela/evals/assertions/semantic.py +223 -0
  21. prela/evals/assertions/structural.py +443 -0
  22. prela/evals/assertions/tool.py +380 -0
  23. prela/evals/case.py +370 -0
  24. prela/evals/n8n/__init__.py +69 -0
  25. prela/evals/n8n/assertions.py +450 -0
  26. prela/evals/n8n/runner.py +497 -0
  27. prela/evals/reporters/README.md +184 -0
  28. prela/evals/reporters/__init__.py +32 -0
  29. prela/evals/reporters/console.py +251 -0
  30. prela/evals/reporters/json.py +176 -0
  31. prela/evals/reporters/junit.py +278 -0
  32. prela/evals/runner.py +525 -0
  33. prela/evals/suite.py +316 -0
  34. prela/exporters/__init__.py +27 -0
  35. prela/exporters/base.py +189 -0
  36. prela/exporters/console.py +443 -0
  37. prela/exporters/file.py +322 -0
  38. prela/exporters/http.py +394 -0
  39. prela/exporters/multi.py +154 -0
  40. prela/exporters/otlp.py +388 -0
  41. prela/instrumentation/ANTHROPIC.md +297 -0
  42. prela/instrumentation/LANGCHAIN.md +480 -0
  43. prela/instrumentation/OPENAI.md +59 -0
  44. prela/instrumentation/__init__.py +49 -0
  45. prela/instrumentation/anthropic.py +1436 -0
  46. prela/instrumentation/auto.py +129 -0
  47. prela/instrumentation/base.py +436 -0
  48. prela/instrumentation/langchain.py +959 -0
  49. prela/instrumentation/llamaindex.py +719 -0
  50. prela/instrumentation/multi_agent/__init__.py +48 -0
  51. prela/instrumentation/multi_agent/autogen.py +357 -0
  52. prela/instrumentation/multi_agent/crewai.py +404 -0
  53. prela/instrumentation/multi_agent/langgraph.py +299 -0
  54. prela/instrumentation/multi_agent/models.py +203 -0
  55. prela/instrumentation/multi_agent/swarm.py +231 -0
  56. prela/instrumentation/n8n/__init__.py +68 -0
  57. prela/instrumentation/n8n/code_node.py +534 -0
  58. prela/instrumentation/n8n/models.py +336 -0
  59. prela/instrumentation/n8n/webhook.py +489 -0
  60. prela/instrumentation/openai.py +1198 -0
  61. prela/license.py +245 -0
  62. prela/replay/__init__.py +31 -0
  63. prela/replay/comparison.py +390 -0
  64. prela/replay/engine.py +1227 -0
  65. prela/replay/loader.py +231 -0
  66. prela/replay/result.py +196 -0
  67. prela-0.1.0.dist-info/METADATA +399 -0
  68. prela-0.1.0.dist-info/RECORD +71 -0
  69. prela-0.1.0.dist-info/WHEEL +4 -0
  70. prela-0.1.0.dist-info/entry_points.txt +2 -0
  71. prela-0.1.0.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,1198 @@
1
+ """Instrumentation for OpenAI SDK (openai>=1.0.0).
2
+
3
+ This module provides automatic tracing for OpenAI's API, including:
4
+ - Synchronous and asynchronous chat completions
5
+ - Legacy completions API
6
+ - Embeddings API
7
+ - Streaming responses
8
+ - Function/tool calling
9
+
10
+ Example:
11
+ ```python
12
+ from prela.instrumentation.openai import OpenAIInstrumentor
13
+ from prela.core.tracer import Tracer
14
+ import openai
15
+
16
+ tracer = Tracer()
17
+ instrumentor = OpenAIInstrumentor()
18
+ instrumentor.instrument(tracer)
19
+
20
+ # Now all OpenAI API calls will be automatically traced
21
+ client = openai.OpenAI()
22
+ response = client.chat.completions.create(
23
+ model="gpt-4",
24
+ messages=[{"role": "user", "content": "Hello!"}]
25
+ )
26
+ ```
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ import logging
32
+ from functools import wraps
33
+ from typing import TYPE_CHECKING, Any, Callable
34
+
35
+ from prela.core.clock import monotonic_ns, duration_ms
36
+ from prela.core.span import SpanType, SpanStatus
37
+ from prela.instrumentation.base import (
38
+ Instrumentor,
39
+ wrap_function,
40
+ unwrap_function,
41
+ _ORIGINALS_ATTR,
42
+ )
43
+
44
+ if TYPE_CHECKING:
45
+ from prela.core.tracer import Tracer
46
+
47
+ logger = logging.getLogger(__name__)
48
+
49
+
50
+ class OpenAIInstrumentor(Instrumentor):
51
+ """Instrumentor for OpenAI SDK.
52
+
53
+ Patches the following methods:
54
+ - openai.OpenAI.chat.completions.create (sync)
55
+ - openai.AsyncOpenAI.chat.completions.create (async)
56
+ - openai.OpenAI.completions.create (sync, legacy)
57
+ - openai.OpenAI.embeddings.create (sync)
58
+
59
+ Captures detailed information about requests, responses, tool usage,
60
+ and streaming events.
61
+ """
62
+
63
+ def __init__(self) -> None:
64
+ """Initialize the OpenAI instrumentor."""
65
+ self._tracer: Tracer | None = None
66
+ self._openai_module: Any = None
67
+ self._chat_completions_module: Any = None
68
+ self._async_chat_completions_module: Any = None
69
+ self._completions_module: Any = None
70
+ self._embeddings_module: Any = None
71
+
72
+ def instrument(self, tracer: Tracer) -> None:
73
+ """Enable instrumentation for OpenAI SDK.
74
+
75
+ Args:
76
+ tracer: The tracer to use for creating spans
77
+
78
+ Raises:
79
+ ImportError: If openai package is not installed
80
+ RuntimeError: If instrumentation fails
81
+ """
82
+ if self.is_instrumented:
83
+ logger.debug("OpenAI SDK is already instrumented, skipping")
84
+ return
85
+
86
+ try:
87
+ import openai
88
+ except ImportError as e:
89
+ raise ImportError(
90
+ "openai package is not installed. "
91
+ "Install it with: pip install openai>=1.0.0"
92
+ ) from e
93
+
94
+ self._tracer = tracer
95
+ self._openai_module = openai
96
+
97
+ try:
98
+ # Get the completions modules for sync and async
99
+ if hasattr(openai, "OpenAI"):
100
+ client = openai.OpenAI.__new__(openai.OpenAI)
101
+ if hasattr(client, "chat") and hasattr(client.chat, "completions"):
102
+ self._chat_completions_module = client.chat.completions.__class__
103
+ if hasattr(client, "completions"):
104
+ self._completions_module = client.completions.__class__
105
+ if hasattr(client, "embeddings"):
106
+ self._embeddings_module = client.embeddings.__class__
107
+
108
+ if hasattr(openai, "AsyncOpenAI"):
109
+ async_client = openai.AsyncOpenAI.__new__(openai.AsyncOpenAI)
110
+ if hasattr(async_client, "chat") and hasattr(
111
+ async_client.chat, "completions"
112
+ ):
113
+ self._async_chat_completions_module = (
114
+ async_client.chat.completions.__class__
115
+ )
116
+
117
+ # Wrap sync chat completions
118
+ if self._chat_completions_module is not None:
119
+ wrap_function(
120
+ self._chat_completions_module,
121
+ "create",
122
+ lambda orig: self._create_chat_completions_wrapper(
123
+ orig, is_async=False
124
+ ),
125
+ )
126
+ logger.debug("Wrapped openai.OpenAI.chat.completions.create")
127
+
128
+ # Wrap async chat completions
129
+ if self._async_chat_completions_module is not None:
130
+ wrap_function(
131
+ self._async_chat_completions_module,
132
+ "create",
133
+ lambda orig: self._create_chat_completions_wrapper(
134
+ orig, is_async=True
135
+ ),
136
+ )
137
+ logger.debug("Wrapped openai.AsyncOpenAI.chat.completions.create")
138
+
139
+ # Wrap legacy completions
140
+ if self._completions_module is not None:
141
+ wrap_function(
142
+ self._completions_module,
143
+ "create",
144
+ lambda orig: self._create_completions_wrapper(orig),
145
+ )
146
+ logger.debug("Wrapped openai.OpenAI.completions.create")
147
+
148
+ # Wrap embeddings
149
+ if self._embeddings_module is not None:
150
+ wrap_function(
151
+ self._embeddings_module,
152
+ "create",
153
+ lambda orig: self._create_embeddings_wrapper(orig),
154
+ )
155
+ logger.debug("Wrapped openai.OpenAI.embeddings.create")
156
+
157
+ logger.info("Successfully instrumented OpenAI SDK")
158
+
159
+ except Exception as e:
160
+ self._tracer = None
161
+ self._openai_module = None
162
+ self._chat_completions_module = None
163
+ self._async_chat_completions_module = None
164
+ self._completions_module = None
165
+ self._embeddings_module = None
166
+ raise RuntimeError(f"Failed to instrument OpenAI SDK: {e}") from e
167
+
168
+ def uninstrument(self) -> None:
169
+ """Disable instrumentation and restore original functions."""
170
+ if not self.is_instrumented:
171
+ logger.debug("OpenAI SDK is not instrumented, skipping")
172
+ return
173
+
174
+ try:
175
+ # Unwrap chat completions
176
+ if self._chat_completions_module is not None:
177
+ unwrap_function(self._chat_completions_module, "create")
178
+
179
+ if self._async_chat_completions_module is not None:
180
+ unwrap_function(self._async_chat_completions_module, "create")
181
+
182
+ # Unwrap legacy completions
183
+ if self._completions_module is not None:
184
+ unwrap_function(self._completions_module, "create")
185
+
186
+ # Unwrap embeddings
187
+ if self._embeddings_module is not None:
188
+ unwrap_function(self._embeddings_module, "create")
189
+
190
+ logger.info("Successfully uninstrumented OpenAI SDK")
191
+
192
+ finally:
193
+ self._tracer = None
194
+ self._openai_module = None
195
+ self._chat_completions_module = None
196
+ self._async_chat_completions_module = None
197
+ self._completions_module = None
198
+ self._embeddings_module = None
199
+
200
+ @property
201
+ def is_instrumented(self) -> bool:
202
+ """Check if OpenAI SDK is currently instrumented."""
203
+ return (
204
+ self._tracer is not None
205
+ and self._chat_completions_module is not None
206
+ and hasattr(self._chat_completions_module, _ORIGINALS_ATTR)
207
+ )
208
+
209
+ def _create_chat_completions_wrapper(
210
+ self, original_func: Callable[..., Any], is_async: bool
211
+ ) -> Callable[..., Any]:
212
+ """Create a wrapper for chat.completions.create method.
213
+
214
+ Args:
215
+ original_func: The original create function
216
+ is_async: Whether this is an async function
217
+
218
+ Returns:
219
+ Wrapped function that creates spans
220
+ """
221
+ if is_async:
222
+
223
+ @wraps(original_func)
224
+ async def async_wrapper(self_obj: Any, *args: Any, **kwargs: Any) -> Any:
225
+ return await self._trace_chat_completions(
226
+ original_func, self_obj, is_async=True, *args, **kwargs
227
+ )
228
+
229
+ return async_wrapper
230
+ else:
231
+
232
+ @wraps(original_func)
233
+ def sync_wrapper(self_obj: Any, *args: Any, **kwargs: Any) -> Any:
234
+ return self._trace_chat_completions(
235
+ original_func, self_obj, is_async=False, *args, **kwargs
236
+ )
237
+
238
+ return sync_wrapper
239
+
240
+ def _create_completions_wrapper(
241
+ self, original_func: Callable[..., Any]
242
+ ) -> Callable[..., Any]:
243
+ """Create a wrapper for legacy completions.create method.
244
+
245
+ Args:
246
+ original_func: The original create function
247
+
248
+ Returns:
249
+ Wrapped function that creates spans
250
+ """
251
+
252
+ @wraps(original_func)
253
+ def wrapper(self_obj: Any, *args: Any, **kwargs: Any) -> Any:
254
+ return self._trace_completions(original_func, self_obj, *args, **kwargs)
255
+
256
+ return wrapper
257
+
258
+ def _create_embeddings_wrapper(
259
+ self, original_func: Callable[..., Any]
260
+ ) -> Callable[..., Any]:
261
+ """Create a wrapper for embeddings.create method.
262
+
263
+ Args:
264
+ original_func: The original create function
265
+
266
+ Returns:
267
+ Wrapped function that creates spans
268
+ """
269
+
270
+ @wraps(original_func)
271
+ def wrapper(self_obj: Any, *args: Any, **kwargs: Any) -> Any:
272
+ return self._trace_embeddings(original_func, self_obj, *args, **kwargs)
273
+
274
+ return wrapper
275
+
276
+ def _trace_chat_completions(
277
+ self,
278
+ original_func: Callable[..., Any],
279
+ self_obj: Any,
280
+ is_async: bool,
281
+ *args: Any,
282
+ **kwargs: Any,
283
+ ) -> Any:
284
+ """Trace a chat.completions.create call (sync or async).
285
+
286
+ Args:
287
+ original_func: The original create function
288
+ self_obj: The completions object (self)
289
+ is_async: Whether this is an async call
290
+ *args: Positional arguments
291
+ **kwargs: Keyword arguments
292
+
293
+ Returns:
294
+ The response from the API call
295
+ """
296
+ if is_async:
297
+ return self._trace_chat_completions_async(
298
+ original_func, self_obj, *args, **kwargs
299
+ )
300
+ else:
301
+ return self._trace_chat_completions_sync(
302
+ original_func, self_obj, *args, **kwargs
303
+ )
304
+
305
+ def _trace_chat_completions_sync(
306
+ self,
307
+ original_func: Callable[..., Any],
308
+ self_obj: Any,
309
+ *args: Any,
310
+ **kwargs: Any,
311
+ ) -> Any:
312
+ """Trace a synchronous chat.completions.create call.
313
+
314
+ Args:
315
+ original_func: The original create function
316
+ self_obj: The completions object (self)
317
+ *args: Positional arguments
318
+ **kwargs: Keyword arguments
319
+
320
+ Returns:
321
+ The response from the API call
322
+ """
323
+ if self._tracer is None:
324
+ return original_func(self_obj, *args, **kwargs)
325
+
326
+ # Extract request parameters
327
+ model = kwargs.get("model", "unknown")
328
+ messages = kwargs.get("messages", [])
329
+ temperature = kwargs.get("temperature")
330
+ max_tokens = kwargs.get("max_tokens")
331
+ stream = kwargs.get("stream", False)
332
+
333
+ # Start timing
334
+ start_time = monotonic_ns()
335
+
336
+ # Initialize replay capture if enabled
337
+ replay_capture = None
338
+ if self._tracer.capture_for_replay:
339
+ from prela.core.replay import ReplayCapture
340
+
341
+ replay_capture = ReplayCapture()
342
+ replay_capture.set_llm_request(
343
+ model=model,
344
+ messages=messages,
345
+ temperature=temperature,
346
+ max_tokens=max_tokens,
347
+ **{k: v for k, v in kwargs.items() if k not in ("model", "messages", "temperature", "max_tokens", "stream")}
348
+ )
349
+
350
+ # Create span
351
+ span = self._tracer.start_span(
352
+ name="openai.chat.completions.create",
353
+ span_type=SpanType.LLM,
354
+ )
355
+
356
+ try:
357
+ # Set request attributes
358
+ span.set_attribute("llm.vendor", "openai")
359
+ span.set_attribute("llm.model", model)
360
+ span.set_attribute("llm.request.model", model)
361
+
362
+ if temperature is not None:
363
+ span.set_attribute("llm.temperature", temperature)
364
+ if max_tokens is not None:
365
+ span.set_attribute("llm.max_tokens", max_tokens)
366
+ if stream:
367
+ span.set_attribute("llm.stream", True)
368
+
369
+ # Add request event
370
+ span.add_event(
371
+ name="llm.request",
372
+ attributes={"messages": messages},
373
+ )
374
+
375
+ # Make the API call
376
+ response = original_func(self_obj, *args, **kwargs)
377
+
378
+ # Handle streaming response
379
+ if stream:
380
+ return TracedChatCompletionStream(
381
+ stream=response,
382
+ span=span,
383
+ tracer=self._tracer,
384
+ start_time=start_time,
385
+ replay_capture=replay_capture,
386
+ )
387
+
388
+ # Calculate latency
389
+ end_time = monotonic_ns()
390
+ latency_ms = duration_ms(start_time, end_time)
391
+ span.set_attribute("llm.latency_ms", latency_ms)
392
+
393
+ # Extract response attributes
394
+ self._extract_chat_completion_attributes(span, response)
395
+
396
+ # Add response event
397
+ if hasattr(response, "choices") and response.choices:
398
+ first_choice = response.choices[0]
399
+ if hasattr(first_choice, "message"):
400
+ span.add_event(
401
+ name="llm.response",
402
+ attributes={"content": first_choice.message.content},
403
+ )
404
+
405
+ # Handle tool calls
406
+ if hasattr(response, "choices") and response.choices:
407
+ first_choice = response.choices[0]
408
+ if hasattr(first_choice, "message") and hasattr(
409
+ first_choice.message, "tool_calls"
410
+ ):
411
+ if first_choice.message.tool_calls:
412
+ self._handle_tool_calls(span, first_choice.message.tool_calls)
413
+
414
+ # Finalize replay capture
415
+ if replay_capture:
416
+ try:
417
+ # Extract response text
418
+ response_text = ""
419
+ if hasattr(response, "choices") and response.choices:
420
+ first_choice = response.choices[0]
421
+ if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
422
+ response_text = first_choice.message.content or ""
423
+
424
+ # Extract usage
425
+ prompt_tokens = None
426
+ completion_tokens = None
427
+ if hasattr(response, "usage"):
428
+ prompt_tokens = getattr(response.usage, "prompt_tokens", None)
429
+ completion_tokens = getattr(response.usage, "completion_tokens", None)
430
+
431
+ # Extract finish reason
432
+ finish_reason = None
433
+ if hasattr(response, "choices") and response.choices:
434
+ first_choice = response.choices[0]
435
+ finish_reason = getattr(first_choice, "finish_reason", None)
436
+
437
+ replay_capture.set_llm_response(
438
+ text=response_text,
439
+ finish_reason=finish_reason,
440
+ model=getattr(response, "model", model),
441
+ prompt_tokens=prompt_tokens,
442
+ completion_tokens=completion_tokens,
443
+ )
444
+
445
+ replay_capture.set_model_info(
446
+ model=getattr(response, "model", model),
447
+ created=getattr(response, "created", None),
448
+ id=getattr(response, "id", None),
449
+ )
450
+
451
+ # Attach replay snapshot to span
452
+ object.__setattr__(span, "replay_snapshot", replay_capture.build())
453
+ except Exception as e:
454
+ logger.debug(f"Failed to capture replay data: {e}")
455
+
456
+ # Mark as successful
457
+ span.set_status(SpanStatus.SUCCESS)
458
+
459
+ return response
460
+
461
+ except Exception as e:
462
+ # Handle errors
463
+ self._handle_error(span, e)
464
+ raise
465
+
466
+ finally:
467
+ span.end()
468
+
469
+ async def _trace_chat_completions_async(
470
+ self,
471
+ original_func: Callable[..., Any],
472
+ self_obj: Any,
473
+ *args: Any,
474
+ **kwargs: Any,
475
+ ) -> Any:
476
+ """Trace an asynchronous chat.completions.create call.
477
+
478
+ Args:
479
+ original_func: The original create function
480
+ self_obj: The completions object (self)
481
+ *args: Positional arguments
482
+ **kwargs: Keyword arguments
483
+
484
+ Returns:
485
+ The response from the API call
486
+ """
487
+ if self._tracer is None:
488
+ return await original_func(self_obj, *args, **kwargs)
489
+
490
+ # Extract request parameters
491
+ model = kwargs.get("model", "unknown")
492
+ messages = kwargs.get("messages", [])
493
+ temperature = kwargs.get("temperature")
494
+ max_tokens = kwargs.get("max_tokens")
495
+ stream = kwargs.get("stream", False)
496
+
497
+ # Start timing
498
+ start_time = monotonic_ns()
499
+
500
+ # Initialize replay capture if enabled
501
+ replay_capture = None
502
+ if self._tracer.capture_for_replay:
503
+ from prela.core.replay import ReplayCapture
504
+
505
+ replay_capture = ReplayCapture()
506
+ replay_capture.set_llm_request(
507
+ model=model,
508
+ messages=messages,
509
+ temperature=temperature,
510
+ max_tokens=max_tokens,
511
+ **{k: v for k, v in kwargs.items() if k not in ("model", "messages", "temperature", "max_tokens", "stream")}
512
+ )
513
+
514
+ # Create span
515
+ span = self._tracer.start_span(
516
+ name="openai.chat.completions.create",
517
+ span_type=SpanType.LLM,
518
+ )
519
+
520
+ try:
521
+ # Set request attributes
522
+ span.set_attribute("llm.vendor", "openai")
523
+ span.set_attribute("llm.model", model)
524
+ span.set_attribute("llm.request.model", model)
525
+
526
+ if temperature is not None:
527
+ span.set_attribute("llm.temperature", temperature)
528
+ if max_tokens is not None:
529
+ span.set_attribute("llm.max_tokens", max_tokens)
530
+ if stream:
531
+ span.set_attribute("llm.stream", True)
532
+
533
+ # Add request event
534
+ span.add_event(
535
+ name="llm.request",
536
+ attributes={"messages": messages},
537
+ )
538
+
539
+ # Make the API call
540
+ response = await original_func(self_obj, *args, **kwargs)
541
+
542
+ # Handle streaming response
543
+ if stream:
544
+ return TracedAsyncChatCompletionStream(
545
+ stream=response,
546
+ span=span,
547
+ tracer=self._tracer,
548
+ start_time=start_time,
549
+ replay_capture=replay_capture,
550
+ )
551
+
552
+ # Calculate latency
553
+ end_time = monotonic_ns()
554
+ latency_ms = duration_ms(start_time, end_time)
555
+ span.set_attribute("llm.latency_ms", latency_ms)
556
+
557
+ # Extract response attributes
558
+ self._extract_chat_completion_attributes(span, response)
559
+
560
+ # Add response event
561
+ if hasattr(response, "choices") and response.choices:
562
+ first_choice = response.choices[0]
563
+ if hasattr(first_choice, "message"):
564
+ span.add_event(
565
+ name="llm.response",
566
+ attributes={"content": first_choice.message.content},
567
+ )
568
+
569
+ # Handle tool calls
570
+ if hasattr(response, "choices") and response.choices:
571
+ first_choice = response.choices[0]
572
+ if hasattr(first_choice, "message") and hasattr(
573
+ first_choice.message, "tool_calls"
574
+ ):
575
+ if first_choice.message.tool_calls:
576
+ self._handle_tool_calls(span, first_choice.message.tool_calls)
577
+
578
+ # Finalize replay capture
579
+ if replay_capture:
580
+ try:
581
+ # Extract response text
582
+ response_text = ""
583
+ if hasattr(response, "choices") and response.choices:
584
+ first_choice = response.choices[0]
585
+ if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
586
+ response_text = first_choice.message.content or ""
587
+
588
+ # Extract usage
589
+ prompt_tokens = None
590
+ completion_tokens = None
591
+ if hasattr(response, "usage"):
592
+ prompt_tokens = getattr(response.usage, "prompt_tokens", None)
593
+ completion_tokens = getattr(response.usage, "completion_tokens", None)
594
+
595
+ # Extract finish reason
596
+ finish_reason = None
597
+ if hasattr(response, "choices") and response.choices:
598
+ first_choice = response.choices[0]
599
+ finish_reason = getattr(first_choice, "finish_reason", None)
600
+
601
+ replay_capture.set_llm_response(
602
+ text=response_text,
603
+ finish_reason=finish_reason,
604
+ model=getattr(response, "model", model),
605
+ prompt_tokens=prompt_tokens,
606
+ completion_tokens=completion_tokens,
607
+ )
608
+
609
+ replay_capture.set_model_info(
610
+ model=getattr(response, "model", model),
611
+ created=getattr(response, "created", None),
612
+ id=getattr(response, "id", None),
613
+ )
614
+
615
+ # Attach replay snapshot to span
616
+ object.__setattr__(span, "replay_snapshot", replay_capture.build())
617
+ except Exception as e:
618
+ logger.debug(f"Failed to capture replay data: {e}")
619
+
620
+ # Mark as successful
621
+ span.set_status(SpanStatus.SUCCESS)
622
+
623
+ return response
624
+
625
+ except Exception as e:
626
+ # Handle errors
627
+ self._handle_error(span, e)
628
+ raise
629
+
630
+ finally:
631
+ span.end()
632
+
633
+ def _trace_completions(
634
+ self,
635
+ original_func: Callable[..., Any],
636
+ self_obj: Any,
637
+ *args: Any,
638
+ **kwargs: Any,
639
+ ) -> Any:
640
+ """Trace a legacy completions.create call.
641
+
642
+ Args:
643
+ original_func: The original create function
644
+ self_obj: The completions object (self)
645
+ *args: Positional arguments
646
+ **kwargs: Keyword arguments
647
+
648
+ Returns:
649
+ The response from the API call
650
+ """
651
+ if self._tracer is None:
652
+ return original_func(self_obj, *args, **kwargs)
653
+
654
+ # Extract request parameters
655
+ model = kwargs.get("model", "unknown")
656
+ prompt = kwargs.get("prompt", "")
657
+ temperature = kwargs.get("temperature")
658
+ max_tokens = kwargs.get("max_tokens")
659
+
660
+ # Start timing
661
+ start_time = monotonic_ns()
662
+
663
+ # Create span
664
+ span = self._tracer.start_span(
665
+ name="openai.completions.create",
666
+ span_type=SpanType.LLM,
667
+ )
668
+
669
+ try:
670
+ # Set request attributes
671
+ span.set_attribute("llm.vendor", "openai")
672
+ span.set_attribute("llm.model", model)
673
+ span.set_attribute("llm.request.model", model)
674
+
675
+ if temperature is not None:
676
+ span.set_attribute("llm.temperature", temperature)
677
+ if max_tokens is not None:
678
+ span.set_attribute("llm.max_tokens", max_tokens)
679
+
680
+ # Add request event
681
+ span.add_event(
682
+ name="llm.request",
683
+ attributes={"prompt": prompt},
684
+ )
685
+
686
+ # Make the API call
687
+ response = original_func(self_obj, *args, **kwargs)
688
+
689
+ # Calculate latency
690
+ end_time = monotonic_ns()
691
+ latency_ms = duration_ms(start_time, end_time)
692
+ span.set_attribute("llm.latency_ms", latency_ms)
693
+
694
+ # Extract response attributes
695
+ self._extract_completion_attributes(span, response)
696
+
697
+ # Add response event
698
+ if hasattr(response, "choices") and response.choices:
699
+ first_choice = response.choices[0]
700
+ if hasattr(first_choice, "text"):
701
+ span.add_event(
702
+ name="llm.response",
703
+ attributes={"text": first_choice.text},
704
+ )
705
+
706
+ # Mark as successful
707
+ span.set_status(SpanStatus.SUCCESS)
708
+
709
+ return response
710
+
711
+ except Exception as e:
712
+ # Handle errors
713
+ self._handle_error(span, e)
714
+ raise
715
+
716
+ finally:
717
+ span.end()
718
+
719
+ def _trace_embeddings(
720
+ self,
721
+ original_func: Callable[..., Any],
722
+ self_obj: Any,
723
+ *args: Any,
724
+ **kwargs: Any,
725
+ ) -> Any:
726
+ """Trace an embeddings.create call.
727
+
728
+ Args:
729
+ original_func: The original create function
730
+ self_obj: The embeddings object (self)
731
+ *args: Positional arguments
732
+ **kwargs: Keyword arguments
733
+
734
+ Returns:
735
+ The response from the API call
736
+ """
737
+ if self._tracer is None:
738
+ return original_func(self_obj, *args, **kwargs)
739
+
740
+ # Extract request parameters
741
+ model = kwargs.get("model", "unknown")
742
+ input_data = kwargs.get("input", [])
743
+
744
+ # Start timing
745
+ start_time = monotonic_ns()
746
+
747
+ # Create span
748
+ span = self._tracer.start_span(
749
+ name="openai.embeddings.create",
750
+ span_type=SpanType.EMBEDDING,
751
+ )
752
+
753
+ try:
754
+ # Set request attributes
755
+ span.set_attribute("llm.vendor", "openai")
756
+ span.set_attribute("llm.model", model)
757
+ span.set_attribute("llm.request.model", model)
758
+
759
+ # Count inputs
760
+ if isinstance(input_data, list):
761
+ span.set_attribute("embedding.input_count", len(input_data))
762
+ else:
763
+ span.set_attribute("embedding.input_count", 1)
764
+
765
+ # Make the API call
766
+ response = original_func(self_obj, *args, **kwargs)
767
+
768
+ # Calculate latency
769
+ end_time = monotonic_ns()
770
+ latency_ms = duration_ms(start_time, end_time)
771
+ span.set_attribute("llm.latency_ms", latency_ms)
772
+
773
+ # Extract response attributes
774
+ self._extract_embedding_attributes(span, response)
775
+
776
+ # Mark as successful
777
+ span.set_status(SpanStatus.SUCCESS)
778
+
779
+ return response
780
+
781
+ except Exception as e:
782
+ # Handle errors
783
+ self._handle_error(span, e)
784
+ raise
785
+
786
+ finally:
787
+ span.end()
788
+
789
+ def _extract_chat_completion_attributes(self, span: Any, response: Any) -> None:
790
+ """Extract attributes from a chat completion response.
791
+
792
+ Args:
793
+ span: The span to add attributes to
794
+ response: The response object from the API
795
+ """
796
+ try:
797
+ # Model (actual model used)
798
+ if hasattr(response, "model"):
799
+ span.set_attribute("llm.response.model", response.model)
800
+
801
+ # Response ID
802
+ if hasattr(response, "id"):
803
+ span.set_attribute("llm.response.id", response.id)
804
+
805
+ # Usage statistics
806
+ if hasattr(response, "usage"):
807
+ usage = response.usage
808
+ if hasattr(usage, "prompt_tokens"):
809
+ span.set_attribute("llm.prompt_tokens", usage.prompt_tokens)
810
+ if hasattr(usage, "completion_tokens"):
811
+ span.set_attribute("llm.completion_tokens", usage.completion_tokens)
812
+ if hasattr(usage, "total_tokens"):
813
+ span.set_attribute("llm.total_tokens", usage.total_tokens)
814
+
815
+ # Finish reason
816
+ if hasattr(response, "choices") and response.choices:
817
+ first_choice = response.choices[0]
818
+ if hasattr(first_choice, "finish_reason"):
819
+ span.set_attribute("llm.finish_reason", first_choice.finish_reason)
820
+
821
+ except Exception as e:
822
+ # Don't let attribute extraction failures break the instrumentation
823
+ logger.debug(f"Failed to extract chat completion attributes: {e}")
824
+
825
+ def _extract_completion_attributes(self, span: Any, response: Any) -> None:
826
+ """Extract attributes from a legacy completion response.
827
+
828
+ Args:
829
+ span: The span to add attributes to
830
+ response: The response object from the API
831
+ """
832
+ try:
833
+ # Model
834
+ if hasattr(response, "model"):
835
+ span.set_attribute("llm.response.model", response.model)
836
+
837
+ # Response ID
838
+ if hasattr(response, "id"):
839
+ span.set_attribute("llm.response.id", response.id)
840
+
841
+ # Usage statistics
842
+ if hasattr(response, "usage"):
843
+ usage = response.usage
844
+ if hasattr(usage, "prompt_tokens"):
845
+ span.set_attribute("llm.prompt_tokens", usage.prompt_tokens)
846
+ if hasattr(usage, "completion_tokens"):
847
+ span.set_attribute("llm.completion_tokens", usage.completion_tokens)
848
+ if hasattr(usage, "total_tokens"):
849
+ span.set_attribute("llm.total_tokens", usage.total_tokens)
850
+
851
+ except Exception as e:
852
+ logger.debug(f"Failed to extract completion attributes: {e}")
853
+
854
+ def _extract_embedding_attributes(self, span: Any, response: Any) -> None:
855
+ """Extract attributes from an embedding response.
856
+
857
+ Args:
858
+ span: The span to add attributes to
859
+ response: The response object from the API
860
+ """
861
+ try:
862
+ # Model
863
+ if hasattr(response, "model"):
864
+ span.set_attribute("llm.response.model", response.model)
865
+
866
+ # Usage statistics
867
+ if hasattr(response, "usage"):
868
+ usage = response.usage
869
+ if hasattr(usage, "prompt_tokens"):
870
+ span.set_attribute("llm.prompt_tokens", usage.prompt_tokens)
871
+ if hasattr(usage, "total_tokens"):
872
+ span.set_attribute("llm.total_tokens", usage.total_tokens)
873
+
874
+ # Embedding count and dimensions
875
+ if hasattr(response, "data") and response.data:
876
+ span.set_attribute("embedding.count", len(response.data))
877
+ if response.data and hasattr(response.data[0], "embedding"):
878
+ span.set_attribute(
879
+ "embedding.dimensions", len(response.data[0].embedding)
880
+ )
881
+
882
+ except Exception as e:
883
+ logger.debug(f"Failed to extract embedding attributes: {e}")
884
+
885
+ def _handle_tool_calls(self, span: Any, tool_calls: Any) -> None:
886
+ """Handle tool calls in the response.
887
+
888
+ Args:
889
+ span: The span to add tool call information to
890
+ tool_calls: The tool calls from the response
891
+ """
892
+ try:
893
+ calls = []
894
+ for tool_call in tool_calls:
895
+ call_info = {
896
+ "id": getattr(tool_call, "id", None),
897
+ "type": getattr(tool_call, "type", None),
898
+ }
899
+
900
+ if hasattr(tool_call, "function"):
901
+ function = tool_call.function
902
+ call_info["function"] = {
903
+ "name": getattr(function, "name", None),
904
+ "arguments": getattr(function, "arguments", None),
905
+ }
906
+
907
+ calls.append(call_info)
908
+
909
+ if calls:
910
+ span.add_event(
911
+ name="llm.tool_calls",
912
+ attributes={"tool_calls": calls},
913
+ )
914
+
915
+ except Exception as e:
916
+ logger.debug(f"Failed to handle tool calls: {e}")
917
+
918
+ def _handle_error(self, span: Any, error: Exception) -> None:
919
+ """Handle an error during API call.
920
+
921
+ Args:
922
+ span: The span to record the error on
923
+ error: The exception that was raised
924
+ """
925
+ try:
926
+ # Set error status
927
+ span.set_status(SpanStatus.ERROR, str(error))
928
+
929
+ # Extract error details
930
+ error_attrs: dict[str, Any] = {
931
+ "error.type": type(error).__name__,
932
+ "error.message": str(error),
933
+ }
934
+
935
+ # Handle openai-specific errors
936
+ if hasattr(error, "status_code"):
937
+ error_attrs["error.status_code"] = error.status_code
938
+
939
+ span.add_event(name="error", attributes=error_attrs)
940
+
941
+ except Exception as e:
942
+ logger.debug(f"Failed to handle error: {e}")
943
+
944
+
945
+ class TracedChatCompletionStream:
946
+ """Wrapper for streaming chat completion responses."""
947
+
948
+ def __init__(
949
+ self,
950
+ stream: Any,
951
+ span: Any,
952
+ tracer: Tracer,
953
+ start_time: int,
954
+ replay_capture: Any = None,
955
+ ) -> None:
956
+ """Initialize the traced stream.
957
+
958
+ Args:
959
+ stream: The original stream
960
+ span: The span to record events on
961
+ tracer: The tracer instance
962
+ start_time: Start time in nanoseconds
963
+ replay_capture: Optional ReplayCapture instance
964
+ """
965
+ self._stream = stream
966
+ self._span = span
967
+ self._tracer = tracer
968
+ self._start_time = start_time
969
+ self._first_token_time: int | None = None
970
+ self._content_chunks: list[str] = []
971
+ self._finish_reason: str | None = None
972
+ self._replay_capture = replay_capture
973
+
974
+ def __enter__(self) -> TracedChatCompletionStream:
975
+ """Enter context manager."""
976
+ return self
977
+
978
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
979
+ """Exit context manager and finalize span."""
980
+ try:
981
+ if exc_type is None:
982
+ self._finalize_span()
983
+ else:
984
+ self._span.set_status(SpanStatus.ERROR, str(exc_val))
985
+
986
+ finally:
987
+ self._span.end()
988
+
989
+ def __iter__(self) -> Any:
990
+ """Iterate over stream chunks."""
991
+ try:
992
+ for chunk in self._stream:
993
+ self._process_chunk(chunk)
994
+ yield chunk
995
+
996
+ except Exception as e:
997
+ self._span.set_status(SpanStatus.ERROR, str(e))
998
+ raise
999
+
1000
+ def _process_chunk(self, chunk: Any) -> None:
1001
+ """Process a streaming chunk.
1002
+
1003
+ Args:
1004
+ chunk: The streaming chunk
1005
+ """
1006
+ try:
1007
+ # Capture first token time
1008
+ if self._first_token_time is None:
1009
+ self._first_token_time = monotonic_ns()
1010
+
1011
+ # Extract content from chunk
1012
+ if hasattr(chunk, "choices") and chunk.choices:
1013
+ first_choice = chunk.choices[0]
1014
+
1015
+ # Get content delta
1016
+ if hasattr(first_choice, "delta"):
1017
+ delta = first_choice.delta
1018
+ if hasattr(delta, "content") and delta.content:
1019
+ self._content_chunks.append(delta.content)
1020
+
1021
+ # Get finish reason
1022
+ if hasattr(first_choice, "finish_reason") and first_choice.finish_reason:
1023
+ self._finish_reason = first_choice.finish_reason
1024
+
1025
+ except Exception as e:
1026
+ logger.debug(f"Failed to process chunk: {e}")
1027
+
1028
+ def _finalize_span(self) -> None:
1029
+ """Finalize the span with aggregated data."""
1030
+ try:
1031
+ # Calculate latency
1032
+ end_time = monotonic_ns()
1033
+ latency_ms = duration_ms(self._start_time, end_time)
1034
+ self._span.set_attribute("llm.latency_ms", latency_ms)
1035
+
1036
+ # Time to first token
1037
+ if self._first_token_time is not None:
1038
+ ttft_ms = duration_ms(self._start_time, self._first_token_time)
1039
+ self._span.set_attribute("llm.time_to_first_token_ms", ttft_ms)
1040
+
1041
+ # Aggregated content
1042
+ if self._content_chunks:
1043
+ full_content = "".join(self._content_chunks)
1044
+ self._span.add_event(
1045
+ name="llm.response",
1046
+ attributes={"content": full_content},
1047
+ )
1048
+
1049
+ # Finish reason
1050
+ if self._finish_reason:
1051
+ self._span.set_attribute("llm.finish_reason", self._finish_reason)
1052
+
1053
+ # Finalize replay capture for streaming
1054
+ if self._replay_capture:
1055
+ try:
1056
+ full_content = "".join(self._content_chunks) if self._content_chunks else ""
1057
+ self._replay_capture.set_llm_response(
1058
+ text=full_content,
1059
+ finish_reason=self._finish_reason,
1060
+ )
1061
+ # Attach replay snapshot to span
1062
+ object.__setattr__(self._span, "replay_snapshot", self._replay_capture.build())
1063
+ except Exception as e:
1064
+ logger.debug(f"Failed to capture streaming replay data: {e}")
1065
+
1066
+ # Mark as successful
1067
+ self._span.set_status(SpanStatus.SUCCESS)
1068
+
1069
+ except Exception as e:
1070
+ logger.debug(f"Failed to finalize span: {e}")
1071
+
1072
+
1073
+ class TracedAsyncChatCompletionStream:
1074
+ """Wrapper for async streaming chat completion responses."""
1075
+
1076
+ def __init__(
1077
+ self,
1078
+ stream: Any,
1079
+ span: Any,
1080
+ tracer: Tracer,
1081
+ start_time: int,
1082
+ replay_capture: Any = None,
1083
+ ) -> None:
1084
+ """Initialize the traced async stream.
1085
+
1086
+ Args:
1087
+ stream: The original async stream
1088
+ span: The span to record events on
1089
+ tracer: The tracer instance
1090
+ start_time: Start time in nanoseconds
1091
+ replay_capture: Optional ReplayCapture instance
1092
+ """
1093
+ self._stream = stream
1094
+ self._span = span
1095
+ self._tracer = tracer
1096
+ self._start_time = start_time
1097
+ self._first_token_time: int | None = None
1098
+ self._content_chunks: list[str] = []
1099
+ self._finish_reason: str | None = None
1100
+ self._replay_capture = replay_capture
1101
+
1102
+ async def __aenter__(self) -> TracedAsyncChatCompletionStream:
1103
+ """Enter async context manager."""
1104
+ return self
1105
+
1106
+ async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
1107
+ """Exit async context manager and finalize span."""
1108
+ try:
1109
+ if exc_type is None:
1110
+ self._finalize_span()
1111
+ else:
1112
+ self._span.set_status(SpanStatus.ERROR, str(exc_val))
1113
+
1114
+ finally:
1115
+ self._span.end()
1116
+
1117
+ async def __aiter__(self) -> Any:
1118
+ """Async iterate over stream chunks."""
1119
+ try:
1120
+ async for chunk in self._stream:
1121
+ self._process_chunk(chunk)
1122
+ yield chunk
1123
+
1124
+ except Exception as e:
1125
+ self._span.set_status(SpanStatus.ERROR, str(e))
1126
+ raise
1127
+
1128
+ def _process_chunk(self, chunk: Any) -> None:
1129
+ """Process a streaming chunk.
1130
+
1131
+ Args:
1132
+ chunk: The streaming chunk
1133
+ """
1134
+ try:
1135
+ # Capture first token time
1136
+ if self._first_token_time is None:
1137
+ self._first_token_time = monotonic_ns()
1138
+
1139
+ # Extract content from chunk
1140
+ if hasattr(chunk, "choices") and chunk.choices:
1141
+ first_choice = chunk.choices[0]
1142
+
1143
+ # Get content delta
1144
+ if hasattr(first_choice, "delta"):
1145
+ delta = first_choice.delta
1146
+ if hasattr(delta, "content") and delta.content:
1147
+ self._content_chunks.append(delta.content)
1148
+
1149
+ # Get finish reason
1150
+ if hasattr(first_choice, "finish_reason") and first_choice.finish_reason:
1151
+ self._finish_reason = first_choice.finish_reason
1152
+
1153
+ except Exception as e:
1154
+ logger.debug(f"Failed to process chunk: {e}")
1155
+
1156
+ def _finalize_span(self) -> None:
1157
+ """Finalize the span with aggregated data."""
1158
+ try:
1159
+ # Calculate latency
1160
+ end_time = monotonic_ns()
1161
+ latency_ms = duration_ms(self._start_time, end_time)
1162
+ self._span.set_attribute("llm.latency_ms", latency_ms)
1163
+
1164
+ # Time to first token
1165
+ if self._first_token_time is not None:
1166
+ ttft_ms = duration_ms(self._start_time, self._first_token_time)
1167
+ self._span.set_attribute("llm.time_to_first_token_ms", ttft_ms)
1168
+
1169
+ # Aggregated content
1170
+ if self._content_chunks:
1171
+ full_content = "".join(self._content_chunks)
1172
+ self._span.add_event(
1173
+ name="llm.response",
1174
+ attributes={"content": full_content},
1175
+ )
1176
+
1177
+ # Finish reason
1178
+ if self._finish_reason:
1179
+ self._span.set_attribute("llm.finish_reason", self._finish_reason)
1180
+
1181
+ # Finalize replay capture for streaming
1182
+ if self._replay_capture:
1183
+ try:
1184
+ full_content = "".join(self._content_chunks) if self._content_chunks else ""
1185
+ self._replay_capture.set_llm_response(
1186
+ text=full_content,
1187
+ finish_reason=self._finish_reason,
1188
+ )
1189
+ # Attach replay snapshot to span
1190
+ object.__setattr__(self._span, "replay_snapshot", self._replay_capture.build())
1191
+ except Exception as e:
1192
+ logger.debug(f"Failed to capture streaming replay data: {e}")
1193
+
1194
+ # Mark as successful
1195
+ self._span.set_status(SpanStatus.SUCCESS)
1196
+
1197
+ except Exception as e:
1198
+ logger.debug(f"Failed to finalize span: {e}")