prela 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prela/__init__.py +394 -0
- prela/_version.py +3 -0
- prela/contrib/CLI.md +431 -0
- prela/contrib/README.md +118 -0
- prela/contrib/__init__.py +5 -0
- prela/contrib/cli.py +1063 -0
- prela/contrib/explorer.py +571 -0
- prela/core/__init__.py +64 -0
- prela/core/clock.py +98 -0
- prela/core/context.py +228 -0
- prela/core/replay.py +403 -0
- prela/core/sampler.py +178 -0
- prela/core/span.py +295 -0
- prela/core/tracer.py +498 -0
- prela/evals/__init__.py +94 -0
- prela/evals/assertions/README.md +484 -0
- prela/evals/assertions/__init__.py +78 -0
- prela/evals/assertions/base.py +90 -0
- prela/evals/assertions/multi_agent.py +625 -0
- prela/evals/assertions/semantic.py +223 -0
- prela/evals/assertions/structural.py +443 -0
- prela/evals/assertions/tool.py +380 -0
- prela/evals/case.py +370 -0
- prela/evals/n8n/__init__.py +69 -0
- prela/evals/n8n/assertions.py +450 -0
- prela/evals/n8n/runner.py +497 -0
- prela/evals/reporters/README.md +184 -0
- prela/evals/reporters/__init__.py +32 -0
- prela/evals/reporters/console.py +251 -0
- prela/evals/reporters/json.py +176 -0
- prela/evals/reporters/junit.py +278 -0
- prela/evals/runner.py +525 -0
- prela/evals/suite.py +316 -0
- prela/exporters/__init__.py +27 -0
- prela/exporters/base.py +189 -0
- prela/exporters/console.py +443 -0
- prela/exporters/file.py +322 -0
- prela/exporters/http.py +394 -0
- prela/exporters/multi.py +154 -0
- prela/exporters/otlp.py +388 -0
- prela/instrumentation/ANTHROPIC.md +297 -0
- prela/instrumentation/LANGCHAIN.md +480 -0
- prela/instrumentation/OPENAI.md +59 -0
- prela/instrumentation/__init__.py +49 -0
- prela/instrumentation/anthropic.py +1436 -0
- prela/instrumentation/auto.py +129 -0
- prela/instrumentation/base.py +436 -0
- prela/instrumentation/langchain.py +959 -0
- prela/instrumentation/llamaindex.py +719 -0
- prela/instrumentation/multi_agent/__init__.py +48 -0
- prela/instrumentation/multi_agent/autogen.py +357 -0
- prela/instrumentation/multi_agent/crewai.py +404 -0
- prela/instrumentation/multi_agent/langgraph.py +299 -0
- prela/instrumentation/multi_agent/models.py +203 -0
- prela/instrumentation/multi_agent/swarm.py +231 -0
- prela/instrumentation/n8n/__init__.py +68 -0
- prela/instrumentation/n8n/code_node.py +534 -0
- prela/instrumentation/n8n/models.py +336 -0
- prela/instrumentation/n8n/webhook.py +489 -0
- prela/instrumentation/openai.py +1198 -0
- prela/license.py +245 -0
- prela/replay/__init__.py +31 -0
- prela/replay/comparison.py +390 -0
- prela/replay/engine.py +1227 -0
- prela/replay/loader.py +231 -0
- prela/replay/result.py +196 -0
- prela-0.1.0.dist-info/METADATA +399 -0
- prela-0.1.0.dist-info/RECORD +71 -0
- prela-0.1.0.dist-info/WHEEL +4 -0
- prela-0.1.0.dist-info/entry_points.txt +2 -0
- prela-0.1.0.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,1198 @@
|
|
|
1
|
+
"""Instrumentation for OpenAI SDK (openai>=1.0.0).
|
|
2
|
+
|
|
3
|
+
This module provides automatic tracing for OpenAI's API, including:
|
|
4
|
+
- Synchronous and asynchronous chat completions
|
|
5
|
+
- Legacy completions API
|
|
6
|
+
- Embeddings API
|
|
7
|
+
- Streaming responses
|
|
8
|
+
- Function/tool calling
|
|
9
|
+
|
|
10
|
+
Example:
|
|
11
|
+
```python
|
|
12
|
+
from prela.instrumentation.openai import OpenAIInstrumentor
|
|
13
|
+
from prela.core.tracer import Tracer
|
|
14
|
+
import openai
|
|
15
|
+
|
|
16
|
+
tracer = Tracer()
|
|
17
|
+
instrumentor = OpenAIInstrumentor()
|
|
18
|
+
instrumentor.instrument(tracer)
|
|
19
|
+
|
|
20
|
+
# Now all OpenAI API calls will be automatically traced
|
|
21
|
+
client = openai.OpenAI()
|
|
22
|
+
response = client.chat.completions.create(
|
|
23
|
+
model="gpt-4",
|
|
24
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
25
|
+
)
|
|
26
|
+
```
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import logging
|
|
32
|
+
from functools import wraps
|
|
33
|
+
from typing import TYPE_CHECKING, Any, Callable
|
|
34
|
+
|
|
35
|
+
from prela.core.clock import monotonic_ns, duration_ms
|
|
36
|
+
from prela.core.span import SpanType, SpanStatus
|
|
37
|
+
from prela.instrumentation.base import (
|
|
38
|
+
Instrumentor,
|
|
39
|
+
wrap_function,
|
|
40
|
+
unwrap_function,
|
|
41
|
+
_ORIGINALS_ATTR,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
if TYPE_CHECKING:
|
|
45
|
+
from prela.core.tracer import Tracer
|
|
46
|
+
|
|
47
|
+
logger = logging.getLogger(__name__)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class OpenAIInstrumentor(Instrumentor):
|
|
51
|
+
"""Instrumentor for OpenAI SDK.
|
|
52
|
+
|
|
53
|
+
Patches the following methods:
|
|
54
|
+
- openai.OpenAI.chat.completions.create (sync)
|
|
55
|
+
- openai.AsyncOpenAI.chat.completions.create (async)
|
|
56
|
+
- openai.OpenAI.completions.create (sync, legacy)
|
|
57
|
+
- openai.OpenAI.embeddings.create (sync)
|
|
58
|
+
|
|
59
|
+
Captures detailed information about requests, responses, tool usage,
|
|
60
|
+
and streaming events.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(self) -> None:
|
|
64
|
+
"""Initialize the OpenAI instrumentor."""
|
|
65
|
+
self._tracer: Tracer | None = None
|
|
66
|
+
self._openai_module: Any = None
|
|
67
|
+
self._chat_completions_module: Any = None
|
|
68
|
+
self._async_chat_completions_module: Any = None
|
|
69
|
+
self._completions_module: Any = None
|
|
70
|
+
self._embeddings_module: Any = None
|
|
71
|
+
|
|
72
|
+
def instrument(self, tracer: Tracer) -> None:
|
|
73
|
+
"""Enable instrumentation for OpenAI SDK.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
tracer: The tracer to use for creating spans
|
|
77
|
+
|
|
78
|
+
Raises:
|
|
79
|
+
ImportError: If openai package is not installed
|
|
80
|
+
RuntimeError: If instrumentation fails
|
|
81
|
+
"""
|
|
82
|
+
if self.is_instrumented:
|
|
83
|
+
logger.debug("OpenAI SDK is already instrumented, skipping")
|
|
84
|
+
return
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
import openai
|
|
88
|
+
except ImportError as e:
|
|
89
|
+
raise ImportError(
|
|
90
|
+
"openai package is not installed. "
|
|
91
|
+
"Install it with: pip install openai>=1.0.0"
|
|
92
|
+
) from e
|
|
93
|
+
|
|
94
|
+
self._tracer = tracer
|
|
95
|
+
self._openai_module = openai
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
# Get the completions modules for sync and async
|
|
99
|
+
if hasattr(openai, "OpenAI"):
|
|
100
|
+
client = openai.OpenAI.__new__(openai.OpenAI)
|
|
101
|
+
if hasattr(client, "chat") and hasattr(client.chat, "completions"):
|
|
102
|
+
self._chat_completions_module = client.chat.completions.__class__
|
|
103
|
+
if hasattr(client, "completions"):
|
|
104
|
+
self._completions_module = client.completions.__class__
|
|
105
|
+
if hasattr(client, "embeddings"):
|
|
106
|
+
self._embeddings_module = client.embeddings.__class__
|
|
107
|
+
|
|
108
|
+
if hasattr(openai, "AsyncOpenAI"):
|
|
109
|
+
async_client = openai.AsyncOpenAI.__new__(openai.AsyncOpenAI)
|
|
110
|
+
if hasattr(async_client, "chat") and hasattr(
|
|
111
|
+
async_client.chat, "completions"
|
|
112
|
+
):
|
|
113
|
+
self._async_chat_completions_module = (
|
|
114
|
+
async_client.chat.completions.__class__
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Wrap sync chat completions
|
|
118
|
+
if self._chat_completions_module is not None:
|
|
119
|
+
wrap_function(
|
|
120
|
+
self._chat_completions_module,
|
|
121
|
+
"create",
|
|
122
|
+
lambda orig: self._create_chat_completions_wrapper(
|
|
123
|
+
orig, is_async=False
|
|
124
|
+
),
|
|
125
|
+
)
|
|
126
|
+
logger.debug("Wrapped openai.OpenAI.chat.completions.create")
|
|
127
|
+
|
|
128
|
+
# Wrap async chat completions
|
|
129
|
+
if self._async_chat_completions_module is not None:
|
|
130
|
+
wrap_function(
|
|
131
|
+
self._async_chat_completions_module,
|
|
132
|
+
"create",
|
|
133
|
+
lambda orig: self._create_chat_completions_wrapper(
|
|
134
|
+
orig, is_async=True
|
|
135
|
+
),
|
|
136
|
+
)
|
|
137
|
+
logger.debug("Wrapped openai.AsyncOpenAI.chat.completions.create")
|
|
138
|
+
|
|
139
|
+
# Wrap legacy completions
|
|
140
|
+
if self._completions_module is not None:
|
|
141
|
+
wrap_function(
|
|
142
|
+
self._completions_module,
|
|
143
|
+
"create",
|
|
144
|
+
lambda orig: self._create_completions_wrapper(orig),
|
|
145
|
+
)
|
|
146
|
+
logger.debug("Wrapped openai.OpenAI.completions.create")
|
|
147
|
+
|
|
148
|
+
# Wrap embeddings
|
|
149
|
+
if self._embeddings_module is not None:
|
|
150
|
+
wrap_function(
|
|
151
|
+
self._embeddings_module,
|
|
152
|
+
"create",
|
|
153
|
+
lambda orig: self._create_embeddings_wrapper(orig),
|
|
154
|
+
)
|
|
155
|
+
logger.debug("Wrapped openai.OpenAI.embeddings.create")
|
|
156
|
+
|
|
157
|
+
logger.info("Successfully instrumented OpenAI SDK")
|
|
158
|
+
|
|
159
|
+
except Exception as e:
|
|
160
|
+
self._tracer = None
|
|
161
|
+
self._openai_module = None
|
|
162
|
+
self._chat_completions_module = None
|
|
163
|
+
self._async_chat_completions_module = None
|
|
164
|
+
self._completions_module = None
|
|
165
|
+
self._embeddings_module = None
|
|
166
|
+
raise RuntimeError(f"Failed to instrument OpenAI SDK: {e}") from e
|
|
167
|
+
|
|
168
|
+
def uninstrument(self) -> None:
|
|
169
|
+
"""Disable instrumentation and restore original functions."""
|
|
170
|
+
if not self.is_instrumented:
|
|
171
|
+
logger.debug("OpenAI SDK is not instrumented, skipping")
|
|
172
|
+
return
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
# Unwrap chat completions
|
|
176
|
+
if self._chat_completions_module is not None:
|
|
177
|
+
unwrap_function(self._chat_completions_module, "create")
|
|
178
|
+
|
|
179
|
+
if self._async_chat_completions_module is not None:
|
|
180
|
+
unwrap_function(self._async_chat_completions_module, "create")
|
|
181
|
+
|
|
182
|
+
# Unwrap legacy completions
|
|
183
|
+
if self._completions_module is not None:
|
|
184
|
+
unwrap_function(self._completions_module, "create")
|
|
185
|
+
|
|
186
|
+
# Unwrap embeddings
|
|
187
|
+
if self._embeddings_module is not None:
|
|
188
|
+
unwrap_function(self._embeddings_module, "create")
|
|
189
|
+
|
|
190
|
+
logger.info("Successfully uninstrumented OpenAI SDK")
|
|
191
|
+
|
|
192
|
+
finally:
|
|
193
|
+
self._tracer = None
|
|
194
|
+
self._openai_module = None
|
|
195
|
+
self._chat_completions_module = None
|
|
196
|
+
self._async_chat_completions_module = None
|
|
197
|
+
self._completions_module = None
|
|
198
|
+
self._embeddings_module = None
|
|
199
|
+
|
|
200
|
+
@property
|
|
201
|
+
def is_instrumented(self) -> bool:
|
|
202
|
+
"""Check if OpenAI SDK is currently instrumented."""
|
|
203
|
+
return (
|
|
204
|
+
self._tracer is not None
|
|
205
|
+
and self._chat_completions_module is not None
|
|
206
|
+
and hasattr(self._chat_completions_module, _ORIGINALS_ATTR)
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
def _create_chat_completions_wrapper(
|
|
210
|
+
self, original_func: Callable[..., Any], is_async: bool
|
|
211
|
+
) -> Callable[..., Any]:
|
|
212
|
+
"""Create a wrapper for chat.completions.create method.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
original_func: The original create function
|
|
216
|
+
is_async: Whether this is an async function
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Wrapped function that creates spans
|
|
220
|
+
"""
|
|
221
|
+
if is_async:
|
|
222
|
+
|
|
223
|
+
@wraps(original_func)
|
|
224
|
+
async def async_wrapper(self_obj: Any, *args: Any, **kwargs: Any) -> Any:
|
|
225
|
+
return await self._trace_chat_completions(
|
|
226
|
+
original_func, self_obj, is_async=True, *args, **kwargs
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
return async_wrapper
|
|
230
|
+
else:
|
|
231
|
+
|
|
232
|
+
@wraps(original_func)
|
|
233
|
+
def sync_wrapper(self_obj: Any, *args: Any, **kwargs: Any) -> Any:
|
|
234
|
+
return self._trace_chat_completions(
|
|
235
|
+
original_func, self_obj, is_async=False, *args, **kwargs
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
return sync_wrapper
|
|
239
|
+
|
|
240
|
+
def _create_completions_wrapper(
|
|
241
|
+
self, original_func: Callable[..., Any]
|
|
242
|
+
) -> Callable[..., Any]:
|
|
243
|
+
"""Create a wrapper for legacy completions.create method.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
original_func: The original create function
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
Wrapped function that creates spans
|
|
250
|
+
"""
|
|
251
|
+
|
|
252
|
+
@wraps(original_func)
|
|
253
|
+
def wrapper(self_obj: Any, *args: Any, **kwargs: Any) -> Any:
|
|
254
|
+
return self._trace_completions(original_func, self_obj, *args, **kwargs)
|
|
255
|
+
|
|
256
|
+
return wrapper
|
|
257
|
+
|
|
258
|
+
def _create_embeddings_wrapper(
|
|
259
|
+
self, original_func: Callable[..., Any]
|
|
260
|
+
) -> Callable[..., Any]:
|
|
261
|
+
"""Create a wrapper for embeddings.create method.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
original_func: The original create function
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Wrapped function that creates spans
|
|
268
|
+
"""
|
|
269
|
+
|
|
270
|
+
@wraps(original_func)
|
|
271
|
+
def wrapper(self_obj: Any, *args: Any, **kwargs: Any) -> Any:
|
|
272
|
+
return self._trace_embeddings(original_func, self_obj, *args, **kwargs)
|
|
273
|
+
|
|
274
|
+
return wrapper
|
|
275
|
+
|
|
276
|
+
def _trace_chat_completions(
|
|
277
|
+
self,
|
|
278
|
+
original_func: Callable[..., Any],
|
|
279
|
+
self_obj: Any,
|
|
280
|
+
is_async: bool,
|
|
281
|
+
*args: Any,
|
|
282
|
+
**kwargs: Any,
|
|
283
|
+
) -> Any:
|
|
284
|
+
"""Trace a chat.completions.create call (sync or async).
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
original_func: The original create function
|
|
288
|
+
self_obj: The completions object (self)
|
|
289
|
+
is_async: Whether this is an async call
|
|
290
|
+
*args: Positional arguments
|
|
291
|
+
**kwargs: Keyword arguments
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
The response from the API call
|
|
295
|
+
"""
|
|
296
|
+
if is_async:
|
|
297
|
+
return self._trace_chat_completions_async(
|
|
298
|
+
original_func, self_obj, *args, **kwargs
|
|
299
|
+
)
|
|
300
|
+
else:
|
|
301
|
+
return self._trace_chat_completions_sync(
|
|
302
|
+
original_func, self_obj, *args, **kwargs
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
def _trace_chat_completions_sync(
|
|
306
|
+
self,
|
|
307
|
+
original_func: Callable[..., Any],
|
|
308
|
+
self_obj: Any,
|
|
309
|
+
*args: Any,
|
|
310
|
+
**kwargs: Any,
|
|
311
|
+
) -> Any:
|
|
312
|
+
"""Trace a synchronous chat.completions.create call.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
original_func: The original create function
|
|
316
|
+
self_obj: The completions object (self)
|
|
317
|
+
*args: Positional arguments
|
|
318
|
+
**kwargs: Keyword arguments
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
The response from the API call
|
|
322
|
+
"""
|
|
323
|
+
if self._tracer is None:
|
|
324
|
+
return original_func(self_obj, *args, **kwargs)
|
|
325
|
+
|
|
326
|
+
# Extract request parameters
|
|
327
|
+
model = kwargs.get("model", "unknown")
|
|
328
|
+
messages = kwargs.get("messages", [])
|
|
329
|
+
temperature = kwargs.get("temperature")
|
|
330
|
+
max_tokens = kwargs.get("max_tokens")
|
|
331
|
+
stream = kwargs.get("stream", False)
|
|
332
|
+
|
|
333
|
+
# Start timing
|
|
334
|
+
start_time = monotonic_ns()
|
|
335
|
+
|
|
336
|
+
# Initialize replay capture if enabled
|
|
337
|
+
replay_capture = None
|
|
338
|
+
if self._tracer.capture_for_replay:
|
|
339
|
+
from prela.core.replay import ReplayCapture
|
|
340
|
+
|
|
341
|
+
replay_capture = ReplayCapture()
|
|
342
|
+
replay_capture.set_llm_request(
|
|
343
|
+
model=model,
|
|
344
|
+
messages=messages,
|
|
345
|
+
temperature=temperature,
|
|
346
|
+
max_tokens=max_tokens,
|
|
347
|
+
**{k: v for k, v in kwargs.items() if k not in ("model", "messages", "temperature", "max_tokens", "stream")}
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
# Create span
|
|
351
|
+
span = self._tracer.start_span(
|
|
352
|
+
name="openai.chat.completions.create",
|
|
353
|
+
span_type=SpanType.LLM,
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
try:
|
|
357
|
+
# Set request attributes
|
|
358
|
+
span.set_attribute("llm.vendor", "openai")
|
|
359
|
+
span.set_attribute("llm.model", model)
|
|
360
|
+
span.set_attribute("llm.request.model", model)
|
|
361
|
+
|
|
362
|
+
if temperature is not None:
|
|
363
|
+
span.set_attribute("llm.temperature", temperature)
|
|
364
|
+
if max_tokens is not None:
|
|
365
|
+
span.set_attribute("llm.max_tokens", max_tokens)
|
|
366
|
+
if stream:
|
|
367
|
+
span.set_attribute("llm.stream", True)
|
|
368
|
+
|
|
369
|
+
# Add request event
|
|
370
|
+
span.add_event(
|
|
371
|
+
name="llm.request",
|
|
372
|
+
attributes={"messages": messages},
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
# Make the API call
|
|
376
|
+
response = original_func(self_obj, *args, **kwargs)
|
|
377
|
+
|
|
378
|
+
# Handle streaming response
|
|
379
|
+
if stream:
|
|
380
|
+
return TracedChatCompletionStream(
|
|
381
|
+
stream=response,
|
|
382
|
+
span=span,
|
|
383
|
+
tracer=self._tracer,
|
|
384
|
+
start_time=start_time,
|
|
385
|
+
replay_capture=replay_capture,
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
# Calculate latency
|
|
389
|
+
end_time = monotonic_ns()
|
|
390
|
+
latency_ms = duration_ms(start_time, end_time)
|
|
391
|
+
span.set_attribute("llm.latency_ms", latency_ms)
|
|
392
|
+
|
|
393
|
+
# Extract response attributes
|
|
394
|
+
self._extract_chat_completion_attributes(span, response)
|
|
395
|
+
|
|
396
|
+
# Add response event
|
|
397
|
+
if hasattr(response, "choices") and response.choices:
|
|
398
|
+
first_choice = response.choices[0]
|
|
399
|
+
if hasattr(first_choice, "message"):
|
|
400
|
+
span.add_event(
|
|
401
|
+
name="llm.response",
|
|
402
|
+
attributes={"content": first_choice.message.content},
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
# Handle tool calls
|
|
406
|
+
if hasattr(response, "choices") and response.choices:
|
|
407
|
+
first_choice = response.choices[0]
|
|
408
|
+
if hasattr(first_choice, "message") and hasattr(
|
|
409
|
+
first_choice.message, "tool_calls"
|
|
410
|
+
):
|
|
411
|
+
if first_choice.message.tool_calls:
|
|
412
|
+
self._handle_tool_calls(span, first_choice.message.tool_calls)
|
|
413
|
+
|
|
414
|
+
# Finalize replay capture
|
|
415
|
+
if replay_capture:
|
|
416
|
+
try:
|
|
417
|
+
# Extract response text
|
|
418
|
+
response_text = ""
|
|
419
|
+
if hasattr(response, "choices") and response.choices:
|
|
420
|
+
first_choice = response.choices[0]
|
|
421
|
+
if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
|
|
422
|
+
response_text = first_choice.message.content or ""
|
|
423
|
+
|
|
424
|
+
# Extract usage
|
|
425
|
+
prompt_tokens = None
|
|
426
|
+
completion_tokens = None
|
|
427
|
+
if hasattr(response, "usage"):
|
|
428
|
+
prompt_tokens = getattr(response.usage, "prompt_tokens", None)
|
|
429
|
+
completion_tokens = getattr(response.usage, "completion_tokens", None)
|
|
430
|
+
|
|
431
|
+
# Extract finish reason
|
|
432
|
+
finish_reason = None
|
|
433
|
+
if hasattr(response, "choices") and response.choices:
|
|
434
|
+
first_choice = response.choices[0]
|
|
435
|
+
finish_reason = getattr(first_choice, "finish_reason", None)
|
|
436
|
+
|
|
437
|
+
replay_capture.set_llm_response(
|
|
438
|
+
text=response_text,
|
|
439
|
+
finish_reason=finish_reason,
|
|
440
|
+
model=getattr(response, "model", model),
|
|
441
|
+
prompt_tokens=prompt_tokens,
|
|
442
|
+
completion_tokens=completion_tokens,
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
replay_capture.set_model_info(
|
|
446
|
+
model=getattr(response, "model", model),
|
|
447
|
+
created=getattr(response, "created", None),
|
|
448
|
+
id=getattr(response, "id", None),
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
# Attach replay snapshot to span
|
|
452
|
+
object.__setattr__(span, "replay_snapshot", replay_capture.build())
|
|
453
|
+
except Exception as e:
|
|
454
|
+
logger.debug(f"Failed to capture replay data: {e}")
|
|
455
|
+
|
|
456
|
+
# Mark as successful
|
|
457
|
+
span.set_status(SpanStatus.SUCCESS)
|
|
458
|
+
|
|
459
|
+
return response
|
|
460
|
+
|
|
461
|
+
except Exception as e:
|
|
462
|
+
# Handle errors
|
|
463
|
+
self._handle_error(span, e)
|
|
464
|
+
raise
|
|
465
|
+
|
|
466
|
+
finally:
|
|
467
|
+
span.end()
|
|
468
|
+
|
|
469
|
+
async def _trace_chat_completions_async(
|
|
470
|
+
self,
|
|
471
|
+
original_func: Callable[..., Any],
|
|
472
|
+
self_obj: Any,
|
|
473
|
+
*args: Any,
|
|
474
|
+
**kwargs: Any,
|
|
475
|
+
) -> Any:
|
|
476
|
+
"""Trace an asynchronous chat.completions.create call.
|
|
477
|
+
|
|
478
|
+
Args:
|
|
479
|
+
original_func: The original create function
|
|
480
|
+
self_obj: The completions object (self)
|
|
481
|
+
*args: Positional arguments
|
|
482
|
+
**kwargs: Keyword arguments
|
|
483
|
+
|
|
484
|
+
Returns:
|
|
485
|
+
The response from the API call
|
|
486
|
+
"""
|
|
487
|
+
if self._tracer is None:
|
|
488
|
+
return await original_func(self_obj, *args, **kwargs)
|
|
489
|
+
|
|
490
|
+
# Extract request parameters
|
|
491
|
+
model = kwargs.get("model", "unknown")
|
|
492
|
+
messages = kwargs.get("messages", [])
|
|
493
|
+
temperature = kwargs.get("temperature")
|
|
494
|
+
max_tokens = kwargs.get("max_tokens")
|
|
495
|
+
stream = kwargs.get("stream", False)
|
|
496
|
+
|
|
497
|
+
# Start timing
|
|
498
|
+
start_time = monotonic_ns()
|
|
499
|
+
|
|
500
|
+
# Initialize replay capture if enabled
|
|
501
|
+
replay_capture = None
|
|
502
|
+
if self._tracer.capture_for_replay:
|
|
503
|
+
from prela.core.replay import ReplayCapture
|
|
504
|
+
|
|
505
|
+
replay_capture = ReplayCapture()
|
|
506
|
+
replay_capture.set_llm_request(
|
|
507
|
+
model=model,
|
|
508
|
+
messages=messages,
|
|
509
|
+
temperature=temperature,
|
|
510
|
+
max_tokens=max_tokens,
|
|
511
|
+
**{k: v for k, v in kwargs.items() if k not in ("model", "messages", "temperature", "max_tokens", "stream")}
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
# Create span
|
|
515
|
+
span = self._tracer.start_span(
|
|
516
|
+
name="openai.chat.completions.create",
|
|
517
|
+
span_type=SpanType.LLM,
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
try:
|
|
521
|
+
# Set request attributes
|
|
522
|
+
span.set_attribute("llm.vendor", "openai")
|
|
523
|
+
span.set_attribute("llm.model", model)
|
|
524
|
+
span.set_attribute("llm.request.model", model)
|
|
525
|
+
|
|
526
|
+
if temperature is not None:
|
|
527
|
+
span.set_attribute("llm.temperature", temperature)
|
|
528
|
+
if max_tokens is not None:
|
|
529
|
+
span.set_attribute("llm.max_tokens", max_tokens)
|
|
530
|
+
if stream:
|
|
531
|
+
span.set_attribute("llm.stream", True)
|
|
532
|
+
|
|
533
|
+
# Add request event
|
|
534
|
+
span.add_event(
|
|
535
|
+
name="llm.request",
|
|
536
|
+
attributes={"messages": messages},
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
# Make the API call
|
|
540
|
+
response = await original_func(self_obj, *args, **kwargs)
|
|
541
|
+
|
|
542
|
+
# Handle streaming response
|
|
543
|
+
if stream:
|
|
544
|
+
return TracedAsyncChatCompletionStream(
|
|
545
|
+
stream=response,
|
|
546
|
+
span=span,
|
|
547
|
+
tracer=self._tracer,
|
|
548
|
+
start_time=start_time,
|
|
549
|
+
replay_capture=replay_capture,
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
# Calculate latency
|
|
553
|
+
end_time = monotonic_ns()
|
|
554
|
+
latency_ms = duration_ms(start_time, end_time)
|
|
555
|
+
span.set_attribute("llm.latency_ms", latency_ms)
|
|
556
|
+
|
|
557
|
+
# Extract response attributes
|
|
558
|
+
self._extract_chat_completion_attributes(span, response)
|
|
559
|
+
|
|
560
|
+
# Add response event
|
|
561
|
+
if hasattr(response, "choices") and response.choices:
|
|
562
|
+
first_choice = response.choices[0]
|
|
563
|
+
if hasattr(first_choice, "message"):
|
|
564
|
+
span.add_event(
|
|
565
|
+
name="llm.response",
|
|
566
|
+
attributes={"content": first_choice.message.content},
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
# Handle tool calls
|
|
570
|
+
if hasattr(response, "choices") and response.choices:
|
|
571
|
+
first_choice = response.choices[0]
|
|
572
|
+
if hasattr(first_choice, "message") and hasattr(
|
|
573
|
+
first_choice.message, "tool_calls"
|
|
574
|
+
):
|
|
575
|
+
if first_choice.message.tool_calls:
|
|
576
|
+
self._handle_tool_calls(span, first_choice.message.tool_calls)
|
|
577
|
+
|
|
578
|
+
# Finalize replay capture
|
|
579
|
+
if replay_capture:
|
|
580
|
+
try:
|
|
581
|
+
# Extract response text
|
|
582
|
+
response_text = ""
|
|
583
|
+
if hasattr(response, "choices") and response.choices:
|
|
584
|
+
first_choice = response.choices[0]
|
|
585
|
+
if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
|
|
586
|
+
response_text = first_choice.message.content or ""
|
|
587
|
+
|
|
588
|
+
# Extract usage
|
|
589
|
+
prompt_tokens = None
|
|
590
|
+
completion_tokens = None
|
|
591
|
+
if hasattr(response, "usage"):
|
|
592
|
+
prompt_tokens = getattr(response.usage, "prompt_tokens", None)
|
|
593
|
+
completion_tokens = getattr(response.usage, "completion_tokens", None)
|
|
594
|
+
|
|
595
|
+
# Extract finish reason
|
|
596
|
+
finish_reason = None
|
|
597
|
+
if hasattr(response, "choices") and response.choices:
|
|
598
|
+
first_choice = response.choices[0]
|
|
599
|
+
finish_reason = getattr(first_choice, "finish_reason", None)
|
|
600
|
+
|
|
601
|
+
replay_capture.set_llm_response(
|
|
602
|
+
text=response_text,
|
|
603
|
+
finish_reason=finish_reason,
|
|
604
|
+
model=getattr(response, "model", model),
|
|
605
|
+
prompt_tokens=prompt_tokens,
|
|
606
|
+
completion_tokens=completion_tokens,
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
replay_capture.set_model_info(
|
|
610
|
+
model=getattr(response, "model", model),
|
|
611
|
+
created=getattr(response, "created", None),
|
|
612
|
+
id=getattr(response, "id", None),
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
# Attach replay snapshot to span
|
|
616
|
+
object.__setattr__(span, "replay_snapshot", replay_capture.build())
|
|
617
|
+
except Exception as e:
|
|
618
|
+
logger.debug(f"Failed to capture replay data: {e}")
|
|
619
|
+
|
|
620
|
+
# Mark as successful
|
|
621
|
+
span.set_status(SpanStatus.SUCCESS)
|
|
622
|
+
|
|
623
|
+
return response
|
|
624
|
+
|
|
625
|
+
except Exception as e:
|
|
626
|
+
# Handle errors
|
|
627
|
+
self._handle_error(span, e)
|
|
628
|
+
raise
|
|
629
|
+
|
|
630
|
+
finally:
|
|
631
|
+
span.end()
|
|
632
|
+
|
|
633
|
+
def _trace_completions(
|
|
634
|
+
self,
|
|
635
|
+
original_func: Callable[..., Any],
|
|
636
|
+
self_obj: Any,
|
|
637
|
+
*args: Any,
|
|
638
|
+
**kwargs: Any,
|
|
639
|
+
) -> Any:
|
|
640
|
+
"""Trace a legacy completions.create call.
|
|
641
|
+
|
|
642
|
+
Args:
|
|
643
|
+
original_func: The original create function
|
|
644
|
+
self_obj: The completions object (self)
|
|
645
|
+
*args: Positional arguments
|
|
646
|
+
**kwargs: Keyword arguments
|
|
647
|
+
|
|
648
|
+
Returns:
|
|
649
|
+
The response from the API call
|
|
650
|
+
"""
|
|
651
|
+
if self._tracer is None:
|
|
652
|
+
return original_func(self_obj, *args, **kwargs)
|
|
653
|
+
|
|
654
|
+
# Extract request parameters
|
|
655
|
+
model = kwargs.get("model", "unknown")
|
|
656
|
+
prompt = kwargs.get("prompt", "")
|
|
657
|
+
temperature = kwargs.get("temperature")
|
|
658
|
+
max_tokens = kwargs.get("max_tokens")
|
|
659
|
+
|
|
660
|
+
# Start timing
|
|
661
|
+
start_time = monotonic_ns()
|
|
662
|
+
|
|
663
|
+
# Create span
|
|
664
|
+
span = self._tracer.start_span(
|
|
665
|
+
name="openai.completions.create",
|
|
666
|
+
span_type=SpanType.LLM,
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
try:
|
|
670
|
+
# Set request attributes
|
|
671
|
+
span.set_attribute("llm.vendor", "openai")
|
|
672
|
+
span.set_attribute("llm.model", model)
|
|
673
|
+
span.set_attribute("llm.request.model", model)
|
|
674
|
+
|
|
675
|
+
if temperature is not None:
|
|
676
|
+
span.set_attribute("llm.temperature", temperature)
|
|
677
|
+
if max_tokens is not None:
|
|
678
|
+
span.set_attribute("llm.max_tokens", max_tokens)
|
|
679
|
+
|
|
680
|
+
# Add request event
|
|
681
|
+
span.add_event(
|
|
682
|
+
name="llm.request",
|
|
683
|
+
attributes={"prompt": prompt},
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
# Make the API call
|
|
687
|
+
response = original_func(self_obj, *args, **kwargs)
|
|
688
|
+
|
|
689
|
+
# Calculate latency
|
|
690
|
+
end_time = monotonic_ns()
|
|
691
|
+
latency_ms = duration_ms(start_time, end_time)
|
|
692
|
+
span.set_attribute("llm.latency_ms", latency_ms)
|
|
693
|
+
|
|
694
|
+
# Extract response attributes
|
|
695
|
+
self._extract_completion_attributes(span, response)
|
|
696
|
+
|
|
697
|
+
# Add response event
|
|
698
|
+
if hasattr(response, "choices") and response.choices:
|
|
699
|
+
first_choice = response.choices[0]
|
|
700
|
+
if hasattr(first_choice, "text"):
|
|
701
|
+
span.add_event(
|
|
702
|
+
name="llm.response",
|
|
703
|
+
attributes={"text": first_choice.text},
|
|
704
|
+
)
|
|
705
|
+
|
|
706
|
+
# Mark as successful
|
|
707
|
+
span.set_status(SpanStatus.SUCCESS)
|
|
708
|
+
|
|
709
|
+
return response
|
|
710
|
+
|
|
711
|
+
except Exception as e:
|
|
712
|
+
# Handle errors
|
|
713
|
+
self._handle_error(span, e)
|
|
714
|
+
raise
|
|
715
|
+
|
|
716
|
+
finally:
|
|
717
|
+
span.end()
|
|
718
|
+
|
|
719
|
+
def _trace_embeddings(
|
|
720
|
+
self,
|
|
721
|
+
original_func: Callable[..., Any],
|
|
722
|
+
self_obj: Any,
|
|
723
|
+
*args: Any,
|
|
724
|
+
**kwargs: Any,
|
|
725
|
+
) -> Any:
|
|
726
|
+
"""Trace an embeddings.create call.
|
|
727
|
+
|
|
728
|
+
Args:
|
|
729
|
+
original_func: The original create function
|
|
730
|
+
self_obj: The embeddings object (self)
|
|
731
|
+
*args: Positional arguments
|
|
732
|
+
**kwargs: Keyword arguments
|
|
733
|
+
|
|
734
|
+
Returns:
|
|
735
|
+
The response from the API call
|
|
736
|
+
"""
|
|
737
|
+
if self._tracer is None:
|
|
738
|
+
return original_func(self_obj, *args, **kwargs)
|
|
739
|
+
|
|
740
|
+
# Extract request parameters
|
|
741
|
+
model = kwargs.get("model", "unknown")
|
|
742
|
+
input_data = kwargs.get("input", [])
|
|
743
|
+
|
|
744
|
+
# Start timing
|
|
745
|
+
start_time = monotonic_ns()
|
|
746
|
+
|
|
747
|
+
# Create span
|
|
748
|
+
span = self._tracer.start_span(
|
|
749
|
+
name="openai.embeddings.create",
|
|
750
|
+
span_type=SpanType.EMBEDDING,
|
|
751
|
+
)
|
|
752
|
+
|
|
753
|
+
try:
|
|
754
|
+
# Set request attributes
|
|
755
|
+
span.set_attribute("llm.vendor", "openai")
|
|
756
|
+
span.set_attribute("llm.model", model)
|
|
757
|
+
span.set_attribute("llm.request.model", model)
|
|
758
|
+
|
|
759
|
+
# Count inputs
|
|
760
|
+
if isinstance(input_data, list):
|
|
761
|
+
span.set_attribute("embedding.input_count", len(input_data))
|
|
762
|
+
else:
|
|
763
|
+
span.set_attribute("embedding.input_count", 1)
|
|
764
|
+
|
|
765
|
+
# Make the API call
|
|
766
|
+
response = original_func(self_obj, *args, **kwargs)
|
|
767
|
+
|
|
768
|
+
# Calculate latency
|
|
769
|
+
end_time = monotonic_ns()
|
|
770
|
+
latency_ms = duration_ms(start_time, end_time)
|
|
771
|
+
span.set_attribute("llm.latency_ms", latency_ms)
|
|
772
|
+
|
|
773
|
+
# Extract response attributes
|
|
774
|
+
self._extract_embedding_attributes(span, response)
|
|
775
|
+
|
|
776
|
+
# Mark as successful
|
|
777
|
+
span.set_status(SpanStatus.SUCCESS)
|
|
778
|
+
|
|
779
|
+
return response
|
|
780
|
+
|
|
781
|
+
except Exception as e:
|
|
782
|
+
# Handle errors
|
|
783
|
+
self._handle_error(span, e)
|
|
784
|
+
raise
|
|
785
|
+
|
|
786
|
+
finally:
|
|
787
|
+
span.end()
|
|
788
|
+
|
|
789
|
+
def _extract_chat_completion_attributes(self, span: Any, response: Any) -> None:
|
|
790
|
+
"""Extract attributes from a chat completion response.
|
|
791
|
+
|
|
792
|
+
Args:
|
|
793
|
+
span: The span to add attributes to
|
|
794
|
+
response: The response object from the API
|
|
795
|
+
"""
|
|
796
|
+
try:
|
|
797
|
+
# Model (actual model used)
|
|
798
|
+
if hasattr(response, "model"):
|
|
799
|
+
span.set_attribute("llm.response.model", response.model)
|
|
800
|
+
|
|
801
|
+
# Response ID
|
|
802
|
+
if hasattr(response, "id"):
|
|
803
|
+
span.set_attribute("llm.response.id", response.id)
|
|
804
|
+
|
|
805
|
+
# Usage statistics
|
|
806
|
+
if hasattr(response, "usage"):
|
|
807
|
+
usage = response.usage
|
|
808
|
+
if hasattr(usage, "prompt_tokens"):
|
|
809
|
+
span.set_attribute("llm.prompt_tokens", usage.prompt_tokens)
|
|
810
|
+
if hasattr(usage, "completion_tokens"):
|
|
811
|
+
span.set_attribute("llm.completion_tokens", usage.completion_tokens)
|
|
812
|
+
if hasattr(usage, "total_tokens"):
|
|
813
|
+
span.set_attribute("llm.total_tokens", usage.total_tokens)
|
|
814
|
+
|
|
815
|
+
# Finish reason
|
|
816
|
+
if hasattr(response, "choices") and response.choices:
|
|
817
|
+
first_choice = response.choices[0]
|
|
818
|
+
if hasattr(first_choice, "finish_reason"):
|
|
819
|
+
span.set_attribute("llm.finish_reason", first_choice.finish_reason)
|
|
820
|
+
|
|
821
|
+
except Exception as e:
|
|
822
|
+
# Don't let attribute extraction failures break the instrumentation
|
|
823
|
+
logger.debug(f"Failed to extract chat completion attributes: {e}")
|
|
824
|
+
|
|
825
|
+
def _extract_completion_attributes(self, span: Any, response: Any) -> None:
|
|
826
|
+
"""Extract attributes from a legacy completion response.
|
|
827
|
+
|
|
828
|
+
Args:
|
|
829
|
+
span: The span to add attributes to
|
|
830
|
+
response: The response object from the API
|
|
831
|
+
"""
|
|
832
|
+
try:
|
|
833
|
+
# Model
|
|
834
|
+
if hasattr(response, "model"):
|
|
835
|
+
span.set_attribute("llm.response.model", response.model)
|
|
836
|
+
|
|
837
|
+
# Response ID
|
|
838
|
+
if hasattr(response, "id"):
|
|
839
|
+
span.set_attribute("llm.response.id", response.id)
|
|
840
|
+
|
|
841
|
+
# Usage statistics
|
|
842
|
+
if hasattr(response, "usage"):
|
|
843
|
+
usage = response.usage
|
|
844
|
+
if hasattr(usage, "prompt_tokens"):
|
|
845
|
+
span.set_attribute("llm.prompt_tokens", usage.prompt_tokens)
|
|
846
|
+
if hasattr(usage, "completion_tokens"):
|
|
847
|
+
span.set_attribute("llm.completion_tokens", usage.completion_tokens)
|
|
848
|
+
if hasattr(usage, "total_tokens"):
|
|
849
|
+
span.set_attribute("llm.total_tokens", usage.total_tokens)
|
|
850
|
+
|
|
851
|
+
except Exception as e:
|
|
852
|
+
logger.debug(f"Failed to extract completion attributes: {e}")
|
|
853
|
+
|
|
854
|
+
def _extract_embedding_attributes(self, span: Any, response: Any) -> None:
|
|
855
|
+
"""Extract attributes from an embedding response.
|
|
856
|
+
|
|
857
|
+
Args:
|
|
858
|
+
span: The span to add attributes to
|
|
859
|
+
response: The response object from the API
|
|
860
|
+
"""
|
|
861
|
+
try:
|
|
862
|
+
# Model
|
|
863
|
+
if hasattr(response, "model"):
|
|
864
|
+
span.set_attribute("llm.response.model", response.model)
|
|
865
|
+
|
|
866
|
+
# Usage statistics
|
|
867
|
+
if hasattr(response, "usage"):
|
|
868
|
+
usage = response.usage
|
|
869
|
+
if hasattr(usage, "prompt_tokens"):
|
|
870
|
+
span.set_attribute("llm.prompt_tokens", usage.prompt_tokens)
|
|
871
|
+
if hasattr(usage, "total_tokens"):
|
|
872
|
+
span.set_attribute("llm.total_tokens", usage.total_tokens)
|
|
873
|
+
|
|
874
|
+
# Embedding count and dimensions
|
|
875
|
+
if hasattr(response, "data") and response.data:
|
|
876
|
+
span.set_attribute("embedding.count", len(response.data))
|
|
877
|
+
if response.data and hasattr(response.data[0], "embedding"):
|
|
878
|
+
span.set_attribute(
|
|
879
|
+
"embedding.dimensions", len(response.data[0].embedding)
|
|
880
|
+
)
|
|
881
|
+
|
|
882
|
+
except Exception as e:
|
|
883
|
+
logger.debug(f"Failed to extract embedding attributes: {e}")
|
|
884
|
+
|
|
885
|
+
def _handle_tool_calls(self, span: Any, tool_calls: Any) -> None:
|
|
886
|
+
"""Handle tool calls in the response.
|
|
887
|
+
|
|
888
|
+
Args:
|
|
889
|
+
span: The span to add tool call information to
|
|
890
|
+
tool_calls: The tool calls from the response
|
|
891
|
+
"""
|
|
892
|
+
try:
|
|
893
|
+
calls = []
|
|
894
|
+
for tool_call in tool_calls:
|
|
895
|
+
call_info = {
|
|
896
|
+
"id": getattr(tool_call, "id", None),
|
|
897
|
+
"type": getattr(tool_call, "type", None),
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
if hasattr(tool_call, "function"):
|
|
901
|
+
function = tool_call.function
|
|
902
|
+
call_info["function"] = {
|
|
903
|
+
"name": getattr(function, "name", None),
|
|
904
|
+
"arguments": getattr(function, "arguments", None),
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
calls.append(call_info)
|
|
908
|
+
|
|
909
|
+
if calls:
|
|
910
|
+
span.add_event(
|
|
911
|
+
name="llm.tool_calls",
|
|
912
|
+
attributes={"tool_calls": calls},
|
|
913
|
+
)
|
|
914
|
+
|
|
915
|
+
except Exception as e:
|
|
916
|
+
logger.debug(f"Failed to handle tool calls: {e}")
|
|
917
|
+
|
|
918
|
+
def _handle_error(self, span: Any, error: Exception) -> None:
|
|
919
|
+
"""Handle an error during API call.
|
|
920
|
+
|
|
921
|
+
Args:
|
|
922
|
+
span: The span to record the error on
|
|
923
|
+
error: The exception that was raised
|
|
924
|
+
"""
|
|
925
|
+
try:
|
|
926
|
+
# Set error status
|
|
927
|
+
span.set_status(SpanStatus.ERROR, str(error))
|
|
928
|
+
|
|
929
|
+
# Extract error details
|
|
930
|
+
error_attrs: dict[str, Any] = {
|
|
931
|
+
"error.type": type(error).__name__,
|
|
932
|
+
"error.message": str(error),
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
# Handle openai-specific errors
|
|
936
|
+
if hasattr(error, "status_code"):
|
|
937
|
+
error_attrs["error.status_code"] = error.status_code
|
|
938
|
+
|
|
939
|
+
span.add_event(name="error", attributes=error_attrs)
|
|
940
|
+
|
|
941
|
+
except Exception as e:
|
|
942
|
+
logger.debug(f"Failed to handle error: {e}")
|
|
943
|
+
|
|
944
|
+
|
|
945
|
+
class TracedChatCompletionStream:
|
|
946
|
+
"""Wrapper for streaming chat completion responses."""
|
|
947
|
+
|
|
948
|
+
def __init__(
|
|
949
|
+
self,
|
|
950
|
+
stream: Any,
|
|
951
|
+
span: Any,
|
|
952
|
+
tracer: Tracer,
|
|
953
|
+
start_time: int,
|
|
954
|
+
replay_capture: Any = None,
|
|
955
|
+
) -> None:
|
|
956
|
+
"""Initialize the traced stream.
|
|
957
|
+
|
|
958
|
+
Args:
|
|
959
|
+
stream: The original stream
|
|
960
|
+
span: The span to record events on
|
|
961
|
+
tracer: The tracer instance
|
|
962
|
+
start_time: Start time in nanoseconds
|
|
963
|
+
replay_capture: Optional ReplayCapture instance
|
|
964
|
+
"""
|
|
965
|
+
self._stream = stream
|
|
966
|
+
self._span = span
|
|
967
|
+
self._tracer = tracer
|
|
968
|
+
self._start_time = start_time
|
|
969
|
+
self._first_token_time: int | None = None
|
|
970
|
+
self._content_chunks: list[str] = []
|
|
971
|
+
self._finish_reason: str | None = None
|
|
972
|
+
self._replay_capture = replay_capture
|
|
973
|
+
|
|
974
|
+
def __enter__(self) -> TracedChatCompletionStream:
|
|
975
|
+
"""Enter context manager."""
|
|
976
|
+
return self
|
|
977
|
+
|
|
978
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
979
|
+
"""Exit context manager and finalize span."""
|
|
980
|
+
try:
|
|
981
|
+
if exc_type is None:
|
|
982
|
+
self._finalize_span()
|
|
983
|
+
else:
|
|
984
|
+
self._span.set_status(SpanStatus.ERROR, str(exc_val))
|
|
985
|
+
|
|
986
|
+
finally:
|
|
987
|
+
self._span.end()
|
|
988
|
+
|
|
989
|
+
def __iter__(self) -> Any:
|
|
990
|
+
"""Iterate over stream chunks."""
|
|
991
|
+
try:
|
|
992
|
+
for chunk in self._stream:
|
|
993
|
+
self._process_chunk(chunk)
|
|
994
|
+
yield chunk
|
|
995
|
+
|
|
996
|
+
except Exception as e:
|
|
997
|
+
self._span.set_status(SpanStatus.ERROR, str(e))
|
|
998
|
+
raise
|
|
999
|
+
|
|
1000
|
+
def _process_chunk(self, chunk: Any) -> None:
|
|
1001
|
+
"""Process a streaming chunk.
|
|
1002
|
+
|
|
1003
|
+
Args:
|
|
1004
|
+
chunk: The streaming chunk
|
|
1005
|
+
"""
|
|
1006
|
+
try:
|
|
1007
|
+
# Capture first token time
|
|
1008
|
+
if self._first_token_time is None:
|
|
1009
|
+
self._first_token_time = monotonic_ns()
|
|
1010
|
+
|
|
1011
|
+
# Extract content from chunk
|
|
1012
|
+
if hasattr(chunk, "choices") and chunk.choices:
|
|
1013
|
+
first_choice = chunk.choices[0]
|
|
1014
|
+
|
|
1015
|
+
# Get content delta
|
|
1016
|
+
if hasattr(first_choice, "delta"):
|
|
1017
|
+
delta = first_choice.delta
|
|
1018
|
+
if hasattr(delta, "content") and delta.content:
|
|
1019
|
+
self._content_chunks.append(delta.content)
|
|
1020
|
+
|
|
1021
|
+
# Get finish reason
|
|
1022
|
+
if hasattr(first_choice, "finish_reason") and first_choice.finish_reason:
|
|
1023
|
+
self._finish_reason = first_choice.finish_reason
|
|
1024
|
+
|
|
1025
|
+
except Exception as e:
|
|
1026
|
+
logger.debug(f"Failed to process chunk: {e}")
|
|
1027
|
+
|
|
1028
|
+
def _finalize_span(self) -> None:
|
|
1029
|
+
"""Finalize the span with aggregated data."""
|
|
1030
|
+
try:
|
|
1031
|
+
# Calculate latency
|
|
1032
|
+
end_time = monotonic_ns()
|
|
1033
|
+
latency_ms = duration_ms(self._start_time, end_time)
|
|
1034
|
+
self._span.set_attribute("llm.latency_ms", latency_ms)
|
|
1035
|
+
|
|
1036
|
+
# Time to first token
|
|
1037
|
+
if self._first_token_time is not None:
|
|
1038
|
+
ttft_ms = duration_ms(self._start_time, self._first_token_time)
|
|
1039
|
+
self._span.set_attribute("llm.time_to_first_token_ms", ttft_ms)
|
|
1040
|
+
|
|
1041
|
+
# Aggregated content
|
|
1042
|
+
if self._content_chunks:
|
|
1043
|
+
full_content = "".join(self._content_chunks)
|
|
1044
|
+
self._span.add_event(
|
|
1045
|
+
name="llm.response",
|
|
1046
|
+
attributes={"content": full_content},
|
|
1047
|
+
)
|
|
1048
|
+
|
|
1049
|
+
# Finish reason
|
|
1050
|
+
if self._finish_reason:
|
|
1051
|
+
self._span.set_attribute("llm.finish_reason", self._finish_reason)
|
|
1052
|
+
|
|
1053
|
+
# Finalize replay capture for streaming
|
|
1054
|
+
if self._replay_capture:
|
|
1055
|
+
try:
|
|
1056
|
+
full_content = "".join(self._content_chunks) if self._content_chunks else ""
|
|
1057
|
+
self._replay_capture.set_llm_response(
|
|
1058
|
+
text=full_content,
|
|
1059
|
+
finish_reason=self._finish_reason,
|
|
1060
|
+
)
|
|
1061
|
+
# Attach replay snapshot to span
|
|
1062
|
+
object.__setattr__(self._span, "replay_snapshot", self._replay_capture.build())
|
|
1063
|
+
except Exception as e:
|
|
1064
|
+
logger.debug(f"Failed to capture streaming replay data: {e}")
|
|
1065
|
+
|
|
1066
|
+
# Mark as successful
|
|
1067
|
+
self._span.set_status(SpanStatus.SUCCESS)
|
|
1068
|
+
|
|
1069
|
+
except Exception as e:
|
|
1070
|
+
logger.debug(f"Failed to finalize span: {e}")
|
|
1071
|
+
|
|
1072
|
+
|
|
1073
|
+
class TracedAsyncChatCompletionStream:
|
|
1074
|
+
"""Wrapper for async streaming chat completion responses."""
|
|
1075
|
+
|
|
1076
|
+
def __init__(
|
|
1077
|
+
self,
|
|
1078
|
+
stream: Any,
|
|
1079
|
+
span: Any,
|
|
1080
|
+
tracer: Tracer,
|
|
1081
|
+
start_time: int,
|
|
1082
|
+
replay_capture: Any = None,
|
|
1083
|
+
) -> None:
|
|
1084
|
+
"""Initialize the traced async stream.
|
|
1085
|
+
|
|
1086
|
+
Args:
|
|
1087
|
+
stream: The original async stream
|
|
1088
|
+
span: The span to record events on
|
|
1089
|
+
tracer: The tracer instance
|
|
1090
|
+
start_time: Start time in nanoseconds
|
|
1091
|
+
replay_capture: Optional ReplayCapture instance
|
|
1092
|
+
"""
|
|
1093
|
+
self._stream = stream
|
|
1094
|
+
self._span = span
|
|
1095
|
+
self._tracer = tracer
|
|
1096
|
+
self._start_time = start_time
|
|
1097
|
+
self._first_token_time: int | None = None
|
|
1098
|
+
self._content_chunks: list[str] = []
|
|
1099
|
+
self._finish_reason: str | None = None
|
|
1100
|
+
self._replay_capture = replay_capture
|
|
1101
|
+
|
|
1102
|
+
async def __aenter__(self) -> TracedAsyncChatCompletionStream:
|
|
1103
|
+
"""Enter async context manager."""
|
|
1104
|
+
return self
|
|
1105
|
+
|
|
1106
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
1107
|
+
"""Exit async context manager and finalize span."""
|
|
1108
|
+
try:
|
|
1109
|
+
if exc_type is None:
|
|
1110
|
+
self._finalize_span()
|
|
1111
|
+
else:
|
|
1112
|
+
self._span.set_status(SpanStatus.ERROR, str(exc_val))
|
|
1113
|
+
|
|
1114
|
+
finally:
|
|
1115
|
+
self._span.end()
|
|
1116
|
+
|
|
1117
|
+
async def __aiter__(self) -> Any:
|
|
1118
|
+
"""Async iterate over stream chunks."""
|
|
1119
|
+
try:
|
|
1120
|
+
async for chunk in self._stream:
|
|
1121
|
+
self._process_chunk(chunk)
|
|
1122
|
+
yield chunk
|
|
1123
|
+
|
|
1124
|
+
except Exception as e:
|
|
1125
|
+
self._span.set_status(SpanStatus.ERROR, str(e))
|
|
1126
|
+
raise
|
|
1127
|
+
|
|
1128
|
+
def _process_chunk(self, chunk: Any) -> None:
|
|
1129
|
+
"""Process a streaming chunk.
|
|
1130
|
+
|
|
1131
|
+
Args:
|
|
1132
|
+
chunk: The streaming chunk
|
|
1133
|
+
"""
|
|
1134
|
+
try:
|
|
1135
|
+
# Capture first token time
|
|
1136
|
+
if self._first_token_time is None:
|
|
1137
|
+
self._first_token_time = monotonic_ns()
|
|
1138
|
+
|
|
1139
|
+
# Extract content from chunk
|
|
1140
|
+
if hasattr(chunk, "choices") and chunk.choices:
|
|
1141
|
+
first_choice = chunk.choices[0]
|
|
1142
|
+
|
|
1143
|
+
# Get content delta
|
|
1144
|
+
if hasattr(first_choice, "delta"):
|
|
1145
|
+
delta = first_choice.delta
|
|
1146
|
+
if hasattr(delta, "content") and delta.content:
|
|
1147
|
+
self._content_chunks.append(delta.content)
|
|
1148
|
+
|
|
1149
|
+
# Get finish reason
|
|
1150
|
+
if hasattr(first_choice, "finish_reason") and first_choice.finish_reason:
|
|
1151
|
+
self._finish_reason = first_choice.finish_reason
|
|
1152
|
+
|
|
1153
|
+
except Exception as e:
|
|
1154
|
+
logger.debug(f"Failed to process chunk: {e}")
|
|
1155
|
+
|
|
1156
|
+
def _finalize_span(self) -> None:
|
|
1157
|
+
"""Finalize the span with aggregated data."""
|
|
1158
|
+
try:
|
|
1159
|
+
# Calculate latency
|
|
1160
|
+
end_time = monotonic_ns()
|
|
1161
|
+
latency_ms = duration_ms(self._start_time, end_time)
|
|
1162
|
+
self._span.set_attribute("llm.latency_ms", latency_ms)
|
|
1163
|
+
|
|
1164
|
+
# Time to first token
|
|
1165
|
+
if self._first_token_time is not None:
|
|
1166
|
+
ttft_ms = duration_ms(self._start_time, self._first_token_time)
|
|
1167
|
+
self._span.set_attribute("llm.time_to_first_token_ms", ttft_ms)
|
|
1168
|
+
|
|
1169
|
+
# Aggregated content
|
|
1170
|
+
if self._content_chunks:
|
|
1171
|
+
full_content = "".join(self._content_chunks)
|
|
1172
|
+
self._span.add_event(
|
|
1173
|
+
name="llm.response",
|
|
1174
|
+
attributes={"content": full_content},
|
|
1175
|
+
)
|
|
1176
|
+
|
|
1177
|
+
# Finish reason
|
|
1178
|
+
if self._finish_reason:
|
|
1179
|
+
self._span.set_attribute("llm.finish_reason", self._finish_reason)
|
|
1180
|
+
|
|
1181
|
+
# Finalize replay capture for streaming
|
|
1182
|
+
if self._replay_capture:
|
|
1183
|
+
try:
|
|
1184
|
+
full_content = "".join(self._content_chunks) if self._content_chunks else ""
|
|
1185
|
+
self._replay_capture.set_llm_response(
|
|
1186
|
+
text=full_content,
|
|
1187
|
+
finish_reason=self._finish_reason,
|
|
1188
|
+
)
|
|
1189
|
+
# Attach replay snapshot to span
|
|
1190
|
+
object.__setattr__(self._span, "replay_snapshot", self._replay_capture.build())
|
|
1191
|
+
except Exception as e:
|
|
1192
|
+
logger.debug(f"Failed to capture streaming replay data: {e}")
|
|
1193
|
+
|
|
1194
|
+
# Mark as successful
|
|
1195
|
+
self._span.set_status(SpanStatus.SUCCESS)
|
|
1196
|
+
|
|
1197
|
+
except Exception as e:
|
|
1198
|
+
logger.debug(f"Failed to finalize span: {e}")
|