mseep-agentops 0.4.18__py3-none-any.whl → 0.4.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentops/__init__.py +0 -0
- agentops/client/api/base.py +28 -30
- agentops/client/api/versions/v3.py +29 -25
- agentops/client/api/versions/v4.py +87 -46
- agentops/client/client.py +98 -29
- agentops/client/http/README.md +87 -0
- agentops/client/http/http_client.py +126 -172
- agentops/config.py +8 -2
- agentops/instrumentation/OpenTelemetry.md +133 -0
- agentops/instrumentation/README.md +167 -0
- agentops/instrumentation/__init__.py +13 -1
- agentops/instrumentation/agentic/ag2/__init__.py +18 -0
- agentops/instrumentation/agentic/ag2/instrumentor.py +922 -0
- agentops/instrumentation/agentic/agno/__init__.py +19 -0
- agentops/instrumentation/agentic/agno/attributes/__init__.py +20 -0
- agentops/instrumentation/agentic/agno/attributes/agent.py +250 -0
- agentops/instrumentation/agentic/agno/attributes/metrics.py +214 -0
- agentops/instrumentation/agentic/agno/attributes/storage.py +158 -0
- agentops/instrumentation/agentic/agno/attributes/team.py +195 -0
- agentops/instrumentation/agentic/agno/attributes/tool.py +210 -0
- agentops/instrumentation/agentic/agno/attributes/workflow.py +254 -0
- agentops/instrumentation/agentic/agno/instrumentor.py +1313 -0
- agentops/instrumentation/agentic/crewai/LICENSE +201 -0
- agentops/instrumentation/agentic/crewai/NOTICE.md +10 -0
- agentops/instrumentation/agentic/crewai/__init__.py +6 -0
- agentops/instrumentation/agentic/crewai/crewai_span_attributes.py +335 -0
- agentops/instrumentation/agentic/crewai/instrumentation.py +535 -0
- agentops/instrumentation/agentic/crewai/version.py +1 -0
- agentops/instrumentation/agentic/google_adk/__init__.py +19 -0
- agentops/instrumentation/agentic/google_adk/instrumentor.py +68 -0
- agentops/instrumentation/agentic/google_adk/patch.py +767 -0
- agentops/instrumentation/agentic/haystack/__init__.py +1 -0
- agentops/instrumentation/agentic/haystack/instrumentor.py +186 -0
- agentops/instrumentation/agentic/langgraph/__init__.py +3 -0
- agentops/instrumentation/agentic/langgraph/attributes.py +54 -0
- agentops/instrumentation/agentic/langgraph/instrumentation.py +598 -0
- agentops/instrumentation/agentic/langgraph/version.py +1 -0
- agentops/instrumentation/agentic/openai_agents/README.md +156 -0
- agentops/instrumentation/agentic/openai_agents/SPANS.md +145 -0
- agentops/instrumentation/agentic/openai_agents/TRACING_API.md +144 -0
- agentops/instrumentation/agentic/openai_agents/__init__.py +30 -0
- agentops/instrumentation/agentic/openai_agents/attributes/common.py +549 -0
- agentops/instrumentation/agentic/openai_agents/attributes/completion.py +172 -0
- agentops/instrumentation/agentic/openai_agents/attributes/model.py +58 -0
- agentops/instrumentation/agentic/openai_agents/attributes/tokens.py +275 -0
- agentops/instrumentation/agentic/openai_agents/exporter.py +469 -0
- agentops/instrumentation/agentic/openai_agents/instrumentor.py +107 -0
- agentops/instrumentation/agentic/openai_agents/processor.py +58 -0
- agentops/instrumentation/agentic/smolagents/README.md +88 -0
- agentops/instrumentation/agentic/smolagents/__init__.py +12 -0
- agentops/instrumentation/agentic/smolagents/attributes/agent.py +354 -0
- agentops/instrumentation/agentic/smolagents/attributes/model.py +205 -0
- agentops/instrumentation/agentic/smolagents/instrumentor.py +286 -0
- agentops/instrumentation/agentic/smolagents/stream_wrapper.py +258 -0
- agentops/instrumentation/agentic/xpander/__init__.py +15 -0
- agentops/instrumentation/agentic/xpander/context.py +112 -0
- agentops/instrumentation/agentic/xpander/instrumentor.py +877 -0
- agentops/instrumentation/agentic/xpander/trace_probe.py +86 -0
- agentops/instrumentation/agentic/xpander/version.py +3 -0
- agentops/instrumentation/common/README.md +65 -0
- agentops/instrumentation/common/attributes.py +1 -2
- agentops/instrumentation/providers/anthropic/__init__.py +24 -0
- agentops/instrumentation/providers/anthropic/attributes/__init__.py +23 -0
- agentops/instrumentation/providers/anthropic/attributes/common.py +64 -0
- agentops/instrumentation/providers/anthropic/attributes/message.py +541 -0
- agentops/instrumentation/providers/anthropic/attributes/tools.py +231 -0
- agentops/instrumentation/providers/anthropic/event_handler_wrapper.py +90 -0
- agentops/instrumentation/providers/anthropic/instrumentor.py +146 -0
- agentops/instrumentation/providers/anthropic/stream_wrapper.py +436 -0
- agentops/instrumentation/providers/google_genai/README.md +33 -0
- agentops/instrumentation/providers/google_genai/__init__.py +24 -0
- agentops/instrumentation/providers/google_genai/attributes/__init__.py +25 -0
- agentops/instrumentation/providers/google_genai/attributes/chat.py +125 -0
- agentops/instrumentation/providers/google_genai/attributes/common.py +88 -0
- agentops/instrumentation/providers/google_genai/attributes/model.py +284 -0
- agentops/instrumentation/providers/google_genai/instrumentor.py +170 -0
- agentops/instrumentation/providers/google_genai/stream_wrapper.py +238 -0
- agentops/instrumentation/providers/ibm_watsonx_ai/__init__.py +28 -0
- agentops/instrumentation/providers/ibm_watsonx_ai/attributes/__init__.py +27 -0
- agentops/instrumentation/providers/ibm_watsonx_ai/attributes/attributes.py +277 -0
- agentops/instrumentation/providers/ibm_watsonx_ai/attributes/common.py +104 -0
- agentops/instrumentation/providers/ibm_watsonx_ai/instrumentor.py +162 -0
- agentops/instrumentation/providers/ibm_watsonx_ai/stream_wrapper.py +302 -0
- agentops/instrumentation/providers/mem0/__init__.py +45 -0
- agentops/instrumentation/providers/mem0/common.py +377 -0
- agentops/instrumentation/providers/mem0/instrumentor.py +270 -0
- agentops/instrumentation/providers/mem0/memory.py +430 -0
- agentops/instrumentation/providers/openai/__init__.py +21 -0
- agentops/instrumentation/providers/openai/attributes/__init__.py +7 -0
- agentops/instrumentation/providers/openai/attributes/common.py +55 -0
- agentops/instrumentation/providers/openai/attributes/response.py +607 -0
- agentops/instrumentation/providers/openai/config.py +36 -0
- agentops/instrumentation/providers/openai/instrumentor.py +312 -0
- agentops/instrumentation/providers/openai/stream_wrapper.py +941 -0
- agentops/instrumentation/providers/openai/utils.py +44 -0
- agentops/instrumentation/providers/openai/v0.py +176 -0
- agentops/instrumentation/providers/openai/v0_wrappers.py +483 -0
- agentops/instrumentation/providers/openai/wrappers/__init__.py +30 -0
- agentops/instrumentation/providers/openai/wrappers/assistant.py +277 -0
- agentops/instrumentation/providers/openai/wrappers/chat.py +259 -0
- agentops/instrumentation/providers/openai/wrappers/completion.py +109 -0
- agentops/instrumentation/providers/openai/wrappers/embeddings.py +94 -0
- agentops/instrumentation/providers/openai/wrappers/image_gen.py +75 -0
- agentops/instrumentation/providers/openai/wrappers/responses.py +191 -0
- agentops/instrumentation/providers/openai/wrappers/shared.py +81 -0
- agentops/instrumentation/utilities/concurrent_futures/__init__.py +10 -0
- agentops/instrumentation/utilities/concurrent_futures/instrumentation.py +206 -0
- agentops/integration/callbacks/dspy/__init__.py +11 -0
- agentops/integration/callbacks/dspy/callback.py +471 -0
- agentops/integration/callbacks/langchain/README.md +59 -0
- agentops/integration/callbacks/langchain/__init__.py +15 -0
- agentops/integration/callbacks/langchain/callback.py +791 -0
- agentops/integration/callbacks/langchain/utils.py +54 -0
- agentops/legacy/crewai.md +121 -0
- agentops/logging/instrument_logging.py +4 -0
- agentops/sdk/README.md +220 -0
- agentops/sdk/core.py +75 -32
- agentops/sdk/descriptors/classproperty.py +28 -0
- agentops/sdk/exporters.py +152 -33
- agentops/semconv/README.md +125 -0
- agentops/semconv/span_kinds.py +0 -2
- agentops/validation.py +102 -63
- {mseep_agentops-0.4.18.dist-info → mseep_agentops-0.4.22.dist-info}/METADATA +30 -40
- mseep_agentops-0.4.22.dist-info/RECORD +178 -0
- {mseep_agentops-0.4.18.dist-info → mseep_agentops-0.4.22.dist-info}/WHEEL +1 -2
- mseep_agentops-0.4.18.dist-info/RECORD +0 -94
- mseep_agentops-0.4.18.dist-info/top_level.txt +0 -2
- tests/conftest.py +0 -10
- tests/unit/client/__init__.py +0 -1
- tests/unit/client/test_http_adapter.py +0 -221
- tests/unit/client/test_http_client.py +0 -206
- tests/unit/conftest.py +0 -54
- tests/unit/sdk/__init__.py +0 -1
- tests/unit/sdk/instrumentation_tester.py +0 -207
- tests/unit/sdk/test_attributes.py +0 -392
- tests/unit/sdk/test_concurrent_instrumentation.py +0 -468
- tests/unit/sdk/test_decorators.py +0 -763
- tests/unit/sdk/test_exporters.py +0 -241
- tests/unit/sdk/test_factory.py +0 -1188
- tests/unit/sdk/test_internal_span_processor.py +0 -397
- tests/unit/sdk/test_resource_attributes.py +0 -35
- tests/unit/test_config.py +0 -82
- tests/unit/test_context_manager.py +0 -777
- tests/unit/test_events.py +0 -27
- tests/unit/test_host_env.py +0 -54
- tests/unit/test_init_py.py +0 -501
- tests/unit/test_serialization.py +0 -433
- tests/unit/test_session.py +0 -676
- tests/unit/test_user_agent.py +0 -34
- tests/unit/test_validation.py +0 -405
- {tests → agentops/instrumentation/agentic/openai_agents/attributes}/__init__.py +0 -0
- /tests/unit/__init__.py → /agentops/instrumentation/providers/openai/attributes/tools.py +0 -0
- {mseep_agentops-0.4.18.dist-info → mseep_agentops-0.4.22.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,941 @@
|
|
1
|
+
"""OpenAI streaming response wrapper implementation.
|
2
|
+
|
3
|
+
This module provides wrappers for OpenAI's streaming functionality,
|
4
|
+
handling both Chat Completions API and Responses API streaming.
|
5
|
+
It instruments streams to collect telemetry data for monitoring and analysis.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import time
|
9
|
+
from typing import Any, AsyncIterator, Iterator
|
10
|
+
|
11
|
+
from opentelemetry import context as context_api
|
12
|
+
from opentelemetry.trace import Span, SpanKind, Status, StatusCode, set_span_in_context
|
13
|
+
from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY
|
14
|
+
|
15
|
+
from agentops.logging import logger
|
16
|
+
from agentops.instrumentation.common.wrappers import _with_tracer_wrapper
|
17
|
+
from agentops.instrumentation.providers.openai.utils import is_metrics_enabled
|
18
|
+
from agentops.instrumentation.providers.openai.wrappers.chat import handle_chat_attributes, _create_tool_span
|
19
|
+
from agentops.semconv import SpanAttributes, LLMRequestTypeValues, MessageAttributes
|
20
|
+
|
21
|
+
|
22
|
+
class OpenaiStreamWrapper:
|
23
|
+
"""Wrapper for OpenAI Chat Completions streaming responses.
|
24
|
+
|
25
|
+
This wrapper intercepts streaming chunks to collect telemetry data including:
|
26
|
+
- Time to first token
|
27
|
+
- Total generation time
|
28
|
+
- Content aggregation
|
29
|
+
- Token usage (if available)
|
30
|
+
- Chunk statistics
|
31
|
+
"""
|
32
|
+
|
33
|
+
def __init__(self, stream: Any, span: Span, request_kwargs: dict):
|
34
|
+
"""Initialize the stream wrapper.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
stream: The original OpenAI stream object
|
38
|
+
span: The OpenTelemetry span for tracking
|
39
|
+
request_kwargs: Original request parameters for context
|
40
|
+
"""
|
41
|
+
self._stream = stream
|
42
|
+
self._span = span
|
43
|
+
self._request_kwargs = request_kwargs
|
44
|
+
self._start_time = time.time()
|
45
|
+
self._first_token_time = None
|
46
|
+
self._chunk_count = 0
|
47
|
+
self._content_chunks = []
|
48
|
+
self._finish_reason = None
|
49
|
+
self._model = None
|
50
|
+
self._response_id = None
|
51
|
+
self._usage = None
|
52
|
+
self._tool_calls = {}
|
53
|
+
self._current_tool_call_index = None
|
54
|
+
|
55
|
+
# Make sure the span is attached to the current context
|
56
|
+
current_context = context_api.get_current()
|
57
|
+
self._token = context_api.attach(set_span_in_context(span, current_context))
|
58
|
+
|
59
|
+
def __iter__(self) -> Iterator[Any]:
|
60
|
+
"""Return iterator for sync streaming."""
|
61
|
+
return self
|
62
|
+
|
63
|
+
def __next__(self) -> Any:
|
64
|
+
"""Process the next chunk from the stream."""
|
65
|
+
try:
|
66
|
+
chunk = next(self._stream)
|
67
|
+
self._process_chunk(chunk)
|
68
|
+
return chunk
|
69
|
+
except StopIteration:
|
70
|
+
self._finalize_stream()
|
71
|
+
raise
|
72
|
+
|
73
|
+
def __enter__(self):
|
74
|
+
"""Support context manager protocol."""
|
75
|
+
return self
|
76
|
+
|
77
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
78
|
+
"""Clean up on context manager exit."""
|
79
|
+
if exc_type is not None:
|
80
|
+
self._span.record_exception(exc_val)
|
81
|
+
self._span.set_status(Status(StatusCode.ERROR, str(exc_val)))
|
82
|
+
|
83
|
+
self._span.end()
|
84
|
+
context_api.detach(self._token)
|
85
|
+
return False
|
86
|
+
|
87
|
+
def _process_chunk(self, chunk: Any) -> None:
|
88
|
+
"""Process a single chunk from the stream.
|
89
|
+
|
90
|
+
Args:
|
91
|
+
chunk: A chunk from the OpenAI streaming response
|
92
|
+
"""
|
93
|
+
self._chunk_count += 1
|
94
|
+
|
95
|
+
# Usage (may be in final chunk with a different structure)
|
96
|
+
if hasattr(chunk, "usage"):
|
97
|
+
self._usage = chunk.usage
|
98
|
+
# Check if this is a usage-only chunk (often the final chunk when stream_options.include_usage=true)
|
99
|
+
is_usage_only_chunk = not (hasattr(chunk, "choices") and chunk.choices)
|
100
|
+
|
101
|
+
# If this is a usage-only chunk, we don't need to process it as a content chunk
|
102
|
+
if is_usage_only_chunk:
|
103
|
+
return
|
104
|
+
|
105
|
+
# Skip processing if no choices are present
|
106
|
+
if not hasattr(chunk, "choices") or not chunk.choices:
|
107
|
+
return
|
108
|
+
|
109
|
+
# Track first token timing
|
110
|
+
if self._first_token_time is None:
|
111
|
+
if any(choice.delta.content for choice in chunk.choices if hasattr(choice.delta, "content")):
|
112
|
+
self._first_token_time = time.time()
|
113
|
+
time_to_first_token = self._first_token_time - self._start_time
|
114
|
+
self._span.set_attribute(SpanAttributes.LLM_STREAMING_TIME_TO_FIRST_TOKEN, time_to_first_token)
|
115
|
+
self._span.add_event("first_token_received", {"time_elapsed": time_to_first_token})
|
116
|
+
# Also check for tool_calls as first tokens
|
117
|
+
elif any(
|
118
|
+
choice.delta.tool_calls
|
119
|
+
for choice in chunk.choices
|
120
|
+
if hasattr(choice.delta, "tool_calls") and choice.delta.tool_calls
|
121
|
+
):
|
122
|
+
self._first_token_time = time.time()
|
123
|
+
time_to_first_token = self._first_token_time - self._start_time
|
124
|
+
self._span.set_attribute(SpanAttributes.LLM_STREAMING_TIME_TO_FIRST_TOKEN, time_to_first_token)
|
125
|
+
self._span.add_event("first_tool_call_token_received", {"time_elapsed": time_to_first_token})
|
126
|
+
|
127
|
+
# Extract chunk data
|
128
|
+
if hasattr(chunk, "id") and chunk.id and not self._response_id:
|
129
|
+
self._response_id = chunk.id
|
130
|
+
if self._response_id is not None:
|
131
|
+
self._span.set_attribute(SpanAttributes.LLM_RESPONSE_ID, self._response_id)
|
132
|
+
|
133
|
+
if hasattr(chunk, "model") and chunk.model and not self._model:
|
134
|
+
self._model = chunk.model
|
135
|
+
if self._model is not None:
|
136
|
+
self._span.set_attribute(SpanAttributes.LLM_RESPONSE_MODEL, self._model)
|
137
|
+
|
138
|
+
# Process choices
|
139
|
+
for choice in chunk.choices:
|
140
|
+
if not hasattr(choice, "delta"):
|
141
|
+
continue
|
142
|
+
|
143
|
+
delta = choice.delta
|
144
|
+
|
145
|
+
# Content
|
146
|
+
if hasattr(delta, "content") and delta.content is not None:
|
147
|
+
self._content_chunks.append(delta.content)
|
148
|
+
|
149
|
+
# Tool calls
|
150
|
+
if hasattr(delta, "tool_calls") and delta.tool_calls:
|
151
|
+
for tool_call in delta.tool_calls:
|
152
|
+
if hasattr(tool_call, "index"):
|
153
|
+
idx = tool_call.index
|
154
|
+
if idx not in self._tool_calls:
|
155
|
+
self._tool_calls[idx] = {
|
156
|
+
"id": "",
|
157
|
+
"type": "function",
|
158
|
+
"function": {"name": "", "arguments": ""},
|
159
|
+
}
|
160
|
+
|
161
|
+
if hasattr(tool_call, "id") and tool_call.id:
|
162
|
+
self._tool_calls[idx]["id"] = tool_call.id
|
163
|
+
|
164
|
+
if hasattr(tool_call, "function"):
|
165
|
+
if hasattr(tool_call.function, "name") and tool_call.function.name:
|
166
|
+
self._tool_calls[idx]["function"]["name"] = tool_call.function.name
|
167
|
+
if hasattr(tool_call.function, "arguments") and tool_call.function.arguments:
|
168
|
+
self._tool_calls[idx]["function"]["arguments"] += tool_call.function.arguments
|
169
|
+
|
170
|
+
# Finish reason
|
171
|
+
if hasattr(choice, "finish_reason") and choice.finish_reason:
|
172
|
+
self._finish_reason = choice.finish_reason
|
173
|
+
|
174
|
+
def _finalize_stream(self) -> None:
|
175
|
+
"""Finalize the stream and set final attributes on the span."""
|
176
|
+
total_time = time.time() - self._start_time
|
177
|
+
|
178
|
+
# Aggregate content
|
179
|
+
full_content = "".join(self._content_chunks)
|
180
|
+
|
181
|
+
# Set generation time
|
182
|
+
if self._first_token_time:
|
183
|
+
generation_time = total_time - (self._first_token_time - self._start_time)
|
184
|
+
self._span.set_attribute(SpanAttributes.LLM_STREAMING_TIME_TO_GENERATE, generation_time)
|
185
|
+
|
186
|
+
# Add content attributes
|
187
|
+
if full_content:
|
188
|
+
self._span.set_attribute(MessageAttributes.COMPLETION_CONTENT.format(i=0), full_content)
|
189
|
+
self._span.set_attribute(MessageAttributes.COMPLETION_ROLE.format(i=0), "assistant")
|
190
|
+
|
191
|
+
# Set finish reason
|
192
|
+
if self._finish_reason:
|
193
|
+
self._span.set_attribute(MessageAttributes.COMPLETION_FINISH_REASON.format(i=0), self._finish_reason)
|
194
|
+
|
195
|
+
# Create tool spans for each tool call
|
196
|
+
if len(self._tool_calls) > 0:
|
197
|
+
for idx, tool_call in self._tool_calls.items():
|
198
|
+
# Create a child span for this tool call
|
199
|
+
_create_tool_span(self._span, tool_call)
|
200
|
+
|
201
|
+
# Set usage if available from the API
|
202
|
+
if self._usage is not None:
|
203
|
+
# Only set token attributes if they exist and have non-None values
|
204
|
+
if hasattr(self._usage, "prompt_tokens") and self._usage.prompt_tokens is not None:
|
205
|
+
self._span.set_attribute(SpanAttributes.LLM_USAGE_PROMPT_TOKENS, int(self._usage.prompt_tokens))
|
206
|
+
|
207
|
+
if hasattr(self._usage, "completion_tokens") and self._usage.completion_tokens is not None:
|
208
|
+
self._span.set_attribute(SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, int(self._usage.completion_tokens))
|
209
|
+
|
210
|
+
if hasattr(self._usage, "total_tokens") and self._usage.total_tokens is not None:
|
211
|
+
self._span.set_attribute(SpanAttributes.LLM_USAGE_TOTAL_TOKENS, int(self._usage.total_tokens))
|
212
|
+
|
213
|
+
# Stream statistics
|
214
|
+
self._span.set_attribute("llm.openai.stream.chunk_count", self._chunk_count)
|
215
|
+
self._span.set_attribute("llm.openai.stream.content_length", len(full_content))
|
216
|
+
self._span.set_attribute("llm.openai.stream.total_duration", total_time)
|
217
|
+
|
218
|
+
# Add completion event
|
219
|
+
self._span.add_event(
|
220
|
+
"stream_completed",
|
221
|
+
{
|
222
|
+
"chunks_received": self._chunk_count,
|
223
|
+
"total_content_length": len(full_content),
|
224
|
+
"duration": total_time,
|
225
|
+
"had_tool_calls": len(self._tool_calls) > 0,
|
226
|
+
},
|
227
|
+
)
|
228
|
+
|
229
|
+
# Finalize span and context
|
230
|
+
self._span.set_status(Status(StatusCode.OK))
|
231
|
+
self._span.end()
|
232
|
+
context_api.detach(self._token)
|
233
|
+
|
234
|
+
|
235
|
+
class OpenAIAsyncStreamWrapper:
|
236
|
+
"""Async wrapper for OpenAI Chat Completions streaming responses."""
|
237
|
+
|
238
|
+
def __init__(self, stream: Any, span: Span, request_kwargs: dict):
|
239
|
+
"""Initialize the async stream wrapper.
|
240
|
+
|
241
|
+
Args:
|
242
|
+
stream: The original OpenAI async stream object
|
243
|
+
span: The OpenTelemetry span for tracking
|
244
|
+
request_kwargs: Original request parameters for context
|
245
|
+
"""
|
246
|
+
self._stream = stream
|
247
|
+
self._span = span
|
248
|
+
self._request_kwargs = request_kwargs
|
249
|
+
self._start_time = time.time()
|
250
|
+
self._first_token_time = None
|
251
|
+
self._chunk_count = 0
|
252
|
+
self._content_chunks = []
|
253
|
+
self._finish_reason = None
|
254
|
+
self._model = None
|
255
|
+
self._response_id = None
|
256
|
+
self._usage = None
|
257
|
+
self._tool_calls = {}
|
258
|
+
|
259
|
+
# Make sure the span is attached to the current context
|
260
|
+
current_context = context_api.get_current()
|
261
|
+
self._token = context_api.attach(set_span_in_context(span, current_context))
|
262
|
+
|
263
|
+
def __aiter__(self) -> AsyncIterator[Any]:
|
264
|
+
"""Return async iterator for async streaming."""
|
265
|
+
return self
|
266
|
+
|
267
|
+
async def __anext__(self) -> Any:
|
268
|
+
"""Process the next chunk from the async stream."""
|
269
|
+
try:
|
270
|
+
if not hasattr(self, "_aiter_debug_logged"):
|
271
|
+
self._aiter_debug_logged = True
|
272
|
+
|
273
|
+
chunk = await self._stream.__anext__()
|
274
|
+
|
275
|
+
# Process the chunk
|
276
|
+
self._process_chunk(chunk)
|
277
|
+
return chunk
|
278
|
+
except StopAsyncIteration:
|
279
|
+
self._finalize_stream()
|
280
|
+
raise
|
281
|
+
except Exception as e:
|
282
|
+
logger.error(f"[OPENAI ASYNC WRAPPER] Error in __anext__: {e}")
|
283
|
+
# Make sure span is ended in case of error
|
284
|
+
self._span.record_exception(e)
|
285
|
+
self._span.set_status(Status(StatusCode.ERROR, str(e)))
|
286
|
+
self._span.end()
|
287
|
+
context_api.detach(self._token)
|
288
|
+
raise
|
289
|
+
|
290
|
+
async def __aenter__(self):
|
291
|
+
"""Support async context manager protocol."""
|
292
|
+
return self
|
293
|
+
|
294
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
295
|
+
"""Clean up on async context manager exit."""
|
296
|
+
if exc_type is not None:
|
297
|
+
self._span.record_exception(exc_val)
|
298
|
+
self._span.set_status(Status(StatusCode.ERROR, str(exc_val)))
|
299
|
+
|
300
|
+
self._span.end()
|
301
|
+
context_api.detach(self._token)
|
302
|
+
return False
|
303
|
+
|
304
|
+
def _process_chunk(self, chunk: Any) -> None:
|
305
|
+
"""Process a single chunk from the stream.
|
306
|
+
|
307
|
+
Args:
|
308
|
+
chunk: A chunk from the OpenAI streaming response
|
309
|
+
"""
|
310
|
+
self._chunk_count += 1
|
311
|
+
|
312
|
+
# Usage (may be in final chunk with a different structure)
|
313
|
+
if hasattr(chunk, "usage"):
|
314
|
+
self._usage = chunk.usage
|
315
|
+
# Check if this is a usage-only chunk (often the final chunk when stream_options.include_usage=true)
|
316
|
+
is_usage_only_chunk = not (hasattr(chunk, "choices") and chunk.choices)
|
317
|
+
|
318
|
+
# If this is a usage-only chunk, we don't need to process it as a content chunk
|
319
|
+
if is_usage_only_chunk:
|
320
|
+
return
|
321
|
+
|
322
|
+
# Skip processing if no choices are present
|
323
|
+
if not hasattr(chunk, "choices") or not chunk.choices:
|
324
|
+
return
|
325
|
+
|
326
|
+
# Track first token timing
|
327
|
+
if self._first_token_time is None:
|
328
|
+
if any(choice.delta.content for choice in chunk.choices if hasattr(choice.delta, "content")):
|
329
|
+
self._first_token_time = time.time()
|
330
|
+
time_to_first_token = self._first_token_time - self._start_time
|
331
|
+
self._span.set_attribute(SpanAttributes.LLM_STREAMING_TIME_TO_FIRST_TOKEN, time_to_first_token)
|
332
|
+
self._span.add_event("first_token_received", {"time_elapsed": time_to_first_token})
|
333
|
+
# Also check for tool_calls as first tokens
|
334
|
+
elif any(
|
335
|
+
choice.delta.tool_calls
|
336
|
+
for choice in chunk.choices
|
337
|
+
if hasattr(choice.delta, "tool_calls") and choice.delta.tool_calls
|
338
|
+
):
|
339
|
+
self._first_token_time = time.time()
|
340
|
+
time_to_first_token = self._first_token_time - self._start_time
|
341
|
+
self._span.set_attribute(SpanAttributes.LLM_STREAMING_TIME_TO_FIRST_TOKEN, time_to_first_token)
|
342
|
+
self._span.add_event("first_tool_call_token_received", {"time_elapsed": time_to_first_token})
|
343
|
+
|
344
|
+
# Extract chunk data
|
345
|
+
if hasattr(chunk, "id") and chunk.id and not self._response_id:
|
346
|
+
self._response_id = chunk.id
|
347
|
+
if self._response_id is not None:
|
348
|
+
self._span.set_attribute(SpanAttributes.LLM_RESPONSE_ID, self._response_id)
|
349
|
+
|
350
|
+
if hasattr(chunk, "model") and chunk.model and not self._model:
|
351
|
+
self._model = chunk.model
|
352
|
+
if self._model is not None:
|
353
|
+
self._span.set_attribute(SpanAttributes.LLM_RESPONSE_MODEL, self._model)
|
354
|
+
|
355
|
+
# Process choices
|
356
|
+
for choice in chunk.choices:
|
357
|
+
if not hasattr(choice, "delta"):
|
358
|
+
continue
|
359
|
+
|
360
|
+
delta = choice.delta
|
361
|
+
|
362
|
+
# Content
|
363
|
+
if hasattr(delta, "content") and delta.content is not None:
|
364
|
+
self._content_chunks.append(delta.content)
|
365
|
+
|
366
|
+
# Tool calls
|
367
|
+
if hasattr(delta, "tool_calls") and delta.tool_calls:
|
368
|
+
for tool_call in delta.tool_calls:
|
369
|
+
if hasattr(tool_call, "index"):
|
370
|
+
idx = tool_call.index
|
371
|
+
if idx not in self._tool_calls:
|
372
|
+
self._tool_calls[idx] = {
|
373
|
+
"id": "",
|
374
|
+
"type": "function",
|
375
|
+
"function": {"name": "", "arguments": ""},
|
376
|
+
}
|
377
|
+
|
378
|
+
if hasattr(tool_call, "id") and tool_call.id:
|
379
|
+
self._tool_calls[idx]["id"] = tool_call.id
|
380
|
+
|
381
|
+
if hasattr(tool_call, "function"):
|
382
|
+
if hasattr(tool_call.function, "name") and tool_call.function.name:
|
383
|
+
self._tool_calls[idx]["function"]["name"] = tool_call.function.name
|
384
|
+
if hasattr(tool_call.function, "arguments") and tool_call.function.arguments:
|
385
|
+
self._tool_calls[idx]["function"]["arguments"] += tool_call.function.arguments
|
386
|
+
|
387
|
+
# Finish reason
|
388
|
+
if hasattr(choice, "finish_reason") and choice.finish_reason:
|
389
|
+
self._finish_reason = choice.finish_reason
|
390
|
+
|
391
|
+
def _finalize_stream(self) -> None:
|
392
|
+
"""Finalize the stream and set final attributes on the span."""
|
393
|
+
total_time = time.time() - self._start_time
|
394
|
+
|
395
|
+
# Aggregate content
|
396
|
+
full_content = "".join(self._content_chunks)
|
397
|
+
|
398
|
+
# Set generation time
|
399
|
+
if self._first_token_time:
|
400
|
+
generation_time = total_time - (self._first_token_time - self._start_time)
|
401
|
+
self._span.set_attribute(SpanAttributes.LLM_STREAMING_TIME_TO_GENERATE, generation_time)
|
402
|
+
|
403
|
+
# Add content attributes
|
404
|
+
if full_content:
|
405
|
+
self._span.set_attribute(MessageAttributes.COMPLETION_CONTENT.format(i=0), full_content)
|
406
|
+
self._span.set_attribute(MessageAttributes.COMPLETION_ROLE.format(i=0), "assistant")
|
407
|
+
|
408
|
+
# Set finish reason
|
409
|
+
if self._finish_reason:
|
410
|
+
self._span.set_attribute(MessageAttributes.COMPLETION_FINISH_REASON.format(i=0), self._finish_reason)
|
411
|
+
|
412
|
+
# Create tool spans for each tool call
|
413
|
+
if len(self._tool_calls) > 0:
|
414
|
+
for idx, tool_call in self._tool_calls.items():
|
415
|
+
# Create a child span for this tool call
|
416
|
+
_create_tool_span(self._span, tool_call)
|
417
|
+
|
418
|
+
# Set usage if available from the API
|
419
|
+
if self._usage is not None:
|
420
|
+
# Only set token attributes if they exist and have non-None values
|
421
|
+
if hasattr(self._usage, "prompt_tokens") and self._usage.prompt_tokens is not None:
|
422
|
+
self._span.set_attribute(SpanAttributes.LLM_USAGE_PROMPT_TOKENS, int(self._usage.prompt_tokens))
|
423
|
+
|
424
|
+
if hasattr(self._usage, "completion_tokens") and self._usage.completion_tokens is not None:
|
425
|
+
self._span.set_attribute(SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, int(self._usage.completion_tokens))
|
426
|
+
|
427
|
+
if hasattr(self._usage, "total_tokens") and self._usage.total_tokens is not None:
|
428
|
+
self._span.set_attribute(SpanAttributes.LLM_USAGE_TOTAL_TOKENS, int(self._usage.total_tokens))
|
429
|
+
|
430
|
+
# Stream statistics
|
431
|
+
self._span.set_attribute("llm.openai.stream.chunk_count", self._chunk_count)
|
432
|
+
self._span.set_attribute("llm.openai.stream.content_length", len(full_content))
|
433
|
+
self._span.set_attribute("llm.openai.stream.total_duration", total_time)
|
434
|
+
|
435
|
+
# Add completion event
|
436
|
+
self._span.add_event(
|
437
|
+
"stream_completed",
|
438
|
+
{
|
439
|
+
"chunks_received": self._chunk_count,
|
440
|
+
"total_content_length": len(full_content),
|
441
|
+
"duration": total_time,
|
442
|
+
"had_tool_calls": len(self._tool_calls) > 0,
|
443
|
+
},
|
444
|
+
)
|
445
|
+
|
446
|
+
# Finalize span and context
|
447
|
+
self._span.set_status(Status(StatusCode.OK))
|
448
|
+
self._span.end()
|
449
|
+
context_api.detach(self._token)
|
450
|
+
|
451
|
+
|
452
|
+
@_with_tracer_wrapper
|
453
|
+
def chat_completion_stream_wrapper(tracer, wrapped, instance, args, kwargs):
|
454
|
+
"""Wrapper for chat completions (both streaming and non-streaming).
|
455
|
+
|
456
|
+
This wrapper handles both streaming and non-streaming responses,
|
457
|
+
wrapping streams with telemetry collection while maintaining the original interface.
|
458
|
+
"""
|
459
|
+
if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY):
|
460
|
+
return wrapped(*args, **kwargs)
|
461
|
+
|
462
|
+
# Check if streaming is enabled
|
463
|
+
is_streaming = kwargs.get("stream", False)
|
464
|
+
|
465
|
+
# Start the span
|
466
|
+
span = tracer.start_span(
|
467
|
+
"openai.chat.completion",
|
468
|
+
kind=SpanKind.CLIENT,
|
469
|
+
attributes={SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.CHAT.value},
|
470
|
+
)
|
471
|
+
|
472
|
+
# Make sure span is linked to the current trace context
|
473
|
+
current_context = context_api.get_current()
|
474
|
+
token = context_api.attach(set_span_in_context(span, current_context))
|
475
|
+
|
476
|
+
try:
|
477
|
+
# Extract and set request attributes
|
478
|
+
request_attributes = handle_chat_attributes(kwargs=kwargs)
|
479
|
+
|
480
|
+
for key, value in request_attributes.items():
|
481
|
+
span.set_attribute(key, value)
|
482
|
+
|
483
|
+
# Add include_usage to get token counts for streaming responses
|
484
|
+
if is_streaming and is_metrics_enabled():
|
485
|
+
# Add stream_options if it doesn't exist
|
486
|
+
if "stream_options" not in kwargs:
|
487
|
+
kwargs["stream_options"] = {"include_usage": True}
|
488
|
+
logger.debug("[OPENAI WRAPPER] Adding stream_options.include_usage=True to get token counts")
|
489
|
+
# If stream_options exists but doesn't have include_usage, add it
|
490
|
+
elif isinstance(kwargs["stream_options"], dict) and "include_usage" not in kwargs["stream_options"]:
|
491
|
+
kwargs["stream_options"]["include_usage"] = True
|
492
|
+
logger.debug(
|
493
|
+
"[OPENAI WRAPPER] Adding include_usage=True to existing stream_options to get token counts"
|
494
|
+
)
|
495
|
+
|
496
|
+
# Call the original method
|
497
|
+
response = wrapped(*args, **kwargs)
|
498
|
+
|
499
|
+
if is_streaming:
|
500
|
+
# Wrap the stream
|
501
|
+
context_api.detach(token)
|
502
|
+
return OpenaiStreamWrapper(response, span, kwargs)
|
503
|
+
else:
|
504
|
+
# Handle non-streaming response
|
505
|
+
response_attributes = handle_chat_attributes(kwargs=kwargs, return_value=response, span=span)
|
506
|
+
|
507
|
+
for key, value in response_attributes.items():
|
508
|
+
if key not in request_attributes: # Avoid overwriting request attributes
|
509
|
+
span.set_attribute(key, value)
|
510
|
+
|
511
|
+
span.set_status(Status(StatusCode.OK))
|
512
|
+
span.end()
|
513
|
+
context_api.detach(token)
|
514
|
+
return response
|
515
|
+
|
516
|
+
except Exception as e:
|
517
|
+
logger.error(f"[OPENAI WRAPPER] Error in chat_completion_stream_wrapper: {e}")
|
518
|
+
span.record_exception(e)
|
519
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
520
|
+
span.end()
|
521
|
+
context_api.detach(token)
|
522
|
+
raise
|
523
|
+
|
524
|
+
|
525
|
+
@_with_tracer_wrapper
|
526
|
+
async def async_chat_completion_stream_wrapper(tracer, wrapped, instance, args, kwargs):
|
527
|
+
"""Async wrapper for chat completions (both streaming and non-streaming)."""
|
528
|
+
if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY):
|
529
|
+
return await wrapped(*args, **kwargs)
|
530
|
+
|
531
|
+
# Check if streaming is enabled
|
532
|
+
is_streaming = kwargs.get("stream", False)
|
533
|
+
|
534
|
+
# Start the span
|
535
|
+
span = tracer.start_span(
|
536
|
+
"openai.chat.completion",
|
537
|
+
kind=SpanKind.CLIENT,
|
538
|
+
attributes={SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.CHAT.value},
|
539
|
+
)
|
540
|
+
|
541
|
+
# Make sure span is linked to the current trace context
|
542
|
+
current_context = context_api.get_current()
|
543
|
+
token = context_api.attach(set_span_in_context(span, current_context))
|
544
|
+
|
545
|
+
try:
|
546
|
+
# Extract and set request attributes
|
547
|
+
request_attributes = handle_chat_attributes(kwargs=kwargs)
|
548
|
+
|
549
|
+
for key, value in request_attributes.items():
|
550
|
+
span.set_attribute(key, value)
|
551
|
+
|
552
|
+
# Add include_usage to get token counts for streaming responses
|
553
|
+
if is_streaming and is_metrics_enabled():
|
554
|
+
# Add stream_options if it doesn't exist
|
555
|
+
if "stream_options" not in kwargs:
|
556
|
+
kwargs["stream_options"] = {"include_usage": True}
|
557
|
+
# If stream_options exists but doesn't have include_usage, add it
|
558
|
+
elif isinstance(kwargs["stream_options"], dict) and "include_usage" not in kwargs["stream_options"]:
|
559
|
+
kwargs["stream_options"]["include_usage"] = True
|
560
|
+
|
561
|
+
# Call the original method
|
562
|
+
response = await wrapped(*args, **kwargs)
|
563
|
+
|
564
|
+
if is_streaming:
|
565
|
+
# Wrap the stream
|
566
|
+
context_api.detach(token)
|
567
|
+
return OpenAIAsyncStreamWrapper(response, span, kwargs)
|
568
|
+
else:
|
569
|
+
# Handle non-streaming response
|
570
|
+
response_attributes = handle_chat_attributes(kwargs=kwargs, return_value=response, span=span)
|
571
|
+
|
572
|
+
for key, value in response_attributes.items():
|
573
|
+
if key not in request_attributes: # Avoid overwriting request attributes
|
574
|
+
span.set_attribute(key, value)
|
575
|
+
|
576
|
+
span.set_status(Status(StatusCode.OK))
|
577
|
+
span.end()
|
578
|
+
context_api.detach(token)
|
579
|
+
return response
|
580
|
+
|
581
|
+
except Exception as e:
|
582
|
+
logger.error(f"[OPENAI WRAPPER] Error in async_chat_completion_stream_wrapper: {e}")
|
583
|
+
span.record_exception(e)
|
584
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
585
|
+
span.end()
|
586
|
+
context_api.detach(token)
|
587
|
+
raise
|
588
|
+
|
589
|
+
|
590
|
+
class ResponsesAPIStreamWrapper:
|
591
|
+
"""Wrapper for OpenAI Responses API streaming.
|
592
|
+
|
593
|
+
The Responses API uses event-based streaming with typed events
|
594
|
+
like 'response.output_text.delta' instead of generic chunks.
|
595
|
+
"""
|
596
|
+
|
597
|
+
def __init__(self, stream: Any, span: Span, request_kwargs: dict):
|
598
|
+
"""Initialize the Responses API stream wrapper."""
|
599
|
+
self._stream = stream
|
600
|
+
self._span = span
|
601
|
+
self._request_kwargs = request_kwargs
|
602
|
+
self._start_time = time.time()
|
603
|
+
self._first_token_time = None
|
604
|
+
self._event_count = 0
|
605
|
+
self._content_chunks = []
|
606
|
+
self._function_call_chunks = []
|
607
|
+
self._reasoning_chunks = []
|
608
|
+
self._response_id = None
|
609
|
+
self._model = None
|
610
|
+
self._usage = None
|
611
|
+
self._output_items = []
|
612
|
+
self._current_function_args = ""
|
613
|
+
|
614
|
+
# Make sure the span is attached to the current context
|
615
|
+
current_context = context_api.get_current()
|
616
|
+
self._token = context_api.attach(set_span_in_context(span, current_context))
|
617
|
+
|
618
|
+
def __iter__(self) -> Iterator[Any]:
|
619
|
+
"""Return iterator for sync streaming."""
|
620
|
+
return self
|
621
|
+
|
622
|
+
def __next__(self) -> Any:
|
623
|
+
"""Process the next event from the stream."""
|
624
|
+
try:
|
625
|
+
event = next(self._stream)
|
626
|
+
self._process_event(event)
|
627
|
+
return event
|
628
|
+
except StopIteration:
|
629
|
+
self._finalize_stream()
|
630
|
+
raise
|
631
|
+
|
632
|
+
# Add async iterator support
|
633
|
+
def __aiter__(self) -> AsyncIterator[Any]:
|
634
|
+
"""Return async iterator for async streaming."""
|
635
|
+
return self
|
636
|
+
|
637
|
+
async def __anext__(self) -> Any:
|
638
|
+
"""Process the next event from the async stream."""
|
639
|
+
try:
|
640
|
+
# If the underlying stream is async
|
641
|
+
if hasattr(self._stream, "__anext__"):
|
642
|
+
event = await self._stream.__anext__()
|
643
|
+
# If the underlying stream is sync but we're in an async context
|
644
|
+
else:
|
645
|
+
try:
|
646
|
+
event = next(self._stream)
|
647
|
+
except StopIteration:
|
648
|
+
self._finalize_stream()
|
649
|
+
raise StopAsyncIteration
|
650
|
+
|
651
|
+
self._process_event(event)
|
652
|
+
return event
|
653
|
+
except StopAsyncIteration:
|
654
|
+
self._finalize_stream()
|
655
|
+
raise
|
656
|
+
except Exception as e:
|
657
|
+
logger.error(f"[RESPONSES API WRAPPER] Error in __anext__: {e}")
|
658
|
+
# Make sure span is ended in case of error
|
659
|
+
self._span.record_exception(e)
|
660
|
+
self._span.set_status(Status(StatusCode.ERROR, str(e)))
|
661
|
+
self._span.end()
|
662
|
+
context_api.detach(self._token)
|
663
|
+
raise
|
664
|
+
|
665
|
+
def _process_event(self, event: Any) -> None:
|
666
|
+
"""Process a single event from the Responses API stream."""
|
667
|
+
self._event_count += 1
|
668
|
+
|
669
|
+
# Track first content event
|
670
|
+
if self._first_token_time is None and hasattr(event, "type"):
|
671
|
+
if event.type in ["response.output_text.delta", "response.function_call_arguments.delta"]:
|
672
|
+
self._first_token_time = time.time()
|
673
|
+
time_to_first_token = self._first_token_time - self._start_time
|
674
|
+
self._span.set_attribute(SpanAttributes.LLM_STREAMING_TIME_TO_FIRST_TOKEN, time_to_first_token)
|
675
|
+
|
676
|
+
# Process different event types
|
677
|
+
if hasattr(event, "type"):
|
678
|
+
if event.type == "response.created":
|
679
|
+
if hasattr(event, "response"):
|
680
|
+
response = event.response
|
681
|
+
if hasattr(response, "id"):
|
682
|
+
self._response_id = response.id
|
683
|
+
self._span.set_attribute(SpanAttributes.LLM_RESPONSE_ID, self._response_id)
|
684
|
+
if hasattr(response, "model"):
|
685
|
+
self._model = response.model
|
686
|
+
self._span.set_attribute(SpanAttributes.LLM_RESPONSE_MODEL, self._model)
|
687
|
+
|
688
|
+
elif event.type == "response.output_text.delta":
|
689
|
+
if hasattr(event, "delta"):
|
690
|
+
self._content_chunks.append(event.delta)
|
691
|
+
|
692
|
+
elif event.type == "response.function_call_arguments.delta":
|
693
|
+
# Accumulate function call arguments
|
694
|
+
if hasattr(event, "delta"):
|
695
|
+
self._current_function_args += event.delta
|
696
|
+
|
697
|
+
elif event.type == "response.completed":
|
698
|
+
# Process the final response which contains all output items
|
699
|
+
if hasattr(event, "response"):
|
700
|
+
response = event.response
|
701
|
+
if hasattr(response, "usage"):
|
702
|
+
self._usage = response.usage
|
703
|
+
|
704
|
+
# Extract output items from the completed response
|
705
|
+
if hasattr(response, "output"):
|
706
|
+
for output_item in response.output:
|
707
|
+
if hasattr(output_item, "type"):
|
708
|
+
if output_item.type == "function_call" and hasattr(output_item, "arguments"):
|
709
|
+
self._function_call_chunks.append(output_item.arguments)
|
710
|
+
elif output_item.type == "reasoning":
|
711
|
+
# Extract reasoning text - could be in summary or content
|
712
|
+
if hasattr(output_item, "summary"):
|
713
|
+
self._reasoning_chunks.append(str(output_item.summary))
|
714
|
+
elif hasattr(output_item, "content"):
|
715
|
+
# content might be a list of text items
|
716
|
+
if isinstance(output_item.content, list):
|
717
|
+
for content_item in output_item.content:
|
718
|
+
if hasattr(content_item, "text"):
|
719
|
+
self._reasoning_chunks.append(str(content_item.text))
|
720
|
+
else:
|
721
|
+
self._reasoning_chunks.append(str(output_item.content))
|
722
|
+
elif output_item.type == "message" and hasattr(output_item, "content"):
|
723
|
+
# Extract text content from message items
|
724
|
+
if isinstance(output_item.content, list):
|
725
|
+
for content in output_item.content:
|
726
|
+
if (
|
727
|
+
hasattr(content, "type")
|
728
|
+
and content.type == "text"
|
729
|
+
and hasattr(content, "text")
|
730
|
+
):
|
731
|
+
self._content_chunks.append(str(content.text))
|
732
|
+
else:
|
733
|
+
self._content_chunks.append(str(output_item.content))
|
734
|
+
|
735
|
+
# Only add significant events, not every delta
|
736
|
+
if hasattr(event, "type") and event.type in [
|
737
|
+
"response.created",
|
738
|
+
"response.completed",
|
739
|
+
"response.output_item.added",
|
740
|
+
]:
|
741
|
+
self._span.add_event(
|
742
|
+
"responses_api_event",
|
743
|
+
{"event_type": event.type, "event_number": self._event_count},
|
744
|
+
)
|
745
|
+
|
746
|
+
def _finalize_stream(self) -> None:
|
747
|
+
"""Finalize the Responses API stream."""
|
748
|
+
total_time = time.time() - self._start_time
|
749
|
+
|
750
|
+
# Aggregate different types of content
|
751
|
+
text_content = "".join(self._content_chunks)
|
752
|
+
function_content = self._current_function_args or "".join(self._function_call_chunks)
|
753
|
+
reasoning_content = "".join(self._reasoning_chunks)
|
754
|
+
|
755
|
+
# Combine all content types for the completion
|
756
|
+
full_content = ""
|
757
|
+
if reasoning_content:
|
758
|
+
full_content = f"Reasoning: {reasoning_content}"
|
759
|
+
if function_content:
|
760
|
+
if full_content:
|
761
|
+
full_content += f"\nFunction Call: {function_content}"
|
762
|
+
else:
|
763
|
+
full_content = f"Function Call: {function_content}"
|
764
|
+
if text_content:
|
765
|
+
if full_content:
|
766
|
+
full_content += f"\nResponse: {text_content}"
|
767
|
+
else:
|
768
|
+
full_content = text_content
|
769
|
+
|
770
|
+
if full_content:
|
771
|
+
self._span.set_attribute(MessageAttributes.COMPLETION_CONTENT.format(i=0), full_content)
|
772
|
+
logger.debug(
|
773
|
+
f"[RESPONSES API] Setting completion content: {full_content[:100]}..."
|
774
|
+
if len(full_content) > 100
|
775
|
+
else f"[RESPONSES API] Setting completion content: {full_content}"
|
776
|
+
)
|
777
|
+
|
778
|
+
# Set timing
|
779
|
+
if self._first_token_time:
|
780
|
+
generation_time = total_time - (self._first_token_time - self._start_time)
|
781
|
+
self._span.set_attribute(SpanAttributes.LLM_STREAMING_TIME_TO_GENERATE, generation_time)
|
782
|
+
|
783
|
+
# Set usage if available from the API
|
784
|
+
if self._usage is not None:
|
785
|
+
# Only set token attributes if they exist and have non-None values
|
786
|
+
if hasattr(self._usage, "input_tokens") and self._usage.input_tokens is not None:
|
787
|
+
self._span.set_attribute(SpanAttributes.LLM_USAGE_PROMPT_TOKENS, int(self._usage.input_tokens))
|
788
|
+
|
789
|
+
if hasattr(self._usage, "output_tokens") and self._usage.output_tokens is not None:
|
790
|
+
self._span.set_attribute(SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, int(self._usage.output_tokens))
|
791
|
+
|
792
|
+
if hasattr(self._usage, "total_tokens") and self._usage.total_tokens is not None:
|
793
|
+
self._span.set_attribute(SpanAttributes.LLM_USAGE_TOTAL_TOKENS, int(self._usage.total_tokens))
|
794
|
+
|
795
|
+
else:
|
796
|
+
logger.debug(
|
797
|
+
f"[RESPONSES API] No usage provided by API. "
|
798
|
+
f"content_length={len(full_content)}, "
|
799
|
+
f"event_count={self._event_count}"
|
800
|
+
)
|
801
|
+
|
802
|
+
# Stream statistics
|
803
|
+
self._span.set_attribute("llm.openai.responses.event_count", self._event_count)
|
804
|
+
self._span.set_attribute("llm.openai.responses.content_length", len(full_content))
|
805
|
+
self._span.set_attribute("llm.openai.responses.total_duration", total_time)
|
806
|
+
|
807
|
+
# Add completion event with summary
|
808
|
+
self._span.add_event(
|
809
|
+
"stream_completed",
|
810
|
+
{
|
811
|
+
"event_count": self._event_count,
|
812
|
+
"total_content_length": len(full_content),
|
813
|
+
"duration": total_time,
|
814
|
+
"had_function_calls": bool(function_content),
|
815
|
+
"had_reasoning": bool(reasoning_content),
|
816
|
+
"had_text": bool(text_content),
|
817
|
+
},
|
818
|
+
)
|
819
|
+
|
820
|
+
# Finalize span and context
|
821
|
+
self._span.set_status(Status(StatusCode.OK))
|
822
|
+
self._span.end()
|
823
|
+
context_api.detach(self._token)
|
824
|
+
logger.debug(
|
825
|
+
f"[RESPONSES API] Finalized streaming span after {self._event_count} events. Content length: {len(full_content)}"
|
826
|
+
)
|
827
|
+
|
828
|
+
|
829
|
+
@_with_tracer_wrapper
|
830
|
+
def responses_stream_wrapper(tracer, wrapped, instance, args, kwargs):
|
831
|
+
"""Wrapper for Responses API (both streaming and non-streaming)."""
|
832
|
+
if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY):
|
833
|
+
return wrapped(*args, **kwargs)
|
834
|
+
|
835
|
+
# Check if streaming is enabled
|
836
|
+
is_streaming = kwargs.get("stream", False)
|
837
|
+
|
838
|
+
# Create span for both streaming and non-streaming
|
839
|
+
span = tracer.start_span(
|
840
|
+
"openai.responses.create",
|
841
|
+
kind=SpanKind.CLIENT,
|
842
|
+
attributes={SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.CHAT.value},
|
843
|
+
)
|
844
|
+
logger.debug(f"[RESPONSES API WRAPPER] Created span for {'streaming' if is_streaming else 'non-streaming'} call")
|
845
|
+
|
846
|
+
# Make sure span is linked to the current trace context
|
847
|
+
current_context = context_api.get_current()
|
848
|
+
token = context_api.attach(set_span_in_context(span, current_context))
|
849
|
+
|
850
|
+
try:
|
851
|
+
# Extract and set request attributes
|
852
|
+
from agentops.instrumentation.providers.openai.wrappers.responses import handle_responses_attributes
|
853
|
+
|
854
|
+
request_attributes = handle_responses_attributes(kwargs=kwargs)
|
855
|
+
for key, value in request_attributes.items():
|
856
|
+
span.set_attribute(key, value)
|
857
|
+
|
858
|
+
# Call the original method
|
859
|
+
response = wrapped(*args, **kwargs)
|
860
|
+
|
861
|
+
if is_streaming:
|
862
|
+
# For streaming, wrap the stream
|
863
|
+
context_api.detach(token)
|
864
|
+
return ResponsesAPIStreamWrapper(response, span, kwargs)
|
865
|
+
else:
|
866
|
+
# For non-streaming, handle response attributes and close span
|
867
|
+
response_attributes = handle_responses_attributes(kwargs=kwargs, return_value=response)
|
868
|
+
for key, value in response_attributes.items():
|
869
|
+
if key not in request_attributes: # Avoid overwriting request attributes
|
870
|
+
span.set_attribute(key, value)
|
871
|
+
|
872
|
+
span.set_status(Status(StatusCode.OK))
|
873
|
+
span.end()
|
874
|
+
context_api.detach(token)
|
875
|
+
logger.debug("[RESPONSES API WRAPPER] Ended non-streaming span")
|
876
|
+
return response
|
877
|
+
|
878
|
+
except Exception as e:
|
879
|
+
span.record_exception(e)
|
880
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
881
|
+
span.end()
|
882
|
+
context_api.detach(token)
|
883
|
+
raise
|
884
|
+
|
885
|
+
|
886
|
+
@_with_tracer_wrapper
|
887
|
+
async def async_responses_stream_wrapper(tracer, wrapped, instance, args, kwargs):
|
888
|
+
"""Async wrapper for Responses API (both streaming and non-streaming)."""
|
889
|
+
if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY):
|
890
|
+
return await wrapped(*args, **kwargs)
|
891
|
+
|
892
|
+
# Check if streaming is enabled
|
893
|
+
is_streaming = kwargs.get("stream", False)
|
894
|
+
|
895
|
+
# Create span for both streaming and non-streaming
|
896
|
+
span = tracer.start_span(
|
897
|
+
"openai.responses.create",
|
898
|
+
kind=SpanKind.CLIENT,
|
899
|
+
attributes={SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.CHAT.value},
|
900
|
+
)
|
901
|
+
logger.debug(f"[RESPONSES API WRAPPER] Created span for {'streaming' if is_streaming else 'non-streaming'} call")
|
902
|
+
|
903
|
+
# Make sure span is linked to the current trace context
|
904
|
+
current_context = context_api.get_current()
|
905
|
+
token = context_api.attach(set_span_in_context(span, current_context))
|
906
|
+
|
907
|
+
try:
|
908
|
+
# Extract and set request attributes
|
909
|
+
from agentops.instrumentation.providers.openai.wrappers.responses import handle_responses_attributes
|
910
|
+
|
911
|
+
request_attributes = handle_responses_attributes(kwargs=kwargs)
|
912
|
+
for key, value in request_attributes.items():
|
913
|
+
span.set_attribute(key, value)
|
914
|
+
|
915
|
+
# Call the original method
|
916
|
+
response = await wrapped(*args, **kwargs)
|
917
|
+
|
918
|
+
if is_streaming:
|
919
|
+
# For streaming, wrap the stream
|
920
|
+
context_api.detach(token)
|
921
|
+
logger.debug("[RESPONSES API WRAPPER] Wrapping streaming response with ResponsesAPIStreamWrapper")
|
922
|
+
return ResponsesAPIStreamWrapper(response, span, kwargs)
|
923
|
+
else:
|
924
|
+
# For non-streaming, handle response attributes and close span
|
925
|
+
response_attributes = handle_responses_attributes(kwargs=kwargs, return_value=response)
|
926
|
+
for key, value in response_attributes.items():
|
927
|
+
if key not in request_attributes: # Avoid overwriting request attributes
|
928
|
+
span.set_attribute(key, value)
|
929
|
+
|
930
|
+
span.set_status(Status(StatusCode.OK))
|
931
|
+
span.end()
|
932
|
+
context_api.detach(token)
|
933
|
+
logger.debug("[RESPONSES API WRAPPER] Ended async non-streaming span")
|
934
|
+
return response
|
935
|
+
|
936
|
+
except Exception as e:
|
937
|
+
span.record_exception(e)
|
938
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
939
|
+
span.end()
|
940
|
+
context_api.detach(token)
|
941
|
+
raise
|