opentelemetry-instrumentation-openai 0.34.1__py3-none-any.whl → 0.49.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of opentelemetry-instrumentation-openai might be problematic. Click here for more details.
- opentelemetry/instrumentation/openai/__init__.py +11 -6
- opentelemetry/instrumentation/openai/shared/__init__.py +167 -68
- opentelemetry/instrumentation/openai/shared/chat_wrappers.py +544 -231
- opentelemetry/instrumentation/openai/shared/completion_wrappers.py +143 -81
- opentelemetry/instrumentation/openai/shared/config.py +8 -3
- opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +91 -30
- opentelemetry/instrumentation/openai/shared/event_emitter.py +108 -0
- opentelemetry/instrumentation/openai/shared/event_models.py +41 -0
- opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py +1 -1
- opentelemetry/instrumentation/openai/shared/span_utils.py +0 -0
- opentelemetry/instrumentation/openai/utils.py +42 -9
- opentelemetry/instrumentation/openai/v0/__init__.py +32 -11
- opentelemetry/instrumentation/openai/v1/__init__.py +177 -69
- opentelemetry/instrumentation/openai/v1/assistant_wrappers.py +208 -109
- opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py +41 -19
- opentelemetry/instrumentation/openai/v1/responses_wrappers.py +1073 -0
- opentelemetry/instrumentation/openai/version.py +1 -1
- {opentelemetry_instrumentation_openai-0.34.1.dist-info → opentelemetry_instrumentation_openai-0.49.3.dist-info}/METADATA +7 -8
- opentelemetry_instrumentation_openai-0.49.3.dist-info/RECORD +21 -0
- {opentelemetry_instrumentation_openai-0.34.1.dist-info → opentelemetry_instrumentation_openai-0.49.3.dist-info}/WHEEL +1 -1
- opentelemetry_instrumentation_openai-0.34.1.dist-info/RECORD +0 -17
- {opentelemetry_instrumentation_openai-0.34.1.dist-info → opentelemetry_instrumentation_openai-0.49.3.dist-info}/entry_points.txt +0 -0
|
@@ -1,48 +1,59 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
+
import threading
|
|
4
5
|
import time
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
|
|
6
|
+
from functools import singledispatch
|
|
7
|
+
from typing import List, Optional, Union
|
|
8
8
|
|
|
9
9
|
from opentelemetry import context as context_api
|
|
10
|
-
|
|
11
|
-
from opentelemetry.semconv_ai import (
|
|
12
|
-
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
|
|
13
|
-
SpanAttributes,
|
|
14
|
-
LLMRequestTypeValues,
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
|
|
18
|
-
from opentelemetry.instrumentation.openai.utils import (
|
|
19
|
-
_with_chat_telemetry_wrapper,
|
|
20
|
-
dont_throw,
|
|
21
|
-
run_async,
|
|
22
|
-
)
|
|
10
|
+
import pydantic
|
|
23
11
|
from opentelemetry.instrumentation.openai.shared import (
|
|
24
|
-
|
|
12
|
+
OPENAI_LLM_USAGE_TOKEN_TYPES,
|
|
13
|
+
_get_openai_base_url,
|
|
25
14
|
_set_client_attributes,
|
|
15
|
+
_set_functions_attributes,
|
|
26
16
|
_set_request_attributes,
|
|
17
|
+
_set_response_attributes,
|
|
27
18
|
_set_span_attribute,
|
|
28
|
-
|
|
19
|
+
_set_span_stream_usage,
|
|
29
20
|
_token_type,
|
|
30
|
-
set_tools_attributes,
|
|
31
|
-
_set_response_attributes,
|
|
32
21
|
is_streaming_response,
|
|
33
|
-
|
|
22
|
+
metric_shared_attributes,
|
|
34
23
|
model_as_dict,
|
|
35
|
-
_get_openai_base_url,
|
|
36
|
-
OPENAI_LLM_USAGE_TOKEN_TYPES,
|
|
37
|
-
should_record_stream_token_usage,
|
|
38
|
-
get_token_count_from_string,
|
|
39
|
-
_set_span_stream_usage,
|
|
40
24
|
propagate_trace_context,
|
|
25
|
+
set_tools_attributes,
|
|
26
|
+
)
|
|
27
|
+
from opentelemetry.instrumentation.openai.shared.config import Config
|
|
28
|
+
from opentelemetry.instrumentation.openai.shared.event_emitter import emit_event
|
|
29
|
+
from opentelemetry.instrumentation.openai.shared.event_models import (
|
|
30
|
+
ChoiceEvent,
|
|
31
|
+
MessageEvent,
|
|
32
|
+
ToolCall,
|
|
33
|
+
)
|
|
34
|
+
from opentelemetry.instrumentation.openai.utils import (
|
|
35
|
+
_with_chat_telemetry_wrapper,
|
|
36
|
+
dont_throw,
|
|
37
|
+
is_openai_v1,
|
|
38
|
+
run_async,
|
|
39
|
+
should_emit_events,
|
|
40
|
+
should_send_prompts,
|
|
41
|
+
)
|
|
42
|
+
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
|
|
43
|
+
from opentelemetry.metrics import Counter, Histogram
|
|
44
|
+
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
|
|
45
|
+
from opentelemetry.semconv._incubating.attributes import (
|
|
46
|
+
gen_ai_attributes as GenAIAttributes,
|
|
47
|
+
)
|
|
48
|
+
from opentelemetry.semconv_ai import (
|
|
49
|
+
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
|
|
50
|
+
LLMRequestTypeValues,
|
|
51
|
+
SpanAttributes,
|
|
41
52
|
)
|
|
42
53
|
from opentelemetry.trace import SpanKind, Tracer
|
|
54
|
+
from opentelemetry import trace
|
|
43
55
|
from opentelemetry.trace.status import Status, StatusCode
|
|
44
|
-
|
|
45
|
-
from opentelemetry.instrumentation.openai.utils import is_openai_v1
|
|
56
|
+
from wrapt import ObjectProxy
|
|
46
57
|
|
|
47
58
|
SPAN_NAME = "openai.chat"
|
|
48
59
|
PROMPT_FILTER_KEY = "prompt_filter_results"
|
|
@@ -79,70 +90,77 @@ def chat_wrapper(
|
|
|
79
90
|
attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
|
|
80
91
|
)
|
|
81
92
|
|
|
82
|
-
|
|
93
|
+
# Use the span as current context to ensure events get proper trace context
|
|
94
|
+
with trace.use_span(span, end_on_exit=False):
|
|
95
|
+
run_async(_handle_request(span, kwargs, instance))
|
|
96
|
+
try:
|
|
97
|
+
start_time = time.time()
|
|
98
|
+
response = wrapped(*args, **kwargs)
|
|
99
|
+
end_time = time.time()
|
|
100
|
+
except Exception as e: # pylint: disable=broad-except
|
|
101
|
+
end_time = time.time()
|
|
102
|
+
duration = end_time - start_time if "start_time" in locals() else 0
|
|
103
|
+
|
|
104
|
+
attributes = {
|
|
105
|
+
"error.type": e.__class__.__name__,
|
|
106
|
+
}
|
|
83
107
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
except Exception as e: # pylint: disable=broad-except
|
|
89
|
-
end_time = time.time()
|
|
90
|
-
duration = end_time - start_time if "start_time" in locals() else 0
|
|
91
|
-
|
|
92
|
-
attributes = {
|
|
93
|
-
"error.type": e.__class__.__name__,
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
if duration > 0 and duration_histogram:
|
|
97
|
-
duration_histogram.record(duration, attributes=attributes)
|
|
98
|
-
if exception_counter:
|
|
99
|
-
exception_counter.add(1, attributes=attributes)
|
|
100
|
-
|
|
101
|
-
raise e
|
|
102
|
-
|
|
103
|
-
if is_streaming_response(response):
|
|
104
|
-
# span will be closed after the generator is done
|
|
105
|
-
if is_openai_v1():
|
|
106
|
-
return ChatStream(
|
|
107
|
-
span,
|
|
108
|
-
response,
|
|
109
|
-
instance,
|
|
110
|
-
token_counter,
|
|
111
|
-
choice_counter,
|
|
112
|
-
duration_histogram,
|
|
113
|
-
streaming_time_to_first_token,
|
|
114
|
-
streaming_time_to_generate,
|
|
115
|
-
start_time,
|
|
116
|
-
kwargs,
|
|
117
|
-
)
|
|
118
|
-
else:
|
|
119
|
-
return _build_from_streaming_response(
|
|
120
|
-
span,
|
|
121
|
-
response,
|
|
122
|
-
instance,
|
|
123
|
-
token_counter,
|
|
124
|
-
choice_counter,
|
|
125
|
-
duration_histogram,
|
|
126
|
-
streaming_time_to_first_token,
|
|
127
|
-
streaming_time_to_generate,
|
|
128
|
-
start_time,
|
|
129
|
-
kwargs,
|
|
130
|
-
)
|
|
108
|
+
if duration > 0 and duration_histogram:
|
|
109
|
+
duration_histogram.record(duration, attributes=attributes)
|
|
110
|
+
if exception_counter:
|
|
111
|
+
exception_counter.add(1, attributes=attributes)
|
|
131
112
|
|
|
132
|
-
|
|
113
|
+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
|
|
114
|
+
span.record_exception(e)
|
|
115
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
116
|
+
span.end()
|
|
133
117
|
|
|
134
|
-
|
|
135
|
-
response,
|
|
136
|
-
span,
|
|
137
|
-
instance,
|
|
138
|
-
token_counter,
|
|
139
|
-
choice_counter,
|
|
140
|
-
duration_histogram,
|
|
141
|
-
duration,
|
|
142
|
-
)
|
|
143
|
-
span.end()
|
|
118
|
+
raise
|
|
144
119
|
|
|
145
|
-
|
|
120
|
+
if is_streaming_response(response):
|
|
121
|
+
# span will be closed after the generator is done
|
|
122
|
+
if is_openai_v1():
|
|
123
|
+
return ChatStream(
|
|
124
|
+
span,
|
|
125
|
+
response,
|
|
126
|
+
instance,
|
|
127
|
+
token_counter,
|
|
128
|
+
choice_counter,
|
|
129
|
+
duration_histogram,
|
|
130
|
+
streaming_time_to_first_token,
|
|
131
|
+
streaming_time_to_generate,
|
|
132
|
+
start_time,
|
|
133
|
+
kwargs,
|
|
134
|
+
)
|
|
135
|
+
else:
|
|
136
|
+
return _build_from_streaming_response(
|
|
137
|
+
span,
|
|
138
|
+
response,
|
|
139
|
+
instance,
|
|
140
|
+
token_counter,
|
|
141
|
+
choice_counter,
|
|
142
|
+
duration_histogram,
|
|
143
|
+
streaming_time_to_first_token,
|
|
144
|
+
streaming_time_to_generate,
|
|
145
|
+
start_time,
|
|
146
|
+
kwargs,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
duration = end_time - start_time
|
|
150
|
+
|
|
151
|
+
_handle_response(
|
|
152
|
+
response,
|
|
153
|
+
span,
|
|
154
|
+
instance,
|
|
155
|
+
token_counter,
|
|
156
|
+
choice_counter,
|
|
157
|
+
duration_histogram,
|
|
158
|
+
duration,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
span.end()
|
|
162
|
+
|
|
163
|
+
return response
|
|
146
164
|
|
|
147
165
|
|
|
148
166
|
@_with_chat_telemetry_wrapper
|
|
@@ -169,87 +187,115 @@ async def achat_wrapper(
|
|
|
169
187
|
kind=SpanKind.CLIENT,
|
|
170
188
|
attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
|
|
171
189
|
)
|
|
172
|
-
await _handle_request(span, kwargs, instance)
|
|
173
190
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
end_time = time.time()
|
|
178
|
-
except Exception as e: # pylint: disable=broad-except
|
|
179
|
-
end_time = time.time()
|
|
180
|
-
duration = end_time - start_time if "start_time" in locals() else 0
|
|
181
|
-
|
|
182
|
-
common_attributes = Config.get_common_metrics_attributes()
|
|
183
|
-
attributes = {
|
|
184
|
-
**common_attributes,
|
|
185
|
-
"error.type": e.__class__.__name__,
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
if duration > 0 and duration_histogram:
|
|
189
|
-
duration_histogram.record(duration, attributes=attributes)
|
|
190
|
-
if exception_counter:
|
|
191
|
-
exception_counter.add(1, attributes=attributes)
|
|
192
|
-
|
|
193
|
-
raise e
|
|
194
|
-
|
|
195
|
-
if is_streaming_response(response):
|
|
196
|
-
# span will be closed after the generator is done
|
|
197
|
-
if is_openai_v1():
|
|
198
|
-
return ChatStream(
|
|
199
|
-
span,
|
|
200
|
-
response,
|
|
201
|
-
instance,
|
|
202
|
-
token_counter,
|
|
203
|
-
choice_counter,
|
|
204
|
-
duration_histogram,
|
|
205
|
-
streaming_time_to_first_token,
|
|
206
|
-
streaming_time_to_generate,
|
|
207
|
-
start_time,
|
|
208
|
-
kwargs,
|
|
209
|
-
)
|
|
210
|
-
else:
|
|
211
|
-
return _abuild_from_streaming_response(
|
|
212
|
-
span,
|
|
213
|
-
response,
|
|
214
|
-
instance,
|
|
215
|
-
token_counter,
|
|
216
|
-
choice_counter,
|
|
217
|
-
duration_histogram,
|
|
218
|
-
streaming_time_to_first_token,
|
|
219
|
-
streaming_time_to_generate,
|
|
220
|
-
start_time,
|
|
221
|
-
kwargs,
|
|
222
|
-
)
|
|
191
|
+
# Use the span as current context to ensure events get proper trace context
|
|
192
|
+
with trace.use_span(span, end_on_exit=False):
|
|
193
|
+
await _handle_request(span, kwargs, instance)
|
|
223
194
|
|
|
224
|
-
|
|
195
|
+
try:
|
|
196
|
+
start_time = time.time()
|
|
197
|
+
response = await wrapped(*args, **kwargs)
|
|
198
|
+
end_time = time.time()
|
|
199
|
+
except Exception as e: # pylint: disable=broad-except
|
|
200
|
+
end_time = time.time()
|
|
201
|
+
duration = end_time - start_time if "start_time" in locals() else 0
|
|
202
|
+
|
|
203
|
+
common_attributes = Config.get_common_metrics_attributes()
|
|
204
|
+
attributes = {
|
|
205
|
+
**common_attributes,
|
|
206
|
+
"error.type": e.__class__.__name__,
|
|
207
|
+
}
|
|
225
208
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
token_counter,
|
|
231
|
-
choice_counter,
|
|
232
|
-
duration_histogram,
|
|
233
|
-
duration,
|
|
234
|
-
)
|
|
235
|
-
span.end()
|
|
209
|
+
if duration > 0 and duration_histogram:
|
|
210
|
+
duration_histogram.record(duration, attributes=attributes)
|
|
211
|
+
if exception_counter:
|
|
212
|
+
exception_counter.add(1, attributes=attributes)
|
|
236
213
|
|
|
237
|
-
|
|
214
|
+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
|
|
215
|
+
span.record_exception(e)
|
|
216
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
217
|
+
span.end()
|
|
218
|
+
|
|
219
|
+
raise
|
|
220
|
+
|
|
221
|
+
if is_streaming_response(response):
|
|
222
|
+
# span will be closed after the generator is done
|
|
223
|
+
if is_openai_v1():
|
|
224
|
+
return ChatStream(
|
|
225
|
+
span,
|
|
226
|
+
response,
|
|
227
|
+
instance,
|
|
228
|
+
token_counter,
|
|
229
|
+
choice_counter,
|
|
230
|
+
duration_histogram,
|
|
231
|
+
streaming_time_to_first_token,
|
|
232
|
+
streaming_time_to_generate,
|
|
233
|
+
start_time,
|
|
234
|
+
kwargs,
|
|
235
|
+
)
|
|
236
|
+
else:
|
|
237
|
+
return _abuild_from_streaming_response(
|
|
238
|
+
span,
|
|
239
|
+
response,
|
|
240
|
+
instance,
|
|
241
|
+
token_counter,
|
|
242
|
+
choice_counter,
|
|
243
|
+
duration_histogram,
|
|
244
|
+
streaming_time_to_first_token,
|
|
245
|
+
streaming_time_to_generate,
|
|
246
|
+
start_time,
|
|
247
|
+
kwargs,
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
duration = end_time - start_time
|
|
251
|
+
|
|
252
|
+
_handle_response(
|
|
253
|
+
response,
|
|
254
|
+
span,
|
|
255
|
+
instance,
|
|
256
|
+
token_counter,
|
|
257
|
+
choice_counter,
|
|
258
|
+
duration_histogram,
|
|
259
|
+
duration,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
span.end()
|
|
263
|
+
|
|
264
|
+
return response
|
|
238
265
|
|
|
239
266
|
|
|
240
267
|
@dont_throw
|
|
241
268
|
async def _handle_request(span, kwargs, instance):
|
|
242
|
-
_set_request_attributes(span, kwargs)
|
|
269
|
+
_set_request_attributes(span, kwargs, instance)
|
|
243
270
|
_set_client_attributes(span, instance)
|
|
244
|
-
if
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
271
|
+
if should_emit_events():
|
|
272
|
+
for message in kwargs.get("messages", []):
|
|
273
|
+
emit_event(
|
|
274
|
+
MessageEvent(
|
|
275
|
+
content=message.get("content"),
|
|
276
|
+
role=message.get("role"),
|
|
277
|
+
tool_calls=_parse_tool_calls(
|
|
278
|
+
message.get("tool_calls", None)),
|
|
279
|
+
)
|
|
280
|
+
)
|
|
281
|
+
else:
|
|
282
|
+
if should_send_prompts():
|
|
283
|
+
await _set_prompts(span, kwargs.get("messages"))
|
|
284
|
+
if kwargs.get("functions"):
|
|
285
|
+
_set_functions_attributes(span, kwargs.get("functions"))
|
|
286
|
+
elif kwargs.get("tools"):
|
|
287
|
+
set_tools_attributes(span, kwargs.get("tools"))
|
|
250
288
|
if Config.enable_trace_context_propagation:
|
|
251
289
|
propagate_trace_context(span, kwargs)
|
|
252
290
|
|
|
291
|
+
# Reasoning request attributes
|
|
292
|
+
reasoning_effort = kwargs.get("reasoning_effort")
|
|
293
|
+
_set_span_attribute(
|
|
294
|
+
span,
|
|
295
|
+
SpanAttributes.LLM_REQUEST_REASONING_EFFORT,
|
|
296
|
+
reasoning_effort or ()
|
|
297
|
+
)
|
|
298
|
+
|
|
253
299
|
|
|
254
300
|
@dont_throw
|
|
255
301
|
def _handle_response(
|
|
@@ -260,6 +306,7 @@ def _handle_response(
|
|
|
260
306
|
choice_counter=None,
|
|
261
307
|
duration_histogram=None,
|
|
262
308
|
duration=None,
|
|
309
|
+
is_streaming: bool = False,
|
|
263
310
|
):
|
|
264
311
|
if is_openai_v1():
|
|
265
312
|
response_dict = model_as_dict(response)
|
|
@@ -274,25 +321,59 @@ def _handle_response(
|
|
|
274
321
|
duration_histogram,
|
|
275
322
|
response_dict,
|
|
276
323
|
duration,
|
|
324
|
+
is_streaming,
|
|
277
325
|
)
|
|
278
326
|
|
|
279
327
|
# span attributes
|
|
280
328
|
_set_response_attributes(span, response_dict)
|
|
281
329
|
|
|
282
|
-
|
|
283
|
-
|
|
330
|
+
# Reasoning usage attributes
|
|
331
|
+
usage = response_dict.get("usage")
|
|
332
|
+
reasoning_tokens = None
|
|
333
|
+
if usage:
|
|
334
|
+
# Support both dict-style and object-style `usage`
|
|
335
|
+
tokens_details = (
|
|
336
|
+
usage.get("completion_tokens_details") if isinstance(usage, dict)
|
|
337
|
+
else getattr(usage, "completion_tokens_details", None)
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
if tokens_details:
|
|
341
|
+
reasoning_tokens = (
|
|
342
|
+
tokens_details.get("reasoning_tokens", None) if isinstance(tokens_details, dict)
|
|
343
|
+
else getattr(tokens_details, "reasoning_tokens", None)
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
_set_span_attribute(
|
|
347
|
+
span,
|
|
348
|
+
SpanAttributes.LLM_USAGE_REASONING_TOKENS,
|
|
349
|
+
reasoning_tokens or 0,
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
if should_emit_events():
|
|
353
|
+
if response.choices is not None:
|
|
354
|
+
for choice in response.choices:
|
|
355
|
+
emit_event(_parse_choice_event(choice))
|
|
356
|
+
else:
|
|
357
|
+
if should_send_prompts():
|
|
358
|
+
_set_completions(span, response_dict.get("choices"))
|
|
284
359
|
|
|
285
360
|
return response
|
|
286
361
|
|
|
287
362
|
|
|
288
363
|
def _set_chat_metrics(
|
|
289
|
-
instance,
|
|
364
|
+
instance,
|
|
365
|
+
token_counter,
|
|
366
|
+
choice_counter,
|
|
367
|
+
duration_histogram,
|
|
368
|
+
response_dict,
|
|
369
|
+
duration,
|
|
370
|
+
is_streaming: bool = False,
|
|
290
371
|
):
|
|
291
372
|
shared_attributes = metric_shared_attributes(
|
|
292
373
|
response_model=response_dict.get("model") or None,
|
|
293
374
|
operation="chat",
|
|
294
375
|
server_address=_get_openai_base_url(instance),
|
|
295
|
-
is_streaming=
|
|
376
|
+
is_streaming=is_streaming,
|
|
296
377
|
)
|
|
297
378
|
|
|
298
379
|
# token metrics
|
|
@@ -326,7 +407,7 @@ def _set_token_counter_metrics(token_counter, usage, shared_attributes):
|
|
|
326
407
|
if name in OPENAI_LLM_USAGE_TOKEN_TYPES:
|
|
327
408
|
attributes_with_token_type = {
|
|
328
409
|
**shared_attributes,
|
|
329
|
-
|
|
410
|
+
GenAIAttributes.GEN_AI_TOKEN_TYPE: _token_type(name),
|
|
330
411
|
}
|
|
331
412
|
token_counter.record(val, attributes=attributes_with_token_type)
|
|
332
413
|
|
|
@@ -351,7 +432,8 @@ async def _process_image_item(item, trace_id, span_id, message_index, content_in
|
|
|
351
432
|
image_format = item["image_url"]["url"].split(";")[0].split("/")[1]
|
|
352
433
|
image_name = f"message_{message_index}_content_{content_index}.{image_format}"
|
|
353
434
|
base64_string = item["image_url"]["url"].split(",")[1]
|
|
354
|
-
|
|
435
|
+
# Convert trace_id and span_id to strings as expected by upload function
|
|
436
|
+
url = await Config.upload_base64_image(str(trace_id), str(span_id), image_name, base64_string)
|
|
355
437
|
|
|
356
438
|
return {"type": "image_url", "image_url": {"url": url}}
|
|
357
439
|
|
|
@@ -362,7 +444,8 @@ async def _set_prompts(span, messages):
|
|
|
362
444
|
return
|
|
363
445
|
|
|
364
446
|
for i, msg in enumerate(messages):
|
|
365
|
-
prefix = f"{
|
|
447
|
+
prefix = f"{GenAIAttributes.GEN_AI_PROMPT}.{i}"
|
|
448
|
+
msg = msg if isinstance(msg, dict) else model_as_dict(msg)
|
|
366
449
|
|
|
367
450
|
_set_span_attribute(span, f"{prefix}.role", msg.get("role"))
|
|
368
451
|
if msg.get("content"):
|
|
@@ -382,7 +465,8 @@ async def _set_prompts(span, messages):
|
|
|
382
465
|
content = json.dumps(content)
|
|
383
466
|
_set_span_attribute(span, f"{prefix}.content", content)
|
|
384
467
|
if msg.get("tool_call_id"):
|
|
385
|
-
_set_span_attribute(
|
|
468
|
+
_set_span_attribute(
|
|
469
|
+
span, f"{prefix}.tool_call_id", msg.get("tool_call_id"))
|
|
386
470
|
tool_calls = msg.get("tool_calls")
|
|
387
471
|
if tool_calls:
|
|
388
472
|
for i, tool_call in enumerate(tool_calls):
|
|
@@ -413,7 +497,7 @@ def _set_completions(span, choices):
|
|
|
413
497
|
|
|
414
498
|
for choice in choices:
|
|
415
499
|
index = choice.get("index")
|
|
416
|
-
prefix = f"{
|
|
500
|
+
prefix = f"{GenAIAttributes.GEN_AI_COMPLETION}.{index}"
|
|
417
501
|
_set_span_attribute(
|
|
418
502
|
span, f"{prefix}.finish_reason", choice.get("finish_reason")
|
|
419
503
|
)
|
|
@@ -438,9 +522,11 @@ def _set_completions(span, choices):
|
|
|
438
522
|
_set_span_attribute(span, f"{prefix}.role", message.get("role"))
|
|
439
523
|
|
|
440
524
|
if message.get("refusal"):
|
|
441
|
-
_set_span_attribute(
|
|
525
|
+
_set_span_attribute(
|
|
526
|
+
span, f"{prefix}.refusal", message.get("refusal"))
|
|
442
527
|
else:
|
|
443
|
-
_set_span_attribute(
|
|
528
|
+
_set_span_attribute(
|
|
529
|
+
span, f"{prefix}.content", message.get("content"))
|
|
444
530
|
|
|
445
531
|
function_call = message.get("function_call")
|
|
446
532
|
if function_call:
|
|
@@ -478,60 +564,34 @@ def _set_completions(span, choices):
|
|
|
478
564
|
def _set_streaming_token_metrics(
|
|
479
565
|
request_kwargs, complete_response, span, token_counter, shared_attributes
|
|
480
566
|
):
|
|
481
|
-
# use tiktoken calculate token usage
|
|
482
|
-
if not should_record_stream_token_usage():
|
|
483
|
-
return
|
|
484
|
-
|
|
485
|
-
# kwargs={'model': 'gpt-3.5', 'messages': [{'role': 'user', 'content': '...'}], 'stream': True}
|
|
486
567
|
prompt_usage = -1
|
|
487
568
|
completion_usage = -1
|
|
488
569
|
|
|
489
|
-
#
|
|
490
|
-
if
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
)
|
|
497
|
-
for msg in request_kwargs.get("messages"):
|
|
498
|
-
if msg.get("content"):
|
|
499
|
-
prompt_content += msg.get("content")
|
|
500
|
-
if model_name:
|
|
501
|
-
prompt_usage = get_token_count_from_string(prompt_content, model_name)
|
|
502
|
-
|
|
503
|
-
# completion_usage
|
|
504
|
-
if complete_response.get("choices"):
|
|
505
|
-
completion_content = ""
|
|
506
|
-
# setting the default model_name as gpt-4. As this uses the embedding "cl100k_base" that
|
|
507
|
-
# is used by most of the other model.
|
|
508
|
-
model_name = complete_response.get("model") or "gpt-4"
|
|
509
|
-
|
|
510
|
-
for choice in complete_response.get("choices"):
|
|
511
|
-
if choice.get("message") and choice.get("message").get("content"):
|
|
512
|
-
completion_content += choice["message"]["content"]
|
|
513
|
-
|
|
514
|
-
if model_name:
|
|
515
|
-
completion_usage = get_token_count_from_string(
|
|
516
|
-
completion_content, model_name
|
|
517
|
-
)
|
|
570
|
+
# Use token usage from API response only
|
|
571
|
+
if complete_response.get("usage"):
|
|
572
|
+
usage = complete_response["usage"]
|
|
573
|
+
if usage.get("prompt_tokens"):
|
|
574
|
+
prompt_usage = usage["prompt_tokens"]
|
|
575
|
+
if usage.get("completion_tokens"):
|
|
576
|
+
completion_usage = usage["completion_tokens"]
|
|
518
577
|
|
|
519
578
|
# span record
|
|
520
579
|
_set_span_stream_usage(span, prompt_usage, completion_usage)
|
|
521
580
|
|
|
522
581
|
# metrics record
|
|
523
582
|
if token_counter:
|
|
524
|
-
if
|
|
583
|
+
if isinstance(prompt_usage, int) and prompt_usage >= 0:
|
|
525
584
|
attributes_with_token_type = {
|
|
526
585
|
**shared_attributes,
|
|
527
|
-
|
|
586
|
+
GenAIAttributes.GEN_AI_TOKEN_TYPE: "input",
|
|
528
587
|
}
|
|
529
|
-
token_counter.record(
|
|
588
|
+
token_counter.record(
|
|
589
|
+
prompt_usage, attributes=attributes_with_token_type)
|
|
530
590
|
|
|
531
|
-
if
|
|
591
|
+
if isinstance(completion_usage, int) and completion_usage >= 0:
|
|
532
592
|
attributes_with_token_type = {
|
|
533
593
|
**shared_attributes,
|
|
534
|
-
|
|
594
|
+
GenAIAttributes.GEN_AI_TOKEN_TYPE: "output",
|
|
535
595
|
}
|
|
536
596
|
token_counter.record(
|
|
537
597
|
completion_usage, attributes=attributes_with_token_type
|
|
@@ -579,11 +639,34 @@ class ChatStream(ObjectProxy):
|
|
|
579
639
|
self._time_of_first_token = self._start_time
|
|
580
640
|
self._complete_response = {"choices": [], "model": ""}
|
|
581
641
|
|
|
642
|
+
# Cleanup state tracking to prevent duplicate operations
|
|
643
|
+
self._cleanup_completed = False
|
|
644
|
+
self._cleanup_lock = threading.Lock()
|
|
645
|
+
|
|
646
|
+
def __del__(self):
|
|
647
|
+
"""Cleanup when object is garbage collected"""
|
|
648
|
+
if hasattr(self, '_cleanup_completed') and not self._cleanup_completed:
|
|
649
|
+
self._ensure_cleanup()
|
|
650
|
+
|
|
582
651
|
def __enter__(self):
|
|
583
652
|
return self
|
|
584
653
|
|
|
585
654
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
586
|
-
|
|
655
|
+
cleanup_exception = None
|
|
656
|
+
try:
|
|
657
|
+
self._ensure_cleanup()
|
|
658
|
+
except Exception as e:
|
|
659
|
+
cleanup_exception = e
|
|
660
|
+
# Don't re-raise to avoid masking original exception
|
|
661
|
+
|
|
662
|
+
result = self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
|
|
663
|
+
|
|
664
|
+
if cleanup_exception:
|
|
665
|
+
# Log cleanup exception but don't affect context manager behavior
|
|
666
|
+
logger.debug(
|
|
667
|
+
"Error during ChatStream cleanup in __exit__: %s", cleanup_exception)
|
|
668
|
+
|
|
669
|
+
return result
|
|
587
670
|
|
|
588
671
|
async def __aenter__(self):
|
|
589
672
|
return self
|
|
@@ -602,8 +685,13 @@ class ChatStream(ObjectProxy):
|
|
|
602
685
|
chunk = self.__wrapped__.__next__()
|
|
603
686
|
except Exception as e:
|
|
604
687
|
if isinstance(e, StopIteration):
|
|
605
|
-
self.
|
|
606
|
-
|
|
688
|
+
self._process_complete_response()
|
|
689
|
+
else:
|
|
690
|
+
# Handle cleanup for other exceptions during stream iteration
|
|
691
|
+
self._ensure_cleanup()
|
|
692
|
+
if self._span and self._span.is_recording():
|
|
693
|
+
self._span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
694
|
+
raise
|
|
607
695
|
else:
|
|
608
696
|
self._process_item(chunk)
|
|
609
697
|
return chunk
|
|
@@ -613,14 +701,20 @@ class ChatStream(ObjectProxy):
|
|
|
613
701
|
chunk = await self.__wrapped__.__anext__()
|
|
614
702
|
except Exception as e:
|
|
615
703
|
if isinstance(e, StopAsyncIteration):
|
|
616
|
-
self.
|
|
617
|
-
|
|
704
|
+
self._process_complete_response()
|
|
705
|
+
else:
|
|
706
|
+
# Handle cleanup for other exceptions during stream iteration
|
|
707
|
+
self._ensure_cleanup()
|
|
708
|
+
if self._span and self._span.is_recording():
|
|
709
|
+
self._span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
710
|
+
raise
|
|
618
711
|
else:
|
|
619
712
|
self._process_item(chunk)
|
|
620
713
|
return chunk
|
|
621
714
|
|
|
622
715
|
def _process_item(self, item):
|
|
623
|
-
self._span.add_event(
|
|
716
|
+
self._span.add_event(
|
|
717
|
+
name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
|
|
624
718
|
|
|
625
719
|
if self._first_token and self._streaming_time_to_first_token:
|
|
626
720
|
self._time_of_first_token = time.time()
|
|
@@ -643,7 +737,7 @@ class ChatStream(ObjectProxy):
|
|
|
643
737
|
)
|
|
644
738
|
|
|
645
739
|
@dont_throw
|
|
646
|
-
def
|
|
740
|
+
def _process_complete_response(self):
|
|
647
741
|
_set_streaming_token_metrics(
|
|
648
742
|
self._request_kwargs,
|
|
649
743
|
self._complete_response,
|
|
@@ -676,12 +770,87 @@ class ChatStream(ObjectProxy):
|
|
|
676
770
|
)
|
|
677
771
|
|
|
678
772
|
_set_response_attributes(self._span, self._complete_response)
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
773
|
+
if should_emit_events():
|
|
774
|
+
for choice in self._complete_response.get("choices", []):
|
|
775
|
+
emit_event(_parse_choice_event(choice))
|
|
776
|
+
else:
|
|
777
|
+
if should_send_prompts():
|
|
778
|
+
_set_completions(
|
|
779
|
+
self._span, self._complete_response.get("choices"))
|
|
682
780
|
|
|
683
781
|
self._span.set_status(Status(StatusCode.OK))
|
|
684
782
|
self._span.end()
|
|
783
|
+
self._cleanup_completed = True
|
|
784
|
+
|
|
785
|
+
@dont_throw
|
|
786
|
+
def _ensure_cleanup(self):
|
|
787
|
+
"""Thread-safe cleanup method that handles different cleanup scenarios"""
|
|
788
|
+
with self._cleanup_lock:
|
|
789
|
+
if self._cleanup_completed:
|
|
790
|
+
logger.debug("ChatStream cleanup already completed, skipping")
|
|
791
|
+
return
|
|
792
|
+
|
|
793
|
+
try:
|
|
794
|
+
logger.debug("Starting ChatStream cleanup")
|
|
795
|
+
|
|
796
|
+
# Calculate partial metrics based on available data
|
|
797
|
+
self._record_partial_metrics()
|
|
798
|
+
|
|
799
|
+
# Set span status and close it
|
|
800
|
+
if self._span and self._span.is_recording():
|
|
801
|
+
self._span.set_status(Status(StatusCode.OK))
|
|
802
|
+
self._span.end()
|
|
803
|
+
logger.debug("ChatStream span closed successfully")
|
|
804
|
+
|
|
805
|
+
self._cleanup_completed = True
|
|
806
|
+
logger.debug("ChatStream cleanup completed successfully")
|
|
807
|
+
|
|
808
|
+
except Exception as e:
|
|
809
|
+
# Log cleanup errors but don't propagate to avoid masking original issues
|
|
810
|
+
logger.debug("Error during ChatStream cleanup: %s", str(e))
|
|
811
|
+
|
|
812
|
+
# Still try to close the span even if metrics recording failed
|
|
813
|
+
try:
|
|
814
|
+
if self._span and self._span.is_recording():
|
|
815
|
+
self._span.set_status(
|
|
816
|
+
Status(StatusCode.ERROR, "Cleanup failed"))
|
|
817
|
+
self._span.end()
|
|
818
|
+
self._cleanup_completed = True
|
|
819
|
+
except Exception:
|
|
820
|
+
# Final fallback - just mark as completed to prevent infinite loops
|
|
821
|
+
self._cleanup_completed = True
|
|
822
|
+
|
|
823
|
+
@dont_throw
|
|
824
|
+
def _record_partial_metrics(self):
|
|
825
|
+
"""Record metrics based on available partial data"""
|
|
826
|
+
# Always record duration if we have start time
|
|
827
|
+
if self._start_time and isinstance(self._start_time, (float, int)) and self._duration_histogram:
|
|
828
|
+
duration = time.time() - self._start_time
|
|
829
|
+
self._duration_histogram.record(
|
|
830
|
+
duration, attributes=self._shared_attributes()
|
|
831
|
+
)
|
|
832
|
+
|
|
833
|
+
# Record basic span attributes even without complete response
|
|
834
|
+
if self._span and self._span.is_recording():
|
|
835
|
+
_set_response_attributes(self._span, self._complete_response)
|
|
836
|
+
|
|
837
|
+
# Record partial token metrics if we have any data
|
|
838
|
+
if self._complete_response.get("choices") or self._request_kwargs:
|
|
839
|
+
_set_streaming_token_metrics(
|
|
840
|
+
self._request_kwargs,
|
|
841
|
+
self._complete_response,
|
|
842
|
+
self._span,
|
|
843
|
+
self._token_counter,
|
|
844
|
+
self._shared_attributes(),
|
|
845
|
+
)
|
|
846
|
+
|
|
847
|
+
# Record choice metrics if we have any choices processed
|
|
848
|
+
if self._choice_counter and self._complete_response.get("choices"):
|
|
849
|
+
_set_choice_counter_metrics(
|
|
850
|
+
self._choice_counter,
|
|
851
|
+
self._complete_response.get("choices"),
|
|
852
|
+
self._shared_attributes(),
|
|
853
|
+
)
|
|
685
854
|
|
|
686
855
|
|
|
687
856
|
# Backward compatibility with OpenAI v0
|
|
@@ -700,7 +869,7 @@ def _build_from_streaming_response(
|
|
|
700
869
|
start_time=None,
|
|
701
870
|
request_kwargs=None,
|
|
702
871
|
):
|
|
703
|
-
complete_response = {"choices": [], "model": ""}
|
|
872
|
+
complete_response = {"choices": [], "model": "", "id": ""}
|
|
704
873
|
|
|
705
874
|
first_token = True
|
|
706
875
|
time_of_first_token = start_time # will be updated when first token is received
|
|
@@ -712,7 +881,8 @@ def _build_from_streaming_response(
|
|
|
712
881
|
|
|
713
882
|
if first_token and streaming_time_to_first_token:
|
|
714
883
|
time_of_first_token = time.time()
|
|
715
|
-
streaming_time_to_first_token.record(
|
|
884
|
+
streaming_time_to_first_token.record(
|
|
885
|
+
time_of_first_token - start_time)
|
|
716
886
|
first_token = False
|
|
717
887
|
|
|
718
888
|
_accumulate_stream_items(item, complete_response)
|
|
@@ -720,7 +890,7 @@ def _build_from_streaming_response(
|
|
|
720
890
|
yield item_to_yield
|
|
721
891
|
|
|
722
892
|
shared_attributes = {
|
|
723
|
-
|
|
893
|
+
GenAIAttributes.GEN_AI_RESPONSE_MODEL: complete_response.get("model") or None,
|
|
724
894
|
"server.address": _get_openai_base_url(instance),
|
|
725
895
|
"stream": True,
|
|
726
896
|
}
|
|
@@ -746,9 +916,12 @@ def _build_from_streaming_response(
|
|
|
746
916
|
streaming_time_to_generate.record(time.time() - time_of_first_token)
|
|
747
917
|
|
|
748
918
|
_set_response_attributes(span, complete_response)
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
919
|
+
if should_emit_events():
|
|
920
|
+
for choice in complete_response.get("choices", []):
|
|
921
|
+
emit_event(_parse_choice_event(choice))
|
|
922
|
+
else:
|
|
923
|
+
if should_send_prompts():
|
|
924
|
+
_set_completions(span, complete_response.get("choices"))
|
|
752
925
|
|
|
753
926
|
span.set_status(Status(StatusCode.OK))
|
|
754
927
|
span.end()
|
|
@@ -767,7 +940,7 @@ async def _abuild_from_streaming_response(
|
|
|
767
940
|
start_time=None,
|
|
768
941
|
request_kwargs=None,
|
|
769
942
|
):
|
|
770
|
-
complete_response = {"choices": [], "model": ""}
|
|
943
|
+
complete_response = {"choices": [], "model": "", "id": ""}
|
|
771
944
|
|
|
772
945
|
first_token = True
|
|
773
946
|
time_of_first_token = start_time # will be updated when first token is received
|
|
@@ -779,7 +952,8 @@ async def _abuild_from_streaming_response(
|
|
|
779
952
|
|
|
780
953
|
if first_token and streaming_time_to_first_token:
|
|
781
954
|
time_of_first_token = time.time()
|
|
782
|
-
streaming_time_to_first_token.record(
|
|
955
|
+
streaming_time_to_first_token.record(
|
|
956
|
+
time_of_first_token - start_time)
|
|
783
957
|
first_token = False
|
|
784
958
|
|
|
785
959
|
_accumulate_stream_items(item, complete_response)
|
|
@@ -787,7 +961,7 @@ async def _abuild_from_streaming_response(
|
|
|
787
961
|
yield item_to_yield
|
|
788
962
|
|
|
789
963
|
shared_attributes = {
|
|
790
|
-
|
|
964
|
+
GenAIAttributes.GEN_AI_RESPONSE_MODEL: complete_response.get("model") or None,
|
|
791
965
|
"server.address": _get_openai_base_url(instance),
|
|
792
966
|
"stream": True,
|
|
793
967
|
}
|
|
@@ -813,23 +987,161 @@ async def _abuild_from_streaming_response(
|
|
|
813
987
|
streaming_time_to_generate.record(time.time() - time_of_first_token)
|
|
814
988
|
|
|
815
989
|
_set_response_attributes(span, complete_response)
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
990
|
+
if should_emit_events():
|
|
991
|
+
for choice in complete_response.get("choices", []):
|
|
992
|
+
emit_event(_parse_choice_event(choice))
|
|
993
|
+
else:
|
|
994
|
+
if should_send_prompts():
|
|
995
|
+
_set_completions(span, complete_response.get("choices"))
|
|
819
996
|
|
|
820
997
|
span.set_status(Status(StatusCode.OK))
|
|
821
998
|
span.end()
|
|
822
999
|
|
|
823
1000
|
|
|
1001
|
+
# pydantic.BaseModel here is ChatCompletionMessageFunctionToolCall (as of openai 1.99.7)
|
|
1002
|
+
# but we keep to a parent type to support older versions
|
|
1003
|
+
def _parse_tool_calls(
|
|
1004
|
+
tool_calls: Optional[List[Union[dict, pydantic.BaseModel]]],
|
|
1005
|
+
) -> Union[List[ToolCall], None]:
|
|
1006
|
+
"""
|
|
1007
|
+
Util to correctly parse the tool calls data from the OpenAI API to this module's
|
|
1008
|
+
standard `ToolCall`.
|
|
1009
|
+
"""
|
|
1010
|
+
if tool_calls is None:
|
|
1011
|
+
return tool_calls
|
|
1012
|
+
|
|
1013
|
+
result = []
|
|
1014
|
+
|
|
1015
|
+
for tool_call in tool_calls:
|
|
1016
|
+
tool_call_data = None
|
|
1017
|
+
|
|
1018
|
+
if isinstance(tool_call, dict):
|
|
1019
|
+
tool_call_data = copy.deepcopy(tool_call)
|
|
1020
|
+
elif _is_chat_message_function_tool_call(tool_call):
|
|
1021
|
+
tool_call_data = tool_call.model_dump()
|
|
1022
|
+
elif _is_function_call(tool_call):
|
|
1023
|
+
function_call = tool_call.model_dump()
|
|
1024
|
+
tool_call_data = ToolCall(
|
|
1025
|
+
id="",
|
|
1026
|
+
function={
|
|
1027
|
+
"name": function_call.get("name"),
|
|
1028
|
+
"arguments": function_call.get("arguments"),
|
|
1029
|
+
},
|
|
1030
|
+
type="function",
|
|
1031
|
+
)
|
|
1032
|
+
|
|
1033
|
+
result.append(tool_call_data)
|
|
1034
|
+
return result
|
|
1035
|
+
|
|
1036
|
+
|
|
1037
|
+
def _is_chat_message_function_tool_call(model: Union[dict, pydantic.BaseModel]) -> bool:
|
|
1038
|
+
try:
|
|
1039
|
+
from openai.types.chat.chat_completion_message_function_tool_call import (
|
|
1040
|
+
ChatCompletionMessageFunctionToolCall,
|
|
1041
|
+
)
|
|
1042
|
+
|
|
1043
|
+
return isinstance(model, ChatCompletionMessageFunctionToolCall)
|
|
1044
|
+
except Exception:
|
|
1045
|
+
try:
|
|
1046
|
+
# Since OpenAI 1.99.3, ChatCompletionMessageToolCall is a Union,
|
|
1047
|
+
# and the isinstance check will fail. This is fine, because in all
|
|
1048
|
+
# those versions, the check above will succeed.
|
|
1049
|
+
from openai.types.chat.chat_completion_message_tool_call import (
|
|
1050
|
+
ChatCompletionMessageToolCall,
|
|
1051
|
+
)
|
|
1052
|
+
return isinstance(model, ChatCompletionMessageToolCall)
|
|
1053
|
+
except Exception:
|
|
1054
|
+
return False
|
|
1055
|
+
|
|
1056
|
+
|
|
1057
|
+
def _is_function_call(model: Union[dict, pydantic.BaseModel]) -> bool:
|
|
1058
|
+
try:
|
|
1059
|
+
from openai.types.chat.chat_completion_message import FunctionCall
|
|
1060
|
+
return isinstance(model, FunctionCall)
|
|
1061
|
+
except Exception:
|
|
1062
|
+
return False
|
|
1063
|
+
|
|
1064
|
+
|
|
1065
|
+
@singledispatch
|
|
1066
|
+
def _parse_choice_event(choice) -> ChoiceEvent:
|
|
1067
|
+
has_message = choice.message is not None
|
|
1068
|
+
has_finish_reason = choice.finish_reason is not None
|
|
1069
|
+
has_tool_calls = has_message and choice.message.tool_calls
|
|
1070
|
+
has_function_call = has_message and choice.message.function_call
|
|
1071
|
+
|
|
1072
|
+
content = choice.message.content if has_message else None
|
|
1073
|
+
role = choice.message.role if has_message else "unknown"
|
|
1074
|
+
finish_reason = choice.finish_reason if has_finish_reason else "unknown"
|
|
1075
|
+
|
|
1076
|
+
if has_tool_calls and has_function_call:
|
|
1077
|
+
tool_calls = choice.message.tool_calls + [choice.message.function_call]
|
|
1078
|
+
elif has_tool_calls:
|
|
1079
|
+
tool_calls = choice.message.tool_calls
|
|
1080
|
+
elif has_function_call:
|
|
1081
|
+
tool_calls = [choice.message.function_call]
|
|
1082
|
+
else:
|
|
1083
|
+
tool_calls = None
|
|
1084
|
+
|
|
1085
|
+
return ChoiceEvent(
|
|
1086
|
+
index=choice.index,
|
|
1087
|
+
message={"content": content, "role": role},
|
|
1088
|
+
finish_reason=finish_reason,
|
|
1089
|
+
tool_calls=_parse_tool_calls(tool_calls),
|
|
1090
|
+
)
|
|
1091
|
+
|
|
1092
|
+
|
|
1093
|
+
@_parse_choice_event.register
|
|
1094
|
+
def _(choice: dict) -> ChoiceEvent:
|
|
1095
|
+
message = choice.get("message")
|
|
1096
|
+
has_message = message is not None
|
|
1097
|
+
has_finish_reason = choice.get("finish_reason") is not None
|
|
1098
|
+
has_tool_calls = has_message and message.get("tool_calls")
|
|
1099
|
+
has_function_call = has_message and message.get("function_call")
|
|
1100
|
+
|
|
1101
|
+
content = choice.get("message").get("content", "") if has_message else None
|
|
1102
|
+
role = choice.get("message").get("role") if has_message else "unknown"
|
|
1103
|
+
finish_reason = choice.get(
|
|
1104
|
+
"finish_reason") if has_finish_reason else "unknown"
|
|
1105
|
+
|
|
1106
|
+
if has_tool_calls and has_function_call:
|
|
1107
|
+
tool_calls = message.get("tool_calls") + [message.get("function_call")]
|
|
1108
|
+
elif has_tool_calls:
|
|
1109
|
+
tool_calls = message.get("tool_calls")
|
|
1110
|
+
elif has_function_call:
|
|
1111
|
+
tool_calls = [message.get("function_call")]
|
|
1112
|
+
else:
|
|
1113
|
+
tool_calls = None
|
|
1114
|
+
|
|
1115
|
+
if tool_calls is not None:
|
|
1116
|
+
for tool_call in tool_calls:
|
|
1117
|
+
tool_call["type"] = "function"
|
|
1118
|
+
|
|
1119
|
+
return ChoiceEvent(
|
|
1120
|
+
index=choice.get("index"),
|
|
1121
|
+
message={"content": content, "role": role},
|
|
1122
|
+
finish_reason=finish_reason,
|
|
1123
|
+
tool_calls=tool_calls,
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
|
|
824
1127
|
def _accumulate_stream_items(item, complete_response):
|
|
825
1128
|
if is_openai_v1():
|
|
826
1129
|
item = model_as_dict(item)
|
|
827
1130
|
|
|
828
1131
|
complete_response["model"] = item.get("model")
|
|
1132
|
+
complete_response["id"] = item.get("id")
|
|
1133
|
+
|
|
1134
|
+
# capture usage information from the last stream chunks
|
|
1135
|
+
if item.get("usage"):
|
|
1136
|
+
complete_response["usage"] = item.get("usage")
|
|
1137
|
+
elif item.get("choices") and item["choices"][0].get("usage"):
|
|
1138
|
+
# Some LLM providers like moonshot mistakenly place token usage information within choices[0], handle this.
|
|
1139
|
+
complete_response["usage"] = item["choices"][0].get("usage")
|
|
829
1140
|
|
|
830
1141
|
# prompt filter results
|
|
831
1142
|
if item.get("prompt_filter_results"):
|
|
832
|
-
complete_response["prompt_filter_results"] = item.get(
|
|
1143
|
+
complete_response["prompt_filter_results"] = item.get(
|
|
1144
|
+
"prompt_filter_results")
|
|
833
1145
|
|
|
834
1146
|
for choice in item.get("choices"):
|
|
835
1147
|
index = choice.get("index")
|
|
@@ -876,4 +1188,5 @@ def _accumulate_stream_items(item, complete_response):
|
|
|
876
1188
|
if tool_call_function and tool_call_function.get("name"):
|
|
877
1189
|
span_function["name"] = tool_call_function.get("name")
|
|
878
1190
|
if tool_call_function and tool_call_function.get("arguments"):
|
|
879
|
-
span_function["arguments"] += tool_call_function.get(
|
|
1191
|
+
span_function["arguments"] += tool_call_function.get(
|
|
1192
|
+
"arguments")
|