opentelemetry-instrumentation-openai 0.34.1__py3-none-any.whl → 0.49.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of opentelemetry-instrumentation-openai might be problematic. Click here for more details.

Files changed (22) hide show
  1. opentelemetry/instrumentation/openai/__init__.py +11 -6
  2. opentelemetry/instrumentation/openai/shared/__init__.py +167 -68
  3. opentelemetry/instrumentation/openai/shared/chat_wrappers.py +544 -231
  4. opentelemetry/instrumentation/openai/shared/completion_wrappers.py +143 -81
  5. opentelemetry/instrumentation/openai/shared/config.py +8 -3
  6. opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +91 -30
  7. opentelemetry/instrumentation/openai/shared/event_emitter.py +108 -0
  8. opentelemetry/instrumentation/openai/shared/event_models.py +41 -0
  9. opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py +1 -1
  10. opentelemetry/instrumentation/openai/shared/span_utils.py +0 -0
  11. opentelemetry/instrumentation/openai/utils.py +42 -9
  12. opentelemetry/instrumentation/openai/v0/__init__.py +32 -11
  13. opentelemetry/instrumentation/openai/v1/__init__.py +177 -69
  14. opentelemetry/instrumentation/openai/v1/assistant_wrappers.py +208 -109
  15. opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py +41 -19
  16. opentelemetry/instrumentation/openai/v1/responses_wrappers.py +1073 -0
  17. opentelemetry/instrumentation/openai/version.py +1 -1
  18. {opentelemetry_instrumentation_openai-0.34.1.dist-info → opentelemetry_instrumentation_openai-0.49.3.dist-info}/METADATA +7 -8
  19. opentelemetry_instrumentation_openai-0.49.3.dist-info/RECORD +21 -0
  20. {opentelemetry_instrumentation_openai-0.34.1.dist-info → opentelemetry_instrumentation_openai-0.49.3.dist-info}/WHEEL +1 -1
  21. opentelemetry_instrumentation_openai-0.34.1.dist-info/RECORD +0 -17
  22. {opentelemetry_instrumentation_openai-0.34.1.dist-info → opentelemetry_instrumentation_openai-0.49.3.dist-info}/entry_points.txt +0 -0
@@ -1,48 +1,59 @@
1
1
  import copy
2
2
  import json
3
3
  import logging
4
+ import threading
4
5
  import time
5
- from opentelemetry.instrumentation.openai.shared.config import Config
6
- from wrapt import ObjectProxy
7
-
6
+ from functools import singledispatch
7
+ from typing import List, Optional, Union
8
8
 
9
9
  from opentelemetry import context as context_api
10
- from opentelemetry.metrics import Counter, Histogram
11
- from opentelemetry.semconv_ai import (
12
- SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
13
- SpanAttributes,
14
- LLMRequestTypeValues,
15
- )
16
-
17
- from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
18
- from opentelemetry.instrumentation.openai.utils import (
19
- _with_chat_telemetry_wrapper,
20
- dont_throw,
21
- run_async,
22
- )
10
+ import pydantic
23
11
  from opentelemetry.instrumentation.openai.shared import (
24
- metric_shared_attributes,
12
+ OPENAI_LLM_USAGE_TOKEN_TYPES,
13
+ _get_openai_base_url,
25
14
  _set_client_attributes,
15
+ _set_functions_attributes,
26
16
  _set_request_attributes,
17
+ _set_response_attributes,
27
18
  _set_span_attribute,
28
- _set_functions_attributes,
19
+ _set_span_stream_usage,
29
20
  _token_type,
30
- set_tools_attributes,
31
- _set_response_attributes,
32
21
  is_streaming_response,
33
- should_send_prompts,
22
+ metric_shared_attributes,
34
23
  model_as_dict,
35
- _get_openai_base_url,
36
- OPENAI_LLM_USAGE_TOKEN_TYPES,
37
- should_record_stream_token_usage,
38
- get_token_count_from_string,
39
- _set_span_stream_usage,
40
24
  propagate_trace_context,
25
+ set_tools_attributes,
26
+ )
27
+ from opentelemetry.instrumentation.openai.shared.config import Config
28
+ from opentelemetry.instrumentation.openai.shared.event_emitter import emit_event
29
+ from opentelemetry.instrumentation.openai.shared.event_models import (
30
+ ChoiceEvent,
31
+ MessageEvent,
32
+ ToolCall,
33
+ )
34
+ from opentelemetry.instrumentation.openai.utils import (
35
+ _with_chat_telemetry_wrapper,
36
+ dont_throw,
37
+ is_openai_v1,
38
+ run_async,
39
+ should_emit_events,
40
+ should_send_prompts,
41
+ )
42
+ from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
43
+ from opentelemetry.metrics import Counter, Histogram
44
+ from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
45
+ from opentelemetry.semconv._incubating.attributes import (
46
+ gen_ai_attributes as GenAIAttributes,
47
+ )
48
+ from opentelemetry.semconv_ai import (
49
+ SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
50
+ LLMRequestTypeValues,
51
+ SpanAttributes,
41
52
  )
42
53
  from opentelemetry.trace import SpanKind, Tracer
54
+ from opentelemetry import trace
43
55
  from opentelemetry.trace.status import Status, StatusCode
44
-
45
- from opentelemetry.instrumentation.openai.utils import is_openai_v1
56
+ from wrapt import ObjectProxy
46
57
 
47
58
  SPAN_NAME = "openai.chat"
48
59
  PROMPT_FILTER_KEY = "prompt_filter_results"
@@ -79,70 +90,77 @@ def chat_wrapper(
79
90
  attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
80
91
  )
81
92
 
82
- run_async(_handle_request(span, kwargs, instance))
93
+ # Use the span as current context to ensure events get proper trace context
94
+ with trace.use_span(span, end_on_exit=False):
95
+ run_async(_handle_request(span, kwargs, instance))
96
+ try:
97
+ start_time = time.time()
98
+ response = wrapped(*args, **kwargs)
99
+ end_time = time.time()
100
+ except Exception as e: # pylint: disable=broad-except
101
+ end_time = time.time()
102
+ duration = end_time - start_time if "start_time" in locals() else 0
103
+
104
+ attributes = {
105
+ "error.type": e.__class__.__name__,
106
+ }
83
107
 
84
- try:
85
- start_time = time.time()
86
- response = wrapped(*args, **kwargs)
87
- end_time = time.time()
88
- except Exception as e: # pylint: disable=broad-except
89
- end_time = time.time()
90
- duration = end_time - start_time if "start_time" in locals() else 0
91
-
92
- attributes = {
93
- "error.type": e.__class__.__name__,
94
- }
95
-
96
- if duration > 0 and duration_histogram:
97
- duration_histogram.record(duration, attributes=attributes)
98
- if exception_counter:
99
- exception_counter.add(1, attributes=attributes)
100
-
101
- raise e
102
-
103
- if is_streaming_response(response):
104
- # span will be closed after the generator is done
105
- if is_openai_v1():
106
- return ChatStream(
107
- span,
108
- response,
109
- instance,
110
- token_counter,
111
- choice_counter,
112
- duration_histogram,
113
- streaming_time_to_first_token,
114
- streaming_time_to_generate,
115
- start_time,
116
- kwargs,
117
- )
118
- else:
119
- return _build_from_streaming_response(
120
- span,
121
- response,
122
- instance,
123
- token_counter,
124
- choice_counter,
125
- duration_histogram,
126
- streaming_time_to_first_token,
127
- streaming_time_to_generate,
128
- start_time,
129
- kwargs,
130
- )
108
+ if duration > 0 and duration_histogram:
109
+ duration_histogram.record(duration, attributes=attributes)
110
+ if exception_counter:
111
+ exception_counter.add(1, attributes=attributes)
131
112
 
132
- duration = end_time - start_time
113
+ span.set_attribute(ERROR_TYPE, e.__class__.__name__)
114
+ span.record_exception(e)
115
+ span.set_status(Status(StatusCode.ERROR, str(e)))
116
+ span.end()
133
117
 
134
- _handle_response(
135
- response,
136
- span,
137
- instance,
138
- token_counter,
139
- choice_counter,
140
- duration_histogram,
141
- duration,
142
- )
143
- span.end()
118
+ raise
144
119
 
145
- return response
120
+ if is_streaming_response(response):
121
+ # span will be closed after the generator is done
122
+ if is_openai_v1():
123
+ return ChatStream(
124
+ span,
125
+ response,
126
+ instance,
127
+ token_counter,
128
+ choice_counter,
129
+ duration_histogram,
130
+ streaming_time_to_first_token,
131
+ streaming_time_to_generate,
132
+ start_time,
133
+ kwargs,
134
+ )
135
+ else:
136
+ return _build_from_streaming_response(
137
+ span,
138
+ response,
139
+ instance,
140
+ token_counter,
141
+ choice_counter,
142
+ duration_histogram,
143
+ streaming_time_to_first_token,
144
+ streaming_time_to_generate,
145
+ start_time,
146
+ kwargs,
147
+ )
148
+
149
+ duration = end_time - start_time
150
+
151
+ _handle_response(
152
+ response,
153
+ span,
154
+ instance,
155
+ token_counter,
156
+ choice_counter,
157
+ duration_histogram,
158
+ duration,
159
+ )
160
+
161
+ span.end()
162
+
163
+ return response
146
164
 
147
165
 
148
166
  @_with_chat_telemetry_wrapper
@@ -169,87 +187,115 @@ async def achat_wrapper(
169
187
  kind=SpanKind.CLIENT,
170
188
  attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
171
189
  )
172
- await _handle_request(span, kwargs, instance)
173
190
 
174
- try:
175
- start_time = time.time()
176
- response = await wrapped(*args, **kwargs)
177
- end_time = time.time()
178
- except Exception as e: # pylint: disable=broad-except
179
- end_time = time.time()
180
- duration = end_time - start_time if "start_time" in locals() else 0
181
-
182
- common_attributes = Config.get_common_metrics_attributes()
183
- attributes = {
184
- **common_attributes,
185
- "error.type": e.__class__.__name__,
186
- }
187
-
188
- if duration > 0 and duration_histogram:
189
- duration_histogram.record(duration, attributes=attributes)
190
- if exception_counter:
191
- exception_counter.add(1, attributes=attributes)
192
-
193
- raise e
194
-
195
- if is_streaming_response(response):
196
- # span will be closed after the generator is done
197
- if is_openai_v1():
198
- return ChatStream(
199
- span,
200
- response,
201
- instance,
202
- token_counter,
203
- choice_counter,
204
- duration_histogram,
205
- streaming_time_to_first_token,
206
- streaming_time_to_generate,
207
- start_time,
208
- kwargs,
209
- )
210
- else:
211
- return _abuild_from_streaming_response(
212
- span,
213
- response,
214
- instance,
215
- token_counter,
216
- choice_counter,
217
- duration_histogram,
218
- streaming_time_to_first_token,
219
- streaming_time_to_generate,
220
- start_time,
221
- kwargs,
222
- )
191
+ # Use the span as current context to ensure events get proper trace context
192
+ with trace.use_span(span, end_on_exit=False):
193
+ await _handle_request(span, kwargs, instance)
223
194
 
224
- duration = end_time - start_time
195
+ try:
196
+ start_time = time.time()
197
+ response = await wrapped(*args, **kwargs)
198
+ end_time = time.time()
199
+ except Exception as e: # pylint: disable=broad-except
200
+ end_time = time.time()
201
+ duration = end_time - start_time if "start_time" in locals() else 0
202
+
203
+ common_attributes = Config.get_common_metrics_attributes()
204
+ attributes = {
205
+ **common_attributes,
206
+ "error.type": e.__class__.__name__,
207
+ }
225
208
 
226
- _handle_response(
227
- response,
228
- span,
229
- instance,
230
- token_counter,
231
- choice_counter,
232
- duration_histogram,
233
- duration,
234
- )
235
- span.end()
209
+ if duration > 0 and duration_histogram:
210
+ duration_histogram.record(duration, attributes=attributes)
211
+ if exception_counter:
212
+ exception_counter.add(1, attributes=attributes)
236
213
 
237
- return response
214
+ span.set_attribute(ERROR_TYPE, e.__class__.__name__)
215
+ span.record_exception(e)
216
+ span.set_status(Status(StatusCode.ERROR, str(e)))
217
+ span.end()
218
+
219
+ raise
220
+
221
+ if is_streaming_response(response):
222
+ # span will be closed after the generator is done
223
+ if is_openai_v1():
224
+ return ChatStream(
225
+ span,
226
+ response,
227
+ instance,
228
+ token_counter,
229
+ choice_counter,
230
+ duration_histogram,
231
+ streaming_time_to_first_token,
232
+ streaming_time_to_generate,
233
+ start_time,
234
+ kwargs,
235
+ )
236
+ else:
237
+ return _abuild_from_streaming_response(
238
+ span,
239
+ response,
240
+ instance,
241
+ token_counter,
242
+ choice_counter,
243
+ duration_histogram,
244
+ streaming_time_to_first_token,
245
+ streaming_time_to_generate,
246
+ start_time,
247
+ kwargs,
248
+ )
249
+
250
+ duration = end_time - start_time
251
+
252
+ _handle_response(
253
+ response,
254
+ span,
255
+ instance,
256
+ token_counter,
257
+ choice_counter,
258
+ duration_histogram,
259
+ duration,
260
+ )
261
+
262
+ span.end()
263
+
264
+ return response
238
265
 
239
266
 
240
267
  @dont_throw
241
268
  async def _handle_request(span, kwargs, instance):
242
- _set_request_attributes(span, kwargs)
269
+ _set_request_attributes(span, kwargs, instance)
243
270
  _set_client_attributes(span, instance)
244
- if should_send_prompts():
245
- await _set_prompts(span, kwargs.get("messages"))
246
- if kwargs.get("functions"):
247
- _set_functions_attributes(span, kwargs.get("functions"))
248
- elif kwargs.get("tools"):
249
- set_tools_attributes(span, kwargs.get("tools"))
271
+ if should_emit_events():
272
+ for message in kwargs.get("messages", []):
273
+ emit_event(
274
+ MessageEvent(
275
+ content=message.get("content"),
276
+ role=message.get("role"),
277
+ tool_calls=_parse_tool_calls(
278
+ message.get("tool_calls", None)),
279
+ )
280
+ )
281
+ else:
282
+ if should_send_prompts():
283
+ await _set_prompts(span, kwargs.get("messages"))
284
+ if kwargs.get("functions"):
285
+ _set_functions_attributes(span, kwargs.get("functions"))
286
+ elif kwargs.get("tools"):
287
+ set_tools_attributes(span, kwargs.get("tools"))
250
288
  if Config.enable_trace_context_propagation:
251
289
  propagate_trace_context(span, kwargs)
252
290
 
291
+ # Reasoning request attributes
292
+ reasoning_effort = kwargs.get("reasoning_effort")
293
+ _set_span_attribute(
294
+ span,
295
+ SpanAttributes.LLM_REQUEST_REASONING_EFFORT,
296
+ reasoning_effort or ()
297
+ )
298
+
253
299
 
254
300
  @dont_throw
255
301
  def _handle_response(
@@ -260,6 +306,7 @@ def _handle_response(
260
306
  choice_counter=None,
261
307
  duration_histogram=None,
262
308
  duration=None,
309
+ is_streaming: bool = False,
263
310
  ):
264
311
  if is_openai_v1():
265
312
  response_dict = model_as_dict(response)
@@ -274,25 +321,59 @@ def _handle_response(
274
321
  duration_histogram,
275
322
  response_dict,
276
323
  duration,
324
+ is_streaming,
277
325
  )
278
326
 
279
327
  # span attributes
280
328
  _set_response_attributes(span, response_dict)
281
329
 
282
- if should_send_prompts():
283
- _set_completions(span, response_dict.get("choices"))
330
+ # Reasoning usage attributes
331
+ usage = response_dict.get("usage")
332
+ reasoning_tokens = None
333
+ if usage:
334
+ # Support both dict-style and object-style `usage`
335
+ tokens_details = (
336
+ usage.get("completion_tokens_details") if isinstance(usage, dict)
337
+ else getattr(usage, "completion_tokens_details", None)
338
+ )
339
+
340
+ if tokens_details:
341
+ reasoning_tokens = (
342
+ tokens_details.get("reasoning_tokens", None) if isinstance(tokens_details, dict)
343
+ else getattr(tokens_details, "reasoning_tokens", None)
344
+ )
345
+
346
+ _set_span_attribute(
347
+ span,
348
+ SpanAttributes.LLM_USAGE_REASONING_TOKENS,
349
+ reasoning_tokens or 0,
350
+ )
351
+
352
+ if should_emit_events():
353
+ if response.choices is not None:
354
+ for choice in response.choices:
355
+ emit_event(_parse_choice_event(choice))
356
+ else:
357
+ if should_send_prompts():
358
+ _set_completions(span, response_dict.get("choices"))
284
359
 
285
360
  return response
286
361
 
287
362
 
288
363
  def _set_chat_metrics(
289
- instance, token_counter, choice_counter, duration_histogram, response_dict, duration
364
+ instance,
365
+ token_counter,
366
+ choice_counter,
367
+ duration_histogram,
368
+ response_dict,
369
+ duration,
370
+ is_streaming: bool = False,
290
371
  ):
291
372
  shared_attributes = metric_shared_attributes(
292
373
  response_model=response_dict.get("model") or None,
293
374
  operation="chat",
294
375
  server_address=_get_openai_base_url(instance),
295
- is_streaming=False,
376
+ is_streaming=is_streaming,
296
377
  )
297
378
 
298
379
  # token metrics
@@ -326,7 +407,7 @@ def _set_token_counter_metrics(token_counter, usage, shared_attributes):
326
407
  if name in OPENAI_LLM_USAGE_TOKEN_TYPES:
327
408
  attributes_with_token_type = {
328
409
  **shared_attributes,
329
- SpanAttributes.LLM_TOKEN_TYPE: _token_type(name),
410
+ GenAIAttributes.GEN_AI_TOKEN_TYPE: _token_type(name),
330
411
  }
331
412
  token_counter.record(val, attributes=attributes_with_token_type)
332
413
 
@@ -351,7 +432,8 @@ async def _process_image_item(item, trace_id, span_id, message_index, content_in
351
432
  image_format = item["image_url"]["url"].split(";")[0].split("/")[1]
352
433
  image_name = f"message_{message_index}_content_{content_index}.{image_format}"
353
434
  base64_string = item["image_url"]["url"].split(",")[1]
354
- url = await Config.upload_base64_image(trace_id, span_id, image_name, base64_string)
435
+ # Convert trace_id and span_id to strings as expected by upload function
436
+ url = await Config.upload_base64_image(str(trace_id), str(span_id), image_name, base64_string)
355
437
 
356
438
  return {"type": "image_url", "image_url": {"url": url}}
357
439
 
@@ -362,7 +444,8 @@ async def _set_prompts(span, messages):
362
444
  return
363
445
 
364
446
  for i, msg in enumerate(messages):
365
- prefix = f"{SpanAttributes.LLM_PROMPTS}.{i}"
447
+ prefix = f"{GenAIAttributes.GEN_AI_PROMPT}.{i}"
448
+ msg = msg if isinstance(msg, dict) else model_as_dict(msg)
366
449
 
367
450
  _set_span_attribute(span, f"{prefix}.role", msg.get("role"))
368
451
  if msg.get("content"):
@@ -382,7 +465,8 @@ async def _set_prompts(span, messages):
382
465
  content = json.dumps(content)
383
466
  _set_span_attribute(span, f"{prefix}.content", content)
384
467
  if msg.get("tool_call_id"):
385
- _set_span_attribute(span, f"{prefix}.tool_call_id", msg.get("tool_call_id"))
468
+ _set_span_attribute(
469
+ span, f"{prefix}.tool_call_id", msg.get("tool_call_id"))
386
470
  tool_calls = msg.get("tool_calls")
387
471
  if tool_calls:
388
472
  for i, tool_call in enumerate(tool_calls):
@@ -413,7 +497,7 @@ def _set_completions(span, choices):
413
497
 
414
498
  for choice in choices:
415
499
  index = choice.get("index")
416
- prefix = f"{SpanAttributes.LLM_COMPLETIONS}.{index}"
500
+ prefix = f"{GenAIAttributes.GEN_AI_COMPLETION}.{index}"
417
501
  _set_span_attribute(
418
502
  span, f"{prefix}.finish_reason", choice.get("finish_reason")
419
503
  )
@@ -438,9 +522,11 @@ def _set_completions(span, choices):
438
522
  _set_span_attribute(span, f"{prefix}.role", message.get("role"))
439
523
 
440
524
  if message.get("refusal"):
441
- _set_span_attribute(span, f"{prefix}.refusal", message.get("refusal"))
525
+ _set_span_attribute(
526
+ span, f"{prefix}.refusal", message.get("refusal"))
442
527
  else:
443
- _set_span_attribute(span, f"{prefix}.content", message.get("content"))
528
+ _set_span_attribute(
529
+ span, f"{prefix}.content", message.get("content"))
444
530
 
445
531
  function_call = message.get("function_call")
446
532
  if function_call:
@@ -478,60 +564,34 @@ def _set_completions(span, choices):
478
564
  def _set_streaming_token_metrics(
479
565
  request_kwargs, complete_response, span, token_counter, shared_attributes
480
566
  ):
481
- # use tiktoken calculate token usage
482
- if not should_record_stream_token_usage():
483
- return
484
-
485
- # kwargs={'model': 'gpt-3.5', 'messages': [{'role': 'user', 'content': '...'}], 'stream': True}
486
567
  prompt_usage = -1
487
568
  completion_usage = -1
488
569
 
489
- # prompt_usage
490
- if request_kwargs and request_kwargs.get("messages"):
491
- prompt_content = ""
492
- # setting the default model_name as gpt-4. As this uses the embedding "cl100k_base" that
493
- # is used by most of the other model.
494
- model_name = (
495
- complete_response.get("model") or request_kwargs.get("model") or "gpt-4"
496
- )
497
- for msg in request_kwargs.get("messages"):
498
- if msg.get("content"):
499
- prompt_content += msg.get("content")
500
- if model_name:
501
- prompt_usage = get_token_count_from_string(prompt_content, model_name)
502
-
503
- # completion_usage
504
- if complete_response.get("choices"):
505
- completion_content = ""
506
- # setting the default model_name as gpt-4. As this uses the embedding "cl100k_base" that
507
- # is used by most of the other model.
508
- model_name = complete_response.get("model") or "gpt-4"
509
-
510
- for choice in complete_response.get("choices"):
511
- if choice.get("message") and choice.get("message").get("content"):
512
- completion_content += choice["message"]["content"]
513
-
514
- if model_name:
515
- completion_usage = get_token_count_from_string(
516
- completion_content, model_name
517
- )
570
+ # Use token usage from API response only
571
+ if complete_response.get("usage"):
572
+ usage = complete_response["usage"]
573
+ if usage.get("prompt_tokens"):
574
+ prompt_usage = usage["prompt_tokens"]
575
+ if usage.get("completion_tokens"):
576
+ completion_usage = usage["completion_tokens"]
518
577
 
519
578
  # span record
520
579
  _set_span_stream_usage(span, prompt_usage, completion_usage)
521
580
 
522
581
  # metrics record
523
582
  if token_counter:
524
- if type(prompt_usage) is int and prompt_usage >= 0:
583
+ if isinstance(prompt_usage, int) and prompt_usage >= 0:
525
584
  attributes_with_token_type = {
526
585
  **shared_attributes,
527
- SpanAttributes.LLM_TOKEN_TYPE: "input",
586
+ GenAIAttributes.GEN_AI_TOKEN_TYPE: "input",
528
587
  }
529
- token_counter.record(prompt_usage, attributes=attributes_with_token_type)
588
+ token_counter.record(
589
+ prompt_usage, attributes=attributes_with_token_type)
530
590
 
531
- if type(completion_usage) is int and completion_usage >= 0:
591
+ if isinstance(completion_usage, int) and completion_usage >= 0:
532
592
  attributes_with_token_type = {
533
593
  **shared_attributes,
534
- SpanAttributes.LLM_TOKEN_TYPE: "output",
594
+ GenAIAttributes.GEN_AI_TOKEN_TYPE: "output",
535
595
  }
536
596
  token_counter.record(
537
597
  completion_usage, attributes=attributes_with_token_type
@@ -579,11 +639,34 @@ class ChatStream(ObjectProxy):
579
639
  self._time_of_first_token = self._start_time
580
640
  self._complete_response = {"choices": [], "model": ""}
581
641
 
642
+ # Cleanup state tracking to prevent duplicate operations
643
+ self._cleanup_completed = False
644
+ self._cleanup_lock = threading.Lock()
645
+
646
+ def __del__(self):
647
+ """Cleanup when object is garbage collected"""
648
+ if hasattr(self, '_cleanup_completed') and not self._cleanup_completed:
649
+ self._ensure_cleanup()
650
+
582
651
  def __enter__(self):
583
652
  return self
584
653
 
585
654
  def __exit__(self, exc_type, exc_val, exc_tb):
586
- self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
655
+ cleanup_exception = None
656
+ try:
657
+ self._ensure_cleanup()
658
+ except Exception as e:
659
+ cleanup_exception = e
660
+ # Don't re-raise to avoid masking original exception
661
+
662
+ result = self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
663
+
664
+ if cleanup_exception:
665
+ # Log cleanup exception but don't affect context manager behavior
666
+ logger.debug(
667
+ "Error during ChatStream cleanup in __exit__: %s", cleanup_exception)
668
+
669
+ return result
587
670
 
588
671
  async def __aenter__(self):
589
672
  return self
@@ -602,8 +685,13 @@ class ChatStream(ObjectProxy):
602
685
  chunk = self.__wrapped__.__next__()
603
686
  except Exception as e:
604
687
  if isinstance(e, StopIteration):
605
- self._close_span()
606
- raise e
688
+ self._process_complete_response()
689
+ else:
690
+ # Handle cleanup for other exceptions during stream iteration
691
+ self._ensure_cleanup()
692
+ if self._span and self._span.is_recording():
693
+ self._span.set_status(Status(StatusCode.ERROR, str(e)))
694
+ raise
607
695
  else:
608
696
  self._process_item(chunk)
609
697
  return chunk
@@ -613,14 +701,20 @@ class ChatStream(ObjectProxy):
613
701
  chunk = await self.__wrapped__.__anext__()
614
702
  except Exception as e:
615
703
  if isinstance(e, StopAsyncIteration):
616
- self._close_span()
617
- raise e
704
+ self._process_complete_response()
705
+ else:
706
+ # Handle cleanup for other exceptions during stream iteration
707
+ self._ensure_cleanup()
708
+ if self._span and self._span.is_recording():
709
+ self._span.set_status(Status(StatusCode.ERROR, str(e)))
710
+ raise
618
711
  else:
619
712
  self._process_item(chunk)
620
713
  return chunk
621
714
 
622
715
  def _process_item(self, item):
623
- self._span.add_event(name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
716
+ self._span.add_event(
717
+ name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
624
718
 
625
719
  if self._first_token and self._streaming_time_to_first_token:
626
720
  self._time_of_first_token = time.time()
@@ -643,7 +737,7 @@ class ChatStream(ObjectProxy):
643
737
  )
644
738
 
645
739
  @dont_throw
646
- def _close_span(self):
740
+ def _process_complete_response(self):
647
741
  _set_streaming_token_metrics(
648
742
  self._request_kwargs,
649
743
  self._complete_response,
@@ -676,12 +770,87 @@ class ChatStream(ObjectProxy):
676
770
  )
677
771
 
678
772
  _set_response_attributes(self._span, self._complete_response)
679
-
680
- if should_send_prompts():
681
- _set_completions(self._span, self._complete_response.get("choices"))
773
+ if should_emit_events():
774
+ for choice in self._complete_response.get("choices", []):
775
+ emit_event(_parse_choice_event(choice))
776
+ else:
777
+ if should_send_prompts():
778
+ _set_completions(
779
+ self._span, self._complete_response.get("choices"))
682
780
 
683
781
  self._span.set_status(Status(StatusCode.OK))
684
782
  self._span.end()
783
+ self._cleanup_completed = True
784
+
785
+ @dont_throw
786
+ def _ensure_cleanup(self):
787
+ """Thread-safe cleanup method that handles different cleanup scenarios"""
788
+ with self._cleanup_lock:
789
+ if self._cleanup_completed:
790
+ logger.debug("ChatStream cleanup already completed, skipping")
791
+ return
792
+
793
+ try:
794
+ logger.debug("Starting ChatStream cleanup")
795
+
796
+ # Calculate partial metrics based on available data
797
+ self._record_partial_metrics()
798
+
799
+ # Set span status and close it
800
+ if self._span and self._span.is_recording():
801
+ self._span.set_status(Status(StatusCode.OK))
802
+ self._span.end()
803
+ logger.debug("ChatStream span closed successfully")
804
+
805
+ self._cleanup_completed = True
806
+ logger.debug("ChatStream cleanup completed successfully")
807
+
808
+ except Exception as e:
809
+ # Log cleanup errors but don't propagate to avoid masking original issues
810
+ logger.debug("Error during ChatStream cleanup: %s", str(e))
811
+
812
+ # Still try to close the span even if metrics recording failed
813
+ try:
814
+ if self._span and self._span.is_recording():
815
+ self._span.set_status(
816
+ Status(StatusCode.ERROR, "Cleanup failed"))
817
+ self._span.end()
818
+ self._cleanup_completed = True
819
+ except Exception:
820
+ # Final fallback - just mark as completed to prevent infinite loops
821
+ self._cleanup_completed = True
822
+
823
+ @dont_throw
824
+ def _record_partial_metrics(self):
825
+ """Record metrics based on available partial data"""
826
+ # Always record duration if we have start time
827
+ if self._start_time and isinstance(self._start_time, (float, int)) and self._duration_histogram:
828
+ duration = time.time() - self._start_time
829
+ self._duration_histogram.record(
830
+ duration, attributes=self._shared_attributes()
831
+ )
832
+
833
+ # Record basic span attributes even without complete response
834
+ if self._span and self._span.is_recording():
835
+ _set_response_attributes(self._span, self._complete_response)
836
+
837
+ # Record partial token metrics if we have any data
838
+ if self._complete_response.get("choices") or self._request_kwargs:
839
+ _set_streaming_token_metrics(
840
+ self._request_kwargs,
841
+ self._complete_response,
842
+ self._span,
843
+ self._token_counter,
844
+ self._shared_attributes(),
845
+ )
846
+
847
+ # Record choice metrics if we have any choices processed
848
+ if self._choice_counter and self._complete_response.get("choices"):
849
+ _set_choice_counter_metrics(
850
+ self._choice_counter,
851
+ self._complete_response.get("choices"),
852
+ self._shared_attributes(),
853
+ )
685
854
 
686
855
 
687
856
  # Backward compatibility with OpenAI v0
@@ -700,7 +869,7 @@ def _build_from_streaming_response(
700
869
  start_time=None,
701
870
  request_kwargs=None,
702
871
  ):
703
- complete_response = {"choices": [], "model": ""}
872
+ complete_response = {"choices": [], "model": "", "id": ""}
704
873
 
705
874
  first_token = True
706
875
  time_of_first_token = start_time # will be updated when first token is received
@@ -712,7 +881,8 @@ def _build_from_streaming_response(
712
881
 
713
882
  if first_token and streaming_time_to_first_token:
714
883
  time_of_first_token = time.time()
715
- streaming_time_to_first_token.record(time_of_first_token - start_time)
884
+ streaming_time_to_first_token.record(
885
+ time_of_first_token - start_time)
716
886
  first_token = False
717
887
 
718
888
  _accumulate_stream_items(item, complete_response)
@@ -720,7 +890,7 @@ def _build_from_streaming_response(
720
890
  yield item_to_yield
721
891
 
722
892
  shared_attributes = {
723
- SpanAttributes.LLM_RESPONSE_MODEL: complete_response.get("model") or None,
893
+ GenAIAttributes.GEN_AI_RESPONSE_MODEL: complete_response.get("model") or None,
724
894
  "server.address": _get_openai_base_url(instance),
725
895
  "stream": True,
726
896
  }
@@ -746,9 +916,12 @@ def _build_from_streaming_response(
746
916
  streaming_time_to_generate.record(time.time() - time_of_first_token)
747
917
 
748
918
  _set_response_attributes(span, complete_response)
749
-
750
- if should_send_prompts():
751
- _set_completions(span, complete_response.get("choices"))
919
+ if should_emit_events():
920
+ for choice in complete_response.get("choices", []):
921
+ emit_event(_parse_choice_event(choice))
922
+ else:
923
+ if should_send_prompts():
924
+ _set_completions(span, complete_response.get("choices"))
752
925
 
753
926
  span.set_status(Status(StatusCode.OK))
754
927
  span.end()
@@ -767,7 +940,7 @@ async def _abuild_from_streaming_response(
767
940
  start_time=None,
768
941
  request_kwargs=None,
769
942
  ):
770
- complete_response = {"choices": [], "model": ""}
943
+ complete_response = {"choices": [], "model": "", "id": ""}
771
944
 
772
945
  first_token = True
773
946
  time_of_first_token = start_time # will be updated when first token is received
@@ -779,7 +952,8 @@ async def _abuild_from_streaming_response(
779
952
 
780
953
  if first_token and streaming_time_to_first_token:
781
954
  time_of_first_token = time.time()
782
- streaming_time_to_first_token.record(time_of_first_token - start_time)
955
+ streaming_time_to_first_token.record(
956
+ time_of_first_token - start_time)
783
957
  first_token = False
784
958
 
785
959
  _accumulate_stream_items(item, complete_response)
@@ -787,7 +961,7 @@ async def _abuild_from_streaming_response(
787
961
  yield item_to_yield
788
962
 
789
963
  shared_attributes = {
790
- SpanAttributes.LLM_RESPONSE_MODEL: complete_response.get("model") or None,
964
+ GenAIAttributes.GEN_AI_RESPONSE_MODEL: complete_response.get("model") or None,
791
965
  "server.address": _get_openai_base_url(instance),
792
966
  "stream": True,
793
967
  }
@@ -813,23 +987,161 @@ async def _abuild_from_streaming_response(
813
987
  streaming_time_to_generate.record(time.time() - time_of_first_token)
814
988
 
815
989
  _set_response_attributes(span, complete_response)
816
-
817
- if should_send_prompts():
818
- _set_completions(span, complete_response.get("choices"))
990
+ if should_emit_events():
991
+ for choice in complete_response.get("choices", []):
992
+ emit_event(_parse_choice_event(choice))
993
+ else:
994
+ if should_send_prompts():
995
+ _set_completions(span, complete_response.get("choices"))
819
996
 
820
997
  span.set_status(Status(StatusCode.OK))
821
998
  span.end()
822
999
 
823
1000
 
1001
+ # pydantic.BaseModel here is ChatCompletionMessageFunctionToolCall (as of openai 1.99.7)
1002
+ # but we keep to a parent type to support older versions
1003
+ def _parse_tool_calls(
1004
+ tool_calls: Optional[List[Union[dict, pydantic.BaseModel]]],
1005
+ ) -> Union[List[ToolCall], None]:
1006
+ """
1007
+ Util to correctly parse the tool calls data from the OpenAI API to this module's
1008
+ standard `ToolCall`.
1009
+ """
1010
+ if tool_calls is None:
1011
+ return tool_calls
1012
+
1013
+ result = []
1014
+
1015
+ for tool_call in tool_calls:
1016
+ tool_call_data = None
1017
+
1018
+ if isinstance(tool_call, dict):
1019
+ tool_call_data = copy.deepcopy(tool_call)
1020
+ elif _is_chat_message_function_tool_call(tool_call):
1021
+ tool_call_data = tool_call.model_dump()
1022
+ elif _is_function_call(tool_call):
1023
+ function_call = tool_call.model_dump()
1024
+ tool_call_data = ToolCall(
1025
+ id="",
1026
+ function={
1027
+ "name": function_call.get("name"),
1028
+ "arguments": function_call.get("arguments"),
1029
+ },
1030
+ type="function",
1031
+ )
1032
+
1033
+ result.append(tool_call_data)
1034
+ return result
1035
+
1036
+
1037
+ def _is_chat_message_function_tool_call(model: Union[dict, pydantic.BaseModel]) -> bool:
1038
+ try:
1039
+ from openai.types.chat.chat_completion_message_function_tool_call import (
1040
+ ChatCompletionMessageFunctionToolCall,
1041
+ )
1042
+
1043
+ return isinstance(model, ChatCompletionMessageFunctionToolCall)
1044
+ except Exception:
1045
+ try:
1046
+ # Since OpenAI 1.99.3, ChatCompletionMessageToolCall is a Union,
1047
+ # and the isinstance check will fail. This is fine, because in all
1048
+ # those versions, the check above will succeed.
1049
+ from openai.types.chat.chat_completion_message_tool_call import (
1050
+ ChatCompletionMessageToolCall,
1051
+ )
1052
+ return isinstance(model, ChatCompletionMessageToolCall)
1053
+ except Exception:
1054
+ return False
1055
+
1056
+
1057
+ def _is_function_call(model: Union[dict, pydantic.BaseModel]) -> bool:
1058
+ try:
1059
+ from openai.types.chat.chat_completion_message import FunctionCall
1060
+ return isinstance(model, FunctionCall)
1061
+ except Exception:
1062
+ return False
1063
+
1064
+
1065
+ @singledispatch
1066
+ def _parse_choice_event(choice) -> ChoiceEvent:
1067
+ has_message = choice.message is not None
1068
+ has_finish_reason = choice.finish_reason is not None
1069
+ has_tool_calls = has_message and choice.message.tool_calls
1070
+ has_function_call = has_message and choice.message.function_call
1071
+
1072
+ content = choice.message.content if has_message else None
1073
+ role = choice.message.role if has_message else "unknown"
1074
+ finish_reason = choice.finish_reason if has_finish_reason else "unknown"
1075
+
1076
+ if has_tool_calls and has_function_call:
1077
+ tool_calls = choice.message.tool_calls + [choice.message.function_call]
1078
+ elif has_tool_calls:
1079
+ tool_calls = choice.message.tool_calls
1080
+ elif has_function_call:
1081
+ tool_calls = [choice.message.function_call]
1082
+ else:
1083
+ tool_calls = None
1084
+
1085
+ return ChoiceEvent(
1086
+ index=choice.index,
1087
+ message={"content": content, "role": role},
1088
+ finish_reason=finish_reason,
1089
+ tool_calls=_parse_tool_calls(tool_calls),
1090
+ )
1091
+
1092
+
1093
+ @_parse_choice_event.register
1094
+ def _(choice: dict) -> ChoiceEvent:
1095
+ message = choice.get("message")
1096
+ has_message = message is not None
1097
+ has_finish_reason = choice.get("finish_reason") is not None
1098
+ has_tool_calls = has_message and message.get("tool_calls")
1099
+ has_function_call = has_message and message.get("function_call")
1100
+
1101
+ content = choice.get("message").get("content", "") if has_message else None
1102
+ role = choice.get("message").get("role") if has_message else "unknown"
1103
+ finish_reason = choice.get(
1104
+ "finish_reason") if has_finish_reason else "unknown"
1105
+
1106
+ if has_tool_calls and has_function_call:
1107
+ tool_calls = message.get("tool_calls") + [message.get("function_call")]
1108
+ elif has_tool_calls:
1109
+ tool_calls = message.get("tool_calls")
1110
+ elif has_function_call:
1111
+ tool_calls = [message.get("function_call")]
1112
+ else:
1113
+ tool_calls = None
1114
+
1115
+ if tool_calls is not None:
1116
+ for tool_call in tool_calls:
1117
+ tool_call["type"] = "function"
1118
+
1119
+ return ChoiceEvent(
1120
+ index=choice.get("index"),
1121
+ message={"content": content, "role": role},
1122
+ finish_reason=finish_reason,
1123
+ tool_calls=tool_calls,
1124
+ )
1125
+
1126
+
824
1127
  def _accumulate_stream_items(item, complete_response):
825
1128
  if is_openai_v1():
826
1129
  item = model_as_dict(item)
827
1130
 
828
1131
  complete_response["model"] = item.get("model")
1132
+ complete_response["id"] = item.get("id")
1133
+
1134
+ # capture usage information from the last stream chunks
1135
+ if item.get("usage"):
1136
+ complete_response["usage"] = item.get("usage")
1137
+ elif item.get("choices") and item["choices"][0].get("usage"):
1138
+ # Some LLM providers like moonshot mistakenly place token usage information within choices[0], handle this.
1139
+ complete_response["usage"] = item["choices"][0].get("usage")
829
1140
 
830
1141
  # prompt filter results
831
1142
  if item.get("prompt_filter_results"):
832
- complete_response["prompt_filter_results"] = item.get("prompt_filter_results")
1143
+ complete_response["prompt_filter_results"] = item.get(
1144
+ "prompt_filter_results")
833
1145
 
834
1146
  for choice in item.get("choices"):
835
1147
  index = choice.get("index")
@@ -876,4 +1188,5 @@ def _accumulate_stream_items(item, complete_response):
876
1188
  if tool_call_function and tool_call_function.get("name"):
877
1189
  span_function["name"] = tool_call_function.get("name")
878
1190
  if tool_call_function and tool_call_function.get("arguments"):
879
- span_function["arguments"] += tool_call_function.get("arguments")
1191
+ span_function["arguments"] += tool_call_function.get(
1192
+ "arguments")