opentelemetry-instrumentation-openai 0.43.1__tar.gz → 0.44.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of opentelemetry-instrumentation-openai might be problematic. Click here for more details.

Files changed (21) hide show
  1. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/PKG-INFO +1 -2
  2. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/__init__.py +0 -2
  3. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/shared/__init__.py +0 -33
  4. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +141 -47
  5. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/shared/completion_wrappers.py +16 -29
  6. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/shared/config.py +0 -1
  7. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/utils.py +0 -4
  8. opentelemetry_instrumentation_openai-0.44.0/opentelemetry/instrumentation/openai/version.py +1 -0
  9. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/pyproject.toml +2 -2
  10. opentelemetry_instrumentation_openai-0.43.1/opentelemetry/instrumentation/openai/version.py +0 -1
  11. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/README.md +0 -0
  12. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +0 -0
  13. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/shared/event_emitter.py +0 -0
  14. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/shared/event_models.py +0 -0
  15. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py +0 -0
  16. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/shared/span_utils.py +0 -0
  17. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/v0/__init__.py +0 -0
  18. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/v1/__init__.py +0 -0
  19. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/v1/assistant_wrappers.py +0 -0
  20. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py +0 -0
  21. {opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: opentelemetry-instrumentation-openai
3
- Version: 0.43.1
3
+ Version: 0.44.0
4
4
  Summary: OpenTelemetry OpenAI instrumentation
5
5
  License: Apache-2.0
6
6
  Author: Gal Kleinman
@@ -18,7 +18,6 @@ Requires-Dist: opentelemetry-api (>=1.28.0,<2.0.0)
18
18
  Requires-Dist: opentelemetry-instrumentation (>=0.50b0)
19
19
  Requires-Dist: opentelemetry-semantic-conventions (>=0.50b0)
20
20
  Requires-Dist: opentelemetry-semantic-conventions-ai (==0.4.11)
21
- Requires-Dist: tiktoken (>=0.6.0,<1)
22
21
  Project-URL: Repository, https://github.com/traceloop/openllmetry/tree/main/packages/opentelemetry-instrumentation-openai
23
22
  Description-Content-Type: text/markdown
24
23
 
@@ -14,7 +14,6 @@ class OpenAIInstrumentor(BaseInstrumentor):
14
14
  def __init__(
15
15
  self,
16
16
  enrich_assistant: bool = False,
17
- enrich_token_usage: bool = False,
18
17
  exception_logger=None,
19
18
  get_common_metrics_attributes: Callable[[], dict] = lambda: {},
20
19
  upload_base64_image: Optional[
@@ -25,7 +24,6 @@ class OpenAIInstrumentor(BaseInstrumentor):
25
24
  ):
26
25
  super().__init__()
27
26
  Config.enrich_assistant = enrich_assistant
28
- Config.enrich_token_usage = enrich_token_usage
29
27
  Config.exception_logger = exception_logger
30
28
  Config.get_common_metrics_attributes = get_common_metrics_attributes
31
29
  Config.upload_base64_image = upload_base64_image
@@ -7,7 +7,6 @@ from opentelemetry.instrumentation.openai.shared.config import Config
7
7
  from opentelemetry.instrumentation.openai.utils import (
8
8
  dont_throw,
9
9
  is_openai_v1,
10
- should_record_stream_token_usage,
11
10
  )
12
11
  from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import (
13
12
  GEN_AI_RESPONSE_ID,
@@ -24,8 +23,6 @@ PROMPT_ERROR = "prompt_error"
24
23
 
25
24
  _PYDANTIC_VERSION = version("pydantic")
26
25
 
27
- # tiktoken encodings map for different model, key is model_name, value is tiktoken encoding
28
- tiktoken_encodings = {}
29
26
 
30
27
  logger = logging.getLogger(__name__)
31
28
 
@@ -355,36 +352,6 @@ def model_as_dict(model):
355
352
  return model
356
353
 
357
354
 
358
- def get_token_count_from_string(string: str, model_name: str):
359
- if not should_record_stream_token_usage():
360
- return None
361
-
362
- import tiktoken
363
-
364
- if tiktoken_encodings.get(model_name) is None:
365
- try:
366
- encoding = tiktoken.encoding_for_model(model_name)
367
- except KeyError as ex:
368
- # no such model_name in tiktoken
369
- logger.warning(
370
- f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
371
- )
372
- return None
373
- except Exception as ex:
374
- # Other exceptions in tiktok
375
- logger.warning(
376
- f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
377
- )
378
- return None
379
-
380
- tiktoken_encodings[model_name] = encoding
381
- else:
382
- encoding = tiktoken_encodings.get(model_name)
383
-
384
- token_count = len(encoding.encode(string))
385
- return token_count
386
-
387
-
388
355
  def _token_type(token_type: str):
389
356
  if token_type == "prompt_tokens":
390
357
  return "input"
@@ -1,6 +1,7 @@
1
1
  import copy
2
2
  import json
3
3
  import logging
4
+ import threading
4
5
  import time
5
6
  from functools import singledispatch
6
7
  from typing import List, Optional, Union
@@ -16,13 +17,11 @@ from opentelemetry.instrumentation.openai.shared import (
16
17
  _set_span_attribute,
17
18
  _set_span_stream_usage,
18
19
  _token_type,
19
- get_token_count_from_string,
20
20
  is_streaming_response,
21
21
  metric_shared_attributes,
22
22
  model_as_dict,
23
23
  propagate_trace_context,
24
24
  set_tools_attributes,
25
- should_record_stream_token_usage,
26
25
  )
27
26
  from opentelemetry.instrumentation.openai.shared.config import Config
28
27
  from opentelemetry.instrumentation.openai.shared.event_emitter import emit_event
@@ -269,7 +268,8 @@ async def _handle_request(span, kwargs, instance):
269
268
  MessageEvent(
270
269
  content=message.get("content"),
271
270
  role=message.get("role"),
272
- tool_calls=_parse_tool_calls(message.get("tool_calls", None)),
271
+ tool_calls=_parse_tool_calls(
272
+ message.get("tool_calls", None)),
273
273
  )
274
274
  )
275
275
  else:
@@ -292,6 +292,7 @@ def _handle_response(
292
292
  choice_counter=None,
293
293
  duration_histogram=None,
294
294
  duration=None,
295
+ is_streaming: bool = False,
295
296
  ):
296
297
  if is_openai_v1():
297
298
  response_dict = model_as_dict(response)
@@ -306,6 +307,7 @@ def _handle_response(
306
307
  duration_histogram,
307
308
  response_dict,
308
309
  duration,
310
+ is_streaming,
309
311
  )
310
312
 
311
313
  # span attributes
@@ -323,13 +325,19 @@ def _handle_response(
323
325
 
324
326
 
325
327
  def _set_chat_metrics(
326
- instance, token_counter, choice_counter, duration_histogram, response_dict, duration
328
+ instance,
329
+ token_counter,
330
+ choice_counter,
331
+ duration_histogram,
332
+ response_dict,
333
+ duration,
334
+ is_streaming: bool = False,
327
335
  ):
328
336
  shared_attributes = metric_shared_attributes(
329
337
  response_model=response_dict.get("model") or None,
330
338
  operation="chat",
331
339
  server_address=_get_openai_base_url(instance),
332
- is_streaming=False,
340
+ is_streaming=is_streaming,
333
341
  )
334
342
 
335
343
  # token metrics
@@ -420,7 +428,8 @@ async def _set_prompts(span, messages):
420
428
  content = json.dumps(content)
421
429
  _set_span_attribute(span, f"{prefix}.content", content)
422
430
  if msg.get("tool_call_id"):
423
- _set_span_attribute(span, f"{prefix}.tool_call_id", msg.get("tool_call_id"))
431
+ _set_span_attribute(
432
+ span, f"{prefix}.tool_call_id", msg.get("tool_call_id"))
424
433
  tool_calls = msg.get("tool_calls")
425
434
  if tool_calls:
426
435
  for i, tool_call in enumerate(tool_calls):
@@ -476,9 +485,11 @@ def _set_completions(span, choices):
476
485
  _set_span_attribute(span, f"{prefix}.role", message.get("role"))
477
486
 
478
487
  if message.get("refusal"):
479
- _set_span_attribute(span, f"{prefix}.refusal", message.get("refusal"))
488
+ _set_span_attribute(
489
+ span, f"{prefix}.refusal", message.get("refusal"))
480
490
  else:
481
- _set_span_attribute(span, f"{prefix}.content", message.get("content"))
491
+ _set_span_attribute(
492
+ span, f"{prefix}.content", message.get("content"))
482
493
 
483
494
  function_call = message.get("function_call")
484
495
  if function_call:
@@ -516,13 +527,10 @@ def _set_completions(span, choices):
516
527
  def _set_streaming_token_metrics(
517
528
  request_kwargs, complete_response, span, token_counter, shared_attributes
518
529
  ):
519
- if not should_record_stream_token_usage():
520
- return
521
-
522
530
  prompt_usage = -1
523
531
  completion_usage = -1
524
532
 
525
- # First, try to get usage from API response
533
+ # Use token usage from API response only
526
534
  if complete_response.get("usage"):
527
535
  usage = complete_response["usage"]
528
536
  if usage.get("prompt_tokens"):
@@ -530,32 +538,6 @@ def _set_streaming_token_metrics(
530
538
  if usage.get("completion_tokens"):
531
539
  completion_usage = usage["completion_tokens"]
532
540
 
533
- # If API response doesn't have usage, fallback to tiktoken calculation
534
- if prompt_usage == -1 or completion_usage == -1:
535
- model_name = (
536
- complete_response.get("model") or request_kwargs.get("model") or "gpt-4"
537
- )
538
-
539
- # Calculate prompt tokens if not available from API
540
- if prompt_usage == -1 and request_kwargs and request_kwargs.get("messages"):
541
- prompt_content = ""
542
- for msg in request_kwargs.get("messages"):
543
- if msg.get("content"):
544
- prompt_content += msg.get("content")
545
- if model_name and should_record_stream_token_usage():
546
- prompt_usage = get_token_count_from_string(prompt_content, model_name)
547
-
548
- # Calculate completion tokens if not available from API
549
- if completion_usage == -1 and complete_response.get("choices"):
550
- completion_content = ""
551
- for choice in complete_response.get("choices"):
552
- if choice.get("message") and choice.get("message").get("content"):
553
- completion_content += choice["message"]["content"]
554
- if model_name and should_record_stream_token_usage():
555
- completion_usage = get_token_count_from_string(
556
- completion_content, model_name
557
- )
558
-
559
541
  # span record
560
542
  _set_span_stream_usage(span, prompt_usage, completion_usage)
561
543
 
@@ -566,7 +548,8 @@ def _set_streaming_token_metrics(
566
548
  **shared_attributes,
567
549
  SpanAttributes.LLM_TOKEN_TYPE: "input",
568
550
  }
569
- token_counter.record(prompt_usage, attributes=attributes_with_token_type)
551
+ token_counter.record(
552
+ prompt_usage, attributes=attributes_with_token_type)
570
553
 
571
554
  if isinstance(completion_usage, int) and completion_usage >= 0:
572
555
  attributes_with_token_type = {
@@ -619,11 +602,34 @@ class ChatStream(ObjectProxy):
619
602
  self._time_of_first_token = self._start_time
620
603
  self._complete_response = {"choices": [], "model": ""}
621
604
 
605
+ # Cleanup state tracking to prevent duplicate operations
606
+ self._cleanup_completed = False
607
+ self._cleanup_lock = threading.Lock()
608
+
609
+ def __del__(self):
610
+ """Cleanup when object is garbage collected"""
611
+ if hasattr(self, '_cleanup_completed') and not self._cleanup_completed:
612
+ self._ensure_cleanup()
613
+
622
614
  def __enter__(self):
623
615
  return self
624
616
 
625
617
  def __exit__(self, exc_type, exc_val, exc_tb):
626
- self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
618
+ cleanup_exception = None
619
+ try:
620
+ self._ensure_cleanup()
621
+ except Exception as e:
622
+ cleanup_exception = e
623
+ # Don't re-raise to avoid masking original exception
624
+
625
+ result = self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
626
+
627
+ if cleanup_exception:
628
+ # Log cleanup exception but don't affect context manager behavior
629
+ logger.debug(
630
+ "Error during ChatStream cleanup in __exit__: %s", cleanup_exception)
631
+
632
+ return result
627
633
 
628
634
  async def __aenter__(self):
629
635
  return self
@@ -643,6 +649,11 @@ class ChatStream(ObjectProxy):
643
649
  except Exception as e:
644
650
  if isinstance(e, StopIteration):
645
651
  self._process_complete_response()
652
+ else:
653
+ # Handle cleanup for other exceptions during stream iteration
654
+ self._ensure_cleanup()
655
+ if self._span and self._span.is_recording():
656
+ self._span.set_status(Status(StatusCode.ERROR, str(e)))
646
657
  raise
647
658
  else:
648
659
  self._process_item(chunk)
@@ -654,13 +665,19 @@ class ChatStream(ObjectProxy):
654
665
  except Exception as e:
655
666
  if isinstance(e, StopAsyncIteration):
656
667
  self._process_complete_response()
668
+ else:
669
+ # Handle cleanup for other exceptions during stream iteration
670
+ self._ensure_cleanup()
671
+ if self._span and self._span.is_recording():
672
+ self._span.set_status(Status(StatusCode.ERROR, str(e)))
657
673
  raise
658
674
  else:
659
675
  self._process_item(chunk)
660
676
  return chunk
661
677
 
662
678
  def _process_item(self, item):
663
- self._span.add_event(name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
679
+ self._span.add_event(
680
+ name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
664
681
 
665
682
  if self._first_token and self._streaming_time_to_first_token:
666
683
  self._time_of_first_token = time.time()
@@ -721,10 +738,82 @@ class ChatStream(ObjectProxy):
721
738
  emit_event(_parse_choice_event(choice))
722
739
  else:
723
740
  if should_send_prompts():
724
- _set_completions(self._span, self._complete_response.get("choices"))
741
+ _set_completions(
742
+ self._span, self._complete_response.get("choices"))
725
743
 
726
744
  self._span.set_status(Status(StatusCode.OK))
727
745
  self._span.end()
746
+ self._cleanup_completed = True
747
+
748
+ @dont_throw
749
+ def _ensure_cleanup(self):
750
+ """Thread-safe cleanup method that handles different cleanup scenarios"""
751
+ with self._cleanup_lock:
752
+ if self._cleanup_completed:
753
+ logger.debug("ChatStream cleanup already completed, skipping")
754
+ return
755
+
756
+ try:
757
+ logger.debug("Starting ChatStream cleanup")
758
+
759
+ # Calculate partial metrics based on available data
760
+ self._record_partial_metrics()
761
+
762
+ # Set span status and close it
763
+ if self._span and self._span.is_recording():
764
+ self._span.set_status(Status(StatusCode.OK))
765
+ self._span.end()
766
+ logger.debug("ChatStream span closed successfully")
767
+
768
+ self._cleanup_completed = True
769
+ logger.debug("ChatStream cleanup completed successfully")
770
+
771
+ except Exception as e:
772
+ # Log cleanup errors but don't propagate to avoid masking original issues
773
+ logger.debug("Error during ChatStream cleanup: %s", str(e))
774
+
775
+ # Still try to close the span even if metrics recording failed
776
+ try:
777
+ if self._span and self._span.is_recording():
778
+ self._span.set_status(
779
+ Status(StatusCode.ERROR, "Cleanup failed"))
780
+ self._span.end()
781
+ self._cleanup_completed = True
782
+ except Exception:
783
+ # Final fallback - just mark as completed to prevent infinite loops
784
+ self._cleanup_completed = True
785
+
786
+ @dont_throw
787
+ def _record_partial_metrics(self):
788
+ """Record metrics based on available partial data"""
789
+ # Always record duration if we have start time
790
+ if self._start_time and isinstance(self._start_time, (float, int)) and self._duration_histogram:
791
+ duration = time.time() - self._start_time
792
+ self._duration_histogram.record(
793
+ duration, attributes=self._shared_attributes()
794
+ )
795
+
796
+ # Record basic span attributes even without complete response
797
+ if self._span and self._span.is_recording():
798
+ _set_response_attributes(self._span, self._complete_response)
799
+
800
+ # Record partial token metrics if we have any data
801
+ if self._complete_response.get("choices") or self._request_kwargs:
802
+ _set_streaming_token_metrics(
803
+ self._request_kwargs,
804
+ self._complete_response,
805
+ self._span,
806
+ self._token_counter,
807
+ self._shared_attributes(),
808
+ )
809
+
810
+ # Record choice metrics if we have any choices processed
811
+ if self._choice_counter and self._complete_response.get("choices"):
812
+ _set_choice_counter_metrics(
813
+ self._choice_counter,
814
+ self._complete_response.get("choices"),
815
+ self._shared_attributes(),
816
+ )
728
817
 
729
818
 
730
819
  # Backward compatibility with OpenAI v0
@@ -755,7 +844,8 @@ def _build_from_streaming_response(
755
844
 
756
845
  if first_token and streaming_time_to_first_token:
757
846
  time_of_first_token = time.time()
758
- streaming_time_to_first_token.record(time_of_first_token - start_time)
847
+ streaming_time_to_first_token.record(
848
+ time_of_first_token - start_time)
759
849
  first_token = False
760
850
 
761
851
  _accumulate_stream_items(item, complete_response)
@@ -825,7 +915,8 @@ async def _abuild_from_streaming_response(
825
915
 
826
916
  if first_token and streaming_time_to_first_token:
827
917
  time_of_first_token = time.time()
828
- streaming_time_to_first_token.record(time_of_first_token - start_time)
918
+ streaming_time_to_first_token.record(
919
+ time_of_first_token - start_time)
829
920
  first_token = False
830
921
 
831
922
  _accumulate_stream_items(item, complete_response)
@@ -943,7 +1034,8 @@ def _(choice: dict) -> ChoiceEvent:
943
1034
 
944
1035
  content = choice.get("message").get("content", "") if has_message else None
945
1036
  role = choice.get("message").get("role") if has_message else "unknown"
946
- finish_reason = choice.get("finish_reason") if has_finish_reason else "unknown"
1037
+ finish_reason = choice.get(
1038
+ "finish_reason") if has_finish_reason else "unknown"
947
1039
 
948
1040
  if has_tool_calls and has_function_call:
949
1041
  tool_calls = message.get("tool_calls") + [message.get("function_call")]
@@ -982,7 +1074,8 @@ def _accumulate_stream_items(item, complete_response):
982
1074
 
983
1075
  # prompt filter results
984
1076
  if item.get("prompt_filter_results"):
985
- complete_response["prompt_filter_results"] = item.get("prompt_filter_results")
1077
+ complete_response["prompt_filter_results"] = item.get(
1078
+ "prompt_filter_results")
986
1079
 
987
1080
  for choice in item.get("choices"):
988
1081
  index = choice.get("index")
@@ -1029,4 +1122,5 @@ def _accumulate_stream_items(item, complete_response):
1029
1122
  if tool_call_function and tool_call_function.get("name"):
1030
1123
  span_function["name"] = tool_call_function.get("name")
1031
1124
  if tool_call_function and tool_call_function.get("arguments"):
1032
- span_function["arguments"] += tool_call_function.get("arguments")
1125
+ span_function["arguments"] += tool_call_function.get(
1126
+ "arguments")
@@ -8,11 +8,9 @@ from opentelemetry.instrumentation.openai.shared import (
8
8
  _set_response_attributes,
9
9
  _set_span_attribute,
10
10
  _set_span_stream_usage,
11
- get_token_count_from_string,
12
11
  is_streaming_response,
13
12
  model_as_dict,
14
13
  propagate_trace_context,
15
- should_record_stream_token_usage,
16
14
  )
17
15
  from opentelemetry.instrumentation.openai.shared.config import Config
18
16
  from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
@@ -231,35 +229,19 @@ def _emit_streaming_response_events(complete_response):
231
229
 
232
230
  @dont_throw
233
231
  def _set_token_usage(span, request_kwargs, complete_response):
234
- # use tiktoken calculate token usage
235
- if should_record_stream_token_usage():
236
- prompt_usage = -1
237
- completion_usage = -1
232
+ prompt_usage = -1
233
+ completion_usage = -1
238
234
 
239
- # prompt_usage
240
- if request_kwargs and request_kwargs.get("prompt"):
241
- prompt_content = request_kwargs.get("prompt")
242
- model_name = complete_response.get("model") or None
235
+ # Use token usage from API response only
236
+ if complete_response.get("usage"):
237
+ usage = complete_response["usage"]
238
+ if usage.get("prompt_tokens"):
239
+ prompt_usage = usage["prompt_tokens"]
240
+ if usage.get("completion_tokens"):
241
+ completion_usage = usage["completion_tokens"]
243
242
 
244
- if model_name:
245
- prompt_usage = get_token_count_from_string(prompt_content, model_name)
246
-
247
- # completion_usage
248
- if complete_response.get("choices"):
249
- completion_content = ""
250
- model_name = complete_response.get("model") or None
251
-
252
- for choice in complete_response.get("choices"):
253
- if choice.get("text"):
254
- completion_content += choice.get("text")
255
-
256
- if model_name:
257
- completion_usage = get_token_count_from_string(
258
- completion_content, model_name
259
- )
260
-
261
- # span record
262
- _set_span_stream_usage(span, prompt_usage, completion_usage)
243
+ # span record
244
+ _set_span_stream_usage(span, prompt_usage, completion_usage)
263
245
 
264
246
 
265
247
  @dont_throw
@@ -269,6 +251,11 @@ def _accumulate_streaming_response(complete_response, item):
269
251
 
270
252
  complete_response["model"] = item.get("model")
271
253
  complete_response["id"] = item.get("id")
254
+
255
+ # capture usage information from the stream chunks
256
+ if item.get("usage"):
257
+ complete_response["usage"] = item.get("usage")
258
+
272
259
  for choice in item.get("choices"):
273
260
  index = choice.get("index")
274
261
  if len(complete_response.get("choices")) <= index:
@@ -4,7 +4,6 @@ from opentelemetry._events import EventLogger
4
4
 
5
5
 
6
6
  class Config:
7
- enrich_token_usage = False
8
7
  enrich_assistant = False
9
8
  exception_logger = None
10
9
  get_common_metrics_attributes: Callable[[], dict] = lambda: {}
@@ -31,10 +31,6 @@ def is_metrics_enabled() -> bool:
31
31
  return (os.getenv("TRACELOOP_METRICS_ENABLED") or "true").lower() == "true"
32
32
 
33
33
 
34
- def should_record_stream_token_usage():
35
- return Config.enrich_token_usage
36
-
37
-
38
34
  def _with_image_gen_metric_wrapper(func):
39
35
  def _with_metric(duration_histogram, exception_counter):
40
36
  def wrapper(wrapped, instance, args, kwargs):
@@ -8,7 +8,7 @@ show_missing = true
8
8
 
9
9
  [tool.poetry]
10
10
  name = "opentelemetry-instrumentation-openai"
11
- version = "0.43.1"
11
+ version = "0.44.0"
12
12
  description = "OpenTelemetry OpenAI instrumentation"
13
13
  authors = [
14
14
  "Gal Kleinman <gal@traceloop.com>",
@@ -28,7 +28,6 @@ opentelemetry-api = "^1.28.0"
28
28
  opentelemetry-instrumentation = ">=0.50b0"
29
29
  opentelemetry-semantic-conventions = ">=0.50b0"
30
30
  opentelemetry-semantic-conventions-ai = "0.4.11"
31
- tiktoken = ">=0.6.0, <1"
32
31
 
33
32
  [tool.poetry.group.dev.dependencies]
34
33
  autopep8 = "^2.2.0"
@@ -42,6 +41,7 @@ pytest-recording = "^0.13.1"
42
41
  openai = { extras = ["datalib"], version = ">=1.66.0" }
43
42
  opentelemetry-sdk = "^1.27.0"
44
43
  pytest-asyncio = "^0.23.7"
44
+ requests = "^2.31.0"
45
45
 
46
46
  [build-system]
47
47
  requires = ["poetry-core"]