PyPI - opentelemetry-instrumentation-openai - Versions diffs - 0.43.1__tar.gz → 0.44.0__tar.gz - Mend

opentelemetry-instrumentation-openai 0.43.1tar.gz → 0.44.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of opentelemetry-instrumentation-openai might be problematic. Click here for more details.

Files changed (21) hide show

{opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: opentelemetry-instrumentation-openai
-Version: 0.43.1
+Version: 0.44.0
 Summary: OpenTelemetry OpenAI instrumentation
 License: Apache-2.0
 Author: Gal Kleinman
@@ -18,7 +18,6 @@ Requires-Dist: opentelemetry-api (>=1.28.0,<2.0.0)
 Requires-Dist: opentelemetry-instrumentation (>=0.50b0)
 Requires-Dist: opentelemetry-semantic-conventions (>=0.50b0)
 Requires-Dist: opentelemetry-semantic-conventions-ai (==0.4.11)
-Requires-Dist: tiktoken (>=0.6.0,<1)
 Project-URL: Repository, https://github.com/traceloop/openllmetry/tree/main/packages/opentelemetry-instrumentation-openai
 Description-Content-Type: text/markdown

{opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/__init__.py RENAMED Viewed

@@ -14,7 +14,6 @@ class OpenAIInstrumentor(BaseInstrumentor):
     def __init__(
         self,
         enrich_assistant: bool = False,
-        enrich_token_usage: bool = False,
         exception_logger=None,
         get_common_metrics_attributes: Callable[[], dict] = lambda: {},
         upload_base64_image: Optional[
@@ -25,7 +24,6 @@ class OpenAIInstrumentor(BaseInstrumentor):
     ):
         super().__init__()
         Config.enrich_assistant = enrich_assistant
-        Config.enrich_token_usage = enrich_token_usage
         Config.exception_logger = exception_logger
         Config.get_common_metrics_attributes = get_common_metrics_attributes
         Config.upload_base64_image = upload_base64_image

{opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/shared/__init__.py RENAMED Viewed

@@ -7,7 +7,6 @@ from opentelemetry.instrumentation.openai.shared.config import Config
 from opentelemetry.instrumentation.openai.utils import (
     dont_throw,
     is_openai_v1,
-    should_record_stream_token_usage,
 )
 from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import (
     GEN_AI_RESPONSE_ID,
@@ -24,8 +23,6 @@ PROMPT_ERROR = "prompt_error"
 _PYDANTIC_VERSION = version("pydantic")
-# tiktoken encodings map for different model, key is model_name, value is tiktoken encoding
-tiktoken_encodings = {}
 logger = logging.getLogger(__name__)
@@ -355,36 +352,6 @@ def model_as_dict(model):
         return model
-def get_token_count_from_string(string: str, model_name: str):
-    if not should_record_stream_token_usage():
-        return None
-    import tiktoken
-    if tiktoken_encodings.get(model_name) is None:
-        try:
-            encoding = tiktoken.encoding_for_model(model_name)
-        except KeyError as ex:
-            # no such model_name in tiktoken
-            logger.warning(
-                f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
-            )
-            return None
-        except Exception as ex:
-            # Other exceptions in tiktok
-            logger.warning(
-                f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
-            )
-            return None
-        tiktoken_encodings[model_name] = encoding
-    else:
-        encoding = tiktoken_encodings.get(model_name)
-    token_count = len(encoding.encode(string))
-    return token_count
 def _token_type(token_type: str):
     if token_type == "prompt_tokens":
         return "input"

{opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/shared/chat_wrappers.py RENAMED Viewed

@@ -1,6 +1,7 @@
 import copy
 import json
 import logging
+import threading
 import time
 from functools import singledispatch
 from typing import List, Optional, Union
@@ -16,13 +17,11 @@ from opentelemetry.instrumentation.openai.shared import (
     _set_span_attribute,
     _set_span_stream_usage,
     _token_type,
-    get_token_count_from_string,
     is_streaming_response,
     metric_shared_attributes,
     model_as_dict,
     propagate_trace_context,
     set_tools_attributes,
-    should_record_stream_token_usage,
 )
 from opentelemetry.instrumentation.openai.shared.config import Config
 from opentelemetry.instrumentation.openai.shared.event_emitter import emit_event
@@ -269,7 +268,8 @@ async def _handle_request(span, kwargs, instance):
                 MessageEvent(
                     content=message.get("content"),
                     role=message.get("role"),
-                    tool_calls=_parse_tool_calls(message.get("tool_calls", None)),
+                    tool_calls=_parse_tool_calls(
+                        message.get("tool_calls", None)),
                 )
             )
     else:
@@ -292,6 +292,7 @@ def _handle_response(
     choice_counter=None,
     duration_histogram=None,
     duration=None,
+    is_streaming: bool = False,
 ):
     if is_openai_v1():
         response_dict = model_as_dict(response)
@@ -306,6 +307,7 @@ def _handle_response(
         duration_histogram,
         response_dict,
         duration,
+        is_streaming,
     )
     # span attributes
@@ -323,13 +325,19 @@ def _handle_response(
 def _set_chat_metrics(
-    instance, token_counter, choice_counter, duration_histogram, response_dict, duration
+    instance,
+    token_counter,
+    choice_counter,
+    duration_histogram,
+    response_dict,
+    duration,
+    is_streaming: bool = False,
 ):
     shared_attributes = metric_shared_attributes(
         response_model=response_dict.get("model") or None,
         operation="chat",
         server_address=_get_openai_base_url(instance),
-        is_streaming=False,
+        is_streaming=is_streaming,
     )
     # token metrics
@@ -420,7 +428,8 @@ async def _set_prompts(span, messages):
                 content = json.dumps(content)
             _set_span_attribute(span, f"{prefix}.content", content)
         if msg.get("tool_call_id"):
-            _set_span_attribute(span, f"{prefix}.tool_call_id", msg.get("tool_call_id"))
+            _set_span_attribute(
+                span, f"{prefix}.tool_call_id", msg.get("tool_call_id"))
         tool_calls = msg.get("tool_calls")
         if tool_calls:
             for i, tool_call in enumerate(tool_calls):
@@ -476,9 +485,11 @@ def _set_completions(span, choices):
         _set_span_attribute(span, f"{prefix}.role", message.get("role"))
         if message.get("refusal"):
-            _set_span_attribute(span, f"{prefix}.refusal", message.get("refusal"))
+            _set_span_attribute(
+                span, f"{prefix}.refusal", message.get("refusal"))
         else:
-            _set_span_attribute(span, f"{prefix}.content", message.get("content"))
+            _set_span_attribute(
+                span, f"{prefix}.content", message.get("content"))
         function_call = message.get("function_call")
         if function_call:
@@ -516,13 +527,10 @@ def _set_completions(span, choices):
 def _set_streaming_token_metrics(
     request_kwargs, complete_response, span, token_counter, shared_attributes
 ):
-    if not should_record_stream_token_usage():
-        return
     prompt_usage = -1
     completion_usage = -1
-    # First, try to get usage from API response
+    # Use token usage from API response only
     if complete_response.get("usage"):
         usage = complete_response["usage"]
         if usage.get("prompt_tokens"):
@@ -530,32 +538,6 @@ def _set_streaming_token_metrics(
         if usage.get("completion_tokens"):
             completion_usage = usage["completion_tokens"]
-    # If API response doesn't have usage, fallback to tiktoken calculation
-    if prompt_usage == -1 or completion_usage == -1:
-        model_name = (
-            complete_response.get("model") or request_kwargs.get("model") or "gpt-4"
-        )
-        # Calculate prompt tokens if not available from API
-        if prompt_usage == -1 and request_kwargs and request_kwargs.get("messages"):
-            prompt_content = ""
-            for msg in request_kwargs.get("messages"):
-                if msg.get("content"):
-                    prompt_content += msg.get("content")
-            if model_name and should_record_stream_token_usage():
-                prompt_usage = get_token_count_from_string(prompt_content, model_name)
-        # Calculate completion tokens if not available from API
-        if completion_usage == -1 and complete_response.get("choices"):
-            completion_content = ""
-            for choice in complete_response.get("choices"):
-                if choice.get("message") and choice.get("message").get("content"):
-                    completion_content += choice["message"]["content"]
-            if model_name and should_record_stream_token_usage():
-                completion_usage = get_token_count_from_string(
-                    completion_content, model_name
-                )
     # span record
     _set_span_stream_usage(span, prompt_usage, completion_usage)
@@ -566,7 +548,8 @@ def _set_streaming_token_metrics(
                 **shared_attributes,
                 SpanAttributes.LLM_TOKEN_TYPE: "input",
             }
-            token_counter.record(prompt_usage, attributes=attributes_with_token_type)
+            token_counter.record(
+                prompt_usage, attributes=attributes_with_token_type)
         if isinstance(completion_usage, int) and completion_usage >= 0:
             attributes_with_token_type = {
@@ -619,11 +602,34 @@ class ChatStream(ObjectProxy):
         self._time_of_first_token = self._start_time
         self._complete_response = {"choices": [], "model": ""}
+        # Cleanup state tracking to prevent duplicate operations
+        self._cleanup_completed = False
+        self._cleanup_lock = threading.Lock()
+    def __del__(self):
+        """Cleanup when object is garbage collected"""
+        if hasattr(self, '_cleanup_completed') and not self._cleanup_completed:
+            self._ensure_cleanup()
     def __enter__(self):
         return self
     def __exit__(self, exc_type, exc_val, exc_tb):
-        self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
+        cleanup_exception = None
+        try:
+            self._ensure_cleanup()
+        except Exception as e:
+            cleanup_exception = e
+            # Don't re-raise to avoid masking original exception
+        result = self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
+        if cleanup_exception:
+            # Log cleanup exception but don't affect context manager behavior
+            logger.debug(
+                "Error during ChatStream cleanup in __exit__: %s", cleanup_exception)
+        return result
     async def __aenter__(self):
         return self
@@ -643,6 +649,11 @@ class ChatStream(ObjectProxy):
         except Exception as e:
             if isinstance(e, StopIteration):
                 self._process_complete_response()
+            else:
+                # Handle cleanup for other exceptions during stream iteration
+                self._ensure_cleanup()
+                if self._span and self._span.is_recording():
+                    self._span.set_status(Status(StatusCode.ERROR, str(e)))
             raise
         else:
             self._process_item(chunk)
@@ -654,13 +665,19 @@ class ChatStream(ObjectProxy):
         except Exception as e:
             if isinstance(e, StopAsyncIteration):
                 self._process_complete_response()
+            else:
+                # Handle cleanup for other exceptions during stream iteration
+                self._ensure_cleanup()
+                if self._span and self._span.is_recording():
+                    self._span.set_status(Status(StatusCode.ERROR, str(e)))
             raise
         else:
             self._process_item(chunk)
             return chunk
     def _process_item(self, item):
-        self._span.add_event(name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
+        self._span.add_event(
+            name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
         if self._first_token and self._streaming_time_to_first_token:
             self._time_of_first_token = time.time()
@@ -721,10 +738,82 @@ class ChatStream(ObjectProxy):
                 emit_event(_parse_choice_event(choice))
         else:
             if should_send_prompts():
-                _set_completions(self._span, self._complete_response.get("choices"))
+                _set_completions(
+                    self._span, self._complete_response.get("choices"))
         self._span.set_status(Status(StatusCode.OK))
         self._span.end()
+        self._cleanup_completed = True
+    @dont_throw
+    def _ensure_cleanup(self):
+        """Thread-safe cleanup method that handles different cleanup scenarios"""
+        with self._cleanup_lock:
+            if self._cleanup_completed:
+                logger.debug("ChatStream cleanup already completed, skipping")
+                return
+            try:
+                logger.debug("Starting ChatStream cleanup")
+                # Calculate partial metrics based on available data
+                self._record_partial_metrics()
+                # Set span status and close it
+                if self._span and self._span.is_recording():
+                    self._span.set_status(Status(StatusCode.OK))
+                    self._span.end()
+                    logger.debug("ChatStream span closed successfully")
+                self._cleanup_completed = True
+                logger.debug("ChatStream cleanup completed successfully")
+            except Exception as e:
+                # Log cleanup errors but don't propagate to avoid masking original issues
+                logger.debug("Error during ChatStream cleanup: %s", str(e))
+                # Still try to close the span even if metrics recording failed
+                try:
+                    if self._span and self._span.is_recording():
+                        self._span.set_status(
+                            Status(StatusCode.ERROR, "Cleanup failed"))
+                        self._span.end()
+                    self._cleanup_completed = True
+                except Exception:
+                    # Final fallback - just mark as completed to prevent infinite loops
+                    self._cleanup_completed = True
+    @dont_throw
+    def _record_partial_metrics(self):
+        """Record metrics based on available partial data"""
+        # Always record duration if we have start time
+        if self._start_time and isinstance(self._start_time, (float, int)) and self._duration_histogram:
+            duration = time.time() - self._start_time
+            self._duration_histogram.record(
+                duration, attributes=self._shared_attributes()
+            )
+        # Record basic span attributes even without complete response
+        if self._span and self._span.is_recording():
+            _set_response_attributes(self._span, self._complete_response)
+        # Record partial token metrics if we have any data
+        if self._complete_response.get("choices") or self._request_kwargs:
+            _set_streaming_token_metrics(
+                self._request_kwargs,
+                self._complete_response,
+                self._span,
+                self._token_counter,
+                self._shared_attributes(),
+            )
+        # Record choice metrics if we have any choices processed
+        if self._choice_counter and self._complete_response.get("choices"):
+            _set_choice_counter_metrics(
+                self._choice_counter,
+                self._complete_response.get("choices"),
+                self._shared_attributes(),
+            )
 # Backward compatibility with OpenAI v0
@@ -755,7 +844,8 @@ def _build_from_streaming_response(
         if first_token and streaming_time_to_first_token:
             time_of_first_token = time.time()
-            streaming_time_to_first_token.record(time_of_first_token - start_time)
+            streaming_time_to_first_token.record(
+                time_of_first_token - start_time)
             first_token = False
         _accumulate_stream_items(item, complete_response)
@@ -825,7 +915,8 @@ async def _abuild_from_streaming_response(
         if first_token and streaming_time_to_first_token:
             time_of_first_token = time.time()
-            streaming_time_to_first_token.record(time_of_first_token - start_time)
+            streaming_time_to_first_token.record(
+                time_of_first_token - start_time)
             first_token = False
         _accumulate_stream_items(item, complete_response)
@@ -943,7 +1034,8 @@ def _(choice: dict) -> ChoiceEvent:
     content = choice.get("message").get("content", "") if has_message else None
     role = choice.get("message").get("role") if has_message else "unknown"
-    finish_reason = choice.get("finish_reason") if has_finish_reason else "unknown"
+    finish_reason = choice.get(
+        "finish_reason") if has_finish_reason else "unknown"
     if has_tool_calls and has_function_call:
         tool_calls = message.get("tool_calls") + [message.get("function_call")]
@@ -982,7 +1074,8 @@ def _accumulate_stream_items(item, complete_response):
     # prompt filter results
     if item.get("prompt_filter_results"):
-        complete_response["prompt_filter_results"] = item.get("prompt_filter_results")
+        complete_response["prompt_filter_results"] = item.get(
+            "prompt_filter_results")
     for choice in item.get("choices"):
         index = choice.get("index")
@@ -1029,4 +1122,5 @@ def _accumulate_stream_items(item, complete_response):
                 if tool_call_function and tool_call_function.get("name"):
                     span_function["name"] = tool_call_function.get("name")
                 if tool_call_function and tool_call_function.get("arguments"):
-                    span_function["arguments"] += tool_call_function.get("arguments")
+                    span_function["arguments"] += tool_call_function.get(
+                        "arguments")

{opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/shared/completion_wrappers.py RENAMED Viewed

@@ -8,11 +8,9 @@ from opentelemetry.instrumentation.openai.shared import (
     _set_response_attributes,
     _set_span_attribute,
     _set_span_stream_usage,
-    get_token_count_from_string,
     is_streaming_response,
     model_as_dict,
     propagate_trace_context,
-    should_record_stream_token_usage,
 )
 from opentelemetry.instrumentation.openai.shared.config import Config
 from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
@@ -231,35 +229,19 @@ def _emit_streaming_response_events(complete_response):
 @dont_throw
 def _set_token_usage(span, request_kwargs, complete_response):
-    # use tiktoken calculate token usage
-    if should_record_stream_token_usage():
-        prompt_usage = -1
-        completion_usage = -1
+    prompt_usage = -1
+    completion_usage = -1
-        # prompt_usage
-        if request_kwargs and request_kwargs.get("prompt"):
-            prompt_content = request_kwargs.get("prompt")
-            model_name = complete_response.get("model") or None
+    # Use token usage from API response only
+    if complete_response.get("usage"):
+        usage = complete_response["usage"]
+        if usage.get("prompt_tokens"):
+            prompt_usage = usage["prompt_tokens"]
+        if usage.get("completion_tokens"):
+            completion_usage = usage["completion_tokens"]
-            if model_name:
-                prompt_usage = get_token_count_from_string(prompt_content, model_name)
-        # completion_usage
-        if complete_response.get("choices"):
-            completion_content = ""
-            model_name = complete_response.get("model") or None
-            for choice in complete_response.get("choices"):
-                if choice.get("text"):
-                    completion_content += choice.get("text")
-            if model_name:
-                completion_usage = get_token_count_from_string(
-                    completion_content, model_name
-                )
-        # span record
-        _set_span_stream_usage(span, prompt_usage, completion_usage)
+    # span record
+    _set_span_stream_usage(span, prompt_usage, completion_usage)
 @dont_throw
@@ -269,6 +251,11 @@ def _accumulate_streaming_response(complete_response, item):
     complete_response["model"] = item.get("model")
     complete_response["id"] = item.get("id")
+    # capture usage information from the stream chunks
+    if item.get("usage"):
+        complete_response["usage"] = item.get("usage")
     for choice in item.get("choices"):
         index = choice.get("index")
         if len(complete_response.get("choices")) <= index:

{opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/shared/config.py RENAMED Viewed

@@ -4,7 +4,6 @@ from opentelemetry._events import EventLogger
 class Config:
-    enrich_token_usage = False
     enrich_assistant = False
     exception_logger = None
     get_common_metrics_attributes: Callable[[], dict] = lambda: {}

{opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/opentelemetry/instrumentation/openai/utils.py RENAMED Viewed

@@ -31,10 +31,6 @@ def is_metrics_enabled() -> bool:
     return (os.getenv("TRACELOOP_METRICS_ENABLED") or "true").lower() == "true"
-def should_record_stream_token_usage():
-    return Config.enrich_token_usage
 def _with_image_gen_metric_wrapper(func):
     def _with_metric(duration_histogram, exception_counter):
         def wrapper(wrapped, instance, args, kwargs):

opentelemetry_instrumentation_openai-0.44.0/opentelemetry/instrumentation/openai/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.44.0"

{opentelemetry_instrumentation_openai-0.43.1 → opentelemetry_instrumentation_openai-0.44.0}/pyproject.toml RENAMED Viewed

@@ -8,7 +8,7 @@ show_missing = true
 [tool.poetry]
 name = "opentelemetry-instrumentation-openai"
-version = "0.43.1"
+version = "0.44.0"
 description = "OpenTelemetry OpenAI instrumentation"
 authors = [
   "Gal Kleinman <gal@traceloop.com>",
@@ -28,7 +28,6 @@ opentelemetry-api = "^1.28.0"
 opentelemetry-instrumentation = ">=0.50b0"
 opentelemetry-semantic-conventions = ">=0.50b0"
 opentelemetry-semantic-conventions-ai = "0.4.11"
-tiktoken = ">=0.6.0, <1"
 [tool.poetry.group.dev.dependencies]
 autopep8 = "^2.2.0"
@@ -42,6 +41,7 @@ pytest-recording = "^0.13.1"
 openai = { extras = ["datalib"], version = ">=1.66.0" }
 opentelemetry-sdk = "^1.27.0"
 pytest-asyncio = "^0.23.7"
+requests = "^2.31.0"
 [build-system]
 requires = ["poetry-core"]