PyPI - lmnr - Versions diffs - 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl - Mend

lmnr 0.7.0py3-none-any.whl → 0.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

lmnr/opentelemetry_lib/decorators/__init__.py CHANGED Viewed

@@ -6,8 +6,15 @@ import types
 from typing import Any, AsyncGenerator, Callable, Generator, Literal
 from opentelemetry import context as context_api
-from opentelemetry.trace import Span
+from opentelemetry.trace import Span, Status, StatusCode
+from lmnr.opentelemetry_lib.tracing.context import (
+    CONTEXT_SESSION_ID_KEY,
+    CONTEXT_USER_ID_KEY,
+    attach_context,
+    detach_context,
+    get_event_attributes_from_context,
+)
 from lmnr.sdk.utils import get_input_from_func_args, is_method
 from lmnr.opentelemetry_lib import MAX_MANUAL_SPAN_PAYLOAD_SIZE
 from lmnr.opentelemetry_lib.tracing.tracer import get_tracer_with_context
@@ -180,7 +187,21 @@ def observe_base(
             span = _setup_span(span_name, span_type, association_properties)
             new_context = wrapper.push_span_context(span)
+            if session_id := association_properties.get("session_id"):
+                new_context = context_api.set_value(
+                    CONTEXT_SESSION_ID_KEY, session_id, new_context
+                )
+            if user_id := association_properties.get("user_id"):
+                new_context = context_api.set_value(
+                    CONTEXT_USER_ID_KEY, user_id, new_context
+                )
+            # Some auto-instrumentations are not under our control, so they
+            # don't have access to our isolated context. We attach the context
+            # to the OTEL global context, so that spans know their parent
+            # span and trace_id.
             ctx_token = context_api.attach(new_context)
+            # update our isolated context too
+            isolated_ctx_token = attach_context(new_context)
             _process_input(
                 span, fn, args, kwargs, ignore_input, ignore_inputs, input_formatter
@@ -195,7 +216,7 @@ def observe_base(
             finally:
                 # Always restore global context
                 context_api.detach(ctx_token)
+                detach_context(isolated_ctx_token)
             # span will be ended in the generator
             if isinstance(res, types.GeneratorType):
                 return _handle_generator(span, ctx_token, res)
@@ -240,7 +261,21 @@ def async_observe_base(
             span = _setup_span(span_name, span_type, association_properties)
             new_context = wrapper.push_span_context(span)
+            if session_id := association_properties.get("session_id"):
+                new_context = context_api.set_value(
+                    CONTEXT_SESSION_ID_KEY, session_id, new_context
+                )
+            if user_id := association_properties.get("user_id"):
+                new_context = context_api.set_value(
+                    CONTEXT_USER_ID_KEY, user_id, new_context
+                )
+            # Some auto-instrumentations are not under our control, so they
+            # don't have access to our isolated context. We attach the context
+            # to the OTEL global context, so that spans know their parent
+            # span and trace_id.
             ctx_token = context_api.attach(new_context)
+            # update our isolated context too
+            isolated_ctx_token = attach_context(new_context)
             _process_input(
                 span, fn, args, kwargs, ignore_input, ignore_inputs, input_formatter
@@ -255,6 +290,7 @@ def async_observe_base(
             finally:
                 # Always restore global context
                 context_api.detach(ctx_token)
+                detach_context(isolated_ctx_token)
             # span will be ended in the generator
             if isinstance(res, types.AsyncGeneratorType):
@@ -288,4 +324,7 @@ async def _ahandle_generator(span: Span, wrapper: TracerWrapper, res: AsyncGener
 def _process_exception(span: Span, e: Exception):
     # Note that this `escaped` is sent as a StringValue("True"), not a boolean.
-    span.record_exception(e, escaped=True)
+    span.record_exception(
+        e, attributes=get_event_attributes_from_context(), escaped=True
+    )
+    span.set_status(Status(StatusCode.ERROR, str(e)))

lmnr/opentelemetry_lib/litellm/__init__.py CHANGED Viewed

@@ -7,6 +7,7 @@ from opentelemetry.trace import SpanKind, Status, StatusCode, Tracer
 from lmnr.opentelemetry_lib.litellm.utils import model_as_dict, set_span_attribute
 from lmnr.opentelemetry_lib.tracing import TracerWrapper
+from lmnr.opentelemetry_lib.tracing.context import get_event_attributes_from_context
 from lmnr.opentelemetry_lib.utils.package_check import is_package_installed
 from lmnr.sdk.log import get_default_logger
@@ -141,10 +142,12 @@ try:
                 else:
                     span.set_status(Status(StatusCode.ERROR))
                     if isinstance(response_obj, Exception):
-                        span.record_exception(response_obj)
+                        attributes = get_event_attributes_from_context()
+                        span.record_exception(response_obj, attributes=attributes)
             except Exception as e:
-                span.record_exception(e)
+                attributes = get_event_attributes_from_context()
+                span.record_exception(e, attributes=attributes)
                 logger.error(f"Error in Laminar LiteLLM instrumentation: {e}")
             finally:
                 span.end(int(end_time.timestamp() * 1e9))

lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py CHANGED Viewed

@@ -8,7 +8,10 @@ from typing import AsyncGenerator, Callable, Collection, Generator
 from google.genai import types
-from lmnr.opentelemetry_lib.tracing.context import get_current_context
+from lmnr.opentelemetry_lib.tracing.context import (
+    get_current_context,
+    get_event_attributes_from_context,
+)
 from .config import (
     Config,
@@ -491,8 +494,9 @@ def _wrap(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
         span.end()
         return response
     except Exception as e:
+        attributes = get_event_attributes_from_context()
         span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
+        span.record_exception(e, attributes=attributes)
         span.set_status(Status(StatusCode.ERROR, str(e)))
         span.end()
         raise e
@@ -529,8 +533,9 @@ async def _awrap(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
             span.end()
             return response
     except Exception as e:
+        attributes = get_event_attributes_from_context()
         span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
+        span.record_exception(e, attributes=attributes)
         span.set_status(Status(StatusCode.ERROR, str(e)))
         span.end()
         raise e

lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/__init__.py CHANGED Viewed

@@ -395,6 +395,12 @@ def get_token_count_from_string(string: str, model_name: str):
                 f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
             )
             return None
+        except Exception as ex:
+            # Other exceptions in tiktoken
+            logger.warning(
+                f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
+            )
+            return None
         tiktoken_encodings[model_name] = encoding
     else:

lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/chat_wrappers.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import copy
 import json
 import logging
+import threading
 import time
 from functools import singledispatch
 from typing import List, Optional, Union
@@ -39,7 +40,10 @@ from ..utils import (
     should_emit_events,
     should_send_prompts,
 )
-from lmnr.opentelemetry_lib.tracing.context import get_current_context
+from lmnr.opentelemetry_lib.tracing.context import (
+    get_current_context,
+    get_event_attributes_from_context,
+)
 from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
 from opentelemetry.metrics import Counter, Histogram
 from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
@@ -111,7 +115,8 @@ def chat_wrapper(
             exception_counter.add(1, attributes=attributes)
         span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
+        attributes = get_event_attributes_from_context()
+        span.record_exception(e, attributes=attributes)
         span.set_status(Status(StatusCode.ERROR, str(e)))
         span.end()
@@ -211,7 +216,8 @@ async def achat_wrapper(
             exception_counter.add(1, attributes=attributes)
         span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
+        attributes = get_event_attributes_from_context()
+        span.record_exception(e, attributes=attributes)
         span.set_status(Status(StatusCode.ERROR, str(e)))
         span.end()
@@ -296,6 +302,7 @@ def _handle_response(
     choice_counter=None,
     duration_histogram=None,
     duration=None,
+    is_streaming: bool = False,
 ):
     if is_openai_v1():
         response_dict = model_as_dict(response)
@@ -310,6 +317,7 @@ def _handle_response(
         duration_histogram,
         response_dict,
         duration,
+        is_streaming,
     )
     # span attributes
@@ -327,13 +335,19 @@ def _handle_response(
 def _set_chat_metrics(
-    instance, token_counter, choice_counter, duration_histogram, response_dict, duration
+    instance,
+    token_counter,
+    choice_counter,
+    duration_histogram,
+    response_dict,
+    duration,
+    is_streaming: bool = False,
 ):
     shared_attributes = metric_shared_attributes(
         response_model=response_dict.get("model") or None,
         operation="chat",
         server_address=_get_openai_base_url(instance),
-        is_streaming=False,
+        is_streaming=is_streaming,
     )
     # token metrics
@@ -520,11 +534,9 @@ def _set_completions(span, choices):
 def _set_streaming_token_metrics(
     request_kwargs, complete_response, span, token_counter, shared_attributes
 ):
-    # use tiktoken calculate token usage
     if not should_record_stream_token_usage():
         return
-    # kwargs={'model': 'gpt-3.5', 'messages': [{'role': 'user', 'content': '...'}], 'stream': True}
     prompt_usage = -1
     completion_usage = -1
@@ -621,11 +633,35 @@ class ChatStream(ObjectProxy):
         self._time_of_first_token = self._start_time
         self._complete_response = {"choices": [], "model": ""}
+        # Cleanup state tracking to prevent duplicate operations
+        self._cleanup_completed = False
+        self._cleanup_lock = threading.Lock()
+    def __del__(self):
+        """Cleanup when object is garbage collected"""
+        if hasattr(self, "_cleanup_completed") and not self._cleanup_completed:
+            self._ensure_cleanup()
     def __enter__(self):
         return self
     def __exit__(self, exc_type, exc_val, exc_tb):
-        self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
+        cleanup_exception = None
+        try:
+            self._ensure_cleanup()
+        except Exception as e:
+            cleanup_exception = e
+            # Don't re-raise to avoid masking original exception
+        result = self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
+        if cleanup_exception:
+            # Log cleanup exception but don't affect context manager behavior
+            logger.debug(
+                "Error during ChatStream cleanup in __exit__: %s", cleanup_exception
+            )
+        return result
     async def __aenter__(self):
         return self
@@ -645,7 +681,12 @@ class ChatStream(ObjectProxy):
         except Exception as e:
             if isinstance(e, StopIteration):
                 self._process_complete_response()
-            raise e
+            else:
+                # Handle cleanup for other exceptions during stream iteration
+                self._ensure_cleanup()
+                if self._span and self._span.is_recording():
+                    self._span.set_status(Status(StatusCode.ERROR, str(e)))
+            raise
         else:
             self._process_item(chunk)
             return chunk
@@ -656,7 +697,12 @@ class ChatStream(ObjectProxy):
         except Exception as e:
             if isinstance(e, StopAsyncIteration):
                 self._process_complete_response()
-            raise e
+            else:
+                # Handle cleanup for other exceptions during stream iteration
+                self._ensure_cleanup()
+                if self._span and self._span.is_recording():
+                    self._span.set_status(Status(StatusCode.ERROR, str(e)))
+            raise
         else:
             self._process_item(chunk)
             return chunk
@@ -727,6 +773,82 @@ class ChatStream(ObjectProxy):
         self._span.set_status(Status(StatusCode.OK))
         self._span.end()
+        self._cleanup_completed = True
+    @dont_throw
+    def _ensure_cleanup(self):
+        """Thread-safe cleanup method that handles different cleanup scenarios"""
+        with self._cleanup_lock:
+            if self._cleanup_completed:
+                logger.debug("ChatStream cleanup already completed, skipping")
+                return
+            try:
+                logger.debug("Starting ChatStream cleanup")
+                # Set span status and close it
+                if self._span and self._span.is_recording():
+                    self._span.set_status(Status(StatusCode.OK))
+                    self._span.end()
+                    logger.debug("ChatStream span closed successfully")
+                # Calculate partial metrics based on available data
+                self._record_partial_metrics()
+                self._cleanup_completed = True
+                logger.debug("ChatStream cleanup completed successfully")
+            except Exception as e:
+                # Log cleanup errors but don't propagate to avoid masking original issues
+                logger.debug("Error during ChatStream cleanup: %s", str(e))
+                # Still try to close the span even if metrics recording failed
+                try:
+                    if self._span and self._span.is_recording():
+                        self._span.set_status(
+                            Status(StatusCode.ERROR, "Cleanup failed")
+                        )
+                        self._span.end()
+                    self._cleanup_completed = True
+                except Exception:
+                    # Final fallback - just mark as completed to prevent infinite loops
+                    self._cleanup_completed = True
+    @dont_throw
+    def _record_partial_metrics(self):
+        """Record metrics based on available partial data"""
+        # Always record duration if we have start time
+        if (
+            self._start_time
+            and isinstance(self._start_time, (float, int))
+            and self._duration_histogram
+        ):
+            duration = time.time() - self._start_time
+            self._duration_histogram.record(
+                duration, attributes=self._shared_attributes()
+            )
+        # Record basic span attributes even without complete response
+        if self._span and self._span.is_recording():
+            _set_response_attributes(self._span, self._complete_response)
+        # Record partial token metrics if we have any data
+        if self._complete_response.get("choices") or self._request_kwargs:
+            _set_streaming_token_metrics(
+                self._request_kwargs,
+                self._complete_response,
+                self._span,
+                self._token_counter,
+                self._shared_attributes(),
+            )
+        # Record choice metrics if we have any choices processed
+        if self._choice_counter and self._complete_response.get("choices"):
+            _set_choice_counter_metrics(
+                self._choice_counter,
+                self._complete_response.get("choices"),
+                self._shared_attributes(),
+            )
 # Backward compatibility with OpenAI v0
@@ -975,6 +1097,13 @@ def _accumulate_stream_items(item, complete_response):
     complete_response["model"] = item.get("model")
     complete_response["id"] = item.get("id")
+    # capture usage information from the last stream chunks
+    if item.get("usage"):
+        complete_response["usage"] = item.get("usage")
+    elif item.get("choices") and item["choices"][0].get("usage"):
+        # Some LLM providers like moonshot mistakenly place token usage information within choices[0], handle this.
+        complete_response["usage"] = item["choices"][0].get("usage")
     # prompt filter results
     if item.get("prompt_filter_results"):
         complete_response["prompt_filter_results"] = item.get("prompt_filter_results")

lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/completion_wrappers.py CHANGED Viewed

@@ -27,7 +27,10 @@ from ..utils import (
     should_emit_events,
     should_send_prompts,
 )
-from lmnr.opentelemetry_lib.tracing.context import get_current_context
+from lmnr.opentelemetry_lib.tracing.context import (
+    get_current_context,
+    get_event_attributes_from_context,
+)
 from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
 from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
 from opentelemetry.semconv_ai import (
@@ -65,7 +68,8 @@ def completion_wrapper(tracer, wrapped, instance, args, kwargs):
         response = wrapped(*args, **kwargs)
     except Exception as e:
         span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
+        attributes = get_event_attributes_from_context()
+        span.record_exception(e, attributes=attributes)
         span.set_status(Status(StatusCode.ERROR, str(e)))
         span.end()
         raise
@@ -100,7 +104,8 @@ async def acompletion_wrapper(tracer, wrapped, instance, args, kwargs):
         response = await wrapped(*args, **kwargs)
     except Exception as e:
         span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
+        attributes = get_event_attributes_from_context()
+        span.record_exception(e, attributes=attributes)
         span.set_status(Status(StatusCode.ERROR, str(e)))
         span.end()
         raise

lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py CHANGED Viewed

@@ -3,6 +3,8 @@ import time
 from collections.abc import Iterable
 from opentelemetry import context as context_api
+from lmnr.opentelemetry_lib.tracing.context import get_event_attributes_from_context
 from ..shared import (
     OPENAI_LLM_USAGE_TOKEN_TYPES,
     _get_openai_base_url,
@@ -91,7 +93,8 @@ def embeddings_wrapper(
                 exception_counter.add(1, attributes=attributes)
             span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-            span.record_exception(e)
+            attributes = get_event_attributes_from_context()
+            span.record_exception(e, attributes=attributes)
             span.set_status(Status(StatusCode.ERROR, str(e)))
             span.end()
@@ -156,7 +159,8 @@ async def aembeddings_wrapper(
                 exception_counter.add(1, attributes=attributes)
             span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-            span.record_exception(e)
+            attributes = get_event_attributes_from_context()
+            span.record_exception(e, attributes=attributes)
             span.set_status(Status(StatusCode.ERROR, str(e)))
             span.end()

lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/assistant_wrappers.py CHANGED Viewed

@@ -17,7 +17,10 @@ from ..utils import (
     dont_throw,
     should_emit_events,
 )
-from lmnr.opentelemetry_lib.tracing.context import get_current_context
+from lmnr.opentelemetry_lib.tracing.context import (
+    get_current_context,
+    get_event_attributes_from_context,
+)
 from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
 from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
 from opentelemetry.semconv_ai import LLMRequestTypeValues, SpanAttributes
@@ -132,7 +135,7 @@ def messages_list_wrapper(tracer, wrapped, instance, args, kwargs):
     if exception := run.get("exception"):
         span.set_attribute(ERROR_TYPE, exception.__class__.__name__)
-        span.record_exception(exception)
+        span.record_exception(exception, attributes=get_event_attributes_from_context())
         span.set_status(Status(StatusCode.ERROR, str(exception)))
         span.end(run.get("end_time"))
@@ -316,7 +319,7 @@ def runs_create_and_stream_wrapper(tracer, wrapped, instance, args, kwargs):
         return response
     except Exception as e:
         span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
+        span.record_exception(e, attributes=get_event_attributes_from_context())
         span.set_status(Status(StatusCode.ERROR, str(e)))
         span.end()
         raise

lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from lmnr.opentelemetry_lib.tracing.context import get_event_attributes_from_context
 from ..shared import _set_span_attribute
 from ..shared.event_emitter import emit_event
 from ..shared.event_models import ChoiceEvent
@@ -69,7 +70,9 @@ class EventHandlerWrapper(AssistantEventHandler):
     @override
     def on_exception(self, exception: Exception):
         self._span.set_attribute(ERROR_TYPE, exception.__class__.__name__)
-        self._span.record_exception(exception)
+        self._span.record_exception(
+            exception, attributes=get_event_attributes_from_context()
+        )
         self._span.set_status(Status(StatusCode.ERROR, str(exception)))
         self._original_handler.on_exception(exception)

lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/responses_wrappers.py CHANGED Viewed

@@ -36,7 +36,10 @@ except ImportError:
     ResponseOutputMessageParam = Dict[str, Any]
     RESPONSES_AVAILABLE = False
-from lmnr.opentelemetry_lib.tracing.context import get_current_context
+from lmnr.opentelemetry_lib.tracing.context import (
+    get_current_context,
+    get_event_attributes_from_context,
+)
 from openai._legacy_response import LegacyAPIResponse
 from opentelemetry import context as context_api
 from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
@@ -433,7 +436,7 @@ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwa
             context=get_current_context(),
         )
         span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
+        span.record_exception(e, attributes=get_event_attributes_from_context())
         span.set_status(StatusCode.ERROR, str(e))
         if traced_data:
             set_data_attributes(traced_data, span)
@@ -529,7 +532,7 @@ async def async_responses_get_or_create_wrapper(
             context=get_current_context(),
         )
         span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
+        span.record_exception(e, attributes=get_event_attributes_from_context())
         span.set_status(StatusCode.ERROR, str(e))
         if traced_data:
             set_data_attributes(traced_data, span)
@@ -597,7 +600,10 @@ def responses_cancel_wrapper(tracer: Tracer, wrapped, instance, args, kwargs):
             record_exception=True,
             context=get_current_context(),
         )
-        span.record_exception(Exception("Response cancelled"))
+        span.record_exception(
+            Exception("Response cancelled"),
+            attributes=get_event_attributes_from_context(),
+        )
         set_data_attributes(existing_data, span)
         span.end()
     return response
@@ -624,7 +630,10 @@ async def async_responses_cancel_wrapper(
             record_exception=True,
             context=get_current_context(),
         )
-        span.record_exception(Exception("Response cancelled"))
+        span.record_exception(
+            Exception("Response cancelled"),
+            attributes=get_event_attributes_from_context(),
+        )
         set_data_attributes(existing_data, span)
         span.end()
     return response

lmnr/opentelemetry_lib/tracing/context.py CHANGED Viewed

@@ -2,7 +2,9 @@ import threading
 from abc import ABC, abstractmethod
 from contextvars import ContextVar
-from opentelemetry.context import Context, Token
+from opentelemetry.context import Context, Token, create_key, get_value
+from lmnr.opentelemetry_lib.tracing.attributes import SESSION_ID, USER_ID
 class _IsolatedRuntimeContext(ABC):
@@ -107,3 +109,18 @@ def attach_context(context: Context) -> Token[Context]:
 def detach_context(token: Token[Context]) -> None:
     """Detach a context from the isolated runtime context."""
     _ISOLATED_RUNTIME_CONTEXT.detach(token)
+CONTEXT_USER_ID_KEY = create_key(f"lmnr.{USER_ID}")
+CONTEXT_SESSION_ID_KEY = create_key(f"lmnr.{SESSION_ID}")
+def get_event_attributes_from_context(context: Context | None = None) -> dict[str, str]:
+    """Get the event attributes from the context."""
+    context = context or get_current_context()
+    attributes = {}
+    if session_id := get_value(CONTEXT_SESSION_ID_KEY, context):
+        attributes["lmnr.event.session_id"] = session_id
+    if user_id := get_value(CONTEXT_USER_ID_KEY, context):
+        attributes["lmnr.event.user_id"] = user_id
+    return attributes

lmnr 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

lmnr 0.7.0py3-none-any.whl → 0.7.1py3-none-any.whl