PyPI - lmnr - Versions diffs - 0.6.21__py3-none-any.whl → 0.7.1__py3-none-any.whl - Mend

lmnr 0.6.21py3-none-any.whl → 0.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

lmnr/__init__.py CHANGED Viewed

@@ -9,7 +9,6 @@ from .sdk.types import (
     HumanEvaluator,
     RunAgentResponseChunk,
     StepChunkContent,
-    TracingLevel,
 )
 from .sdk.decorators import observe
 from .sdk.types import LaminarSpanContext
@@ -18,7 +17,6 @@ from .opentelemetry_lib.tracing.attributes import Attributes
 from .opentelemetry_lib.tracing.instruments import Instruments
 from .opentelemetry_lib.tracing.processor import LaminarSpanProcessor
 from .opentelemetry_lib.tracing.tracer import get_laminar_tracer_provider, get_tracer
-from opentelemetry.trace import use_span
 __all__ = [
     "AgentOutput",
@@ -36,10 +34,8 @@ __all__ = [
     "LaminarSpanProcessor",
     "RunAgentResponseChunk",
     "StepChunkContent",
-    "TracingLevel",
     "get_laminar_tracer_provider",
     "get_tracer",
     "evaluate",
     "observe",
-    "use_span",
 ]

lmnr/opentelemetry_lib/decorators/__init__.py CHANGED Viewed

@@ -5,13 +5,19 @@ import orjson
 import types
 from typing import Any, AsyncGenerator, Callable, Generator, Literal
-from opentelemetry import trace
 from opentelemetry import context as context_api
-from opentelemetry.trace import Span
+from opentelemetry.trace import Span, Status, StatusCode
+from lmnr.opentelemetry_lib.tracing.context import (
+    CONTEXT_SESSION_ID_KEY,
+    CONTEXT_USER_ID_KEY,
+    attach_context,
+    detach_context,
+    get_event_attributes_from_context,
+)
 from lmnr.sdk.utils import get_input_from_func_args, is_method
 from lmnr.opentelemetry_lib import MAX_MANUAL_SPAN_PAYLOAD_SIZE
-from lmnr.opentelemetry_lib.tracing.tracer import get_tracer
+from lmnr.opentelemetry_lib.tracing.tracer import get_tracer_with_context
 from lmnr.opentelemetry_lib.tracing.attributes import (
     ASSOCIATION_PROPERTIES,
     SPAN_INPUT,
@@ -37,6 +43,7 @@ def default_json(o):
     try:
         return str(o)
     except Exception:
+        logger.debug("Failed to serialize data to JSON, inner type: %s", type(o))
         pass
     return DEFAULT_PLACEHOLDER
@@ -61,8 +68,13 @@ def _setup_span(
     span_name: str, span_type: str, association_properties: dict[str, Any] | None
 ):
     """Set up a span with the given name, type, and association properties."""
-    with get_tracer() as tracer:
-        span = tracer.start_span(span_name, attributes={SPAN_TYPE: span_type})
+    with get_tracer_with_context() as (tracer, isolated_context):
+        # Create span in isolated context
+        span = tracer.start_span(
+            span_name,
+            context=isolated_context,
+            attributes={SPAN_TYPE: span_type},
+        )
         if association_properties is not None:
             for key, value in association_properties.items():
@@ -148,10 +160,10 @@ def _process_output(
         pass
-def _cleanup_span(span: Span, ctx_token):
+def _cleanup_span(span: Span, wrapper: TracerWrapper):
     """Clean up span and context."""
     span.end()
-    context_api.detach(ctx_token)
+    wrapper.pop_span_context()
 def observe_base(
@@ -171,10 +183,25 @@ def observe_base(
                 return fn(*args, **kwargs)
             span_name = name or fn.__name__
+            wrapper = TracerWrapper()
             span = _setup_span(span_name, span_type, association_properties)
-            ctx = trace.set_span_in_context(span, context_api.get_current())
-            ctx_token = context_api.attach(ctx)
+            new_context = wrapper.push_span_context(span)
+            if session_id := association_properties.get("session_id"):
+                new_context = context_api.set_value(
+                    CONTEXT_SESSION_ID_KEY, session_id, new_context
+                )
+            if user_id := association_properties.get("user_id"):
+                new_context = context_api.set_value(
+                    CONTEXT_USER_ID_KEY, user_id, new_context
+                )
+            # Some auto-instrumentations are not under our control, so they
+            # don't have access to our isolated context. We attach the context
+            # to the OTEL global context, so that spans know their parent
+            # span and trace_id.
+            ctx_token = context_api.attach(new_context)
+            # update our isolated context too
+            isolated_ctx_token = attach_context(new_context)
             _process_input(
                 span, fn, args, kwargs, ignore_input, ignore_inputs, input_formatter
@@ -184,9 +211,12 @@ def observe_base(
                 res = fn(*args, **kwargs)
             except Exception as e:
                 _process_exception(span, e)
-                _cleanup_span(span, ctx_token)
+                _cleanup_span(span, wrapper)
                 raise e
+            finally:
+                # Always restore global context
+                context_api.detach(ctx_token)
+                detach_context(isolated_ctx_token)
             # span will be ended in the generator
             if isinstance(res, types.GeneratorType):
                 return _handle_generator(span, ctx_token, res)
@@ -201,7 +231,7 @@ def observe_base(
                 return _ahandle_generator(span, ctx_token, res)
             _process_output(span, res, ignore_output, output_formatter)
-            _cleanup_span(span, ctx_token)
+            _cleanup_span(span, wrapper)
             return res
         return wrap
@@ -227,10 +257,25 @@ def async_observe_base(
                 return await fn(*args, **kwargs)
             span_name = name or fn.__name__
+            wrapper = TracerWrapper()
             span = _setup_span(span_name, span_type, association_properties)
-            ctx = trace.set_span_in_context(span, context_api.get_current())
-            ctx_token = context_api.attach(ctx)
+            new_context = wrapper.push_span_context(span)
+            if session_id := association_properties.get("session_id"):
+                new_context = context_api.set_value(
+                    CONTEXT_SESSION_ID_KEY, session_id, new_context
+                )
+            if user_id := association_properties.get("user_id"):
+                new_context = context_api.set_value(
+                    CONTEXT_USER_ID_KEY, user_id, new_context
+                )
+            # Some auto-instrumentations are not under our control, so they
+            # don't have access to our isolated context. We attach the context
+            # to the OTEL global context, so that spans know their parent
+            # span and trace_id.
+            ctx_token = context_api.attach(new_context)
+            # update our isolated context too
+            isolated_ctx_token = attach_context(new_context)
             _process_input(
                 span, fn, args, kwargs, ignore_input, ignore_inputs, input_formatter
@@ -240,8 +285,12 @@ def async_observe_base(
                 res = await fn(*args, **kwargs)
             except Exception as e:
                 _process_exception(span, e)
-                _cleanup_span(span, ctx_token)
+                _cleanup_span(span, wrapper)
                 raise e
+            finally:
+                # Always restore global context
+                context_api.detach(ctx_token)
+                detach_context(isolated_ctx_token)
             # span will be ended in the generator
             if isinstance(res, types.AsyncGeneratorType):
@@ -250,7 +299,7 @@ def async_observe_base(
                 return await _ahandle_generator(span, ctx_token, res)
             _process_output(span, res, ignore_output, output_formatter)
-            _cleanup_span(span, ctx_token)
+            _cleanup_span(span, wrapper)
             return res
         return wrap
@@ -258,24 +307,24 @@ def async_observe_base(
     return decorate
-def _handle_generator(span: Span, ctx_token, res: Generator[Any, Any, Any]):
-    yield from res
-    span.end()
-    if ctx_token is not None:
-        context_api.detach(ctx_token)
+def _handle_generator(span: Span, wrapper: TracerWrapper, res: Generator):
+    try:
+        yield from res
+    finally:
+        _cleanup_span(span, wrapper)
-async def _ahandle_generator(span: Span, ctx_token, res: AsyncGenerator[Any, Any]):
-    # async with contextlib.aclosing(res) as closing_gen:
-    async for part in res:
-        yield part
-    span.end()
-    if ctx_token is not None:
-        context_api.detach(ctx_token)
+async def _ahandle_generator(span: Span, wrapper: TracerWrapper, res: AsyncGenerator):
+    try:
+        async for part in res:
+            yield part
+    finally:
+        _cleanup_span(span, wrapper)
 def _process_exception(span: Span, e: Exception):
     # Note that this `escaped` is sent as a StringValue("True"), not a boolean.
-    span.record_exception(e, escaped=True)
+    span.record_exception(
+        e, attributes=get_event_attributes_from_context(), escaped=True
+    )
+    span.set_status(Status(StatusCode.ERROR, str(e)))

lmnr/opentelemetry_lib/litellm/__init__.py CHANGED Viewed

@@ -7,6 +7,7 @@ from opentelemetry.trace import SpanKind, Status, StatusCode, Tracer
 from lmnr.opentelemetry_lib.litellm.utils import model_as_dict, set_span_attribute
 from lmnr.opentelemetry_lib.tracing import TracerWrapper
+from lmnr.opentelemetry_lib.tracing.context import get_event_attributes_from_context
 from lmnr.opentelemetry_lib.utils.package_check import is_package_installed
 from lmnr.sdk.log import get_default_logger
@@ -141,10 +142,12 @@ try:
                 else:
                     span.set_status(Status(StatusCode.ERROR))
                     if isinstance(response_obj, Exception):
-                        span.record_exception(response_obj)
+                        attributes = get_event_attributes_from_context()
+                        span.record_exception(response_obj, attributes=attributes)
             except Exception as e:
-                span.record_exception(e)
+                attributes = get_event_attributes_from_context()
+                span.record_exception(e, attributes=attributes)
                 logger.error(f"Error in Laminar LiteLLM instrumentation: {e}")
             finally:
                 span.end(int(end_time.timestamp() * 1e9))

lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/__init__.py CHANGED Viewed

@@ -30,6 +30,8 @@ from .utils import (
     should_emit_events,
 )
 from .version import __version__
+from lmnr.opentelemetry_lib.tracing.context import get_current_context
 from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
 from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY, unwrap
 from opentelemetry.metrics import Counter, Histogram, Meter, get_meter
@@ -396,9 +398,10 @@ def _wrap(
         name,
         kind=SpanKind.CLIENT,
         attributes={
-            SpanAttributes.LLM_SYSTEM: "Anthropic",
+            SpanAttributes.LLM_SYSTEM: "anthropic",
             SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
         },
+        context=get_current_context(),
     )
     _handle_input(span, event_logger, kwargs)
@@ -493,9 +496,10 @@ async def _awrap(
         name,
         kind=SpanKind.CLIENT,
         attributes={
-            SpanAttributes.LLM_SYSTEM: "Anthropic",
+            SpanAttributes.LLM_SYSTEM: "anthropic",
             SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
         },
+        context=get_current_context(),
     )
     await _ahandle_input(span, event_logger, kwargs)

lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py CHANGED Viewed

@@ -8,6 +8,11 @@ from typing import AsyncGenerator, Callable, Collection, Generator
 from google.genai import types
+from lmnr.opentelemetry_lib.tracing.context import (
+    get_current_context,
+    get_event_attributes_from_context,
+)
 from .config import (
     Config,
 )
@@ -474,6 +479,7 @@ def _wrap(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
             SpanAttributes.LLM_SYSTEM: "gemini",
             SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
         },
+        context=get_current_context(),
     )
     if span.is_recording():
@@ -488,8 +494,9 @@ def _wrap(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
         span.end()
         return response
     except Exception as e:
+        attributes = get_event_attributes_from_context()
         span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
+        span.record_exception(e, attributes=attributes)
         span.set_status(Status(StatusCode.ERROR, str(e)))
         span.end()
         raise e
@@ -509,6 +516,7 @@ async def _awrap(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
             SpanAttributes.LLM_SYSTEM: "gemini",
             SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
         },
+        context=get_current_context(),
     )
     if span.is_recording():
@@ -525,8 +533,9 @@ async def _awrap(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
             span.end()
             return response
     except Exception as e:
+        attributes = get_event_attributes_from_context()
         span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
+        span.record_exception(e, attributes=attributes)
         span.set_status(Status(StatusCode.ERROR, str(e)))
         span.end()
         raise e

lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/__init__.py CHANGED Viewed

@@ -27,6 +27,7 @@ from .utils import (
     should_emit_events,
 )
 from .version import __version__
+from lmnr.opentelemetry_lib.tracing.context import get_current_context
 from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
 from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY, unwrap
 from opentelemetry.metrics import Counter, Histogram, Meter, get_meter
@@ -245,6 +246,7 @@ def _wrap(
             SpanAttributes.LLM_SYSTEM: "Groq",
             SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
         },
+        context=get_current_context(),
     )
     _handle_input(span, kwargs, event_logger)
@@ -327,6 +329,7 @@ async def _awrap(
             SpanAttributes.LLM_SYSTEM: "Groq",
             SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
         },
+        context=get_current_context(),
     )
     _handle_input(span, kwargs, event_logger)

lmnr/opentelemetry_lib/opentelemetry/instrumentation/langgraph/__init__.py CHANGED Viewed

@@ -12,10 +12,7 @@ from langchain_core.runnables.graph import Graph
 from opentelemetry.trace import Tracer
 from wrapt import wrap_function_wrapper
 from opentelemetry.trace import get_tracer
-from lmnr.opentelemetry_lib.tracing.context_properties import (
-    update_association_properties,
-)
+from opentelemetry.context import get_value, attach, set_value
 from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
 from opentelemetry.instrumentation.utils import unwrap
@@ -45,12 +42,13 @@ def wrap_pregel_stream(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs)
         }
         for edge in graph.edges
     ]
-    update_association_properties(
-        {
-            "langgraph.edges": json.dumps(edges),
-            "langgraph.nodes": json.dumps(nodes),
-        },
-    )
+    d = {
+        "langgraph.edges": json.dumps(edges),
+        "langgraph.nodes": json.dumps(nodes),
+    }
+    association_properties = get_value("lmnr.langgraph.graph") or {}
+    association_properties.update(d)
+    attach(set_value("lmnr.langgraph.graph", association_properties))
     return wrapped(*args, **kwargs)
@@ -75,12 +73,14 @@ async def async_wrap_pregel_stream(
         }
         for edge in graph.edges
     ]
-    update_association_properties(
-        {
-            "langgraph.edges": json.dumps(edges),
-            "langgraph.nodes": json.dumps(nodes),
-        },
-    )
+    d = {
+        "langgraph.edges": json.dumps(edges),
+        "langgraph.nodes": json.dumps(nodes),
+    }
+    association_properties = get_value("lmnr.langgraph.graph") or {}
+    association_properties.update(d)
+    attach(set_value("lmnr.langgraph.graph", association_properties))
     async for item in wrapped(*args, **kwargs):
         yield item

lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/__init__.py CHANGED Viewed

@@ -395,6 +395,12 @@ def get_token_count_from_string(string: str, model_name: str):
                 f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
             )
             return None
+        except Exception as ex:
+            # Other exceptions in tiktoken
+            logger.warning(
+                f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
+            )
+            return None
         tiktoken_encodings[model_name] = encoding
     else:

lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/chat_wrappers.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import copy
 import json
 import logging
+import threading
 import time
 from functools import singledispatch
 from typing import List, Optional, Union
@@ -39,6 +40,10 @@ from ..utils import (
     should_emit_events,
     should_send_prompts,
 )
+from lmnr.opentelemetry_lib.tracing.context import (
+    get_current_context,
+    get_event_attributes_from_context,
+)
 from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
 from opentelemetry.metrics import Counter, Histogram
 from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
@@ -87,6 +92,7 @@ def chat_wrapper(
         SPAN_NAME,
         kind=SpanKind.CLIENT,
         attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
+        context=get_current_context(),
     )
     run_async(_handle_request(span, kwargs, instance))
@@ -109,7 +115,8 @@ def chat_wrapper(
             exception_counter.add(1, attributes=attributes)
         span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
+        attributes = get_event_attributes_from_context()
+        span.record_exception(e, attributes=attributes)
         span.set_status(Status(StatusCode.ERROR, str(e)))
         span.end()
@@ -184,6 +191,7 @@ async def achat_wrapper(
         SPAN_NAME,
         kind=SpanKind.CLIENT,
         attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
+        context=get_current_context(),
     )
     await _handle_request(span, kwargs, instance)
@@ -208,7 +216,8 @@ async def achat_wrapper(
             exception_counter.add(1, attributes=attributes)
         span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
+        attributes = get_event_attributes_from_context()
+        span.record_exception(e, attributes=attributes)
         span.set_status(Status(StatusCode.ERROR, str(e)))
         span.end()
@@ -293,6 +302,7 @@ def _handle_response(
     choice_counter=None,
     duration_histogram=None,
     duration=None,
+    is_streaming: bool = False,
 ):
     if is_openai_v1():
         response_dict = model_as_dict(response)
@@ -307,6 +317,7 @@ def _handle_response(
         duration_histogram,
         response_dict,
         duration,
+        is_streaming,
     )
     # span attributes
@@ -324,13 +335,19 @@ def _handle_response(
 def _set_chat_metrics(
-    instance, token_counter, choice_counter, duration_histogram, response_dict, duration
+    instance,
+    token_counter,
+    choice_counter,
+    duration_histogram,
+    response_dict,
+    duration,
+    is_streaming: bool = False,
 ):
     shared_attributes = metric_shared_attributes(
         response_model=response_dict.get("model") or None,
         operation="chat",
         server_address=_get_openai_base_url(instance),
-        is_streaming=False,
+        is_streaming=is_streaming,
     )
     # token metrics
@@ -517,11 +534,9 @@ def _set_completions(span, choices):
 def _set_streaming_token_metrics(
     request_kwargs, complete_response, span, token_counter, shared_attributes
 ):
-    # use tiktoken calculate token usage
     if not should_record_stream_token_usage():
         return
-    # kwargs={'model': 'gpt-3.5', 'messages': [{'role': 'user', 'content': '...'}], 'stream': True}
     prompt_usage = -1
     completion_usage = -1
@@ -618,11 +633,35 @@ class ChatStream(ObjectProxy):
         self._time_of_first_token = self._start_time
         self._complete_response = {"choices": [], "model": ""}
+        # Cleanup state tracking to prevent duplicate operations
+        self._cleanup_completed = False
+        self._cleanup_lock = threading.Lock()
+    def __del__(self):
+        """Cleanup when object is garbage collected"""
+        if hasattr(self, "_cleanup_completed") and not self._cleanup_completed:
+            self._ensure_cleanup()
     def __enter__(self):
         return self
     def __exit__(self, exc_type, exc_val, exc_tb):
-        self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
+        cleanup_exception = None
+        try:
+            self._ensure_cleanup()
+        except Exception as e:
+            cleanup_exception = e
+            # Don't re-raise to avoid masking original exception
+        result = self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
+        if cleanup_exception:
+            # Log cleanup exception but don't affect context manager behavior
+            logger.debug(
+                "Error during ChatStream cleanup in __exit__: %s", cleanup_exception
+            )
+        return result
     async def __aenter__(self):
         return self
@@ -642,7 +681,12 @@ class ChatStream(ObjectProxy):
         except Exception as e:
             if isinstance(e, StopIteration):
                 self._process_complete_response()
-            raise e
+            else:
+                # Handle cleanup for other exceptions during stream iteration
+                self._ensure_cleanup()
+                if self._span and self._span.is_recording():
+                    self._span.set_status(Status(StatusCode.ERROR, str(e)))
+            raise
         else:
             self._process_item(chunk)
             return chunk
@@ -653,7 +697,12 @@ class ChatStream(ObjectProxy):
         except Exception as e:
             if isinstance(e, StopAsyncIteration):
                 self._process_complete_response()
-            raise e
+            else:
+                # Handle cleanup for other exceptions during stream iteration
+                self._ensure_cleanup()
+                if self._span and self._span.is_recording():
+                    self._span.set_status(Status(StatusCode.ERROR, str(e)))
+            raise
         else:
             self._process_item(chunk)
             return chunk
@@ -724,6 +773,82 @@ class ChatStream(ObjectProxy):
         self._span.set_status(Status(StatusCode.OK))
         self._span.end()
+        self._cleanup_completed = True
+    @dont_throw
+    def _ensure_cleanup(self):
+        """Thread-safe cleanup method that handles different cleanup scenarios"""
+        with self._cleanup_lock:
+            if self._cleanup_completed:
+                logger.debug("ChatStream cleanup already completed, skipping")
+                return
+            try:
+                logger.debug("Starting ChatStream cleanup")
+                # Set span status and close it
+                if self._span and self._span.is_recording():
+                    self._span.set_status(Status(StatusCode.OK))
+                    self._span.end()
+                    logger.debug("ChatStream span closed successfully")
+                # Calculate partial metrics based on available data
+                self._record_partial_metrics()
+                self._cleanup_completed = True
+                logger.debug("ChatStream cleanup completed successfully")
+            except Exception as e:
+                # Log cleanup errors but don't propagate to avoid masking original issues
+                logger.debug("Error during ChatStream cleanup: %s", str(e))
+                # Still try to close the span even if metrics recording failed
+                try:
+                    if self._span and self._span.is_recording():
+                        self._span.set_status(
+                            Status(StatusCode.ERROR, "Cleanup failed")
+                        )
+                        self._span.end()
+                    self._cleanup_completed = True
+                except Exception:
+                    # Final fallback - just mark as completed to prevent infinite loops
+                    self._cleanup_completed = True
+    @dont_throw
+    def _record_partial_metrics(self):
+        """Record metrics based on available partial data"""
+        # Always record duration if we have start time
+        if (
+            self._start_time
+            and isinstance(self._start_time, (float, int))
+            and self._duration_histogram
+        ):
+            duration = time.time() - self._start_time
+            self._duration_histogram.record(
+                duration, attributes=self._shared_attributes()
+            )
+        # Record basic span attributes even without complete response
+        if self._span and self._span.is_recording():
+            _set_response_attributes(self._span, self._complete_response)
+        # Record partial token metrics if we have any data
+        if self._complete_response.get("choices") or self._request_kwargs:
+            _set_streaming_token_metrics(
+                self._request_kwargs,
+                self._complete_response,
+                self._span,
+                self._token_counter,
+                self._shared_attributes(),
+            )
+        # Record choice metrics if we have any choices processed
+        if self._choice_counter and self._complete_response.get("choices"):
+            _set_choice_counter_metrics(
+                self._choice_counter,
+                self._complete_response.get("choices"),
+                self._shared_attributes(),
+            )
 # Backward compatibility with OpenAI v0
@@ -972,6 +1097,13 @@ def _accumulate_stream_items(item, complete_response):
     complete_response["model"] = item.get("model")
     complete_response["id"] = item.get("id")
+    # capture usage information from the last stream chunks
+    if item.get("usage"):
+        complete_response["usage"] = item.get("usage")
+    elif item.get("choices") and item["choices"][0].get("usage"):
+        # Some LLM providers like moonshot mistakenly place token usage information within choices[0], handle this.
+        complete_response["usage"] = item["choices"][0].get("usage")
     # prompt filter results
     if item.get("prompt_filter_results"):
         complete_response["prompt_filter_results"] = item.get("prompt_filter_results")

lmnr 0.6.21__py3-none-any.whl → 0.7.1__py3-none-any.whl

lmnr 0.6.21py3-none-any.whl → 0.7.1py3-none-any.whl