PyPI - lmnr - Versions diffs - 0.6.20__py3-none-any.whl → 0.6.21__py3-none-any.whl - Mend

lmnr 0.6.20py3-none-any.whl → 0.6.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/streaming.py ADDED Viewed

@@ -0,0 +1,295 @@
+import logging
+import time
+from typing import Optional
+from opentelemetry._events import EventLogger
+from .config import Config
+from .event_emitter import (
+    emit_streaming_response_events,
+)
+from .span_utils import (
+    set_streaming_response_attributes,
+)
+from .utils import (
+    count_prompt_tokens_from_request,
+    dont_throw,
+    error_metrics_attributes,
+    set_span_attribute,
+    shared_metrics_attributes,
+    should_emit_events,
+)
+from opentelemetry.metrics import Counter, Histogram
+from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import (
+    GEN_AI_RESPONSE_ID,
+)
+from opentelemetry.semconv_ai import SpanAttributes
+from opentelemetry.trace.status import Status, StatusCode
+logger = logging.getLogger(__name__)
+@dont_throw
+def _process_response_item(item, complete_response):
+    if item.type == "message_start":
+        complete_response["model"] = item.message.model
+        complete_response["usage"] = dict(item.message.usage)
+        complete_response["id"] = item.message.id
+    elif item.type == "content_block_start":
+        index = item.index
+        if len(complete_response.get("events")) <= index:
+            complete_response["events"].append(
+                {"index": index, "text": "", "type": item.content_block.type}
+            )
+    elif item.type == "content_block_delta" and item.delta.type in [
+        "thinking_delta",
+        "text_delta",
+    ]:
+        index = item.index
+        if item.delta.type == "thinking_delta":
+            complete_response["events"][index]["text"] += item.delta.thinking
+        elif item.delta.type == "text_delta":
+            complete_response["events"][index]["text"] += item.delta.text
+    elif item.type == "message_delta":
+        for event in complete_response.get("events", []):
+            event["finish_reason"] = item.delta.stop_reason
+        if item.usage:
+            if "usage" in complete_response:
+                item_output_tokens = dict(item.usage).get("output_tokens", 0)
+                existing_output_tokens = complete_response["usage"].get(
+                    "output_tokens", 0
+                )
+                complete_response["usage"]["output_tokens"] = (
+                    item_output_tokens + existing_output_tokens
+                )
+            else:
+                complete_response["usage"] = dict(item.usage)
+def _set_token_usage(
+    span,
+    complete_response,
+    prompt_tokens,
+    completion_tokens,
+    metric_attributes: dict = {},
+    token_histogram: Histogram = None,
+    choice_counter: Counter = None,
+):
+    cache_read_tokens = (
+        complete_response.get("usage", {}).get("cache_read_input_tokens", 0) or 0
+    )
+    cache_creation_tokens = (
+        complete_response.get("usage", {}).get("cache_creation_input_tokens", 0) or 0
+    )
+    input_tokens = prompt_tokens + cache_read_tokens + cache_creation_tokens
+    total_tokens = input_tokens + completion_tokens
+    set_span_attribute(span, SpanAttributes.LLM_USAGE_PROMPT_TOKENS, input_tokens)
+    set_span_attribute(
+        span, SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, completion_tokens
+    )
+    set_span_attribute(span, SpanAttributes.LLM_USAGE_TOTAL_TOKENS, total_tokens)
+    set_span_attribute(
+        span, SpanAttributes.LLM_RESPONSE_MODEL, complete_response.get("model")
+    )
+    set_span_attribute(
+        span, SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS, cache_read_tokens
+    )
+    set_span_attribute(
+        span,
+        SpanAttributes.LLM_USAGE_CACHE_CREATION_INPUT_TOKENS,
+        cache_creation_tokens,
+    )
+    if token_histogram and type(input_tokens) is int and input_tokens >= 0:
+        token_histogram.record(
+            input_tokens,
+            attributes={
+                **metric_attributes,
+                SpanAttributes.LLM_TOKEN_TYPE: "input",
+            },
+        )
+    if token_histogram and type(completion_tokens) is int and completion_tokens >= 0:
+        token_histogram.record(
+            completion_tokens,
+            attributes={
+                **metric_attributes,
+                SpanAttributes.LLM_TOKEN_TYPE: "output",
+            },
+        )
+    if type(complete_response.get("events")) is list and choice_counter:
+        for event in complete_response.get("events"):
+            choice_counter.add(
+                1,
+                attributes={
+                    **metric_attributes,
+                    SpanAttributes.LLM_RESPONSE_FINISH_REASON: event.get(
+                        "finish_reason"
+                    ),
+                },
+            )
+def _handle_streaming_response(span, event_logger, complete_response):
+    if should_emit_events() and event_logger:
+        emit_streaming_response_events(event_logger, complete_response)
+    else:
+        if not span.is_recording():
+            return
+        set_streaming_response_attributes(span, complete_response.get("events"))
+@dont_throw
+def build_from_streaming_response(
+    span,
+    response,
+    instance,
+    start_time,
+    token_histogram: Histogram = None,
+    choice_counter: Counter = None,
+    duration_histogram: Histogram = None,
+    exception_counter: Counter = None,
+    event_logger: Optional[EventLogger] = None,
+    kwargs: dict = {},
+):
+    complete_response = {"events": [], "model": "", "usage": {}, "id": ""}
+    for item in response:
+        try:
+            yield item
+        except Exception as e:
+            attributes = error_metrics_attributes(e)
+            if exception_counter:
+                exception_counter.add(1, attributes=attributes)
+            raise e
+        _process_response_item(item, complete_response)
+    metric_attributes = shared_metrics_attributes(complete_response)
+    set_span_attribute(span, GEN_AI_RESPONSE_ID, complete_response.get("id"))
+    if duration_histogram:
+        duration = time.time() - start_time
+        duration_histogram.record(
+            duration,
+            attributes=metric_attributes,
+        )
+    # calculate token usage
+    if Config.enrich_token_usage:
+        try:
+            completion_tokens = -1
+            # prompt_usage
+            if usage := complete_response.get("usage"):
+                prompt_tokens = usage.get("input_tokens", 0) or 0
+            else:
+                prompt_tokens = count_prompt_tokens_from_request(instance, kwargs)
+            # completion_usage
+            if usage := complete_response.get("usage"):
+                completion_tokens = usage.get("output_tokens", 0) or 0
+            else:
+                completion_content = ""
+                if complete_response.get("events"):
+                    model_name = complete_response.get("model") or None
+                    for event in complete_response.get("events"):
+                        if event.get("text"):
+                            completion_content += event.get("text")
+                    if model_name and hasattr(instance, "count_tokens"):
+                        completion_tokens = instance.count_tokens(completion_content)
+            _set_token_usage(
+                span,
+                complete_response,
+                prompt_tokens,
+                completion_tokens,
+                metric_attributes,
+                token_histogram,
+                choice_counter,
+            )
+        except Exception as e:
+            logger.warning("Failed to set token usage, error: %s", e)
+    _handle_streaming_response(span, event_logger, complete_response)
+    if span.is_recording():
+        span.set_status(Status(StatusCode.OK))
+        span.end()
+@dont_throw
+async def abuild_from_streaming_response(
+    span,
+    response,
+    instance,
+    start_time,
+    token_histogram: Histogram = None,
+    choice_counter: Counter = None,
+    duration_histogram: Histogram = None,
+    exception_counter: Counter = None,
+    event_logger: Optional[EventLogger] = None,
+    kwargs: dict = {},
+):
+    complete_response = {"events": [], "model": "", "usage": {}, "id": ""}
+    async for item in response:
+        try:
+            yield item
+        except Exception as e:
+            attributes = error_metrics_attributes(e)
+            if exception_counter:
+                exception_counter.add(1, attributes=attributes)
+            raise e
+        _process_response_item(item, complete_response)
+    set_span_attribute(span, GEN_AI_RESPONSE_ID, complete_response.get("id"))
+    metric_attributes = shared_metrics_attributes(complete_response)
+    if duration_histogram:
+        duration = time.time() - start_time
+        duration_histogram.record(
+            duration,
+            attributes=metric_attributes,
+        )
+    # calculate token usage
+    if Config.enrich_token_usage:
+        try:
+            # prompt_usage
+            if usage := complete_response.get("usage"):
+                prompt_tokens = usage.get("input_tokens", 0)
+            else:
+                prompt_tokens = count_prompt_tokens_from_request(instance, kwargs)
+            # completion_usage
+            if usage := complete_response.get("usage"):
+                completion_tokens = usage.get("output_tokens", 0)
+            else:
+                completion_content = ""
+                if complete_response.get("events"):
+                    model_name = complete_response.get("model") or None
+                    for event in complete_response.get("events"):
+                        if event.get("text"):
+                            completion_content += event.get("text")
+                    if model_name and hasattr(instance, "count_tokens"):
+                        completion_tokens = instance.count_tokens(completion_content)
+            _set_token_usage(
+                span,
+                complete_response,
+                prompt_tokens,
+                completion_tokens,
+                metric_attributes,
+                token_histogram,
+                choice_counter,
+            )
+        except Exception as e:
+            logger.warning("Failed to set token usage, error: %s", str(e))
+    _handle_streaming_response(span, event_logger, complete_response)
+    if span.is_recording():
+        span.set_status(Status(StatusCode.OK))
+        span.end()

lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/utils.py ADDED Viewed

@@ -0,0 +1,179 @@
+import asyncio
+import json
+import logging
+import os
+import threading
+import traceback
+from importlib.metadata import version
+from opentelemetry import context as context_api
+from .config import Config
+from opentelemetry.semconv_ai import SpanAttributes
+GEN_AI_SYSTEM = "gen_ai.system"
+GEN_AI_SYSTEM_ANTHROPIC = "anthropic"
+_PYDANTIC_VERSION = version("pydantic")
+LMNR_TRACE_CONTENT = "LMNR_TRACE_CONTENT"
+def set_span_attribute(span, name, value):
+    if value is not None:
+        if value != "":
+            span.set_attribute(name, value)
+    return
+def should_send_prompts():
+    return (
+        os.getenv(LMNR_TRACE_CONTENT) or "true"
+    ).lower() == "true" or context_api.get_value("override_enable_content_tracing")
+def dont_throw(func):
+    """
+    A decorator that wraps the passed in function and logs exceptions instead of throwing them.
+    Works for both synchronous and asynchronous functions.
+    """
+    logger = logging.getLogger(func.__module__)
+    async def async_wrapper(*args, **kwargs):
+        try:
+            return await func(*args, **kwargs)
+        except Exception as e:
+            _handle_exception(e, func, logger)
+    def sync_wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except Exception as e:
+            _handle_exception(e, func, logger)
+    def _handle_exception(e, func, logger):
+        logger.debug(
+            "OpenLLMetry failed to trace in %s, error: %s",
+            func.__name__,
+            traceback.format_exc(),
+        )
+        if Config.exception_logger:
+            Config.exception_logger(e)
+    return async_wrapper if asyncio.iscoroutinefunction(func) else sync_wrapper
+@dont_throw
+def shared_metrics_attributes(response):
+    if not isinstance(response, dict):
+        response = response.__dict__
+    common_attributes = Config.get_common_metrics_attributes()
+    return {
+        **common_attributes,
+        GEN_AI_SYSTEM: GEN_AI_SYSTEM_ANTHROPIC,
+        SpanAttributes.LLM_RESPONSE_MODEL: response.get("model"),
+    }
+@dont_throw
+def error_metrics_attributes(exception):
+    return {
+        GEN_AI_SYSTEM: GEN_AI_SYSTEM_ANTHROPIC,
+        "error.type": exception.__class__.__name__,
+    }
+@dont_throw
+def count_prompt_tokens_from_request(anthropic, request):
+    prompt_tokens = 0
+    if hasattr(anthropic, "count_tokens"):
+        if request.get("prompt"):
+            prompt_tokens = anthropic.count_tokens(request.get("prompt"))
+        elif messages := request.get("messages"):
+            prompt_tokens = 0
+            for m in messages:
+                content = m.get("content")
+                if isinstance(content, str):
+                    prompt_tokens += anthropic.count_tokens(content)
+                elif isinstance(content, list):
+                    for item in content:
+                        # TODO: handle image and tool tokens
+                        if isinstance(item, dict) and item.get("type") == "text":
+                            prompt_tokens += anthropic.count_tokens(
+                                item.get("text", "")
+                            )
+    return prompt_tokens
+@dont_throw
+async def acount_prompt_tokens_from_request(anthropic, request):
+    prompt_tokens = 0
+    if hasattr(anthropic, "count_tokens"):
+        if request.get("prompt"):
+            prompt_tokens = await anthropic.count_tokens(request.get("prompt"))
+        elif messages := request.get("messages"):
+            prompt_tokens = 0
+            for m in messages:
+                content = m.get("content")
+                if isinstance(content, str):
+                    prompt_tokens += await anthropic.count_tokens(content)
+                elif isinstance(content, list):
+                    for item in content:
+                        # TODO: handle image and tool tokens
+                        if isinstance(item, dict) and item.get("type") == "text":
+                            prompt_tokens += await anthropic.count_tokens(
+                                item.get("text", "")
+                            )
+    return prompt_tokens
+def run_async(method):
+    try:
+        loop = asyncio.get_running_loop()
+    except RuntimeError:
+        loop = None
+    if loop and loop.is_running():
+        thread = threading.Thread(target=lambda: asyncio.run(method))
+        thread.start()
+        thread.join()
+    else:
+        asyncio.run(method)
+def should_emit_events() -> bool:
+    """
+    Checks if the instrumentation isn't using the legacy attributes
+    and if the event logger is not None.
+    """
+    return not Config.use_legacy_attributes
+class JSONEncoder(json.JSONEncoder):
+    def default(self, o):
+        if hasattr(o, "to_json"):
+            return o.to_json()
+        if hasattr(o, "model_dump_json"):
+            return o.model_dump_json()
+        try:
+            return str(o)
+        except Exception:
+            logger = logging.getLogger(__name__)
+            logger.debug("Failed to serialize object of type: %s", type(o).__name__)
+            return ""
+def model_as_dict(model):
+    if isinstance(model, dict):
+        return model
+    if _PYDANTIC_VERSION < "2.0.0" and hasattr(model, "dict"):
+        return model.dict()
+    if hasattr(model, "model_dump"):
+        return model.model_dump()
+    else:
+        try:
+            return dict(model)
+        except Exception:
+            return model

lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.41.0"

lmnr 0.6.20__py3-none-any.whl → 0.6.21__py3-none-any.whl

lmnr 0.6.20py3-none-any.whl → 0.6.21py3-none-any.whl