PyPI - lmnr - Versions diffs - 0.6.18__py3-none-any.whl → 0.6.19__py3-none-any.whl - Mend

lmnr 0.6.18py3-none-any.whl → 0.6.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/completion_wrappers.py ADDED Viewed

@@ -0,0 +1,297 @@
+import logging
+from opentelemetry import context as context_api
+from ..shared import (
+    _set_client_attributes,
+    _set_functions_attributes,
+    _set_request_attributes,
+    _set_response_attributes,
+    _set_span_attribute,
+    _set_span_stream_usage,
+    get_token_count_from_string,
+    is_streaming_response,
+    model_as_dict,
+    propagate_trace_context,
+    should_record_stream_token_usage,
+)
+from ..shared.config import Config
+from ..shared.event_emitter import emit_event
+from ..shared.event_models import (
+    ChoiceEvent,
+    MessageEvent,
+)
+from ..utils import (
+    _with_tracer_wrapper,
+    dont_throw,
+    is_openai_v1,
+    should_emit_events,
+    should_send_prompts,
+)
+from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
+from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
+from opentelemetry.semconv_ai import (
+    SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
+    LLMRequestTypeValues,
+    SpanAttributes,
+)
+from opentelemetry.trace import SpanKind
+from opentelemetry.trace.status import Status, StatusCode
+SPAN_NAME = "openai.completion"
+LLM_REQUEST_TYPE = LLMRequestTypeValues.COMPLETION
+logger = logging.getLogger(__name__)
+@_with_tracer_wrapper
+def completion_wrapper(tracer, wrapped, instance, args, kwargs):
+    if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
+        SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
+    ):
+        return wrapped(*args, **kwargs)
+    # span needs to be opened and closed manually because the response is a generator
+    span = tracer.start_span(
+        SPAN_NAME,
+        kind=SpanKind.CLIENT,
+        attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
+    )
+    _handle_request(span, kwargs, instance)
+    try:
+        response = wrapped(*args, **kwargs)
+    except Exception as e:
+        span.set_attribute(ERROR_TYPE, e.__class__.__name__)
+        span.record_exception(e)
+        span.set_status(Status(StatusCode.ERROR, str(e)))
+        span.end()
+        raise
+    if is_streaming_response(response):
+        # span will be closed after the generator is done
+        return _build_from_streaming_response(span, kwargs, response)
+    else:
+        _handle_response(response, span, instance)
+    span.end()
+    return response
+@_with_tracer_wrapper
+async def acompletion_wrapper(tracer, wrapped, instance, args, kwargs):
+    if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
+        SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
+    ):
+        return await wrapped(*args, **kwargs)
+    span = tracer.start_span(
+        name=SPAN_NAME,
+        kind=SpanKind.CLIENT,
+        attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
+    )
+    _handle_request(span, kwargs, instance)
+    try:
+        response = await wrapped(*args, **kwargs)
+    except Exception as e:
+        span.set_attribute(ERROR_TYPE, e.__class__.__name__)
+        span.record_exception(e)
+        span.set_status(Status(StatusCode.ERROR, str(e)))
+        span.end()
+        raise
+    if is_streaming_response(response):
+        # span will be closed after the generator is done
+        return _abuild_from_streaming_response(span, kwargs, response)
+    else:
+        _handle_response(response, span, instance)
+    span.end()
+    return response
+@dont_throw
+def _handle_request(span, kwargs, instance):
+    _set_request_attributes(span, kwargs, instance)
+    if should_emit_events():
+        _emit_prompts_events(kwargs)
+    else:
+        if should_send_prompts():
+            _set_prompts(span, kwargs.get("prompt"))
+            _set_functions_attributes(span, kwargs.get("functions"))
+    _set_client_attributes(span, instance)
+    if Config.enable_trace_context_propagation:
+        propagate_trace_context(span, kwargs)
+def _emit_prompts_events(kwargs):
+    prompt = kwargs.get("prompt")
+    if isinstance(prompt, list):
+        for p in prompt:
+            emit_event(MessageEvent(content=p))
+    elif isinstance(prompt, str):
+        emit_event(MessageEvent(content=prompt))
+@dont_throw
+def _handle_response(response, span, instance=None):
+    if is_openai_v1():
+        response_dict = model_as_dict(response)
+    else:
+        response_dict = response
+    _set_response_attributes(span, response_dict)
+    if should_emit_events():
+        for choice in response.choices:
+            emit_event(_parse_choice_event(choice))
+    else:
+        if should_send_prompts():
+            _set_completions(span, response_dict.get("choices"))
+def _set_prompts(span, prompt):
+    if not span.is_recording() or not prompt:
+        return
+    _set_span_attribute(
+        span,
+        f"{SpanAttributes.LLM_PROMPTS}.0.user",
+        prompt[0] if isinstance(prompt, list) else prompt,
+    )
+@dont_throw
+def _set_completions(span, choices):
+    if not span.is_recording() or not choices:
+        return
+    for choice in choices:
+        index = choice.get("index")
+        prefix = f"{SpanAttributes.LLM_COMPLETIONS}.{index}"
+        _set_span_attribute(
+            span, f"{prefix}.finish_reason", choice.get("finish_reason")
+        )
+        _set_span_attribute(span, f"{prefix}.content", choice.get("text"))
+@dont_throw
+def _build_from_streaming_response(span, request_kwargs, response):
+    complete_response = {"choices": [], "model": "", "id": ""}
+    for item in response:
+        yield item
+        _accumulate_streaming_response(complete_response, item)
+    _set_response_attributes(span, complete_response)
+    _set_token_usage(span, request_kwargs, complete_response)
+    if should_emit_events():
+        _emit_streaming_response_events(complete_response)
+    else:
+        if should_send_prompts():
+            _set_completions(span, complete_response.get("choices"))
+    span.set_status(Status(StatusCode.OK))
+    span.end()
+@dont_throw
+async def _abuild_from_streaming_response(span, request_kwargs, response):
+    complete_response = {"choices": [], "model": "", "id": ""}
+    async for item in response:
+        yield item
+        _accumulate_streaming_response(complete_response, item)
+    _set_response_attributes(span, complete_response)
+    _set_token_usage(span, request_kwargs, complete_response)
+    if should_emit_events():
+        _emit_streaming_response_events(complete_response)
+    else:
+        if should_send_prompts():
+            _set_completions(span, complete_response.get("choices"))
+    span.set_status(Status(StatusCode.OK))
+    span.end()
+def _emit_streaming_response_events(complete_response):
+    for i, choice in enumerate(complete_response["choices"]):
+        emit_event(
+            ChoiceEvent(
+                index=choice.get("index", i),
+                message={"content": choice.get("text"), "role": "assistant"},
+                finish_reason=choice.get("finish_reason", "unknown"),
+            )
+        )
+@dont_throw
+def _set_token_usage(span, request_kwargs, complete_response):
+    # use tiktoken calculate token usage
+    if should_record_stream_token_usage():
+        prompt_usage = -1
+        completion_usage = -1
+        # prompt_usage
+        if request_kwargs and request_kwargs.get("prompt"):
+            prompt_content = request_kwargs.get("prompt")
+            model_name = complete_response.get("model") or None
+            if model_name:
+                prompt_usage = get_token_count_from_string(prompt_content, model_name)
+        # completion_usage
+        if complete_response.get("choices"):
+            completion_content = ""
+            model_name = complete_response.get("model") or None
+            for choice in complete_response.get("choices"):
+                if choice.get("text"):
+                    completion_content += choice.get("text")
+            if model_name:
+                completion_usage = get_token_count_from_string(
+                    completion_content, model_name
+                )
+        # span record
+        _set_span_stream_usage(span, prompt_usage, completion_usage)
+@dont_throw
+def _accumulate_streaming_response(complete_response, item):
+    if is_openai_v1():
+        item = model_as_dict(item)
+    complete_response["model"] = item.get("model")
+    complete_response["id"] = item.get("id")
+    for choice in item.get("choices"):
+        index = choice.get("index")
+        if len(complete_response.get("choices")) <= index:
+            complete_response["choices"].append({"index": index, "text": ""})
+        complete_choice = complete_response.get("choices")[index]
+        if choice.get("finish_reason"):
+            complete_choice["finish_reason"] = choice.get("finish_reason")
+        if choice.get("text"):
+            complete_choice["text"] += choice.get("text")
+    return complete_response
+def _parse_choice_event(choice) -> ChoiceEvent:
+    has_message = choice.text is not None
+    has_finish_reason = choice.finish_reason is not None
+    content = choice.text if has_message else None
+    finish_reason = choice.finish_reason if has_finish_reason else "unknown"
+    return ChoiceEvent(
+        index=choice.index,
+        message={"content": content, "role": "assistant"},
+        finish_reason=finish_reason,
+    )

lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/config.py ADDED Viewed

@@ -0,0 +1,16 @@
+from typing import Callable, Optional
+from opentelemetry._events import EventLogger
+class Config:
+    enrich_token_usage = False
+    enrich_assistant = False
+    exception_logger = None
+    get_common_metrics_attributes: Callable[[], dict] = lambda: {}
+    upload_base64_image: Callable[[str, str, str], str] = (
+        lambda trace_id, span_id, base64_image_url: ""
+    )
+    enable_trace_context_propagation: bool = True
+    use_legacy_attributes = True
+    event_logger: Optional[EventLogger] = None

lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py ADDED Viewed

@@ -0,0 +1,308 @@
+import logging
+import time
+from collections.abc import Iterable
+from opentelemetry import context as context_api
+from ..shared import (
+    OPENAI_LLM_USAGE_TOKEN_TYPES,
+    _get_openai_base_url,
+    _set_client_attributes,
+    _set_request_attributes,
+    _set_response_attributes,
+    _set_span_attribute,
+    _token_type,
+    metric_shared_attributes,
+    model_as_dict,
+    propagate_trace_context,
+)
+from ..shared.config import Config
+from ..shared.event_emitter import emit_event
+from ..shared.event_models import (
+    ChoiceEvent,
+    MessageEvent,
+)
+from ..utils import (
+    _with_embeddings_telemetry_wrapper,
+    dont_throw,
+    is_openai_v1,
+    should_emit_events,
+    should_send_prompts,
+    start_as_current_span_async,
+)
+from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
+from opentelemetry.metrics import Counter, Histogram
+from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
+from opentelemetry.semconv_ai import (
+    SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
+    LLMRequestTypeValues,
+    SpanAttributes,
+)
+from opentelemetry.trace import SpanKind, Status, StatusCode
+from openai._legacy_response import LegacyAPIResponse
+from openai.types.create_embedding_response import CreateEmbeddingResponse
+SPAN_NAME = "openai.embeddings"
+LLM_REQUEST_TYPE = LLMRequestTypeValues.EMBEDDING
+logger = logging.getLogger(__name__)
+@_with_embeddings_telemetry_wrapper
+def embeddings_wrapper(
+    tracer,
+    token_counter: Counter,
+    vector_size_counter: Counter,
+    duration_histogram: Histogram,
+    exception_counter: Counter,
+    wrapped,
+    instance,
+    args,
+    kwargs,
+):
+    if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
+        SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
+    ):
+        return wrapped(*args, **kwargs)
+    with tracer.start_as_current_span(
+        name=SPAN_NAME,
+        kind=SpanKind.CLIENT,
+        attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
+    ) as span:
+        _handle_request(span, kwargs, instance)
+        try:
+            # record time for duration
+            start_time = time.time()
+            response = wrapped(*args, **kwargs)
+            end_time = time.time()
+        except Exception as e:  # pylint: disable=broad-except
+            end_time = time.time()
+            duration = end_time - start_time if "start_time" in locals() else 0
+            attributes = {
+                "error.type": e.__class__.__name__,
+            }
+            # if there are legal duration, record it
+            if duration > 0 and duration_histogram:
+                duration_histogram.record(duration, attributes=attributes)
+            if exception_counter:
+                exception_counter.add(1, attributes=attributes)
+            span.set_attribute(ERROR_TYPE, e.__class__.__name__)
+            span.record_exception(e)
+            span.set_status(Status(StatusCode.ERROR, str(e)))
+            span.end()
+            raise
+        duration = end_time - start_time
+        _handle_response(
+            response,
+            span,
+            instance,
+            token_counter,
+            vector_size_counter,
+            duration_histogram,
+            duration,
+        )
+        return response
+@_with_embeddings_telemetry_wrapper
+async def aembeddings_wrapper(
+    tracer,
+    token_counter: Counter,
+    vector_size_counter: Counter,
+    duration_histogram: Histogram,
+    exception_counter: Counter,
+    wrapped,
+    instance,
+    args,
+    kwargs,
+):
+    if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
+        SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
+    ):
+        return await wrapped(*args, **kwargs)
+    async with start_as_current_span_async(
+        tracer=tracer,
+        name=SPAN_NAME,
+        kind=SpanKind.CLIENT,
+        attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
+    ) as span:
+        _handle_request(span, kwargs, instance)
+        try:
+            # record time for duration
+            start_time = time.time()
+            response = await wrapped(*args, **kwargs)
+            end_time = time.time()
+        except Exception as e:  # pylint: disable=broad-except
+            end_time = time.time()
+            duration = end_time - start_time if "start_time" in locals() else 0
+            attributes = {
+                "error.type": e.__class__.__name__,
+            }
+            # if there are legal duration, record it
+            if duration > 0 and duration_histogram:
+                duration_histogram.record(duration, attributes=attributes)
+            if exception_counter:
+                exception_counter.add(1, attributes=attributes)
+            span.set_attribute(ERROR_TYPE, e.__class__.__name__)
+            span.record_exception(e)
+            span.set_status(Status(StatusCode.ERROR, str(e)))
+            span.end()
+            raise
+        duration = end_time - start_time
+        _handle_response(
+            response,
+            span,
+            instance,
+            token_counter,
+            vector_size_counter,
+            duration_histogram,
+            duration,
+        )
+        return response
+@dont_throw
+def _handle_request(span, kwargs, instance):
+    _set_request_attributes(span, kwargs, instance)
+    if should_emit_events():
+        _emit_embeddings_message_event(kwargs.get("input"))
+    else:
+        if should_send_prompts():
+            _set_prompts(span, kwargs.get("input"))
+    _set_client_attributes(span, instance)
+    if Config.enable_trace_context_propagation:
+        propagate_trace_context(span, kwargs)
+@dont_throw
+def _handle_response(
+    response,
+    span,
+    instance=None,
+    token_counter=None,
+    vector_size_counter=None,
+    duration_histogram=None,
+    duration=None,
+):
+    if is_openai_v1():
+        response_dict = model_as_dict(response)
+    else:
+        response_dict = response
+    # metrics record
+    _set_embeddings_metrics(
+        instance,
+        token_counter,
+        vector_size_counter,
+        duration_histogram,
+        response_dict,
+        duration,
+    )
+    # span attributes
+    _set_response_attributes(span, response_dict)
+    # emit events
+    if should_emit_events():
+        _emit_embeddings_choice_event(response)
+def _set_embeddings_metrics(
+    instance,
+    token_counter,
+    vector_size_counter,
+    duration_histogram,
+    response_dict,
+    duration,
+):
+    shared_attributes = metric_shared_attributes(
+        response_model=response_dict.get("model") or None,
+        operation="embeddings",
+        server_address=_get_openai_base_url(instance),
+    )
+    # token count metrics
+    usage = response_dict.get("usage")
+    if usage and token_counter:
+        for name, val in usage.items():
+            if name in OPENAI_LLM_USAGE_TOKEN_TYPES:
+                if val is None:
+                    logging.error(f"Received None value for {name} in usage")
+                    continue
+                attributes_with_token_type = {
+                    **shared_attributes,
+                    SpanAttributes.LLM_TOKEN_TYPE: _token_type(name),
+                }
+                token_counter.record(val, attributes=attributes_with_token_type)
+    # vec size metrics
+    # should use counter for vector_size?
+    vec_embedding = (response_dict.get("data") or [{}])[0].get("embedding", [])
+    vec_size = len(vec_embedding)
+    if vector_size_counter:
+        vector_size_counter.add(vec_size, attributes=shared_attributes)
+    # duration metrics
+    if duration and isinstance(duration, (float, int)) and duration_histogram:
+        duration_histogram.record(duration, attributes=shared_attributes)
+def _set_prompts(span, prompt):
+    if not span.is_recording() or not prompt:
+        return
+    if isinstance(prompt, list):
+        for i, p in enumerate(prompt):
+            _set_span_attribute(span, f"{SpanAttributes.LLM_PROMPTS}.{i}.content", p)
+    else:
+        _set_span_attribute(
+            span,
+            f"{SpanAttributes.LLM_PROMPTS}.0.content",
+            prompt,
+        )
+def _emit_embeddings_message_event(embeddings) -> None:
+    if isinstance(embeddings, str):
+        emit_event(MessageEvent(content=embeddings))
+    elif isinstance(embeddings, Iterable):
+        for i in embeddings:
+            emit_event(MessageEvent(content=i))
+def _emit_embeddings_choice_event(response) -> None:
+    if isinstance(response, CreateEmbeddingResponse):
+        for embedding in response.data:
+            emit_event(
+                ChoiceEvent(
+                    index=embedding.index,
+                    message={"content": embedding.embedding, "role": "assistant"},
+                )
+            )
+    elif isinstance(response, LegacyAPIResponse):
+        parsed_response = response.parse()
+        for embedding in parsed_response.data:
+            emit_event(
+                ChoiceEvent(
+                    index=embedding.index,
+                    message={"content": embedding.embedding, "role": "assistant"},
+                )
+            )

lmnr 0.6.18__py3-none-any.whl → 0.6.19__py3-none-any.whl

lmnr 0.6.18py3-none-any.whl → 0.6.19py3-none-any.whl