PyPI - openlit - Versions diffs - 1.34.1__py3-none-any.whl → 1.34.3__py3-none-any.whl - Mend

openlit 1.34.1py3-none-any.whl → 1.34.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

openlit/instrumentation/google_ai_studio/async_google_ai_studio.py CHANGED Viewed

@@ -59,14 +59,12 @@ def async_generate(version, environment, application_name,
                         version=version,
                 )
-                return response
             except Exception as e:
                 handle_exception(span, e)
                 logger.error("Error in trace creation: %s", e)
-                # Return original response
-                return response
+            # Return original response
+            return response
     return wrapper

openlit/instrumentation/google_ai_studio/google_ai_studio.py CHANGED Viewed

@@ -59,14 +59,12 @@ def generate(version, environment, application_name,
                         version=version,
                 )
-                return response
             except Exception as e:
                 handle_exception(span, e)
                 logger.error("Error in trace creation: %s", e)
-                # Return original response
-                return response
+            # Return original response
+            return response
     return wrapper

openlit/instrumentation/google_ai_studio/utils.py CHANGED Viewed

@@ -2,10 +2,8 @@
 Google AI Studio OpenTelemetry instrumentation utility functions
 """
 import time
 from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
 from opentelemetry.trace import Status, StatusCode
 from openlit.__helpers import (
     calculate_ttft,
     response_as_dict,
@@ -117,6 +115,7 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
     scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_GEMINI)
     scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
     scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
+    scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
     inference_config = scope._kwargs.get('config', {})
@@ -142,7 +141,6 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
     scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
     scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
     scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_REASONING_TOKENS, scope._reasoning_tokens)
-    scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
     scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
                               'text' if isinstance(scope._llmresponse, str) else 'json')

openlit/instrumentation/openai/__init__.py CHANGED Viewed

@@ -5,9 +5,10 @@ import importlib.metadata
 from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
 from wrapt import wrap_function_wrapper
-from openlit.instrumentation.openai.openai import chat_completions, embedding, responses
+from openlit.instrumentation.openai.openai import chat_completions, embedding, responses, chat_completions_parse
 from openlit.instrumentation.openai.openai import image_generate, image_variatons, audio_create
-from openlit.instrumentation.openai.async_openai import async_chat_completions, async_embedding
+from openlit.instrumentation.openai.async_openai import (async_chat_completions, async_embedding,
+                                                         async_chat_completions_parse)
 from openlit.instrumentation.openai.async_openai import async_image_generate, async_image_variatons
 from openlit.instrumentation.openai.async_openai import async_audio_create, async_responses
@@ -127,6 +128,20 @@ class OpenAIInstrumentor(BaseInstrumentor):
                                metrics, disable_metrics),
         )
+        wrap_function_wrapper(
+            "openai.resources.beta.chat.completions",
+            "Completions.parse",
+            chat_completions_parse(version, environment, application_name, tracer, pricing_info,
+                                   capture_message_content, metrics, disable_metrics),
+        )
+        wrap_function_wrapper(
+            "openai.resources.beta.chat.completions",
+            "AsyncCompletions.parse",
+            async_chat_completions_parse(version, environment, application_name, tracer, pricing_info,
+                                         capture_message_content, metrics, disable_metrics),
+        )
     @staticmethod
     def _uninstrument(self, **kwargs):
         pass

openlit/instrumentation/openai/async_openai.py CHANGED Viewed

@@ -882,6 +882,167 @@ def async_chat_completions(version, environment, application_name,
     return wrapper
+def async_chat_completions_parse(version, environment, application_name, tracer, pricing_info, capture_message_content,
+                                 metrics, disable_metrics):
+    """
+    Generates a telemetry wrapper for chat completions parse to collect metrics.
+    Args:
+        version: Version of the monitoring package.
+        environment: Deployment environment (e.g., production, staging).
+        application_name: Name of the application using the OpenAI API.
+        tracer: OpenTelemetry tracer for creating spans.
+        pricing_info: Information used for calculating the cost of OpenAI usage.
+        capture_message_content: Flag indicating whether to trace the actual content.
+    Returns:
+        A function that wraps the chat completions parse method to add telemetry.
+    """
+    async def wrapper(wrapped, instance, args, kwargs):
+        """
+        Wraps the 'chat.completions.parse' API call to add telemetry.
+        This collects metrics such as execution time, cost, and token usage, and handles errors
+        gracefully, adding details to the trace for observability.
+        Args:
+            wrapped: The original 'chat.completions' method to be wrapped.
+            instance: The instance of the class where the original method is defined.
+            args: Positional arguments for the 'chat.completions' method.
+            kwargs: Keyword arguments for the 'chat.completions' method.
+        Returns:
+            The response from the original 'chat.completions.parse' method.
+        """
+        server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
+        request_model = kwargs.get("model", "gpt-4o")
+        span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
+        with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
+            start_time = time.time()
+            try:
+                # Execute the original 'parse' method
+                response = await wrapped(*args, **kwargs)
+                end_time = time.time()
+                response_dict = response_as_dict(response)
+                # Format 'messages' from kwargs to calculate input tokens
+                message_prompt = kwargs.get("messages", "")
+                formatted_messages = []
+                for message in message_prompt:
+                    role = message.get("role")
+                    content = message.get("content")
+                    if content:
+                        formatted_messages.append(f"{role}: {content}")
+                prompt = "\n".join(formatted_messages)
+                input_tokens = response_dict.get('usage').get('prompt_tokens')
+                output_tokens = response_dict.get('usage').get('completion_tokens')
+                # Calculate cost
+                cost = get_chat_model_cost(request_model,
+                                           pricing_info, input_tokens,
+                                           output_tokens)
+                # Set base span attribues (OTel Semconv)
+                span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+                span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
+                span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_OPENAI)
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, str(kwargs.get("seed", "")))
+                span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+                                   str(kwargs.get("frequency_penalty", 0.0)))
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, str(kwargs.get("max_tokens", -1)))
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
+                                   str(kwargs.get("presence_penalty", 0.0)))
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, str(kwargs.get("stop", [])))
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, str(kwargs.get("temperature", 1.0)))
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, str(kwargs.get("top_p", 1.0)))
+                span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, response_dict.get("id"))
+                span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, response_dict.get('model'))
+                span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
+                span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
+                span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER,
+                                   str(kwargs.get("service_tier", "auto")))
+                span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SERVICE_TIER,
+                                   response_dict.get('service_tier', 'auto'))
+                span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
+                                   str(response_dict.get('system_fingerprint', '')))
+                # Set base span attribues (Extras)
+                span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
+                span.set_attribute(SERVICE_NAME, application_name)
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER, kwargs.get("user", ""))
+                span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, False)
+                span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS, input_tokens + output_tokens)
+                span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
+                span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, end_time - start_time)
+                if capture_message_content:
+                    span.add_event(
+                        name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
+                        attributes={SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt},
+                    )
+                for i in range(kwargs.get('n', 1)):
+                    span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
+                                       [response_dict.get('choices')[i].get('finish_reason')])
+                    if capture_message_content:
+                        span.add_event(
+                            name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
+                            attributes={
+                                # pylint: disable=line-too-long
+                                SemanticConvention.GEN_AI_CONTENT_COMPLETION: str(
+                                    response_dict.get('choices')[i].get('message').get('content')),
+                            },
+                        )
+                    if kwargs.get('tools'):
+                        span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
+                                           str(response_dict.get('choices')[i].get('message').get('tool_calls')))
+                    if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
+                        span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
+                                           "text")
+                    elif response_dict.get('choices')[i].get('message').get('content') is not None:
+                        span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
+                                           "json")
+                span.set_status(Status(StatusCode.OK))
+                if not disable_metrics:
+                    attributes = create_metrics_attributes(
+                        service_name=application_name,
+                        deployment_environment=environment,
+                        operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
+                        system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
+                        request_model=request_model,
+                        server_address=server_address,
+                        server_port=server_port,
+                        response_model=response_dict.get('model'),
+                    )
+                    metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, attributes)
+                    metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
+                    metrics["genai_server_ttft"].record( end_time - start_time, attributes)
+                    metrics["genai_requests"].add(1, attributes)
+                    metrics["genai_completion_tokens"].add(output_tokens, attributes)
+                    metrics["genai_prompt_tokens"].add(input_tokens, attributes)
+                    metrics["genai_cost"].record(cost, attributes)
+                return response
+            except Exception as e:
+                handle_exception(span, e)
+                logger.error("Error in 'parse' trace creation: %s", e)
+                # Re-raise the exception to not interfere with the application flow
+                raise
+    return wrapper
 def async_embedding(version, environment, application_name,
               tracer, pricing_info, capture_message_content, metrics, disable_metrics):
     """

openlit/instrumentation/openai/openai.py CHANGED Viewed

@@ -882,6 +882,167 @@ def chat_completions(version, environment, application_name,
     return wrapper
+def chat_completions_parse(version, environment, application_name, tracer, pricing_info, capture_message_content,
+                           metrics, disable_metrics):
+    """
+    Generates a telemetry wrapper for chat completions parse to collect metrics.
+    Args:
+        version: Version of the monitoring package.
+        environment: Deployment environment (e.g., production, staging).
+        application_name: Name of the application using the OpenAI API.
+        tracer: OpenTelemetry tracer for creating spans.
+        pricing_info: Information used for calculating the cost of OpenAI usage.
+        capture_message_content: Flag indicating whether to trace the actual content.
+    Returns:
+        A function that wraps the chat completions parse method to add telemetry.
+    """
+    def wrapper(wrapped, instance, args, kwargs):
+        """
+        Wraps the 'chat.completions.parse' API call to add telemetry.
+        This collects metrics such as execution time, cost, and token usage, and handles errors
+        gracefully, adding details to the trace for observability.
+        Args:
+            wrapped: The original 'chat.completions' method to be wrapped.
+            instance: The instance of the class where the original method is defined.
+            args: Positional arguments for the 'chat.completions' method.
+            kwargs: Keyword arguments for the 'chat.completions' method.
+        Returns:
+            The response from the original 'chat.completions.parse' method.
+        """
+        server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
+        request_model = kwargs.get("model", "gpt-4o")
+        span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
+        with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
+            start_time = time.time()
+            try:
+                # Execute the original 'parse' method
+                response = wrapped(*args, **kwargs)
+                end_time = time.time()
+                response_dict = response_as_dict(response)
+                # Format 'messages' from kwargs to calculate input tokens
+                message_prompt = kwargs.get("messages", "")
+                formatted_messages = []
+                for message in message_prompt:
+                    role = message.get("role")
+                    content = message.get("content")
+                    if content:
+                        formatted_messages.append(f"{role}: {content}")
+                prompt = "\n".join(formatted_messages)
+                input_tokens = response_dict.get('usage').get('prompt_tokens')
+                output_tokens = response_dict.get('usage').get('completion_tokens')
+                # Calculate cost
+                cost = get_chat_model_cost(request_model,
+                                           pricing_info, input_tokens,
+                                           output_tokens)
+                # Set base span attribues (OTel Semconv)
+                span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+                span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
+                span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_OPENAI)
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, str(kwargs.get("seed", "")))
+                span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+                                   str(kwargs.get("frequency_penalty", 0.0)))
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, str(kwargs.get("max_tokens", -1)))
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
+                                   str(kwargs.get("presence_penalty", 0.0)))
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, str(kwargs.get("stop", [])))
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, str(kwargs.get("temperature", 1.0)))
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, str(kwargs.get("top_p", 1.0)))
+                span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, response_dict.get("id"))
+                span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, response_dict.get('model'))
+                span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
+                span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
+                span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER,
+                                   str(kwargs.get("service_tier", "auto")))
+                span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SERVICE_TIER,
+                                   response_dict.get('service_tier', 'auto'))
+                span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
+                                   str(response_dict.get('system_fingerprint', '')))
+                # Set base span attribues (Extras)
+                span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
+                span.set_attribute(SERVICE_NAME, application_name)
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER, kwargs.get("user", ""))
+                span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
+                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, False)
+                span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS, input_tokens + output_tokens)
+                span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
+                span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, end_time - start_time)
+                if capture_message_content:
+                    span.add_event(
+                        name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
+                        attributes={SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt},
+                    )
+                for i in range(kwargs.get('n', 1)):
+                    span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
+                                       [response_dict.get('choices')[i].get('finish_reason')])
+                    if capture_message_content:
+                        span.add_event(
+                            name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
+                            attributes={
+                                # pylint: disable=line-too-long
+                                SemanticConvention.GEN_AI_CONTENT_COMPLETION: str(
+                                    response_dict.get('choices')[i].get('message').get('content')),
+                            },
+                        )
+                    if kwargs.get('tools'):
+                        span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
+                                           str(response_dict.get('choices')[i].get('message').get('tool_calls')))
+                    if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
+                        span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
+                                           "text")
+                    elif response_dict.get('choices')[i].get('message').get('content') is not None:
+                        span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
+                                           "json")
+                span.set_status(Status(StatusCode.OK))
+                if not disable_metrics:
+                    attributes = create_metrics_attributes(
+                        service_name=application_name,
+                        deployment_environment=environment,
+                        operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
+                        system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
+                        request_model=request_model,
+                        server_address=server_address,
+                        server_port=server_port,
+                        response_model=response_dict.get('model'),
+                    )
+                    metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, attributes)
+                    metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
+                    metrics["genai_server_ttft"].record( end_time - start_time, attributes)
+                    metrics["genai_requests"].add(1, attributes)
+                    metrics["genai_completion_tokens"].add(output_tokens, attributes)
+                    metrics["genai_prompt_tokens"].add(input_tokens, attributes)
+                    metrics["genai_cost"].record(cost, attributes)
+                return response
+            except Exception as e:
+                handle_exception(span, e)
+                logger.error("Error in 'parse' trace creation: %s", e)
+                # Re-raise the exception to not interfere with the application flow
+                raise
+    return wrapper
 def embedding(version, environment, application_name,
               tracer, pricing_info, capture_message_content, metrics, disable_metrics):
     """

openlit/instrumentation/vllm/__init__.py CHANGED Viewed

@@ -32,7 +32,7 @@ class VLLMInstrumentor(BaseInstrumentor):
         # sync chat
         wrap_function_wrapper(
-            "vllm",
+            "vllm.entrypoints.llm",
             "LLM.generate",
             generate(version, environment, application_name,
                   tracer, pricing_info, capture_message_content, metrics, disable_metrics),

openlit/instrumentation/vllm/utils.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""
+Utility functions for vLLM instrumentation.
+"""
+import time
+from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
+from opentelemetry.trace import Status, StatusCode
+from openlit.__helpers import (
+    calculate_tbt,
+    get_chat_model_cost,
+    general_tokens,
+    create_metrics_attributes,
+)
+from openlit.semcov import SemanticConvention
+def get_inference_config(args, kwargs):
+    """
+    Safely extract inference configuration from args or kwargs.
+    """
+    if 'sampling_params' in kwargs:
+        return kwargs['sampling_params']
+    if len(args) > 1:
+        return args[1]
+    return None
+def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
+    capture_message_content, disable_metrics, version, is_stream):
+    """
+    Process chat request and generate Telemetry
+    """
+    scope._end_time = time.time()
+    if len(scope._timestamps) > 1:
+        scope._tbt = calculate_tbt(scope._timestamps)
+    # Set base span attributes
+    scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+    scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_VLLM)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, scope._request_model)
+    scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
+    scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
+    # Handle inference configuration
+    inference_config = get_inference_config(scope._args, scope._kwargs)
+    if inference_config:
+        attributes = [
+            (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
+            (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
+            (SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
+            (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
+            (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
+            (SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
+            (SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
+        ]
+        for attribute, key in attributes:
+            value = getattr(inference_config, key, None)
+            if value is not None:
+                scope._span.set_attribute(attribute, value)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._request_model)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
+    # Set base span attributes (Extras)
+    scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
+    scope._span.set_attribute(SERVICE_NAME, application_name)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
+    input_tokens = 0
+    output_tokens = 0
+    cost = 0
+    if capture_message_content:
+        prompt = ""
+        completion = ""
+        for output in scope._response:
+            prompt += output.prompt + "\n"
+            if output.outputs and len(output.outputs) > 0:
+                completion += output.outputs[0].text + "\n"
+            input_tokens += general_tokens(output.prompt)
+            output_tokens += general_tokens(output.outputs[0].text)
+        # Add a single event for prompt
+        scope._span.add_event(
+            name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
+            attributes={
+                SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
+            },
+        )
+        # Add a single event for completion
+        scope._span.add_event(
+            name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
+            attributes={
+                SemanticConvention.GEN_AI_CONTENT_COMPLETION: completion,
+            },
+        )
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
+                        input_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
+                        output_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
+                        input_tokens + output_tokens)
+    # Calculate cost of the operation
+    cost = get_chat_model_cost(scope._request_model, pricing_info, input_tokens, output_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
+    scope._span.set_status(Status(StatusCode.OK))
+    if disable_metrics is False:
+        metrics_attributes = create_metrics_attributes(
+            service_name=application_name,
+            deployment_environment=environment,
+            operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
+            system=SemanticConvention.GEN_AI_SYSTEM_VLLM,
+            request_model=scope._request_model,
+            server_address=scope._server_address,
+            server_port=scope._server_port,
+            response_model=scope._request_model,
+        )
+        metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
+        metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
+        metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
+        metrics['genai_requests'].add(1, metrics_attributes)
+        metrics['genai_completion_tokens'].add(output_tokens, metrics_attributes)
+        metrics['genai_prompt_tokens'].add(input_tokens, metrics_attributes)
+        metrics['genai_cost'].record(cost, metrics_attributes)
+        metrics['genai_client_usage_tokens'].record(
+            input_tokens + output_tokens, metrics_attributes)
+def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
+    environment, application_name, metrics, start_time, span, args, kwargs,
+    capture_message_content=False, disable_metrics=False, version="1.0.0"):
+    """
+    Process chat request and generate Telemetry
+    """
+    self = type('GenericScope', (), {})()
+    self._response = response
+    self._start_time = start_time
+    self._end_time = time.time()
+    self._span = span
+    self._ttft, self._tbt = self._end_time - self._start_time, 0
+    self._server_address = server_address
+    self._server_port = server_port
+    self._request_model = request_model
+    self._timestamps = []
+    self._args = args
+    self._kwargs = kwargs
+    common_chat_logic(self, pricing_info, environment, application_name, metrics,
+        capture_message_content, disable_metrics, version, is_stream=False)
+    return response

openlit/instrumentation/vllm/vllm.py CHANGED Viewed

@@ -4,170 +4,62 @@ Module for monitoring vLLM API calls.
 import logging
 import time
-from opentelemetry.trace import SpanKind, Status, StatusCode
-from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
+from opentelemetry.trace import SpanKind
 from openlit.__helpers import (
-    get_chat_model_cost,
     handle_exception,
-    general_tokens,
-    create_metrics_attributes,
     set_server_address_and_port
 )
+from openlit.instrumentation.vllm.utils import (
+    process_chat_response
+)
 from openlit.semcov import SemanticConvention
 # Initialize logger for logging potential issues and operations
 logger = logging.getLogger(__name__)
 def generate(version, environment, application_name,
-                     tracer, pricing_info, capture_message_content, metrics, disable_metrics):
+    tracer, pricing_info, capture_message_content, metrics, disable_metrics):
     """
-    Generates a telemetry wrapper for generate to collect metrics.
-    Args:
-        version: Version of the monitoring package.
-        environment: Deployment environment (e.g., production, staging).
-        application_name: Name of the application using the vLLM API.
-        tracer: OpenTelemetry tracer for creating spans.
-        pricing_info: Information used for calculating the cost of vLLM usage.
-        capture_message_content: Flag indicating whether to trace the actual content.
-    Returns:
-        A function that wraps the generate method to add telemetry.
+    Generates a telemetry wrapper for GenAI function call
     """
     def wrapper(wrapped, instance, args, kwargs):
         """
-        Wraps the 'generate' API call to add telemetry.
-        This collects metrics such as execution time, cost, and token usage, and handles errors
-        gracefully, adding details to the trace for observability.
-        Args:
-            wrapped: The original 'generate' method to be wrapped.
-            instance: The instance of the class where the original method is defined.
-            args: Positional arguments for the 'generate' method.
-            kwargs: Keyword arguments for the 'generate' method.
-        Returns:
-            The response from the original 'generate' method.
+        Wraps the GenAI function call.
         """
-        server_address, server_port = set_server_address_and_port(instance, "api.cohere.com", 443)
+        server_address, server_port = set_server_address_and_port(instance, "http://127.0.0.1", 443)
         request_model = instance.llm_engine.model_config.model or "facebook/opt-125m"
         span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
-        # pylint: disable=line-too-long
-        with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+        with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
             start_time = time.time()
             response = wrapped(*args, **kwargs)
-            end_time = time.time()
             try:
-                # Set base span attribues
-                span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-                span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
-                                    SemanticConvention.GEN_AI_SYSTEM_VLLM)
-                span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
-                                    SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
-                span.set_attribute(SemanticConvention.SERVER_PORT,
-                                    server_port)
-                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
-                                    request_model)
-                span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
-                                    request_model)
-                span.set_attribute(SemanticConvention.SERVER_ADDRESS,
-                                    server_address)
-                span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
-                                    "text")
-                # Set base span attribues (Extras)
-                span.set_attribute(DEPLOYMENT_ENVIRONMENT,
-                                     environment)
-                span.set_attribute(SERVICE_NAME,
-                                    application_name)
-                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
-                                    False)
-                span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
-                                    end_time - start_time)
-                span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
-                                    version)
-                input_tokens = 0
-                output_tokens = 0
-                cost = 0
-                if capture_message_content:
-                    prompt_attributes = {}
-                    completion_attributes = {}
-                    for i, output in enumerate(response):
-                        prompt_attributes[f"{SemanticConvention.GEN_AI_CONTENT_PROMPT}.{i}"] = output.prompt
-                        completion_attributes[f"{SemanticConvention.GEN_AI_CONTENT_COMPLETION}.{i}"] = output.outputs[0].text
-                        input_tokens += general_tokens(output.prompt)
-                        output_tokens += general_tokens(output.outputs[0].text)
-                    # Add a single event for all prompts
-                    span.add_event(
-                        name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
-                        attributes=prompt_attributes,
-                    )
-                    # Add a single event for all completions
-                    span.add_event(
-                        name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
-                        attributes=completion_attributes,
-                    )
-                span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
-                                    input_tokens)
-                span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                    output_tokens)
-                span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
-                                    input_tokens + output_tokens)
-                # Calculate cost of the operation
-                cost = get_chat_model_cost(request_model, pricing_info,
-                                            input_tokens, output_tokens)
-                span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
-                                    cost)
-                span.set_status(Status(StatusCode.OK))
-                if disable_metrics is False:
-                    attributes = create_metrics_attributes(
-                        service_name=application_name,
-                        deployment_environment=environment,
-                        operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
-                        system=SemanticConvention.GEN_AI_SYSTEM_VLLM,
-                        request_model=request_model,
-                        server_address=server_address,
-                        server_port=server_port,
-                        response_model=request_model,
-                    )
-                    metrics["genai_client_usage_tokens"].record(
-                        input_tokens + output_tokens, attributes
-                    )
-                    metrics["genai_client_operation_duration"].record(
-                        end_time - start_time, attributes
-                    )
-                    metrics["genai_server_ttft"].record(
-                        end_time - start_time, attributes
-                    )
-                    metrics["genai_requests"].add(1, attributes)
-                    metrics["genai_completion_tokens"].add(output_tokens, attributes)
-                    metrics["genai_prompt_tokens"].add(input_tokens, attributes)
-                    metrics["genai_cost"].record(cost, attributes)
-                # Return original response
-                return response
+                response = process_chat_response(
+                    instance=instance,
+                    response=response,
+                    request_model=request_model,
+                    pricing_info=pricing_info,
+                    server_port=server_port,
+                    server_address=server_address,
+                    environment=environment,
+                    application_name=application_name,
+                    metrics=metrics,
+                    start_time=start_time,
+                    span=span,
+                    args=args,
+                    kwargs=kwargs,
+                    capture_message_content=capture_message_content,
+                    disable_metrics=disable_metrics,
+                    version=version,
+                )
             except Exception as e:
                 handle_exception(span, e)
                 logger.error("Error in trace creation: %s", e)
-                # Return original response
-                return response
+            return response
     return wrapper

{openlit-1.34.1.dist-info → openlit-1.34.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: openlit
-Version: 1.34.1
+Version: 1.34.3
 Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
 License: Apache-2.0
 Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu

{openlit-1.34.1.dist-info → openlit-1.34.3.dist-info}/RECORD RENAMED Viewed

@@ -57,9 +57,9 @@ openlit/instrumentation/embedchain/embedchain.py,sha256=f4hyOr1Xr0RC4PNHRu46aV-j
 openlit/instrumentation/firecrawl/__init__.py,sha256=kyVsAiDBC2djifqT2w1cPRAotiEyEabNvnBeSQxi9N8,1876
 openlit/instrumentation/firecrawl/firecrawl.py,sha256=4X38UrLYeGm3uez-edYA6qEc0nKC3p77yfKgKBBud0A,3826
 openlit/instrumentation/google_ai_studio/__init__.py,sha256=d4aDvCSfDtT2geRbwG5yinu62uPTHaj4PtalimuvG-k,2685
-openlit/instrumentation/google_ai_studio/async_google_ai_studio.py,sha256=eOUrIMsUh2dG8BqMB7oizuQBbntIb4sy2WkYrF3Dzj0,5815
-openlit/instrumentation/google_ai_studio/google_ai_studio.py,sha256=fvwZcWlfjT5eXPmeYA5JMSyPfHfLZKBnzBd0iHQxEec,5717
-openlit/instrumentation/google_ai_studio/utils.py,sha256=s9-5qnIWfXsq2xjYbWToI0RBAUo5lT0yLPOILc6eY1k,11256
+openlit/instrumentation/google_ai_studio/async_google_ai_studio.py,sha256=sWwoUCzaRO9TTSmwO12hPqKTdBmrgG1TTkyvgLN-CIo,5774
+openlit/instrumentation/google_ai_studio/google_ai_studio.py,sha256=Qps8EY6xZDFT1ZQWZJ0RAQxORHG8PjrTzq52vSqrJ2o,5676
+openlit/instrumentation/google_ai_studio/utils.py,sha256=0Bqs2GMFv6e5UU-FV-s-RBVvJOTSNvD74AaGCPD5CVc,11254
 openlit/instrumentation/gpt4all/__init__.py,sha256=cO8mi3hhPDXcNwb9AwQ3-wQ_ydnOeBRwb0cptlQmAM4,1805
 openlit/instrumentation/gpt4all/gpt4all.py,sha256=EYp0njZ1kF56rTAjYZVtufA5W4xTWGzSIntjJ4MEfl4,24185
 openlit/instrumentation/gpu/__init__.py,sha256=QQCFVEbRfdeTjmdFe-UeEiy19vEEWSIBpj2B1wYGhUs,11036
@@ -95,9 +95,9 @@ openlit/instrumentation/ollama/__init__.py,sha256=v7VhVxHw_c6QtMznxe6a7z6QrYHZsH
 openlit/instrumentation/ollama/async_ollama.py,sha256=zJPDr2ROh1nvFGoxgdTbe04Zr1KhmgJUYFPeuRLQGLk,6667
 openlit/instrumentation/ollama/ollama.py,sha256=MNUltiP9XVT4azmO_-E2vjhFaoHQyJ0Z6c-HnB0_jCE,6563
 openlit/instrumentation/ollama/utils.py,sha256=41uvYaYkGwWfRyHYqhOOwrFy6cMzBlG1urJYUat9Q24,14819
-openlit/instrumentation/openai/__init__.py,sha256=FiL4OHDhs957spa3k9sNC_VLt0-txtwbnujQwnevQ5I,5564
-openlit/instrumentation/openai/async_openai.py,sha256=gxA9Fs_b0hsOlJh8F55zi0TqgarJUlZA6eK1-ghvy90,71945
-openlit/instrumentation/openai/openai.py,sha256=Gky-NPUhjXhGOG4nWKkuKGTEKWJSgebzHb5dmqJp7fU,71754
+openlit/instrumentation/openai/__init__.py,sha256=y9Ox5aYWTb2nAa_d0ic3Mkv4wEKmUGqslW9nHKg6NnY,6320
+openlit/instrumentation/openai/async_openai.py,sha256=JkpVcyOhGvPzhqxzeP01MwwfaYhddNsSUQqgfF8hU8I,81390
+openlit/instrumentation/openai/openai.py,sha256=5fgRyK5dUN2zUdrN0vBSZFnSEAXf2dKS0qnq_85-mQE,81175
 openlit/instrumentation/openai_agents/__init__.py,sha256=tRTSIrUtkXc_lfQnVanXmQLd2Sy9RqBNTHF5FhhZx7o,1530
 openlit/instrumentation/openai_agents/openai_agents.py,sha256=kRWPgjofcOviMi3w7CsRvJO3SCjqPmuq-PM800vIM7g,2678
 openlit/instrumentation/phidata/__init__.py,sha256=tqls5-UI6FzbjxYgq_qqAfALhWJm8dHn2NtgqiQA4f8,1557
@@ -124,13 +124,14 @@ openlit/instrumentation/transformers/utils.py,sha256=3f-ewpUpduaBrTVIFJKaabACjz-
 openlit/instrumentation/vertexai/__init__.py,sha256=mT28WCBvQfRCkAWGL6bd0EjEPHvMjaNcz6T3jsLZh8k,3745
 openlit/instrumentation/vertexai/async_vertexai.py,sha256=-kpg-eiL76O5_XopUPghCYwJHf0Nrxi00_Z5tCwq6zM,23086
 openlit/instrumentation/vertexai/vertexai.py,sha256=5NB090aWlm9DnlccNNLRO6A97P_RN-JnHb5JS01tYyw,23000
-openlit/instrumentation/vllm/__init__.py,sha256=8Su4DEpxdT2wr4Qr17heakzoGSbuq6ey1MmSVR_vbOA,1508
-openlit/instrumentation/vllm/vllm.py,sha256=FxDIR4WH1VySivi0wop4E1DBo2HXyCr8nZ9x1c7x4eM,7778
+openlit/instrumentation/vllm/__init__.py,sha256=VUWsjtYEe1_u4hJRDAZI5abrRfZ2L85LxZmc76irBrk,1524
+openlit/instrumentation/vllm/utils.py,sha256=hPVG_UKLY7xTvmmHbBdPy8HT7y_8VIILn37a5zOTYzU,6822
+openlit/instrumentation/vllm/vllm.py,sha256=SZosSwnkBUKspPtsm_k6VQaAWnD4kdcFWj2n-StWJus,2175
 openlit/otel/events.py,sha256=VrMjTpvnLtYRBHCiFwJojTQqqNpRCxoD4yJYeQrtPsk,3560
 openlit/otel/metrics.py,sha256=GM2PDloBGRhBTkHHkYaqmOwIAQkY124ZhW4sEqW1Fgk,7086
 openlit/otel/tracing.py,sha256=tjV2bEbEDPUB1Z46gE-UsJsb04sRdFrfbhIDkxViZc0,3103
 openlit/semcov/__init__.py,sha256=ptyo37PY-FHDx_PShEvbdns71cD4YvvXw15bCRXKCKM,13461
-openlit-1.34.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-openlit-1.34.1.dist-info/METADATA,sha256=c3xokUNDxGsFVnc_XqD9q4_aodyLpYGPYBA6uXNd3do,23469
-openlit-1.34.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-openlit-1.34.1.dist-info/RECORD,,
+openlit-1.34.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+openlit-1.34.3.dist-info/METADATA,sha256=8_jDnUBC1cxAr2DNwkg5IXbNQX2qru-_nC7OpwC6Jh8,23469
+openlit-1.34.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+openlit-1.34.3.dist-info/RECORD,,

{openlit-1.34.1.dist-info → openlit-1.34.3.dist-info}/LICENSE RENAMED Viewed

File without changes

{openlit-1.34.1.dist-info → openlit-1.34.3.dist-info}/WHEEL RENAMED Viewed

File without changes

openlit 1.34.1__py3-none-any.whl → 1.34.3__py3-none-any.whl

openlit 1.34.1py3-none-any.whl → 1.34.3py3-none-any.whl