PyPI - openlit - Versions diffs - 1.34.12__py3-none-any.whl → 1.34.13__py3-none-any.whl - Mend

openlit 1.34.12py3-none-any.whl → 1.34.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

openlit/instrumentation/vllm/__init__.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
 """Initializer of Auto Instrumentation of vLLM Functions"""
 from typing import Collection
@@ -14,15 +13,15 @@ _instruments = ("vllm >= 0.5.4",)
 class VLLMInstrumentor(BaseInstrumentor):
     """
-    An instrumentor for vLLM's client library.
+    An instrumentor for vLLM client library.
     """
     def instrumentation_dependencies(self) -> Collection[str]:
         return _instruments
     def _instrument(self, **kwargs):
-        application_name = kwargs.get("application_name", "default_application")
-        environment = kwargs.get("environment", "default_environment")
+        application_name = kwargs.get("application_name", "default")
+        environment = kwargs.get("environment", "default")
         tracer = kwargs.get("tracer")
         metrics = kwargs.get("metrics_dict")
         pricing_info = kwargs.get("pricing_info", {})
@@ -30,14 +29,13 @@ class VLLMInstrumentor(BaseInstrumentor):
         disable_metrics = kwargs.get("disable_metrics")
         version = importlib.metadata.version("vllm")
-        # sync chat
+        # Chat completions
         wrap_function_wrapper(
             "vllm.entrypoints.llm",
             "LLM.generate",
             generate(version, environment, application_name,
-                  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
+                     tracer, pricing_info, capture_message_content, metrics, disable_metrics),
         )
     def _uninstrument(self, **kwargs):
-        # Proper uninstrumentation logic to revert patched methods
         pass

openlit/instrumentation/vllm/utils.py CHANGED Viewed

@@ -1,15 +1,15 @@
 """
-Utility functions for vLLM instrumentation.
+vLLM OpenTelemetry instrumentation utility functions
 """
 import time
-from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
 from opentelemetry.trace import Status, StatusCode
 from openlit.__helpers import (
-    calculate_tbt,
-    get_chat_model_cost,
     general_tokens,
-    create_metrics_attributes,
+    get_chat_model_cost,
+    common_span_attributes,
+    record_completion_metrics,
 )
 from openlit.semcov import SemanticConvention
@@ -24,77 +24,81 @@ def get_inference_config(args, kwargs):
         return args[1]
     return None
+def format_content(prompts):
+    """
+    Process a list of prompts to extract content.
+    """
+    if isinstance(prompts, str):
+        return prompts
+    elif isinstance(prompts, list):
+        return "\n".join(str(prompt) for prompt in prompts)
+    else:
+        return str(prompts)
 def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
     capture_message_content, disable_metrics, version, is_stream):
     """
     Process chat request and generate Telemetry
     """
-    scope._end_time = time.time()
-    if len(scope._timestamps) > 1:
-        scope._tbt = calculate_tbt(scope._timestamps)
-    # Set base span attributes
-    scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-    scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_VLLM)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, scope._request_model)
-    scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
-    scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
-    # Handle inference configuration
+    request_model = scope._request_model
+    # Extract prompts and completions from vLLM response
+    input_tokens = 0
+    output_tokens = 0
+    prompt = ""
+    completion = ""
+    for output in scope._response:
+        prompt += output.prompt + "\n"
+        if output.outputs and len(output.outputs) > 0:
+            completion += output.outputs[0].text + "\n"
+        input_tokens += general_tokens(output.prompt)
+        output_tokens += general_tokens(output.outputs[0].text)
+    cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
+    # Common Span Attributes
+    common_span_attributes(scope,
+        SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
+        scope._server_address, scope._server_port, request_model, request_model,
+        environment, application_name, is_stream, scope._tbt, scope._ttft, version)
+    # Span Attributes for Request parameters
     inference_config = get_inference_config(scope._args, scope._kwargs)
     if inference_config:
-        attributes = [
-            (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
-            (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
-            (SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
-            (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
-            (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
-            (SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
-            (SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
-        ]
-        for attribute, key in attributes:
-            value = getattr(inference_config, key, None)
-            if value is not None:
-                scope._span.set_attribute(attribute, value)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._request_model)
+        scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, getattr(inference_config, 'max_tokens', -1))
+        scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, getattr(inference_config, 'stop_sequences', []))
+        scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, getattr(inference_config, 'temperature', 1.0))
+        scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, getattr(inference_config, 'top_p', 1.0))
+        scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, getattr(inference_config, 'top_k', -1))
+        scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
+            getattr(inference_config, 'presence_penalty', 0.0))
+        scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+            getattr(inference_config, 'frequency_penalty', 0.0))
+    # Span Attributes for Response parameters
     scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
-    # Set base span attributes (Extras)
-    scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
-    scope._span.set_attribute(SERVICE_NAME, application_name)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
-    input_tokens = 0
-    output_tokens = 0
-    cost = 0
+    # Span Attributes for Cost and Tokens
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
+    # Span Attributes for Content
     if capture_message_content:
-        prompt = ""
-        completion = ""
+        scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
+        scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, completion)
-        for output in scope._response:
-            prompt += output.prompt + "\n"
-            if output.outputs and len(output.outputs) > 0:
-                completion += output.outputs[0].text + "\n"
-            input_tokens += general_tokens(output.prompt)
-            output_tokens += general_tokens(output.outputs[0].text)
-        # Add a single event for prompt
+        # To be removed once the change to span_attributes (from span events) is complete
         scope._span.add_event(
             name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
             attributes={
                 SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
             },
         )
-        # Add a single event for completion
         scope._span.add_event(
             name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
             attributes={
@@ -102,39 +106,14 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
             },
         )
-    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
-                        input_tokens)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
-                        output_tokens)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
-                        input_tokens + output_tokens)
-    # Calculate cost of the operation
-    cost = get_chat_model_cost(scope._request_model, pricing_info, input_tokens, output_tokens)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
     scope._span.set_status(Status(StatusCode.OK))
-    if disable_metrics is False:
-        metrics_attributes = create_metrics_attributes(
-            service_name=application_name,
-            deployment_environment=environment,
-            operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
-            system=SemanticConvention.GEN_AI_SYSTEM_VLLM,
-            request_model=scope._request_model,
-            server_address=scope._server_address,
-            server_port=scope._server_port,
-            response_model=scope._request_model,
-        )
-        metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
-        metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
-        metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
-        metrics['genai_requests'].add(1, metrics_attributes)
-        metrics['genai_completion_tokens'].add(output_tokens, metrics_attributes)
-        metrics['genai_prompt_tokens'].add(input_tokens, metrics_attributes)
-        metrics['genai_cost'].record(cost, metrics_attributes)
-        metrics['genai_client_usage_tokens'].record(
-            input_tokens + output_tokens, metrics_attributes)
+    # Metrics
+    if not disable_metrics:
+        record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
+            scope._server_address, scope._server_port, request_model, request_model, environment,
+            application_name, scope._start_time, scope._end_time, input_tokens, output_tokens,
+            cost, scope._tbt, scope._ttft)
 def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
     environment, application_name, metrics, start_time, span, args, kwargs,
@@ -142,20 +121,23 @@ def process_chat_response(instance, response, request_model, pricing_info, serve
     """
     Process chat request and generate Telemetry
     """
-    self = type('GenericScope', (), {})()
-    self._response = response
-    self._start_time = start_time
-    self._end_time = time.time()
-    self._span = span
-    self._ttft, self._tbt = self._end_time - self._start_time, 0
-    self._server_address = server_address
-    self._server_port = server_port
-    self._request_model = request_model
-    self._timestamps = []
-    self._args = args
-    self._kwargs = kwargs
-    common_chat_logic(self, pricing_info, environment, application_name, metrics,
+    # Create scope object
+    scope = type("GenericScope", (), {})()
+    scope._response = response
+    scope._start_time = start_time
+    scope._end_time = time.time()
+    scope._span = span
+    scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
+    scope._server_address = server_address
+    scope._server_port = server_port
+    scope._request_model = request_model
+    scope._timestamps = []
+    scope._args = args
+    scope._kwargs = kwargs
+    common_chat_logic(scope, pricing_info, environment, application_name, metrics,
         capture_message_content, disable_metrics, version, is_stream=False)
     return response

openlit/instrumentation/vllm/vllm.py CHANGED Viewed

@@ -2,7 +2,6 @@
 Module for monitoring vLLM API calls.
 """
-import logging
 import time
 from opentelemetry.trace import SpanKind
 from openlit.__helpers import (
@@ -14,11 +13,8 @@ from openlit.instrumentation.vllm.utils import (
 )
 from openlit.semcov import SemanticConvention
-# Initialize logger for logging potential issues and operations
-logger = logging.getLogger(__name__)
-def generate(version, environment, application_name,
-    tracer, pricing_info, capture_message_content, metrics, disable_metrics):
+def generate(version, environment, application_name, tracer, pricing_info,
+             capture_message_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for GenAI function call
     """
@@ -27,7 +23,6 @@ def generate(version, environment, application_name,
         """
         Wraps the GenAI function call.
         """
         server_address, server_port = set_server_address_and_port(instance, "http://127.0.0.1", 443)
         request_model = instance.llm_engine.model_config.model or "facebook/opt-125m"
@@ -56,9 +51,9 @@ def generate(version, environment, application_name,
                     disable_metrics=disable_metrics,
                     version=version,
                 )
             except Exception as e:
                 handle_exception(span, e)
-                logger.error("Error in trace creation: %s", e)
             return response

{openlit-1.34.12.dist-info → openlit-1.34.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: openlit
-Version: 1.34.12
+Version: 1.34.13
 Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
 License: Apache-2.0
 Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu

{openlit-1.34.12.dist-info → openlit-1.34.13.dist-info}/RECORD RENAMED Viewed

@@ -131,14 +131,14 @@ openlit/instrumentation/transformers/utils.py,sha256=3f-ewpUpduaBrTVIFJKaabACjz-
 openlit/instrumentation/vertexai/__init__.py,sha256=mT28WCBvQfRCkAWGL6bd0EjEPHvMjaNcz6T3jsLZh8k,3745
 openlit/instrumentation/vertexai/async_vertexai.py,sha256=-kpg-eiL76O5_XopUPghCYwJHf0Nrxi00_Z5tCwq6zM,23086
 openlit/instrumentation/vertexai/vertexai.py,sha256=5NB090aWlm9DnlccNNLRO6A97P_RN-JnHb5JS01tYyw,23000
-openlit/instrumentation/vllm/__init__.py,sha256=VUWsjtYEe1_u4hJRDAZI5abrRfZ2L85LxZmc76irBrk,1524
-openlit/instrumentation/vllm/utils.py,sha256=hPVG_UKLY7xTvmmHbBdPy8HT7y_8VIILn37a5zOTYzU,6822
-openlit/instrumentation/vllm/vllm.py,sha256=SZosSwnkBUKspPtsm_k6VQaAWnD4kdcFWj2n-StWJus,2175
+openlit/instrumentation/vllm/__init__.py,sha256=uaSzQmgDuKJ-sh61sfVdzVt2qAZaozZIQ8sbmQ0XpZE,1357
+openlit/instrumentation/vllm/utils.py,sha256=HuCPNBgChWg9vA7DHNFCij_y8qj27DjZxdZ0Nvdt2fg,5751
+openlit/instrumentation/vllm/vllm.py,sha256=VzazF2f4LLwjZDO_G8lIN_d622oSJM0fIO9wjxXbhyg,2004
 openlit/otel/events.py,sha256=VrMjTpvnLtYRBHCiFwJojTQqqNpRCxoD4yJYeQrtPsk,3560
 openlit/otel/metrics.py,sha256=GM2PDloBGRhBTkHHkYaqmOwIAQkY124ZhW4sEqW1Fgk,7086
 openlit/otel/tracing.py,sha256=tjV2bEbEDPUB1Z46gE-UsJsb04sRdFrfbhIDkxViZc0,3103
 openlit/semcov/__init__.py,sha256=ptyo37PY-FHDx_PShEvbdns71cD4YvvXw15bCRXKCKM,13461
-openlit-1.34.12.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-openlit-1.34.12.dist-info/METADATA,sha256=1GWoYLlOr6o5nSG3t4_n58xc9QJjcTSPUGNEGJJfEn8,23470
-openlit-1.34.12.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-openlit-1.34.12.dist-info/RECORD,,
+openlit-1.34.13.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+openlit-1.34.13.dist-info/METADATA,sha256=4uHfQSKnuT-yfoNz7kj78yd53TBFDCDYVhOIsz7XF8k,23470
+openlit-1.34.13.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+openlit-1.34.13.dist-info/RECORD,,

{openlit-1.34.12.dist-info → openlit-1.34.13.dist-info}/LICENSE RENAMED Viewed

File without changes

{openlit-1.34.12.dist-info → openlit-1.34.13.dist-info}/WHEEL RENAMED Viewed

File without changes

openlit 1.34.12__py3-none-any.whl → 1.34.13__py3-none-any.whl

openlit 1.34.12py3-none-any.whl → 1.34.13py3-none-any.whl