PyPI - openlit - Versions diffs - 1.34.11__py3-none-any.whl → 1.34.13__py3-none-any.whl - Mend

openlit 1.34.11py3-none-any.whl → 1.34.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

openlit/__helpers.py +3 -3
openlit/instrumentation/ai21/__init__.py +10 -8
openlit/instrumentation/ai21/ai21.py +15 -27
openlit/instrumentation/ai21/async_ai21.py +15 -27
openlit/instrumentation/ai21/utils.py +229 -212
openlit/instrumentation/openai/__init__.py +3 -3
openlit/instrumentation/vllm/__init__.py +5 -7
openlit/instrumentation/vllm/utils.py +85 -103
openlit/instrumentation/vllm/vllm.py +3 -8
{openlit-1.34.11.dist-info → openlit-1.34.13.dist-info}/METADATA +1 -1
{openlit-1.34.11.dist-info → openlit-1.34.13.dist-info}/RECORD +13 -13
{openlit-1.34.11.dist-info → openlit-1.34.13.dist-info}/LICENSE +0 -0
{openlit-1.34.11.dist-info → openlit-1.34.13.dist-info}/WHEEL +0 -0

openlit/__helpers.py CHANGED Viewed

@@ -346,12 +346,12 @@ def common_span_attributes(scope, gen_ai_operation, gen_ai_system, server_addres
     scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
     scope._span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
     scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._response_model)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, response_model)
     scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
     scope._span.set_attribute(SERVICE_NAME, application_name)
     scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, tbt)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, ttft)
     scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
 def record_completion_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,

openlit/instrumentation/ai21/__init__.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
 """Initializer of Auto Instrumentation of AI21 Functions"""
 from typing import Collection
@@ -33,34 +32,37 @@ class AI21Instrumentor(BaseInstrumentor):
         disable_metrics = kwargs.get("disable_metrics")
         version = importlib.metadata.version("ai21")
-        #sync
+        # Chat completions
         wrap_function_wrapper(
             "ai21.clients.studio.resources.chat.chat_completions",
             "ChatCompletions.create",
             chat(version, environment, application_name,
-                  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
+                 tracer, pricing_info, capture_message_content, metrics, disable_metrics),
         )
+        # RAG completions
         wrap_function_wrapper(
             "ai21.clients.studio.resources.studio_conversational_rag",
             "StudioConversationalRag.create",
             chat_rag(version, environment, application_name,
-                  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
+                     tracer, pricing_info, capture_message_content, metrics, disable_metrics),
         )
-        #Async
+        # Async chat completions
         wrap_function_wrapper(
             "ai21.clients.studio.resources.chat.async_chat_completions",
             "AsyncChatCompletions.create",
             async_chat(version, environment, application_name,
-                  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
+                       tracer, pricing_info, capture_message_content, metrics, disable_metrics),
         )
+        # Async RAG completions
         wrap_function_wrapper(
             "ai21.clients.studio.resources.studio_conversational_rag",
             "AsyncStudioConversationalRag.create",
             async_chat_rag(version, environment, application_name,
-                  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
+                           tracer, pricing_info, capture_message_content, metrics, disable_metrics),
         )
     def _uninstrument(self, **kwargs):
-        # Proper uninstrumentation logic to revert patched methods
         pass

openlit/instrumentation/ai21/ai21.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
-Module for monitoring AI21 calls.
+Module for monitoring AI21 API calls.
 """
-import logging
 import time
 from opentelemetry.trace import SpanKind
 from openlit.__helpers import (
@@ -15,14 +14,10 @@ from openlit.instrumentation.ai21.utils import (
     process_streaming_chat_response,
     process_chat_rag_response
 )
 from openlit.semcov import SemanticConvention
-# Initialize logger for logging potential issues and operations
-logger = logging.getLogger(__name__)
-def chat(version, environment, application_name,
-                     tracer, pricing_info, capture_message_content, metrics, disable_metrics):
+def chat(version, environment, application_name, tracer, pricing_info,
+    capture_message_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for GenAI function call
     """
@@ -45,14 +40,12 @@ def chat(version, environment, application_name,
             self.__wrapped__ = wrapped
             self._span = span
             self._span_name = span_name
-            # Placeholder for aggregating streaming response
             self._llmresponse = ""
             self._response_id = ""
             self._finish_reason = ""
+            self._tools = None
             self._input_tokens = 0
             self._output_tokens = 0
-            self._choices = []
             self._args = args
             self._kwargs = kwargs
             self._start_time = time.time()
@@ -83,9 +76,8 @@ def chat(version, environment, application_name,
                 process_chunk(self, chunk)
                 return chunk
             except StopIteration:
-                # Handling exception ensure observability without disrupting operation
                 try:
-                    with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
+                    with tracer.start_as_current_span(self._span_name, kind=SpanKind.CLIENT) as self._span:
                         process_streaming_chat_response(
                             self,
                             pricing_info=pricing_info,
@@ -96,34 +88,31 @@ def chat(version, environment, application_name,
                             disable_metrics=disable_metrics,
                             version=version
                         )
                 except Exception as e:
                     handle_exception(self._span, e)
-                    logger.error("Error in trace creation: %s", e)
                 raise
     def wrapper(wrapped, instance, args, kwargs):
         """
         Wraps the GenAI function call.
         """
         # Check if streaming is enabled for the API call
         streaming = kwargs.get("stream", False)
         server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
         request_model = kwargs.get("model", "jamba-1.5-mini")
         span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
-        # pylint: disable=no-else-return
         if streaming:
-            # Special handling for streaming response to accommodate the nature of data flow
+            # Special handling for streaming response
             awaited_wrapped = wrapped(*args, **kwargs)
             span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
             return TracedSyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
-        # Handling for non-streaming responses
         else:
-            with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+            # Handling for non-streaming responses
+            with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
                 start_time = time.time()
                 response = wrapped(*args, **kwargs)
@@ -152,23 +141,22 @@ def chat(version, environment, application_name,
     return wrapper
-def chat_rag(version, environment, application_name,
-                tracer, pricing_info, capture_message_content, metrics, disable_metrics):
+def chat_rag(version, environment, application_name, tracer, pricing_info,
+    capture_message_content, metrics, disable_metrics):
     """
-    Generates a telemetry wrapper for GenAI function call
+    Generates a telemetry wrapper for GenAI RAG function call
     """
     def wrapper(wrapped, instance, args, kwargs):
         """
-        Wraps the GenAI function call.
+        Wraps the GenAI RAG function call.
         """
         server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
         request_model = kwargs.get("model", "jamba-1.5-mini")
         span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
-        with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+        with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
             start_time = time.time()
             response = wrapped(*args, **kwargs)

openlit/instrumentation/ai21/async_ai21.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
-Module for monitoring AI21 calls.
+Module for monitoring AI21 API calls (async version).
 """
-import logging
 import time
 from opentelemetry.trace import SpanKind
 from openlit.__helpers import (
@@ -15,21 +14,17 @@ from openlit.instrumentation.ai21.utils import (
     process_streaming_chat_response,
     process_chat_rag_response
 )
 from openlit.semcov import SemanticConvention
-# Initialize logger for logging potential issues and operations
-logger = logging.getLogger(__name__)
-def async_chat(version, environment, application_name,
-                     tracer, pricing_info, capture_message_content, metrics, disable_metrics):
+def async_chat(version, environment, application_name, tracer, pricing_info,
+    capture_message_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for GenAI function call
     """
     class TracedAsyncStream:
         """
-        Wrapper for streaming responses to collect telemetry.
+        Wrapper for async streaming responses to collect telemetry.
         """
         def __init__(
@@ -45,14 +40,12 @@ def async_chat(version, environment, application_name,
             self.__wrapped__ = wrapped
             self._span = span
             self._span_name = span_name
-            # Placeholder for aggregating streaming response
             self._llmresponse = ""
             self._response_id = ""
             self._finish_reason = ""
+            self._tools = None
             self._input_tokens = 0
             self._output_tokens = 0
-            self._choices = []
             self._args = args
             self._kwargs = kwargs
             self._start_time = time.time()
@@ -83,9 +76,8 @@ def async_chat(version, environment, application_name,
                 process_chunk(self, chunk)
                 return chunk
             except StopAsyncIteration:
-                # Handling exception ensure observability without disrupting operation
                 try:
-                    with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
+                    with tracer.start_as_current_span(self._span_name, kind=SpanKind.CLIENT) as self._span:
                         process_streaming_chat_response(
                             self,
                             pricing_info=pricing_info,
@@ -96,6 +88,7 @@ def async_chat(version, environment, application_name,
                             disable_metrics=disable_metrics,
                             version=version
                         )
                 except Exception as e:
                     handle_exception(self._span, e)
@@ -105,25 +98,21 @@ def async_chat(version, environment, application_name,
         """
         Wraps the GenAI function call.
         """
         # Check if streaming is enabled for the API call
         streaming = kwargs.get("stream", False)
         server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
         request_model = kwargs.get("model", "jamba-1.5-mini")
         span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
-        # pylint: disable=no-else-return
         if streaming:
-            # Special handling for streaming response to accommodate the nature of data flow
+            # Special handling for streaming response
             awaited_wrapped = await wrapped(*args, **kwargs)
             span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
             return TracedAsyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
-        # Handling for non-streaming responses
         else:
-            with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+            # Handling for non-streaming responses
+            with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
                 start_time = time.time()
                 response = await wrapped(*args, **kwargs)
@@ -152,23 +141,22 @@ def async_chat(version, environment, application_name,
     return wrapper
-def async_chat_rag(version, environment, application_name,
-                tracer, pricing_info, capture_message_content, metrics, disable_metrics):
+def async_chat_rag(version, environment, application_name, tracer, pricing_info,
+    capture_message_content, metrics, disable_metrics):
     """
-    Generates a telemetry wrapper for GenAI function call
+    Generates a telemetry wrapper for GenAI RAG function call
     """
     async def wrapper(wrapped, instance, args, kwargs):
         """
-        Wraps the GenAI function call.
+        Wraps the GenAI RAG function call.
         """
         server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
         request_model = kwargs.get("model", "jamba-1.5-mini")
         span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
-        with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+        with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
             start_time = time.time()
             response = await wrapped(*args, **kwargs)

openlit/instrumentation/ai21/utils.py CHANGED Viewed

@@ -4,7 +4,6 @@ AI21 OpenTelemetry instrumentation utility functions
 import time
-from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
 from opentelemetry.trace import Status, StatusCode
 from openlit.__helpers import (
@@ -12,289 +11,307 @@ from openlit.__helpers import (
     response_as_dict,
     calculate_tbt,
     general_tokens,
-    extract_and_format_input,
     get_chat_model_cost,
-    create_metrics_attributes,
-    concatenate_all_contents
+    common_span_attributes,
+    record_completion_metrics,
 )
 from openlit.semcov import SemanticConvention
-def setup_common_span_attributes(span, request_model, kwargs, tokens,
-                                 server_port, server_address, environment,
-                                 application_name, extra_attrs):
+def format_content(messages):
     """
-    Set common span attributes for both chat and RAG operations.
+    Process a list of messages to extract content.
     """
-    # Base attributes from SDK and operation settings.
-    span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-    span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
-    span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_AI21)
-    span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
-    span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
-    span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, kwargs.get("seed", ""))
-    span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, kwargs.get("frequency_penalty", 0.0))
-    span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, kwargs.get("max_tokens", -1))
-    span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, kwargs.get("presence_penalty", 0.0))
-    span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, kwargs.get("stop", []))
-    span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, kwargs.get("temperature", 0.4))
-    span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, kwargs.get("top_p", 1.0))
-    # Add token-related attributes if available.
-    if "finish_reason" in tokens:
-        span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [tokens["finish_reason"]])
-    if "response_id" in tokens:
-        span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, tokens["response_id"])
-    if "input_tokens" in tokens:
-        span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, tokens["input_tokens"])
-    if "output_tokens" in tokens:
-        span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, tokens["output_tokens"])
-    if "total_tokens" in tokens:
-        span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, tokens["total_tokens"])
-    span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, request_model)
-    span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
-    # Environment and service identifiers.
-    span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
-    span.set_attribute(SERVICE_NAME, application_name)
-    # Set any extra attributes passed in.
-    for key, value in extra_attrs.items():
-        span.set_attribute(key, value)
-def record_common_metrics(metrics, application_name, environment, request_model,
-                          server_address, server_port, start_time, end_time,
-                          input_tokens, output_tokens, cost, include_tbt=False, tbt_value=None):
-    """
-    Record common metrics for the operation.
-    """
-    attributes = create_metrics_attributes(
-        service_name=application_name,
-        deployment_environment=environment,
-        operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
-        system=SemanticConvention.GEN_AI_SYSTEM_AI21,
-        request_model=request_model,
-        server_address=server_address,
-        server_port=server_port,
-        response_model=request_model,
-    )
-    metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, attributes)
-    metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
-    if include_tbt and tbt_value is not None:
-        metrics["genai_server_tbt"].record(tbt_value, attributes)
-    metrics["genai_server_ttft"].record(end_time - start_time, attributes)
-    metrics["genai_requests"].add(1, attributes)
-    metrics["genai_completion_tokens"].add(output_tokens, attributes)
-    metrics["genai_prompt_tokens"].add(input_tokens, attributes)
-    metrics["genai_cost"].record(cost, attributes)
-def process_chunk(self, chunk):
+    formatted_messages = []
+    for message in messages:
+        # Handle different message formats
+        if hasattr(message, "role") and (hasattr(message, "content") or hasattr(message, "text")):
+            # ChatMessage object (AI21 format)
+            role = str(message.role) if hasattr(message.role, 'value') else str(message.role)
+            content = getattr(message, "content", None) or getattr(message, "text", "")
+        elif isinstance(message, dict):
+            # Dictionary format
+            role = message["role"]
+            content = message["content"]
+        else:
+            # Fallback - try to extract as string
+            role = str(getattr(message, "role", "unknown"))
+            content = str(getattr(message, "content", "") or getattr(message, "text", ""))
+        if isinstance(content, list):
+            content_str = ", ".join(
+                f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
+                if "type" in item else f'text: {item["text"]}'
+                for item in content
+            )
+            formatted_messages.append(f"{role}: {content_str}")
+        else:
+            formatted_messages.append(f"{role}: {content}")
+    return "\n".join(formatted_messages)
+def process_chunk(scope, chunk):
     """
     Process a chunk of response data and update state.
     """
     end_time = time.time()
-    # Record the timestamp for the current chunk.
-    self._timestamps.append(end_time)
-    if len(self._timestamps) == 1:
-        # Calculate time-to-first-chunk (TTFT).
-        self._ttft = calculate_ttft(self._timestamps, self._start_time)
+    # Record the timestamp for the current chunk
+    scope._timestamps.append(end_time)
+    if len(scope._timestamps) == 1:
+        # Calculate time to first chunk
+        scope._ttft = calculate_ttft(scope._timestamps, scope._start_time)
     chunked = response_as_dict(chunk)
-    if (len(chunked.get("choices")) > 0 and
-            "delta" in chunked.get("choices")[0] and
-            "content" in chunked.get("choices")[0].get("delta")):
-        if content := chunked.get("choices")[0].get("delta").get("content"):
-            self._llmresponse += content
-        if chunked.get("usage"):
-            self._input_tokens = chunked.get("usage").get("prompt_tokens")
-            self._output_tokens = chunked.get("usage").get("completion_tokens")
-    self._response_id = chunked.get("id")
-    self._choices += chunked.get("choices")
-    self._finish_reason = chunked.get("choices")[0].get("finish_reason")
+    # Collect message IDs and aggregated response from events
+    if (len(chunked.get("choices", [])) > 0 and
+        "delta" in chunked.get("choices")[0] and
+        "content" in chunked.get("choices")[0].get("delta", {})):
+        content = chunked.get("choices")[0].get("delta").get("content")
+        if content:
+            scope._llmresponse += content
+    if chunked.get("usage"):
+        scope._input_tokens = chunked.get("usage").get("prompt_tokens")
+        scope._output_tokens = chunked.get("usage").get("completion_tokens")
+        scope._response_id = chunked.get("id")
+        scope._finish_reason = chunked.get("choices", [{}])[0].get("finish_reason")
+        scope._end_time = time.time()
 def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
     capture_message_content, disable_metrics, version, is_stream):
     """
-    Process chat request and generate Telemetry.
+    Process chat request and generate Telemetry
     """
-    scope._end_time = time.time()
     if len(scope._timestamps) > 1:
         scope._tbt = calculate_tbt(scope._timestamps)
-    # Extract and format input messages.
-    formatted_messages = extract_and_format_input(scope._kwargs.get("messages", ""))
-    prompt = concatenate_all_contents(formatted_messages)
+    prompt = format_content(scope._kwargs.get("messages", []))
     request_model = scope._kwargs.get("model", "jamba-1.5-mini")
-    # Calculate cost based on token usage.
     cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
-    # Prepare tokens dictionary.
-    tokens = {
-        "finish_reason": scope._finish_reason,
-        "response_id": scope._response_id,
-        "input_tokens": scope._input_tokens,
-        "output_tokens": scope._output_tokens,
-        "total_tokens": scope._input_tokens + scope._output_tokens,
-    }
-    extra_attrs = {
-        SemanticConvention.GEN_AI_REQUEST_IS_STREAM: is_stream,
-        SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE: scope._input_tokens + scope._output_tokens,
-        SemanticConvention.GEN_AI_USAGE_COST: cost,
-        SemanticConvention.GEN_AI_SERVER_TBT: scope._tbt,
-        SemanticConvention.GEN_AI_SERVER_TTFT: scope._ttft,
-        SemanticConvention.GEN_AI_SDK_VERSION: version,
-        SemanticConvention.GEN_AI_OUTPUT_TYPE: "text" if isinstance(scope._llmresponse, str) else "json"
-    }
-    # Set span attributes.
-    setup_common_span_attributes(scope._span, request_model, scope._kwargs, tokens,
-                                 scope._server_port, scope._server_address, environment,
-                                 application_name, extra_attrs)
+    # Common Span Attributes
+    common_span_attributes(scope,
+        SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_AI21,
+        scope._server_address, scope._server_port, request_model, request_model,
+        environment, application_name, is_stream, scope._tbt, scope._ttft, version)
+    # Span Attributes for Request parameters
+    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, scope._kwargs.get("seed", ""))
+    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, scope._kwargs.get("frequency_penalty", 0.0))
+    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get("max_tokens", -1))
+    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, scope._kwargs.get("presence_penalty", 0.0))
+    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get("stop", []))
+    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get("temperature", 0.4))
+    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get("top_p", 1.0))
+    # Span Attributes for Response parameters
+    scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
+    scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
+    # Span Attributes for Cost and Tokens
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
+    # Span Attributes for Tools
+    if scope._tools:
+        scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_NAME, scope._tools.get("function", {}).get("name", ""))
+        scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALL_ID, str(scope._tools.get("id", "")))
+        scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_ARGS, str(scope._tools.get("function", {}).get("arguments", "")))
+    # Span Attributes for Content
     if capture_message_content:
+        scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
+        scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
+        # To be removed once the change to span_attributes (from span events) is complete
         scope._span.add_event(
             name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
-            attributes={SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt},
+            attributes={
+                SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
+            },
         )
         scope._span.add_event(
             name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
-            attributes={SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse},
+            attributes={
+                SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
+            },
         )
     scope._span.set_status(Status(StatusCode.OK))
+    # Metrics
     if not disable_metrics:
-        record_common_metrics(metrics, application_name, environment, request_model,
-                              scope._server_address, scope._server_port,
-                              scope._start_time, scope._end_time,
-                              scope._input_tokens, scope._output_tokens, cost,
-                              include_tbt=True, tbt_value=scope._tbt)
+        record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_AI21,
+            scope._server_address, scope._server_port, request_model, request_model, environment,
+            application_name, scope._start_time, scope._end_time, scope._input_tokens, scope._output_tokens,
+            cost, scope._tbt, scope._ttft)
-def process_streaming_chat_response(self, pricing_info, environment, application_name, metrics,
+def process_streaming_chat_response(scope, pricing_info, environment, application_name, metrics,
     capture_message_content=False, disable_metrics=False, version=""):
     """
-    Process a streaming chat response and generate Telemetry.
+    Process streaming chat request and generate Telemetry
     """
-    common_chat_logic(self, pricing_info, environment, application_name, metrics,
+    common_chat_logic(scope, pricing_info, environment, application_name, metrics,
         capture_message_content, disable_metrics, version, is_stream=True)
 def process_chat_response(response, request_model, pricing_info, server_port, server_address,
-                          environment, application_name, metrics, start_time,
-                          span, capture_message_content=False, disable_metrics=False, version="1.0.0", **kwargs):
+    environment, application_name, metrics, start_time, span, capture_message_content=False,
+    disable_metrics=False, version="1.0.0", **kwargs):
     """
-    Process a synchronous chat response and generate Telemetry.
+    Process chat request and generate Telemetry
     """
-    # Create a generic scope object to hold telemetry data.
-    self = type("GenericScope", (), {})()
+    # Create scope object
+    scope = type("GenericScope", (), {})()
     response_dict = response_as_dict(response)
-    # pylint: disable = no-member
-    self._start_time = start_time
-    self._end_time = time.time()
-    self._span = span
-    # Concatenate content from all choices.
-    self._llmresponse = "".join(
+    scope._start_time = start_time
+    scope._end_time = time.time()
+    scope._span = span
+    scope._llmresponse = " ".join(
         (choice.get("message", {}).get("content") or "")
         for choice in response_dict.get("choices", [])
     )
-    self._response_role = response_dict.get("message", {}).get("role", "assistant")
-    self._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
-    self._output_tokens = response_dict.get("usage", {}).get("completion_tokens", 0)
-    self._response_id = response_dict.get("id", "")
-    self._response_model = request_model
-    self._finish_reason = response_dict.get("choices", [{}])[0].get("finish_reason")
-    self._timestamps = []
-    self._ttft, self._tbt = self._end_time - self._start_time, 0
-    self._server_address, self._server_port = server_address, server_port
-    self._kwargs = kwargs
-    self._choices = response_dict.get("choices")
-    common_chat_logic(self, pricing_info, environment, application_name, metrics,
+    scope._response_id = response_dict.get("id")
+    scope._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
+    scope._output_tokens = response_dict.get("usage", {}).get("completion_tokens", 0)
+    scope._timestamps = []
+    scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
+    scope._server_address, scope._server_port = server_address, server_port
+    scope._kwargs = kwargs
+    scope._finish_reason = str(response_dict.get("choices", [])[0].get("finish_reason", ""))
+    # Handle tool calls
+    if scope._kwargs.get("tools"):
+        scope._tools = response_dict.get("choices", [{}])[0].get("message", {}).get("tool_calls")
+    else:
+        scope._tools = None
+    common_chat_logic(scope, pricing_info, environment, application_name, metrics,
         capture_message_content, disable_metrics, version, is_stream=False)
     return response
-def process_chat_rag_response(response, request_model, pricing_info, server_port, server_address,
-                              environment, application_name, metrics, start_time,
-                              span, capture_message_content=False, disable_metrics=False, version="1.0.0", **kwargs):
+def common_chat_rag_logic(scope, pricing_info, environment, application_name, metrics,
+    capture_message_content, disable_metrics, version):
     """
-    Process a chat response and generate Telemetry.
+    Process RAG chat request and generate Telemetry
     """
-    end_time = time.time()
-    response_dict = response_as_dict(response)
-    # Format input messages into a single prompt string.
-    messages_input = kwargs.get("messages", "")
-    formatted_messages = extract_and_format_input(messages_input)
-    prompt = concatenate_all_contents(formatted_messages)
-    input_tokens = general_tokens(prompt)
-    # Create tokens dict and RAG-specific extra attributes.
-    tokens = {"response_id": response_dict.get("id"), "input_tokens": input_tokens}
-    extra_attrs = {
-        SemanticConvention.GEN_AI_REQUEST_IS_STREAM: False,
-        SemanticConvention.GEN_AI_SERVER_TTFT: end_time - start_time,
-        SemanticConvention.GEN_AI_SDK_VERSION: version,
-        SemanticConvention.GEN_AI_RAG_MAX_SEGMENTS: kwargs.get("max_segments", -1),
-        SemanticConvention.GEN_AI_RAG_STRATEGY: kwargs.get("retrieval_strategy", "segments"),
-        SemanticConvention.GEN_AI_RAG_SIMILARITY_THRESHOLD: kwargs.get("retrieval_similarity_threshold", -1),
-        SemanticConvention.GEN_AI_RAG_MAX_NEIGHBORS: kwargs.get("max_neighbors", -1),
-        SemanticConvention.GEN_AI_RAG_FILE_IDS: str(kwargs.get("file_ids", "")),
-        SemanticConvention.GEN_AI_RAG_DOCUMENTS_PATH: kwargs.get("path", "")
-    }
-    # Set common span attributes.
-    setup_common_span_attributes(span, request_model, kwargs, tokens,
-                                    server_port, server_address, environment, application_name,
-                                    extra_attrs)
+    prompt = format_content(scope._kwargs.get("messages", []))
+    request_model = scope._kwargs.get("model", "jamba-1.5-mini")
+    cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
+    # Common Span Attributes
+    common_span_attributes(scope,
+        SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_AI21,
+        scope._server_address, scope._server_port, request_model, scope._response_model,
+        environment, application_name, False, scope._tbt, scope._ttft, version)
+    # RAG-specific span attributes
+    scope._span.set_attribute(SemanticConvention.GEN_AI_RAG_MAX_SEGMENTS, scope._kwargs.get("max_segments", -1))
+    scope._span.set_attribute(SemanticConvention.GEN_AI_RAG_STRATEGY, scope._kwargs.get("retrieval_strategy", "segments"))
+    scope._span.set_attribute(SemanticConvention.GEN_AI_RAG_MAX_NEIGHBORS, scope._kwargs.get("max_neighbors", -1))
+    scope._span.set_attribute(SemanticConvention.GEN_AI_RAG_FILE_IDS, str(scope._kwargs.get("file_ids", "")))
+    scope._span.set_attribute(SemanticConvention.GEN_AI_RAG_DOCUMENTS_PATH, scope._kwargs.get("path", ""))
+    scope._span.set_attribute(SemanticConvention.GEN_AI_RAG_SIMILARITY_THRESHOLD,
+                                scope._kwargs.get("retrieval_similarity_threshold", -1))
+    # Standard span attributes
+    scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
+    # Handle tool calls
+    if scope._kwargs.get("tools"):
+        scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
+            str(scope._choices[0].get("message", {}).get("tool_calls", "")))
+    # Content attributes
     if capture_message_content:
-        span.add_event(
+        scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
+        scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
+        # To be removed once the change to span_attributes (from span events) is complete
+        scope._span.add_event(
             name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
-            attributes={SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt},
+            attributes={
+                SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
+            },
+        )
+        scope._span.add_event(
+            name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
+            attributes={
+                SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
+            },
         )
-    output_tokens = 0
+    scope._span.set_status(Status(StatusCode.OK))
+    # Metrics
+    if not disable_metrics:
+        record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_AI21,
+            scope._server_address, scope._server_port, request_model, scope._response_model, environment,
+            application_name, scope._start_time, scope._end_time, scope._input_tokens, scope._output_tokens,
+            cost, scope._tbt, scope._ttft)
+def process_chat_rag_response(response, request_model, pricing_info, server_port, server_address,
+    environment, application_name, metrics, start_time, span, capture_message_content=False,
+    disable_metrics=False, version="1.0.0", **kwargs):
+    """
+    Process RAG chat request and generate Telemetry
+    """
+    # Create scope object
+    scope = type("GenericScope", (), {})()
+    response_dict = response_as_dict(response)
+    scope._start_time = start_time
+    scope._end_time = time.time()
+    scope._span = span
+    # Format input messages and calculate input tokens
+    prompt = format_content(kwargs.get("messages", []))
+    input_tokens = general_tokens(prompt)
+    # Process response choices
     choices = response_dict.get("choices", [])
     aggregated_completion = []
+    output_tokens = 0
     for i in range(kwargs.get("n", 1)):
-        # Get the response content from each choice and count tokens.
         content = choices[i].get("content", "")
         aggregated_completion.append(content)
         output_tokens += general_tokens(content)
-        if kwargs.get("tools"):
-            span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
-                                str(choices[i].get("message", {}).get("tool_calls")))
-        # Set output type based on actual content type.
-        if isinstance(content, str):
-            span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
-        elif content is not None:
-            span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "json")
-    # Concatenate completion responses.
-    llmresponse = "".join(aggregated_completion)
-    tokens["output_tokens"] = output_tokens
-    tokens["total_tokens"] = input_tokens + output_tokens
-    cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
-    span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
-    span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
-    span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
-    span.set_status(Status(StatusCode.OK))
-    if capture_message_content:
-        span.add_event(
-            name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
-            attributes={SemanticConvention.GEN_AI_CONTENT_COMPLETION: llmresponse},
-        )
+    scope._llmresponse = "".join(aggregated_completion)
+    scope._response_id = response_dict.get("id", "")
+    scope._response_model = request_model
+    scope._input_tokens = input_tokens
+    scope._output_tokens = output_tokens
+    scope._timestamps = []
+    scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
+    scope._server_address, scope._server_port = server_address, server_port
+    scope._kwargs = kwargs
+    scope._finish_reason = ""
+    scope._tools = None
+    scope._choices = choices
+    common_chat_rag_logic(scope, pricing_info, environment, application_name, metrics,
+        capture_message_content, disable_metrics, version)
-    if not disable_metrics:
-        record_common_metrics(metrics, application_name, environment, request_model,
-                                server_address, server_port, start_time, end_time,
-                                input_tokens, output_tokens, cost, include_tbt=False)
     return response

openlit/instrumentation/openai/__init__.py CHANGED Viewed

@@ -12,7 +12,7 @@ from openlit.instrumentation.openai.async_openai import (async_chat_completions,
 from openlit.instrumentation.openai.async_openai import async_image_generate, async_image_variatons
 from openlit.instrumentation.openai.async_openai import async_audio_create, async_responses
-_instruments = ("openai >= 1.1.1",)
+_instruments = ("openai >= 1.92.0",)
 class OpenAIInstrumentor(BaseInstrumentor):
     """An instrumentor for OpenAI's client library."""
@@ -129,14 +129,14 @@ class OpenAIInstrumentor(BaseInstrumentor):
         )
         wrap_function_wrapper(
-            "openai.resources.beta.chat.completions",
+            "openai.resources.chat.completions",
             "Completions.parse",
             chat_completions_parse(version, environment, application_name, tracer, pricing_info,
                                    capture_message_content, metrics, disable_metrics),
         )
         wrap_function_wrapper(
-            "openai.resources.beta.chat.completions",
+            "openai.resources.chat.completions",
             "AsyncCompletions.parse",
             async_chat_completions_parse(version, environment, application_name, tracer, pricing_info,
                                          capture_message_content, metrics, disable_metrics),

openlit/instrumentation/vllm/__init__.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
 """Initializer of Auto Instrumentation of vLLM Functions"""
 from typing import Collection
@@ -14,15 +13,15 @@ _instruments = ("vllm >= 0.5.4",)
 class VLLMInstrumentor(BaseInstrumentor):
     """
-    An instrumentor for vLLM's client library.
+    An instrumentor for vLLM client library.
     """
     def instrumentation_dependencies(self) -> Collection[str]:
         return _instruments
     def _instrument(self, **kwargs):
-        application_name = kwargs.get("application_name", "default_application")
-        environment = kwargs.get("environment", "default_environment")
+        application_name = kwargs.get("application_name", "default")
+        environment = kwargs.get("environment", "default")
         tracer = kwargs.get("tracer")
         metrics = kwargs.get("metrics_dict")
         pricing_info = kwargs.get("pricing_info", {})
@@ -30,14 +29,13 @@ class VLLMInstrumentor(BaseInstrumentor):
         disable_metrics = kwargs.get("disable_metrics")
         version = importlib.metadata.version("vllm")
-        # sync chat
+        # Chat completions
         wrap_function_wrapper(
             "vllm.entrypoints.llm",
             "LLM.generate",
             generate(version, environment, application_name,
-                  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
+                     tracer, pricing_info, capture_message_content, metrics, disable_metrics),
         )
     def _uninstrument(self, **kwargs):
-        # Proper uninstrumentation logic to revert patched methods
         pass

openlit/instrumentation/vllm/utils.py CHANGED Viewed

@@ -1,15 +1,15 @@
 """
-Utility functions for vLLM instrumentation.
+vLLM OpenTelemetry instrumentation utility functions
 """
 import time
-from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
 from opentelemetry.trace import Status, StatusCode
 from openlit.__helpers import (
-    calculate_tbt,
-    get_chat_model_cost,
     general_tokens,
-    create_metrics_attributes,
+    get_chat_model_cost,
+    common_span_attributes,
+    record_completion_metrics,
 )
 from openlit.semcov import SemanticConvention
@@ -24,77 +24,81 @@ def get_inference_config(args, kwargs):
         return args[1]
     return None
+def format_content(prompts):
+    """
+    Process a list of prompts to extract content.
+    """
+    if isinstance(prompts, str):
+        return prompts
+    elif isinstance(prompts, list):
+        return "\n".join(str(prompt) for prompt in prompts)
+    else:
+        return str(prompts)
 def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
     capture_message_content, disable_metrics, version, is_stream):
     """
     Process chat request and generate Telemetry
     """
-    scope._end_time = time.time()
-    if len(scope._timestamps) > 1:
-        scope._tbt = calculate_tbt(scope._timestamps)
-    # Set base span attributes
-    scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-    scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_VLLM)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, scope._request_model)
-    scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
-    scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
-    # Handle inference configuration
+    request_model = scope._request_model
+    # Extract prompts and completions from vLLM response
+    input_tokens = 0
+    output_tokens = 0
+    prompt = ""
+    completion = ""
+    for output in scope._response:
+        prompt += output.prompt + "\n"
+        if output.outputs and len(output.outputs) > 0:
+            completion += output.outputs[0].text + "\n"
+        input_tokens += general_tokens(output.prompt)
+        output_tokens += general_tokens(output.outputs[0].text)
+    cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
+    # Common Span Attributes
+    common_span_attributes(scope,
+        SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
+        scope._server_address, scope._server_port, request_model, request_model,
+        environment, application_name, is_stream, scope._tbt, scope._ttft, version)
+    # Span Attributes for Request parameters
     inference_config = get_inference_config(scope._args, scope._kwargs)
     if inference_config:
-        attributes = [
-            (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
-            (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
-            (SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
-            (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
-            (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
-            (SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
-            (SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
-        ]
-        for attribute, key in attributes:
-            value = getattr(inference_config, key, None)
-            if value is not None:
-                scope._span.set_attribute(attribute, value)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._request_model)
+        scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, getattr(inference_config, 'max_tokens', -1))
+        scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, getattr(inference_config, 'stop_sequences', []))
+        scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, getattr(inference_config, 'temperature', 1.0))
+        scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, getattr(inference_config, 'top_p', 1.0))
+        scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, getattr(inference_config, 'top_k', -1))
+        scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
+            getattr(inference_config, 'presence_penalty', 0.0))
+        scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+            getattr(inference_config, 'frequency_penalty', 0.0))
+    # Span Attributes for Response parameters
     scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
-    # Set base span attributes (Extras)
-    scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
-    scope._span.set_attribute(SERVICE_NAME, application_name)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
-    input_tokens = 0
-    output_tokens = 0
-    cost = 0
+    # Span Attributes for Cost and Tokens
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
+    # Span Attributes for Content
     if capture_message_content:
-        prompt = ""
-        completion = ""
+        scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
+        scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, completion)
-        for output in scope._response:
-            prompt += output.prompt + "\n"
-            if output.outputs and len(output.outputs) > 0:
-                completion += output.outputs[0].text + "\n"
-            input_tokens += general_tokens(output.prompt)
-            output_tokens += general_tokens(output.outputs[0].text)
-        # Add a single event for prompt
+        # To be removed once the change to span_attributes (from span events) is complete
         scope._span.add_event(
             name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
             attributes={
                 SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
             },
         )
-        # Add a single event for completion
         scope._span.add_event(
             name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
             attributes={
@@ -102,39 +106,14 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
             },
         )
-    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
-                        input_tokens)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
-                        output_tokens)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
-                        input_tokens + output_tokens)
-    # Calculate cost of the operation
-    cost = get_chat_model_cost(scope._request_model, pricing_info, input_tokens, output_tokens)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
     scope._span.set_status(Status(StatusCode.OK))
-    if disable_metrics is False:
-        metrics_attributes = create_metrics_attributes(
-            service_name=application_name,
-            deployment_environment=environment,
-            operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
-            system=SemanticConvention.GEN_AI_SYSTEM_VLLM,
-            request_model=scope._request_model,
-            server_address=scope._server_address,
-            server_port=scope._server_port,
-            response_model=scope._request_model,
-        )
-        metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
-        metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
-        metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
-        metrics['genai_requests'].add(1, metrics_attributes)
-        metrics['genai_completion_tokens'].add(output_tokens, metrics_attributes)
-        metrics['genai_prompt_tokens'].add(input_tokens, metrics_attributes)
-        metrics['genai_cost'].record(cost, metrics_attributes)
-        metrics['genai_client_usage_tokens'].record(
-            input_tokens + output_tokens, metrics_attributes)
+    # Metrics
+    if not disable_metrics:
+        record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
+            scope._server_address, scope._server_port, request_model, request_model, environment,
+            application_name, scope._start_time, scope._end_time, input_tokens, output_tokens,
+            cost, scope._tbt, scope._ttft)
 def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
     environment, application_name, metrics, start_time, span, args, kwargs,
@@ -142,20 +121,23 @@ def process_chat_response(instance, response, request_model, pricing_info, serve
     """
     Process chat request and generate Telemetry
     """
-    self = type('GenericScope', (), {})()
-    self._response = response
-    self._start_time = start_time
-    self._end_time = time.time()
-    self._span = span
-    self._ttft, self._tbt = self._end_time - self._start_time, 0
-    self._server_address = server_address
-    self._server_port = server_port
-    self._request_model = request_model
-    self._timestamps = []
-    self._args = args
-    self._kwargs = kwargs
-    common_chat_logic(self, pricing_info, environment, application_name, metrics,
+    # Create scope object
+    scope = type("GenericScope", (), {})()
+    scope._response = response
+    scope._start_time = start_time
+    scope._end_time = time.time()
+    scope._span = span
+    scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
+    scope._server_address = server_address
+    scope._server_port = server_port
+    scope._request_model = request_model
+    scope._timestamps = []
+    scope._args = args
+    scope._kwargs = kwargs
+    common_chat_logic(scope, pricing_info, environment, application_name, metrics,
         capture_message_content, disable_metrics, version, is_stream=False)
     return response

openlit/instrumentation/vllm/vllm.py CHANGED Viewed

@@ -2,7 +2,6 @@
 Module for monitoring vLLM API calls.
 """
-import logging
 import time
 from opentelemetry.trace import SpanKind
 from openlit.__helpers import (
@@ -14,11 +13,8 @@ from openlit.instrumentation.vllm.utils import (
 )
 from openlit.semcov import SemanticConvention
-# Initialize logger for logging potential issues and operations
-logger = logging.getLogger(__name__)
-def generate(version, environment, application_name,
-    tracer, pricing_info, capture_message_content, metrics, disable_metrics):
+def generate(version, environment, application_name, tracer, pricing_info,
+             capture_message_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for GenAI function call
     """
@@ -27,7 +23,6 @@ def generate(version, environment, application_name,
         """
         Wraps the GenAI function call.
         """
         server_address, server_port = set_server_address_and_port(instance, "http://127.0.0.1", 443)
         request_model = instance.llm_engine.model_config.model or "facebook/opt-125m"
@@ -56,9 +51,9 @@ def generate(version, environment, application_name,
                     disable_metrics=disable_metrics,
                     version=version,
                 )
             except Exception as e:
                 handle_exception(span, e)
-                logger.error("Error in trace creation: %s", e)
             return response

{openlit-1.34.11.dist-info → openlit-1.34.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: openlit
-Version: 1.34.11
+Version: 1.34.13
 Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
 License: Apache-2.0
 Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu

{openlit-1.34.11.dist-info → openlit-1.34.13.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-openlit/__helpers.py,sha256=gIFD6pDsj_zd4_D936aRptUq-TKUQ-0GgJBf6PoEDlo,14883
+openlit/__helpers.py,sha256=x_HA-B3v0lawXeg3_yASXAzN0P0hChrgWyYXdLGY0Pw,14862
 openlit/__init__.py,sha256=ris6-GY0ePSbK_jvawHTXymGClVF7yeKdIT95IRBl18,24086
 openlit/evals/__init__.py,sha256=nJe99nuLo1b5rf7pt9U9BCdSDedzbVi2Fj96cgl7msM,380
 openlit/evals/all.py,sha256=oWrue3PotE-rB5WePG3MRYSA-ro6WivkclSHjYlAqGs,7154
@@ -14,10 +14,10 @@ openlit/guard/sensitive_topic.py,sha256=RgVw_laFERv0nNdzBsAd2_3yLomMOK-gVq-P7oj1
 openlit/guard/utils.py,sha256=6hE3rCRjFXYjKRQYUo8YsqUSlvod48nOWp8MwoQEYdw,7670
 openlit/instrumentation/ag2/__init__.py,sha256=KgyLJBmwAxRWu7Z0S8FDDK4TZ13EFoAAIalvG5Oq4wc,1839
 openlit/instrumentation/ag2/ag2.py,sha256=eNQziyeZl4396GsIp5qI1Dne2KcnQMmhftW7joKQvNU,6934
-openlit/instrumentation/ai21/__init__.py,sha256=U24XlK1aHX0zubyUyBY6PBCa59fwp5sU5f-VD1EkCjc,2583
-openlit/instrumentation/ai21/ai21.py,sha256=1fJ1MvVIRQG-gh5YXkDycuTriT7_VB77vjXDKb7GZY8,6965
-openlit/instrumentation/ai21/async_ai21.py,sha256=uUJUXCKJcokYi6fPfcgBABSMVLj9CQsWJySakiZcSiU,7003
-openlit/instrumentation/ai21/utils.py,sha256=TiJtzG6kcrTf4FMJkAeHYUwZxkjp6JS3xoM2qn4gw54,14215
+openlit/instrumentation/ai21/__init__.py,sha256=tKX643fwxPWPJq1EXEZd0Xpd6B0jl_ViPFmJ87f5B08,2539
+openlit/instrumentation/ai21/ai21.py,sha256=zyQMfCLcOFG1tQWrZmGeMaVAmj8MtCUeXQtPHmlUAO0,6533
+openlit/instrumentation/ai21/async_ai21.py,sha256=q1Dhxru4tUJu0U1Px3PptNqrSGW0-VfRGcqkLKFR8vQ,6659
+openlit/instrumentation/ai21/utils.py,sha256=5zf69uw_TT8u-q-6R6rBeGm1bX0WpsbrAq-MTTZJ9Bk,14309
 openlit/instrumentation/anthropic/__init__.py,sha256=QEsiwdxcQDzzlVYR4_x7KTdf0-UJDJt8FjwNQMspnxM,1929
 openlit/instrumentation/anthropic/anthropic.py,sha256=NxJJjhsu9sSFIlBp322olGkPlLt9Bn5sndaugYA68dE,5149
 openlit/instrumentation/anthropic/async_anthropic.py,sha256=ivJGygKWVTS2hWWX12_g1tiq-5mpeHXETZsWoFZL3UE,5235
@@ -99,7 +99,7 @@ openlit/instrumentation/ollama/__init__.py,sha256=WxjqjuR8ovMU5dR08OELNqClbuM7ns
 openlit/instrumentation/ollama/async_ollama.py,sha256=ORXwem8lgSrhOcci55NkChIK9SNc3IYIpLjF_ogsGA8,6666
 openlit/instrumentation/ollama/ollama.py,sha256=8mvrWfU1c5h1L7lxWo47YBJ7g2u7QZmSZuuP0URtTDo,6538
 openlit/instrumentation/ollama/utils.py,sha256=TIE3_ur2U-iyCclna7TzwjDIFC9PZjRnZqNDV6NfG-0,11958
-openlit/instrumentation/openai/__init__.py,sha256=y9Ox5aYWTb2nAa_d0ic3Mkv4wEKmUGqslW9nHKg6NnY,6320
+openlit/instrumentation/openai/__init__.py,sha256=KI3ncllea3VzK0lvBfZXBhs2EClSLe38WEPdIL4_SOo,6311
 openlit/instrumentation/openai/async_openai.py,sha256=JyA8MDxWCM38Te6mJzBdfonRgIIlo2ziLn7HOmzqxxo,81398
 openlit/instrumentation/openai/openai.py,sha256=5fgRyK5dUN2zUdrN0vBSZFnSEAXf2dKS0qnq_85-mQE,81175
 openlit/instrumentation/openai_agents/__init__.py,sha256=tRTSIrUtkXc_lfQnVanXmQLd2Sy9RqBNTHF5FhhZx7o,1530
@@ -131,14 +131,14 @@ openlit/instrumentation/transformers/utils.py,sha256=3f-ewpUpduaBrTVIFJKaabACjz-
 openlit/instrumentation/vertexai/__init__.py,sha256=mT28WCBvQfRCkAWGL6bd0EjEPHvMjaNcz6T3jsLZh8k,3745
 openlit/instrumentation/vertexai/async_vertexai.py,sha256=-kpg-eiL76O5_XopUPghCYwJHf0Nrxi00_Z5tCwq6zM,23086
 openlit/instrumentation/vertexai/vertexai.py,sha256=5NB090aWlm9DnlccNNLRO6A97P_RN-JnHb5JS01tYyw,23000
-openlit/instrumentation/vllm/__init__.py,sha256=VUWsjtYEe1_u4hJRDAZI5abrRfZ2L85LxZmc76irBrk,1524
-openlit/instrumentation/vllm/utils.py,sha256=hPVG_UKLY7xTvmmHbBdPy8HT7y_8VIILn37a5zOTYzU,6822
-openlit/instrumentation/vllm/vllm.py,sha256=SZosSwnkBUKspPtsm_k6VQaAWnD4kdcFWj2n-StWJus,2175
+openlit/instrumentation/vllm/__init__.py,sha256=uaSzQmgDuKJ-sh61sfVdzVt2qAZaozZIQ8sbmQ0XpZE,1357
+openlit/instrumentation/vllm/utils.py,sha256=HuCPNBgChWg9vA7DHNFCij_y8qj27DjZxdZ0Nvdt2fg,5751
+openlit/instrumentation/vllm/vllm.py,sha256=VzazF2f4LLwjZDO_G8lIN_d622oSJM0fIO9wjxXbhyg,2004
 openlit/otel/events.py,sha256=VrMjTpvnLtYRBHCiFwJojTQqqNpRCxoD4yJYeQrtPsk,3560
 openlit/otel/metrics.py,sha256=GM2PDloBGRhBTkHHkYaqmOwIAQkY124ZhW4sEqW1Fgk,7086
 openlit/otel/tracing.py,sha256=tjV2bEbEDPUB1Z46gE-UsJsb04sRdFrfbhIDkxViZc0,3103
 openlit/semcov/__init__.py,sha256=ptyo37PY-FHDx_PShEvbdns71cD4YvvXw15bCRXKCKM,13461
-openlit-1.34.11.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-openlit-1.34.11.dist-info/METADATA,sha256=uupRffQvhiozgD4kGsKu_uQ3Sc7hrYyfX32l3EmiKSk,23470
-openlit-1.34.11.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-openlit-1.34.11.dist-info/RECORD,,
+openlit-1.34.13.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+openlit-1.34.13.dist-info/METADATA,sha256=4uHfQSKnuT-yfoNz7kj78yd53TBFDCDYVhOIsz7XF8k,23470
+openlit-1.34.13.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+openlit-1.34.13.dist-info/RECORD,,

{openlit-1.34.11.dist-info → openlit-1.34.13.dist-info}/LICENSE RENAMED Viewed

File without changes

{openlit-1.34.11.dist-info → openlit-1.34.13.dist-info}/WHEEL RENAMED Viewed

File without changes

openlit 1.34.11__py3-none-any.whl → 1.34.13__py3-none-any.whl

openlit 1.34.11py3-none-any.whl → 1.34.13py3-none-any.whl