PyPI - openlit - Versions diffs - 1.33.20__py3-none-any.whl → 1.33.21__py3-none-any.whl - Mend

openlit 1.33.20py3-none-any.whl → 1.33.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

openlit/__helpers.py +57 -0
openlit/instrumentation/ollama/__init__.py +47 -34
openlit/instrumentation/ollama/async_ollama.py +4 -2
openlit/instrumentation/ollama/ollama.py +4 -2
openlit/instrumentation/ollama/utils.py +8 -4
openlit/instrumentation/transformers/__init__.py +11 -7
openlit/instrumentation/transformers/transformers.py +30 -166
openlit/instrumentation/transformers/utils.py +183 -0
{openlit-1.33.20.dist-info → openlit-1.33.21.dist-info}/METADATA +1 -1
{openlit-1.33.20.dist-info → openlit-1.33.21.dist-info}/RECORD +12 -11
{openlit-1.33.20.dist-info → openlit-1.33.21.dist-info}/WHEEL +1 -1
{openlit-1.33.20.dist-info → openlit-1.33.21.dist-info}/LICENSE +0 -0

openlit/__helpers.py CHANGED Viewed

@@ -240,6 +240,11 @@ def extract_and_format_input(messages):
     fixed_roles = ['user', 'assistant', 'system', 'tool', 'developer']
     formatted_messages = {role_key: {'role': '', 'content': ''} for role_key in fixed_roles}
+    # Check if input is a simple string
+    if isinstance(messages, str):
+        formatted_messages['user'] = {'role': 'user', 'content': messages}
+        return formatted_messages
     for message in messages:
         message = response_as_dict(message)
@@ -276,3 +281,55 @@ def concatenate_all_contents(formatted_messages):
         for message_data in formatted_messages.values()
         if message_data['content']
     )
+def format_and_concatenate(messages):
+    """
+    Process a list of messages to extract content, categorize them by role,
+    and concatenate all 'content' fields into a single string with role: content format.
+    """
+    formatted_messages = {}
+    # Check if input is a simple string
+    if isinstance(messages, str):
+        formatted_messages['user'] = {'role': 'user', 'content': messages}
+    elif isinstance(messages, list) and all(isinstance(m, str) for m in messages):
+        # If it's a list of strings, each string is 'user' input
+        user_content = ' '.join(messages)
+        formatted_messages['user'] = {'role': 'user', 'content': user_content}
+    else:
+        for message in messages:
+            message = response_as_dict(message)
+            role = message.get('role', 'unknown')  # Default to 'unknown' if no role is specified
+            content = message.get('content', '')
+            # Initialize role in formatted messages if not present
+            if role not in formatted_messages:
+                formatted_messages[role] = {'role': role, 'content': ''}
+            # Handle list of dictionaries in content
+            if isinstance(content, list):
+                content_str = []
+                for item in content:
+                    if isinstance(item, dict):
+                        # Collect text or other attributes as needed
+                        text = item.get('text', '')
+                        image_url = item.get('image_url', '')
+                        content_str.append(text)
+                        content_str.append(image_url)
+                content_str = ", ".join(filter(None, content_str))
+            else:
+                content_str = content
+            # Concatenate content
+            if formatted_messages[role]['content']:
+                formatted_messages[role]['content'] += ' ' + content_str
+            else:
+                formatted_messages[role]['content'] = content_str
+    # Concatenate role and content for all messages
+    return ' '.join(
+        f"{message_data['role']}: {message_data['content']}"
+        for message_data in formatted_messages.values()
+        if message_data['content']
+    )

openlit/instrumentation/ollama/__init__.py CHANGED Viewed

@@ -16,6 +16,29 @@ from openlit.instrumentation.ollama.async_ollama import (
 _instruments = ("ollama >= 0.2.0",)
+# Dispatch wrapper to route instrumentation to chat or embeddings based on path
+def _dispatch(sync_chat_wrap, sync_emb_wrap):
+    def wrapper(wrapped, instance, args, kwargs):
+        if len(args) > 2 and isinstance(args[2], str):
+            op = args[2].rstrip("/").split("/")[-1]
+            if op == "chat":
+                return sync_chat_wrap(wrapped, instance, args, kwargs)
+            if op == "embeddings":
+                return sync_emb_wrap(wrapped, instance, args, kwargs)
+        return wrapped(*args, **kwargs)
+    return wrapper
+def _dispatch_async(async_chat_wrap, async_emb_wrap):
+    async def wrapper(wrapped, instance, args, kwargs):
+        if len(args) > 2 and isinstance(args[2], str):
+            op = args[2].rstrip("/").split("/")[-1]
+            if op == "chat":
+                return await async_chat_wrap(wrapped, instance, args, kwargs)
+            if op == "embeddings":
+                return await async_emb_wrap(wrapped, instance, args, kwargs)
+        return await wrapped(*args, **kwargs)
+    return wrapper
 class OllamaInstrumentor(BaseInstrumentor):
     """
     An instrumentor for Ollama's client library.
@@ -35,48 +58,38 @@ class OllamaInstrumentor(BaseInstrumentor):
         disable_metrics = kwargs.get("disable_metrics")
         version = importlib.metadata.version("ollama")
-        # sync chat
-        wrap_function_wrapper(
-            "ollama",
-            "chat",
-            chat(version, environment, application_name,
-                  tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
+        # Build wrapper factories for chat and embeddings
+        sync_chat_wrap = chat(
+            version, environment, application_name,
+            tracer, event_provider, pricing_info,
+            capture_message_content, metrics, disable_metrics
         )
-        wrap_function_wrapper(
-            "ollama",
-            "Client.chat",
-            chat(version, environment, application_name,
-                  tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
+        sync_emb_wrap = embeddings(
+            version, environment, application_name,
+            tracer, event_provider, pricing_info,
+            capture_message_content, metrics, disable_metrics
         )
-        # sync embeddings
-        wrap_function_wrapper(
-            "ollama",
-            "embeddings",
-            embeddings(version, environment, application_name,
-                  tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
+        async_chat_wrap = async_chat(
+            version, environment, application_name,
+            tracer, event_provider, pricing_info,
+            capture_message_content, metrics, disable_metrics
         )
-        wrap_function_wrapper(
-            "ollama",
-            "Client.embeddings",
-            embeddings(version, environment, application_name,
-                  tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
+        async_emb_wrap = async_embeddings(
+            version, environment, application_name,
+            tracer, event_provider, pricing_info,
+            capture_message_content, metrics, disable_metrics
         )
-        # async chat
+        # Patch underlying request methods to ensure instrumentation regardless of import order
         wrap_function_wrapper(
-            "ollama",
-            "AsyncClient.chat",
-            async_chat(version, environment, application_name,
-                  tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
+            "ollama._client",
+            "Client._request",
+            _dispatch(sync_chat_wrap, sync_emb_wrap),
         )
-        # async embeddings
         wrap_function_wrapper(
-            "ollama",
-            "AsyncClient.embeddings",
-            async_embeddings(version, environment, application_name,
-                  tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
+            "ollama._client",
+            "AsyncClient._request",
+            _dispatch_async(async_chat_wrap, async_emb_wrap),
         )
     def _uninstrument(self, **kwargs):

openlit/instrumentation/ollama/async_ollama.py CHANGED Viewed

@@ -106,7 +106,8 @@ def async_chat(version, environment, application_name,
         streaming = kwargs.get("stream", False)
         server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
-        request_model = kwargs.get("model", "gpt-4o")
+        json_body = kwargs.get("json", {}) or {}
+        request_model = json_body.get("model") or kwargs.get("model")
         span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
@@ -154,7 +155,8 @@ def async_embeddings(version, environment, application_name,
         """
         server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
-        request_model = kwargs.get('model', 'all-minilm')
+        json_body = kwargs.get('json', {}) or {}
+        request_model = json_body.get('model') or kwargs.get('model')
         span_name = f'{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'

openlit/instrumentation/ollama/ollama.py CHANGED Viewed

@@ -106,7 +106,8 @@ def chat(version, environment, application_name,
         streaming = kwargs.get("stream", False)
         server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
-        request_model = kwargs.get("model", "gpt-4o")
+        json_body = kwargs.get("json", {}) or {}
+        request_model = json_body.get("model") or kwargs.get("model")
         span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
@@ -154,7 +155,8 @@ def embeddings(version, environment, application_name,
         """
         server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
-        request_model = kwargs.get('model', 'all-minilm')
+        json_body = kwargs.get('json', {}) or {}
+        request_model = json_body.get('model') or kwargs.get('model')
         span_name = f'{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'

openlit/instrumentation/ollama/utils.py CHANGED Viewed

@@ -57,8 +57,10 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
     if len(scope._timestamps) > 1:
         scope._tbt = calculate_tbt(scope._timestamps)
-    formatted_messages = extract_and_format_input(scope._kwargs.get("messages", ""))
-    request_model = scope._kwargs.get("model", "gpt-4o")
+    json_body = scope._kwargs.get("json", {}) or {}
+    request_model = json_body.get("model") or scope._kwargs.get("model")
+    messages = json_body.get("messages", scope._kwargs.get("messages", ""))
+    formatted_messages = extract_and_format_input(messages)
     cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
@@ -252,7 +254,9 @@ def process_embedding_response(response, request_model, pricing_info, server_por
     end_time = time.time()
     try:
-        input_tokens = general_tokens(str(kwargs.get('prompt')))
+        json_body = kwargs.get("json", {}) or {}
+        prompt_val = json_body.get('prompt', kwargs.get('prompt', ''))
+        input_tokens = general_tokens(str(prompt_val))
         # Calculate cost of the operation
         cost = get_embed_model_cost(request_model,
@@ -293,7 +297,7 @@ def process_embedding_response(response, request_model, pricing_info, server_por
                 SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_OLLAMA
             },
             body={
-                **({"content": kwargs.get('prompt', '')} if capture_message_content else {}),
+                **({"content": prompt_val} if capture_message_content else {}),
                 "role":  'user'
             }
         )

openlit/instrumentation/transformers/__init__.py CHANGED Viewed

@@ -1,16 +1,20 @@
-# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
-"""Initializer of Auto Instrumentation of HuggingFace Transformer Functions"""
+"""
+Initializer of Auto Instrumentation of HuggingFace Transformer Functions
+"""
 from typing import Collection
 import importlib.metadata
 from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
 from wrapt import wrap_function_wrapper
-from openlit.instrumentation.transformers.transformers import text_wrap
+from openlit.instrumentation.transformers.transformers import pipeline_wrapper
-_instruments = ("transformers >= 4.39.3",)
+_instruments = ("transformers >= 4.48.0",)
 class TransformersInstrumentor(BaseInstrumentor):
-    """An instrumentor for HuggingFace Transformer Functions."""
+    """
+    An instrumentor for HuggingFace Transformer library.
+    """
     def instrumentation_dependencies(self) -> Collection[str]:
         return _instruments
@@ -28,10 +32,10 @@ class TransformersInstrumentor(BaseInstrumentor):
         wrap_function_wrapper(
             "transformers",
             "TextGenerationPipeline.__call__",
-            text_wrap(version, environment, application_name,
+            pipeline_wrapper(version, environment, application_name,
                  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
         )
-    @staticmethod
     def _uninstrument(self, **kwargs):
+        # Proper uninstrumentation logic to revert patched methods
         pass

openlit/instrumentation/transformers/transformers.py CHANGED Viewed

@@ -1,63 +1,31 @@
 """
-Module for monitoring ChromaDB.
+Module for monitoring HF Transformers API calls.
 """
 import logging
 import time
-from opentelemetry.trace import SpanKind, Status, StatusCode
-from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
+from opentelemetry.trace import SpanKind
 from openlit.__helpers import (
-    get_chat_model_cost,
-    handle_exception,
-    general_tokens,
-    create_metrics_attributes,
     set_server_address_and_port
 )
+from openlit.instrumentation.transformers.utils import (
+    process_chat_response,
+)
 from openlit.semcov import SemanticConvention
 # Initialize logger for logging potential issues and operations
 logger = logging.getLogger(__name__)
-def text_wrap(version, environment, application_name,
-                 tracer, pricing_info, capture_message_content, metrics, disable_metrics):
+def pipeline_wrapper(version, environment, application_name,
+                tracer, pricing_info, capture_message_content, metrics, disable_metrics):
     """
-    Creates a wrapper around a function call to trace and log its execution metrics.
-    This function wraps any given function to measure its execution time,
-    log its operation, and trace its execution using OpenTelemetry.
-    Parameters:
-    - version (str): The version of the Langchain application.
-    - environment (str): The deployment environment (e.g., 'production', 'development').
-    - application_name (str): Name of the Langchain application.
-    - tracer (opentelemetry.trace.Tracer): The tracer object used for OpenTelemetry tracing.
-    - pricing_info (dict): Information about the pricing for internal metrics (currently not used).
-    - capture_message_content (bool): Flag indicating whether to trace the content of the response.
-    Returns:
-    - function: A higher-order function that takes a function 'wrapped' and returns
-                a new function that wraps 'wrapped' with additional tracing and logging.
+    Generates a telemetry wrapper for GenAI function call
     """
     def wrapper(wrapped, instance, args, kwargs):
         """
-        An inner wrapper function that executes the wrapped function, measures execution
-        time, and records trace data using OpenTelemetry.
-        Parameters:
-        - wrapped (Callable): The original function that this wrapper will execute.
-        - instance (object): The instance to which the wrapped function belongs. This
-                             is used for instance methods. For static and classmethods,
-                             this may be None.
-        - args (tuple): Positional arguments passed to the wrapped function.
-        - kwargs (dict): Keyword arguments passed to the wrapped function.
-        Returns:
-        - The result of the wrapped function call.
-        The wrapper initiates a span with the provided tracer, sets various attributes
-        on the span based on the function's execution and response, and ensures
-        errors are handled and logged appropriately.
+        Wraps the GenAI function call.
         """
         server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 80)
@@ -68,130 +36,26 @@ def text_wrap(version, environment, application_name,
         with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
             start_time = time.time()
             response = wrapped(*args, **kwargs)
-            end_time = time.time()
-            # pylint: disable=protected-access
-            forward_params = instance._forward_params
-            try:
-                if args and len(args) > 0:
-                    prompt = args[0]
-                else:
-                    prompt = kwargs.get("args", "")
-                input_tokens = general_tokens(prompt[0])
-                span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-                span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
-                                   SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
-                span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
-                                   SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE)
-                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
-                                   request_model)
-                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
-                                   forward_params.get("temperature", "null"))
-                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
-                                   forward_params.get("top_p", "null"))
-                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
-                                   forward_params.get("max_length", -1))
-                span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
-                                   input_tokens)
-                span.set_attribute(SemanticConvention.SERVER_ADDRESS,
-                                    server_address)
-                span.set_attribute(SemanticConvention.SERVER_PORT,
-                                    server_port)
-                span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
-                                    request_model)
-                span.set_attribute(DEPLOYMENT_ENVIRONMENT,
-                                   environment)
-                span.set_attribute(SERVICE_NAME,
-                                   application_name)
-                span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
-                                    False)
-                span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
-                                    end_time - start_time)
-                span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
-                                    version)
-                if capture_message_content:
-                    span.add_event(
-                        name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
-                        attributes={
-                            SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
-                        },
-                    )
-                i = 0
-                output_tokens = 0
-                for completion in response:
-                    if len(response) > 1:
-                        attribute_name = f"gen_ai.content.completion.{i}"
-                    else:
-                        attribute_name = SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT
-                    if capture_message_content:
-                        # pylint: disable=bare-except
-                        try:
-                            llm_response = completion.get('generated_text', '')
-                        except:
-                            llm_response = completion[i].get('generated_text', '')
-                        span.add_event(
-                            name=attribute_name,
-                            attributes={
-                                SemanticConvention.GEN_AI_CONTENT_COMPLETION: llm_response,
-                            },
-                        )
-                    output_tokens += general_tokens(llm_response)
-                    i=i+1
-                span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                   output_tokens)
-                span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
-                                   input_tokens + output_tokens)
-                # Calculate cost of the operation
-                cost = get_chat_model_cost(request_model,
-                                            pricing_info, input_tokens,
-                                            output_tokens)
-                span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
-                                    cost)
-                span.set_status(Status(StatusCode.OK))
-                if disable_metrics is False:
-                    attributes = create_metrics_attributes(
-                        service_name=application_name,
-                        deployment_environment=environment,
-                        operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
-                        system=SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE,
-                        request_model=request_model,
-                        server_address=server_address,
-                        server_port=server_port,
-                        response_model=request_model,
-                    )
-                    metrics["genai_client_usage_tokens"].record(
-                        input_tokens + output_tokens, attributes
-                    )
-                    metrics["genai_client_operation_duration"].record(
-                        end_time - start_time, attributes
-                    )
-                    metrics["genai_server_ttft"].record(
-                        end_time - start_time, attributes
-                    )
-                    metrics["genai_requests"].add(1, attributes)
-                    metrics["genai_completion_tokens"].add(output_tokens, attributes)
-                    metrics["genai_prompt_tokens"].add(input_tokens, attributes)
-                    metrics["genai_cost"].record(cost, attributes)
-                # Return original response
-                return response
-            except Exception as e:
-                handle_exception(span, e)
-                logger.error("Error in trace creation: %s", e)
-                # Return original response
-                return response
+            response = process_chat_response(
+                    instance = instance,
+                    response=response,
+                    request_model=request_model,
+                    pricing_info=pricing_info,
+                    server_port=server_port,
+                    server_address=server_address,
+                    environment=environment,
+                    application_name=application_name,
+                    metrics=metrics,
+                    start_time=start_time,
+                    span=span,
+                    args=args,
+                    kwargs=kwargs,
+                    capture_message_content=capture_message_content,
+                    disable_metrics=disable_metrics,
+                    version=version,
+            )
+        return response
     return wrapper

openlit/instrumentation/transformers/utils.py ADDED Viewed

@@ -0,0 +1,183 @@
+"""
+HF Transformers OpenTelemetry instrumentation utility functions
+"""
+import time
+from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
+from opentelemetry.trace import Status, StatusCode
+from openlit.__helpers import (
+    response_as_dict,
+    calculate_tbt,
+    general_tokens,
+    get_chat_model_cost,
+    create_metrics_attributes,
+    format_and_concatenate
+)
+from openlit.semcov import SemanticConvention
+def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
+    capture_message_content, disable_metrics, version, args, kwargs, is_stream):
+    """
+    Process chat request and generate Telemetry
+    """
+    scope._end_time = time.time()
+    if len(scope._timestamps) > 1:
+        scope._tbt = calculate_tbt(scope._timestamps)
+    forward_params = scope._instance._forward_params
+    request_model = scope._instance.model.config.name_or_path
+    input_tokens = general_tokens(scope._prompt)
+    output_tokens = general_tokens(scope._llmresponse)
+    cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
+    # Set Span attributes (OTel Semconv)
+    scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+    scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
+    scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
+    # List of attributes and their config keys
+    attributes = [
+        (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, "temperature"),
+        (SemanticConvention.GEN_AI_REQUEST_TOP_K, "top_k"),
+        (SemanticConvention.GEN_AI_REQUEST_TOP_P, "top_p"),
+        (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, "max_length"),
+    ]
+    # Set each attribute if the corresponding value exists and is not None
+    for attribute, key in attributes:
+        value = forward_params.get(key)
+        if value is not None:
+            scope._span.set_attribute(attribute, value)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, request_model)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
+    scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
+    scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
+    scope._span.set_attribute(SERVICE_NAME, application_name)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
+    # To be removed one the change to span_attributes (from span events) is complete
+    if capture_message_content:
+        scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, scope._prompt)
+        scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse,)
+        scope._span.add_event(
+            name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
+            attributes={
+                SemanticConvention.GEN_AI_CONTENT_PROMPT: scope._prompt,
+            },
+        )
+        scope._span.add_event(
+            name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
+            attributes={
+                SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
+            },
+        )
+    scope._span.set_status(Status(StatusCode.OK))
+    if not disable_metrics:
+        metrics_attributes = create_metrics_attributes(
+            service_name=application_name,
+            deployment_environment=environment,
+            operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
+            system=SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE,
+            request_model=request_model,
+            server_address=scope._server_address,
+            server_port=scope._server_port,
+            response_model=request_model,
+        )
+        metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, metrics_attributes)
+        metrics["genai_client_operation_duration"].record(scope._end_time - scope._start_time, metrics_attributes)
+        metrics["genai_server_tbt"].record(scope._tbt, metrics_attributes)
+        metrics["genai_server_ttft"].record(scope._ttft, metrics_attributes)
+        metrics["genai_requests"].add(1, metrics_attributes)
+        metrics["genai_completion_tokens"].add(output_tokens, metrics_attributes)
+        metrics["genai_prompt_tokens"].add(input_tokens, metrics_attributes)
+        metrics["genai_cost"].record(cost, metrics_attributes)
+def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
+                          environment, application_name, metrics, start_time,
+                          span, args, kwargs, capture_message_content=False, disable_metrics=False, version="1.0.0"):
+    """
+    Process chat request and generate Telemetry
+    """
+    self = type("GenericScope", (), {})()
+    response_dict = response_as_dict(response)
+    # pylint: disable = no-member
+    self._instance = instance
+    self._start_time = start_time
+    self._end_time = time.time()
+    self._span = span
+    self._timestamps = []
+    self._ttft, self._tbt = self._end_time - self._start_time, 0
+    self._server_address, self._server_port = server_address, server_port
+    self._kwargs = kwargs
+    self._args = args
+    if self._args and len(self._args) > 0:
+        self._prompt = args[0]
+    else:
+        self._prompt = (
+            kwargs.get("text_inputs") or
+            (kwargs.get("image") and kwargs.get("question") and
+            ("image: " + kwargs.get("image") + " question:" + kwargs.get("question"))) or
+            kwargs.get("fallback") or
+            ""
+        )
+    self._prompt = format_and_concatenate(self._prompt)
+    self._llmresponse = []
+    if self._kwargs.get("task", "text-generation") == "text-generation":
+        first_entry = response_dict[0]
+        if isinstance(first_entry, dict) and isinstance(first_entry.get("generated_text"), list):
+            last_element = first_entry.get("generated_text")[-1]
+            self._llmresponse = last_element.get("content", last_element)
+        else:
+            def extract_text(entry):
+                if isinstance(entry, dict):
+                    return entry.get("generated_text")
+                if isinstance(entry, list):
+                    return " ".join(
+                        extract_text(sub_entry) for sub_entry in entry if isinstance(sub_entry, dict)
+                    )
+                return ""
+            # Process and collect all generated texts
+            self._llmresponse = [
+                extract_text(entry) for entry in response_dict
+            ]
+            # Join all non-empty responses into a single string
+            self._llmresponse = " ".join(filter(None, self._llmresponse))
+    elif self._kwargs.get("task", "text-generation") == "automatic-speech-recognition":
+        self._llmresponse = response_dict.get("text", "")
+    elif self._kwargs.get("task", "text-generation") == "image-classification":
+        self._llmresponse = str(response_dict[0])
+    elif self._kwargs.get("task", "text-generation") == "visual-question-answering":
+        self._llmresponse = str(response_dict[0]).get("answer")
+    common_chat_logic(self, pricing_info, environment, application_name, metrics,
+            capture_message_content, disable_metrics, version, args, kwargs, is_stream=False)
+    return response

{openlit-1.33.20.dist-info → openlit-1.33.21.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: openlit
-Version: 1.33.20
+Version: 1.33.21
 Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
 License: Apache-2.0
 Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu

{openlit-1.33.20.dist-info → openlit-1.33.21.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-openlit/__helpers.py,sha256=mbcQvTwjf0R3POo2vDPc0Ms94pNCmAAu9OWXeenFHC4,9068
+openlit/__helpers.py,sha256=sg0EGJGC_OlZePR84cLK77l_lZRBPJwdjWjq_RuaYS0,11444
 openlit/__init__.py,sha256=1OzJQmiZrTlT3Aze_l8GOf1GXH7dAHztJn0Uzd1LAPc,23924
 openlit/evals/__init__.py,sha256=nJe99nuLo1b5rf7pt9U9BCdSDedzbVi2Fj96cgl7msM,380
 openlit/evals/all.py,sha256=oWrue3PotE-rB5WePG3MRYSA-ro6WivkclSHjYlAqGs,7154
@@ -90,10 +90,10 @@ openlit/instrumentation/mistral/mistral.py,sha256=_2qM8v4RCL-S0Mm1vbW77m5vUm8aPD
 openlit/instrumentation/multion/__init__.py,sha256=Wr3lcDyG_YbOLkCUzBFhraAedF6E113tce8eSWlcz10,3149
 openlit/instrumentation/multion/async_multion.py,sha256=XutZnayCJOZ_NA9bvE1NUoej41KOGR7FRn2tpoGKMEU,6092
 openlit/instrumentation/multion/multion.py,sha256=-WqRAcu5qiEMY9XDmlJTQHuQiWfdwms9JDn127QCNb8,6074
-openlit/instrumentation/ollama/__init__.py,sha256=JjxSqEegmRoRqIVz7ZAq9dLyXPZ2DqV2wGmgXCENNpw,3004
-openlit/instrumentation/ollama/async_ollama.py,sha256=rQ637cpOenezcbyJPV16LLQ2UKuROrO31-0lbfyGhoA,6541
-openlit/instrumentation/ollama/ollama.py,sha256=9C-XtUZ9FoR1cjpFdO3BHjF6NZqRhzNLt-z2z0cJIyE,6437
-openlit/instrumentation/ollama/utils.py,sha256=vAFCWv4qWd-_jThR0IQGoBhjp99uy1QUtJk6Kj6y_Js,14604
+openlit/instrumentation/ollama/__init__.py,sha256=v7VhVxHw_c6QtMznxe6a7z6QrYHZsH_NSXfiXao83Ns,3707
+openlit/instrumentation/ollama/async_ollama.py,sha256=zJPDr2ROh1nvFGoxgdTbe04Zr1KhmgJUYFPeuRLQGLk,6667
+openlit/instrumentation/ollama/ollama.py,sha256=MNUltiP9XVT4azmO_-E2vjhFaoHQyJ0Z6c-HnB0_jCE,6563
+openlit/instrumentation/ollama/utils.py,sha256=41uvYaYkGwWfRyHYqhOOwrFy6cMzBlG1urJYUat9Q24,14819
 openlit/instrumentation/openai/__init__.py,sha256=FiL4OHDhs957spa3k9sNC_VLt0-txtwbnujQwnevQ5I,5564
 openlit/instrumentation/openai/async_openai.py,sha256=CiyBpn8Evnd_gh3Cm1WbfkN7eUpDmFh4KMvxka-B4og,71764
 openlit/instrumentation/openai/openai.py,sha256=r8ZNVoAFTPuCUf18a5v1Lp48LXwCeT9paEB-3USSiiU,71507
@@ -114,8 +114,9 @@ openlit/instrumentation/reka/reka.py,sha256=L6gH7j94tcYlc_FCkQP6SrxH7yBr4uSgtN8B
 openlit/instrumentation/together/__init__.py,sha256=MLLL2t8FyrytpfMueqcwekiqTKn-JN40HBD_LbZS_jQ,2661
 openlit/instrumentation/together/async_together.py,sha256=ToSeYqE0mCgSsCNSO0pqoyS7WU6YarHxa3I7ZrzH-d8,30634
 openlit/instrumentation/together/together.py,sha256=7Da9fjHaZk_ObXMnSZA79-RktgwHRVYevsZAA-OpcXY,30530
-openlit/instrumentation/transformers/__init__.py,sha256=f-kWX6_VdiOXqDa64iK6oJyYF3xkHuKxIBPTll-W7Lw,1467
-openlit/instrumentation/transformers/transformers.py,sha256=naSIvynfI16RWboB4cZb728AqubU8lUt-zYSRUg6X_k,9174
+openlit/instrumentation/transformers/__init__.py,sha256=9Ubss5nlumcypxprxff8Fv3sst7II27SsvCzqkBX9Kg,1457
+openlit/instrumentation/transformers/transformers.py,sha256=zCAwfXu77HPlhy7vuU-nvNvsmmU4fs4aaFMCBG5AOLA,1993
+openlit/instrumentation/transformers/utils.py,sha256=UP-aB_hP4SVLQ1A0I-PrNXC3mPJkVZnS7UOkQGe6OXc,8087
 openlit/instrumentation/vertexai/__init__.py,sha256=mT28WCBvQfRCkAWGL6bd0EjEPHvMjaNcz6T3jsLZh8k,3745
 openlit/instrumentation/vertexai/async_vertexai.py,sha256=-kpg-eiL76O5_XopUPghCYwJHf0Nrxi00_Z5tCwq6zM,23086
 openlit/instrumentation/vertexai/vertexai.py,sha256=5NB090aWlm9DnlccNNLRO6A97P_RN-JnHb5JS01tYyw,23000
@@ -125,7 +126,7 @@ openlit/otel/events.py,sha256=VrMjTpvnLtYRBHCiFwJojTQqqNpRCxoD4yJYeQrtPsk,3560
 openlit/otel/metrics.py,sha256=urpadRfC_BjLCPxWgk5J6NGStECeJA55LFkyTD43Jd4,6837
 openlit/otel/tracing.py,sha256=tjV2bEbEDPUB1Z46gE-UsJsb04sRdFrfbhIDkxViZc0,3103
 openlit/semcov/__init__.py,sha256=JF9MwflazC8jHOiQdPYshfv1q5Z9bhB4OGa0N_fr9d4,13305
-openlit-1.33.20.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-openlit-1.33.20.dist-info/METADATA,sha256=GZtU_l6KXa6MR28a4KEa0TzHsNh5d0mwmPnZ2dNFMfQ,23470
-openlit-1.33.20.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
-openlit-1.33.20.dist-info/RECORD,,
+openlit-1.33.21.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+openlit-1.33.21.dist-info/METADATA,sha256=RP9M2CRa9gXnTVN6I_YjyjoGS8C3zUI04w4lbI6q3yE,23470
+openlit-1.33.21.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+openlit-1.33.21.dist-info/RECORD,,

{openlit-1.33.20.dist-info → openlit-1.33.21.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.1.2
+Generator: poetry-core 2.1.3
 Root-Is-Purelib: true
 Tag: py3-none-any

{openlit-1.33.20.dist-info → openlit-1.33.21.dist-info}/LICENSE RENAMED Viewed

File without changes

openlit 1.33.20__py3-none-any.whl → 1.33.21__py3-none-any.whl

openlit 1.33.20py3-none-any.whl → 1.33.21py3-none-any.whl