PyPI - openlit - Versions diffs - 1.33.7__py3-none-any.whl → 1.33.9__py3-none-any.whl - Mend

openlit 1.33.7py3-none-any.whl → 1.33.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

openlit/__helpers.py +83 -0
openlit/__init__.py +1 -1
openlit/instrumentation/ag2/ag2.py +2 -2
openlit/instrumentation/ai21/__init__.py +4 -4
openlit/instrumentation/ai21/ai21.py +370 -319
openlit/instrumentation/ai21/async_ai21.py +371 -319
openlit/instrumentation/anthropic/__init__.py +4 -4
openlit/instrumentation/anthropic/anthropic.py +321 -189
openlit/instrumentation/anthropic/async_anthropic.py +323 -190
openlit/instrumentation/assemblyai/__init__.py +1 -1
openlit/instrumentation/assemblyai/assemblyai.py +59 -43
openlit/instrumentation/astra/astra.py +4 -4
openlit/instrumentation/astra/async_astra.py +4 -4
openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
openlit/instrumentation/bedrock/__init__.py +1 -1
openlit/instrumentation/bedrock/bedrock.py +115 -58
openlit/instrumentation/chroma/chroma.py +4 -4
openlit/instrumentation/cohere/__init__.py +33 -10
openlit/instrumentation/cohere/async_cohere.py +610 -0
openlit/instrumentation/cohere/cohere.py +410 -219
openlit/instrumentation/controlflow/controlflow.py +2 -2
openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
openlit/instrumentation/crewai/crewai.py +2 -2
openlit/instrumentation/dynamiq/dynamiq.py +2 -2
openlit/instrumentation/elevenlabs/async_elevenlabs.py +73 -47
openlit/instrumentation/elevenlabs/elevenlabs.py +73 -52
openlit/instrumentation/embedchain/embedchain.py +4 -4
openlit/instrumentation/firecrawl/firecrawl.py +2 -2
openlit/instrumentation/google_ai_studio/__init__.py +9 -9
openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
openlit/instrumentation/gpt4all/gpt4all.py +17 -17
openlit/instrumentation/groq/async_groq.py +14 -14
openlit/instrumentation/groq/groq.py +14 -14
openlit/instrumentation/haystack/haystack.py +2 -2
openlit/instrumentation/julep/async_julep.py +2 -2
openlit/instrumentation/julep/julep.py +2 -2
openlit/instrumentation/langchain/langchain.py +36 -31
openlit/instrumentation/letta/letta.py +6 -6
openlit/instrumentation/litellm/async_litellm.py +20 -20
openlit/instrumentation/litellm/litellm.py +20 -20
openlit/instrumentation/llamaindex/llamaindex.py +2 -2
openlit/instrumentation/mem0/mem0.py +2 -2
openlit/instrumentation/milvus/milvus.py +4 -4
openlit/instrumentation/mistral/async_mistral.py +18 -18
openlit/instrumentation/mistral/mistral.py +18 -18
openlit/instrumentation/multion/async_multion.py +2 -2
openlit/instrumentation/multion/multion.py +2 -2
openlit/instrumentation/ollama/async_ollama.py +29 -29
openlit/instrumentation/ollama/ollama.py +29 -29
openlit/instrumentation/openai/__init__.py +11 -230
openlit/instrumentation/openai/async_openai.py +434 -409
openlit/instrumentation/openai/openai.py +415 -393
openlit/instrumentation/phidata/phidata.py +2 -2
openlit/instrumentation/pinecone/pinecone.py +4 -4
openlit/instrumentation/premai/premai.py +20 -20
openlit/instrumentation/qdrant/async_qdrant.py +4 -4
openlit/instrumentation/qdrant/qdrant.py +4 -4
openlit/instrumentation/reka/async_reka.py +6 -6
openlit/instrumentation/reka/reka.py +6 -6
openlit/instrumentation/together/async_together.py +18 -18
openlit/instrumentation/together/together.py +18 -18
openlit/instrumentation/transformers/transformers.py +6 -6
openlit/instrumentation/vertexai/async_vertexai.py +53 -53
openlit/instrumentation/vertexai/vertexai.py +53 -53
openlit/instrumentation/vllm/vllm.py +6 -6
openlit/otel/metrics.py +98 -7
openlit/semcov/__init__.py +113 -80
{openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/METADATA +2 -1
openlit-1.33.9.dist-info/RECORD +121 -0
{openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/WHEEL +1 -1
openlit/instrumentation/openai/async_azure_openai.py +0 -900
openlit/instrumentation/openai/azure_openai.py +0 -898
openlit-1.33.7.dist-info/RECORD +0 -122
{openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/LICENSE +0 -0

openlit/instrumentation/cohere/cohere.py CHANGED Viewed

@@ -1,29 +1,37 @@
-# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
 """
 Module for monitoring Cohere API calls.
 """
 import logging
+import time
 from opentelemetry.trace import SpanKind, Status, StatusCode
-from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
-from openlit.__helpers import get_chat_model_cost, get_embed_model_cost, handle_exception
+from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
+from openlit.__helpers import (
+    get_chat_model_cost,
+    get_embed_model_cost,
+    handle_exception,
+    response_as_dict,
+    calculate_ttft,
+    calculate_tbt,
+    create_metrics_attributes,
+    set_server_address_and_port
+)
 from openlit.semcov import SemanticConvetion
 # Initialize logger for logging potential issues and operations
 logger = logging.getLogger(__name__)
-def embed(gen_ai_endpoint, version, environment, application_name, tracer,
+def embed(version, environment, application_name, tracer,
           pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for embeddings to collect metrics.
     Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
         version: Version of the monitoring package.
         environment: Deployment environment (e.g., production, staging).
-        application_name: Name of the application using the OpenAI API.
+        application_name: Name of the application using the Cohere API.
         tracer: OpenTelemetry tracer for creating spans.
-        pricing_info: Information used for calculating the cost of OpenAI usage.
+        pricing_info: Information used for calculating the cost of Cohere usage.
         trace_content: Flag indicating whether to trace the actual content.
     Returns:
@@ -47,80 +55,85 @@ def embed(gen_ai_endpoint, version, environment, application_name, tracer,
             The response from the original 'embed' method.
         """
-        with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
-            response = wrapped(*args, **kwargs)
+        server_address, server_port = set_server_address_and_port(instance, "api.cohere.com", 443)
+        request_model = kwargs.get("model", "mbed-english-v3.0")
-            try:
-                # Get prompt from kwargs and store as a single string
-                prompt = " ".join(kwargs.get("texts", []))
+        span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
+        with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+            start_time = time.time()
+            response = wrapped(*args, **kwargs)
+            end_time = time.time()
+            response_dict = response_as_dict(response)
+            try:
+                input_tokens = response_dict.get('meta').get('billed_units').get('input_tokens')
                 # Calculate cost of the operation
                 cost = get_embed_model_cost(kwargs.get("model", "embed-english-v2.0"),
-                                            pricing_info,
-                                            response.meta.billed_units.input_tokens)
+                                            pricing_info, input_tokens)
-                # Set Span attributes
+                # Set Span attributes (OTel Semconv)
                 span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+                span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
+                                    SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
                 span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
                                     SemanticConvetion.GEN_AI_SYSTEM_COHERE)
-                span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
-                                    SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
-                span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                    gen_ai_endpoint)
-                span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
+                                    request_model)
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
+                                    kwargs.get('embedding_types', ['float']))
+                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                    request_model)
+                span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                    server_address)
+                span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                    server_port)
+                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
+                                    input_tokens)
+                span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                    response_dict.get('response_type'))
+                # Set Span attributes (Extras)
+                span.set_attribute(DEPLOYMENT_ENVIRONMENT,
                                     environment)
-                span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
+                span.set_attribute(SERVICE_NAME,
                                     application_name)
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                    kwargs.get("model", "embed-english-v2.0"))
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
-                                    kwargs.get("embedding_types", "float"))
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
-                                    kwargs.get("input_type", ""))
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
-                                    kwargs.get("user", ""))
-                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
-                                    response.id)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                    response.meta.billed_units.input_tokens)
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                    response.meta.billed_units.input_tokens)
+                                    input_tokens)
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                     cost)
+                span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                    version)
                 if trace_content:
                     span.add_event(
                         name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
                         attributes={
-                            SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                            SemanticConvetion.GEN_AI_CONTENT_PROMPT: str(kwargs.get("texts", "")),
                         },
                     )
                 span.set_status(Status(StatusCode.OK))
                 if disable_metrics is False:
-                    attributes = {
-                        TELEMETRY_SDK_NAME:
-                            "openlit",
-                        SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                            application_name,
-                        SemanticConvetion.GEN_AI_SYSTEM:
-                            SemanticConvetion.GEN_AI_SYSTEM_COHERE,
-                        SemanticConvetion.GEN_AI_ENVIRONMENT:
-                            environment,
-                        SemanticConvetion.GEN_AI_TYPE:
-                            SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
-                        SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                            kwargs.get("model", "embed-english-v2.0")
-                    }
-                    metrics["genai_requests"].add(1, attributes)
-                    metrics["genai_total_tokens"].add(
-                        response.meta.billed_units.input_tokens, attributes
+                    attributes = create_metrics_attributes(
+                        service_name=application_name,
+                        deployment_environment=environment,
+                        operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
+                        system=SemanticConvetion.GEN_AI_SYSTEM_COHERE,
+                        request_model=request_model,
+                        server_address=server_address,
+                        server_port=server_port,
+                        response_model=request_model,
                     )
-                    metrics["genai_prompt_tokens"].add(
-                        response.meta.billed_units.input_tokens, attributes
+                    metrics["genai_client_usage_tokens"].record(
+                            input_tokens, attributes
+                        )
+                    metrics["genai_client_operation_duration"].record(
+                        end_time - start_time, attributes
                     )
+                    metrics["genai_requests"].add(1, attributes)
+                    metrics["genai_prompt_tokens"].add(input_tokens, attributes)
                     metrics["genai_cost"].record(cost, attributes)
                 # Return original response
@@ -135,18 +148,17 @@ def embed(gen_ai_endpoint, version, environment, application_name, tracer,
     return wrapper
-def chat(gen_ai_endpoint, version, environment, application_name, tracer,
+def chat(version, environment, application_name, tracer,
          pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for chat to collect metrics.
     Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
         version: Version of the monitoring package.
         environment: Deployment environment (e.g., production, staging).
-        application_name: Name of the application using the OpenAI API.
+        application_name: Name of the application using the Cohere API.
         tracer: OpenTelemetry tracer for creating spans.
-        pricing_info: Information used for calculating the cost of OpenAI usage.
+        pricing_info: Information used for calculating the cost of Cohere usage.
         trace_content: Flag indicating whether to trace the actual content.
     Returns:
@@ -170,96 +182,145 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
             The response from the original 'chat' method.
         """
-        with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
+        server_address, server_port = set_server_address_and_port(instance, "api.cohere.com", 443)
+        request_model = kwargs.get("model", "command-r-plus-08-2024")
+        span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
+        with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
+            start_time = time.time()
             response = wrapped(*args, **kwargs)
+            end_time = time.time()
+            response_dict = response_as_dict(response)
             try:
+                # Format 'messages' into a single string
+                message_prompt = kwargs.get("messages", "")
+                formatted_messages = []
+                for message in message_prompt:
+                    role = message["role"]
+                    content = message["content"]
+                    if isinstance(content, list):
+                        content_str = ", ".join(
+                            f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
+                            if "type" in item else f'text: {item["text"]}'
+                            for item in content
+                        )
+                        formatted_messages.append(f"{role}: {content_str}")
+                    else:
+                        formatted_messages.append(f"{role}: {content}")
+                prompt = "\n".join(formatted_messages)
+                input_tokens = response_dict.get('usage').get('billed_units').get('input_tokens')
+                output_tokens = response_dict.get('usage').get('billed_units').get('output_tokens')
                 # Calculate cost of the operation
-                cost = get_chat_model_cost(kwargs.get("model", "command"),
-                                            pricing_info,
-                                            response.meta.billed_units.input_tokens,
-                                            response.meta.billed_units.output_tokens)
+                cost = get_chat_model_cost(request_model, pricing_info,
+                                            input_tokens, output_tokens)
+                llm_response = response_dict.get('message').get('content')[0].get('text')
-                # Set Span attributes
+                 # Set base span attribues (OTel Semconv)
                 span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+                span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
+                                    SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
                 span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
                                     SemanticConvetion.GEN_AI_SYSTEM_COHERE)
-                span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
-                                    SemanticConvetion.GEN_AI_TYPE_CHAT)
-                span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                    gen_ai_endpoint)
-                span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
-                                    environment)
-                span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
-                                    application_name)
                 span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                    kwargs.get("model", "command"))
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
-                                    kwargs.get("temperature", 0.3))
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
-                                    kwargs.get("max_tokens", -1))
+                                    request_model)
                 span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
                                     kwargs.get("seed", ""))
+                span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                    server_port)
                 span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
                                     kwargs.get("frequency_penalty", 0.0))
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
+                                    kwargs.get("max_tokens", -1))
                 span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
                                     kwargs.get("presence_penalty", 0.0))
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
+                                    kwargs.get("stop_sequences", []))
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
+                                    kwargs.get("temperature", 0.3))
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
+                                    kwargs.get("k", 1.0))
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
+                                    kwargs.get("p", 1.0))
+                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
+                                    response_dict.get("id"))
+                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                    request_model)
+                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
+                                    input_tokens)
+                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                    output_tokens)
+                span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                    server_address)
+                if isinstance(llm_response, str):
+                    span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                    "text")
+                else:
+                    span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                    "json")
+                # Set base span attribues (Extras)
+                span.set_attribute(DEPLOYMENT_ENVIRONMENT,
+                                     environment)
+                span.set_attribute(SERVICE_NAME,
+                                    application_name)
                 span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
                                     False)
-                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
-                                    response.generation_id)
-                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
-                                    [response.finish_reason])
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                    response.meta.billed_units.input_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
-                                    response.meta.billed_units.output_tokens)
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                    response.meta.billed_units.input_tokens +
-                                    response.meta.billed_units.output_tokens)
+                                    input_tokens + output_tokens)
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                     cost)
+                span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
+                                    end_time - start_time)
+                span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                    version)
                 if trace_content:
                     span.add_event(
                         name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
                         attributes={
-                            SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("message", ""),
+                            SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
                         },
                     )
                     span.add_event(
                         name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
                         attributes={
-                            SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
+                            SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llm_response,
                         },
                     )
                 span.set_status(Status(StatusCode.OK))
                 if disable_metrics is False:
-                    attributes = {
-                        TELEMETRY_SDK_NAME:
-                            "openlit",
-                        SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                            application_name,
-                        SemanticConvetion.GEN_AI_SYSTEM:
-                            SemanticConvetion.GEN_AI_SYSTEM_COHERE,
-                        SemanticConvetion.GEN_AI_ENVIRONMENT:
-                            environment,
-                        SemanticConvetion.GEN_AI_TYPE:
-                            SemanticConvetion.GEN_AI_TYPE_CHAT,
-                        SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                            kwargs.get("model", "command")
-                    }
+                    attributes = create_metrics_attributes(
+                        service_name=application_name,
+                        deployment_environment=environment,
+                        operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
+                        system=SemanticConvetion.GEN_AI_SYSTEM_COHERE,
+                        request_model=request_model,
+                        server_address=server_address,
+                        server_port=server_port,
+                        response_model=request_model,
+                    )
+                    metrics["genai_client_usage_tokens"].record(
+                        input_tokens + output_tokens, attributes
+                    )
+                    metrics["genai_client_operation_duration"].record(
+                        end_time - start_time, attributes
+                    )
+                    metrics["genai_server_ttft"].record(
+                        end_time - start_time, attributes
+                    )
                     metrics["genai_requests"].add(1, attributes)
-                    metrics["genai_total_tokens"].add(
-                        response.meta.billed_units.input_tokens +
-                        response.meta.billed_units.output_tokens, attributes)
-                    metrics["genai_completion_tokens"].add(
-                        response.meta.billed_units.output_tokens, attributes)
-                    metrics["genai_prompt_tokens"].add(
-                        response.meta.billed_units.input_tokens, attributes)
+                    metrics["genai_completion_tokens"].add(output_tokens, attributes)
+                    metrics["genai_prompt_tokens"].add(input_tokens, attributes)
                     metrics["genai_cost"].record(cost, attributes)
                 # Return original response
@@ -274,18 +335,17 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
     return wrapper
-def chat_stream(gen_ai_endpoint, version, environment, application_name,
+def chat_stream(version, environment, application_name,
                 tracer, pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for chat_stream to collect metrics.
     Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
         version: Version of the monitoring package.
         environment: Deployment environment (e.g., production, staging).
-        application_name: Name of the application using the OpenAI API.
+        application_name: Name of the application using the Cohere API.
         tracer: OpenTelemetry tracer for creating spans.
-        pricing_info: Information used for calculating the cost of OpenAI usage.
+        pricing_info: Information used for calculating the cost of Cohere usage.
         trace_content: Flag indicating whether to trace the actual content.
     Returns:
@@ -309,111 +369,242 @@ def chat_stream(gen_ai_endpoint, version, environment, application_name,
             The response from the original 'chat_stream' method.
         """
-        def stream_generator():
-            with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
+        class TracedSyncStream:
+            """
+            Wrapper for streaming responses to collect metrics and trace data.
+            Wraps the 'cohere.AsyncStream' response to collect message IDs and aggregated response.
+            This class implements the '__aiter__' and '__anext__' methods that
+            handle asynchronous streaming responses.
+            This class also implements '__aenter__' and '__aexit__' methods that
+            handle asynchronous context management protocol.
+            """
+            def __init__(
+                    self,
+                    wrapped,
+                    span,
+                    kwargs,
+                    server_address,
+                    server_port,
+                    **args,
+                ):
+                self.__wrapped__ = wrapped
+                self._span = span
                 # Placeholder for aggregating streaming response
-                llmresponse = ""
-                # Loop through streaming events capturing relevant details
-                for event in wrapped(*args, **kwargs):
-                    # Collect message IDs and aggregated response from events
-                    if event.event_type == "stream-end":
-                        llmresponse = event.response.text
-                        prompt_tokens = event.response.meta.billed_units.input_tokens
-                        completion_tokens = event.response.meta.billed_units.output_tokens
-                        finish_reason = event.finish_reason
-                    if event.event_type == "stream-start":
-                        response_id = event.generation_id
-                    yield event
-                # Handling exception ensure observability without disrupting operation
+                self._llmresponse = ""
+                self._response_id = ""
+                self._finish_reason = ""
+                self._input_tokens = ""
+                self._output_tokens = ""
+                self._args = args
+                self._kwargs = kwargs
+                self._start_time = time.time()
+                self._end_time = None
+                self._timestamps = []
+                self._ttft = 0
+                self._tbt = 0
+                self._server_address = server_address
+                self._server_port = server_port
+            def __enter__(self):
+                self.__wrapped__.__enter__()
+                return self
+            def __exit__(self, exc_type, exc_value, traceback):
+                self.__wrapped__.__exit__(exc_type, exc_value, traceback)
+            def __iter__(self):
+                return self
+            def __getattr__(self, name):
+                """Delegate attribute access to the wrapped object."""
+                return getattr(self.__wrapped__, name)
+            def __next__(self):
                 try:
-                    # Calculate cost of the operation
-                    cost = get_chat_model_cost(kwargs.get("model", "command"),
-                                                pricing_info, prompt_tokens, completion_tokens)
-                    # Set Span attributes
-                    span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-                    span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
-                                        SemanticConvetion.GEN_AI_SYSTEM_COHERE)
-                    span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
-                                        SemanticConvetion.GEN_AI_TYPE_CHAT)
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                        gen_ai_endpoint)
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
-                                        environment)
-                    span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
-                                        application_name)
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                        kwargs.get("model", "command"))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
-                                        kwargs.get("temperature", 0.3))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
-                                        kwargs.get("max_tokens", -1))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
-                                        kwargs.get("seed", ""))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
-                                        kwargs.get("frequency_penalty", 0.0))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
-                                        kwargs.get("presence_penalty", 0.0))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
-                                        True)
-                    span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
-                                        response_id)
-                    span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
-                                        [finish_reason])
-                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                        prompt_tokens)
-                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
-                                        completion_tokens)
-                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                        prompt_tokens + completion_tokens)
-                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                                        cost)
-                    if trace_content:
-                        span.add_event(
-                            name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
-                            attributes={
-                                SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("message", ""),
-                            },
-                        )
-                        span.add_event(
-                            name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
-                            attributes={
-                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
-                            },
-                        )
-                    span.set_status(Status(StatusCode.OK))
-                    if disable_metrics is False:
-                        attributes = {
-                            TELEMETRY_SDK_NAME:
-                                "openlit",
-                            SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                                application_name,
-                            SemanticConvetion.GEN_AI_SYSTEM:
-                                SemanticConvetion.GEN_AI_SYSTEM_COHERE,
-                            SemanticConvetion.GEN_AI_ENVIRONMENT:
-                                environment,
-                            SemanticConvetion.GEN_AI_TYPE:
-                                SemanticConvetion.GEN_AI_TYPE_CHAT,
-                            SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                                kwargs.get("model", "command")
-                        }
-                        metrics["genai_requests"].add(1, attributes)
-                        metrics["genai_total_tokens"].add(
-                            prompt_tokens + completion_tokens, attributes
-                        )
-                        metrics["genai_completion_tokens"].add(completion_tokens, attributes)
-                        metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
-                        metrics["genai_cost"].record(cost, attributes)
-                except Exception as e:
-                    handle_exception(span, e)
-                    logger.error("Error in trace creation: %s", e)
-        return stream_generator()
+                    chunk = self.__wrapped__.__next__()
+                    end_time = time.time()
+                    # Record the timestamp for the current chunk
+                    self._timestamps.append(end_time)
+                    if len(self._timestamps) == 1:
+                        # Calculate time to first chunk
+                        self._ttft = calculate_ttft(self._timestamps, self._start_time)
+                    chunked = response_as_dict(chunk)
+                    if chunked.get('type') == 'message-start':
+                        self._response_id = chunked.get('id')
+                    if chunked.get('type') == 'content-delta':
+                        content = chunked.get('delta').get('message').get('text')
+                        if content:
+                            self._llmresponse += content
+                    if chunked.get('type') == 'message-end':
+                        self._finish_reason = chunked.get('delta').get('finish_reason')
+                        self._input_tokens = chunked.get('delta').get('usage').get('billed_units').get('input_tokens')
+                        self._output_tokens = chunked.get('delta').get('usage').get('billed_units').get('output_tokens')
+                    return chunk
+                except StopIteration:
+                    # Handling exception ensure observability without disrupting operation
+                    try:
+                        self._end_time = time.time()
+                        if len(self._timestamps) > 1:
+                            self._tbt = calculate_tbt(self._timestamps)
+                        # Format 'messages' into a single string
+                        message_prompt = self._kwargs.get("messages", "")
+                        formatted_messages = []
+                        for message in message_prompt:
+                            role = message["role"]
+                            content = message["content"]
+                            if isinstance(content, list):
+                                content_str_list = []
+                                for item in content:
+                                    if item["type"] == "text":
+                                        content_str_list.append(f'text: {item["text"]}')
+                                    elif (item["type"] == "image_url" and
+                                        not item["image_url"]["url"].startswith("data:")):
+                                        content_str_list.append(f'image_url: {item["image_url"]["url"]}')
+                                content_str = ", ".join(content_str_list)
+                                formatted_messages.append(f"{role}: {content_str}")
+                            else:
+                                formatted_messages.append(f"{role}: {content}")
+                        prompt = "\n".join(formatted_messages)
+                        request_model = self._kwargs.get("model", "command-r-plus")
+                        # Calculate cost of the operation
+                        cost = get_chat_model_cost(request_model,
+                                                    pricing_info, self._input_tokens,
+                                                    self._output_tokens)
+                        # Set Span attributes (OTel Semconv)
+                        self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
+                                            SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
+                                            SemanticConvetion.GEN_AI_SYSTEM_COHERE)
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
+                                            request_model)
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
+                                            self._kwargs.get("seed", ""))
+                        self._span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                            self._server_port)
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+                                            self._kwargs.get("frequency_penalty", 0.0))
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
+                                            self._kwargs.get("max_tokens", -1))
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
+                                            self._kwargs.get("presence_penalty", 0.0))
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
+                                            self._kwargs.get("stop_sequences", []))
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
+                                            self._kwargs.get("temperature", 0.3))
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
+                                            self._kwargs.get("k", 1.0))
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
+                                            self._kwargs.get("p", 1.0))
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
+                                            [self._finish_reason])
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
+                                            self._response_id)
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                            request_model)
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
+                                            self._input_tokens)
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                            self._output_tokens)
+                        self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                            self._server_address)
+                        if isinstance(self._llmresponse, str):
+                            self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                            "text")
+                        else:
+                            self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                            "json")
+                        # Set Span attributes (Extra)
+                        self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
+                                            environment)
+                        self._span.set_attribute(SERVICE_NAME,
+                                            application_name)
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
+                                            True)
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
+                                            self._input_tokens + self._output_tokens)
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
+                                            cost)
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
+                                            self._tbt)
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
+                                            self._ttft)
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                            version)
+                        if trace_content:
+                            self._span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                                attributes={
+                                    SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                                },
+                            )
+                            self._span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                                attributes={
+                                    SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
+                                },
+                            )
+                        self._span.set_status(Status(StatusCode.OK))
+                        if disable_metrics is False:
+                            attributes = create_metrics_attributes(
+                                service_name=application_name,
+                                deployment_environment=environment,
+                                operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
+                                system=SemanticConvetion.GEN_AI_SYSTEM_COHERE,
+                                request_model=request_model,
+                                server_address=self._server_address,
+                                server_port=self._server_port,
+                                response_model=request_model,
+                            )
+                            metrics["genai_client_usage_tokens"].record(
+                                self._input_tokens + self._output_tokens, attributes
+                            )
+                            metrics["genai_client_operation_duration"].record(
+                                self._end_time - self._start_time, attributes
+                            )
+                            metrics["genai_server_tbt"].record(
+                                self._tbt, attributes
+                            )
+                            metrics["genai_server_ttft"].record(
+                                self._ttft, attributes
+                            )
+                            metrics["genai_requests"].add(1, attributes)
+                            metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
+                            metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
+                            metrics["genai_cost"].record(cost, attributes)
+                    except Exception as e:
+                        handle_exception(self._span, e)
+                        logger.error("Error in trace creation: %s", e)
+                    finally:
+                        self._span.end()
+                    raise
+        server_address, server_port = set_server_address_and_port(instance, "api.cohere.com", 443)
+        request_model = kwargs.get("model", "command-r-plus")
+        span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
+        awaited_wrapped = wrapped(*args, **kwargs)
+        span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
+        return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
     return wrapper

openlit 1.33.7__py3-none-any.whl → 1.33.9__py3-none-any.whl

openlit 1.33.7py3-none-any.whl → 1.33.9py3-none-any.whl