PyPI - openlit - Versions diffs - 1.16.0__py3-none-any.whl → 1.17.0__py3-none-any.whl - Mend

openlit 1.16.0py3-none-any.whl → 1.17.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

openlit/instrumentation/anthropic/anthropic.py +30 -12
openlit/instrumentation/anthropic/async_anthropic.py +29 -12
openlit/instrumentation/bedrock/__init__.py +3 -3
openlit/instrumentation/bedrock/bedrock.py +77 -307
openlit/instrumentation/cohere/cohere.py +35 -14
openlit/instrumentation/elevenlabs/async_elevenlabs.py +6 -2
openlit/instrumentation/elevenlabs/elevenlabs.py +6 -2
openlit/instrumentation/gpt4all/gpt4all.py +30 -10
openlit/instrumentation/groq/async_groq.py +41 -21
openlit/instrumentation/groq/groq.py +41 -21
openlit/instrumentation/mistral/async_mistral.py +37 -16
openlit/instrumentation/mistral/mistral.py +37 -16
openlit/instrumentation/ollama/async_ollama.py +57 -20
openlit/instrumentation/ollama/ollama.py +57 -20
openlit/instrumentation/openai/async_azure_openai.py +106 -47
openlit/instrumentation/openai/async_openai.py +78 -37
openlit/instrumentation/openai/azure_openai.py +101 -43
openlit/instrumentation/openai/openai.py +78 -39
openlit/instrumentation/transformers/transformers.py +21 -17
openlit/instrumentation/vertexai/async_vertexai.py +104 -35
openlit/instrumentation/vertexai/vertexai.py +104 -35
openlit/semcov/__init__.py +6 -4
{openlit-1.16.0.dist-info → openlit-1.17.0.dist-info}/METADATA +2 -2
{openlit-1.16.0.dist-info → openlit-1.17.0.dist-info}/RECORD +26 -26
{openlit-1.16.0.dist-info → openlit-1.17.0.dist-info}/LICENSE +0 -0
{openlit-1.16.0.dist-info → openlit-1.17.0.dist-info}/WHEEL +0 -0

openlit/instrumentation/anthropic/anthropic.py CHANGED Viewed

@@ -120,7 +120,7 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
                         span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
                                             kwargs.get("model", "claude-3-sonnet-20240229"))
                         span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
-                                            kwargs.get("max_tokens", ""))
+                                            kwargs.get("max_tokens", -1))
                         span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
                                             True)
                         span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
@@ -130,7 +130,7 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
                         span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
                                             kwargs.get("top_k", ""))
                         span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
-                                            finish_reason)
+                                            [finish_reason])
                         span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
                                             prompt_tokens)
                         span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
@@ -140,10 +140,18 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
                         span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                             cost)
                         if trace_content:
-                            span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                                prompt)
-                            span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                                llmresponse)
+                            span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                                attributes={
+                                    SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                                },
+                            )
+                            span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                                attributes={
+                                    SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
+                                },
+                            )
                         span.set_status(Status(StatusCode.OK))
@@ -224,7 +232,7 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
                                         kwargs.get("model", "claude-3-sonnet-20240229"))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
-                                        kwargs.get("max_tokens", ""))
+                                        kwargs.get("max_tokens", -1))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
                                         False)
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
@@ -234,7 +242,7 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
                                         kwargs.get("top_k", ""))
                     span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
-                                        response.stop_reason)
+                                        [response.stop_reason])
                     span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
                                         response.usage.input_tokens)
                     span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
@@ -244,11 +252,21 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
                                         response.usage.output_tokens)
                     span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                         cost)
                     if trace_content:
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                            prompt)
-                        # pylint: disable=line-too-long
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.content[0].text if response.content else "")
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                            attributes={
+                                SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                            },
+                        )
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                            attributes={
+                                # pylint: disable=line-too-long
+                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.content[0].text if response.content else "",
+                            },
+                        )
                     span.set_status(Status(StatusCode.OK))

openlit/instrumentation/anthropic/async_anthropic.py CHANGED Viewed

@@ -120,7 +120,7 @@ def async_messages(gen_ai_endpoint, version, environment, application_name,
                         span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
                                             kwargs.get("model", "claude-3-sonnet-20240229"))
                         span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
-                                            kwargs.get("max_tokens", ""))
+                                            kwargs.get("max_tokens", -1))
                         span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
                                             True)
                         span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
@@ -130,7 +130,7 @@ def async_messages(gen_ai_endpoint, version, environment, application_name,
                         span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
                                             kwargs.get("top_k", ""))
                         span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
-                                            finish_reason)
+                                            [finish_reason])
                         span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
                                             prompt_tokens)
                         span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
@@ -140,10 +140,18 @@ def async_messages(gen_ai_endpoint, version, environment, application_name,
                         span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                             cost)
                         if trace_content:
-                            span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                                prompt)
-                            span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                                llmresponse)
+                            span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                                attributes={
+                                    SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                                },
+                            )
+                            span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                                attributes={
+                                    SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
+                                },
+                            )
                         span.set_status(Status(StatusCode.OK))
@@ -224,7 +232,7 @@ def async_messages(gen_ai_endpoint, version, environment, application_name,
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
                                         kwargs.get("model", "claude-3-sonnet-20240229"))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
-                                        kwargs.get("max_tokens", ""))
+                                        kwargs.get("max_tokens", -1))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
                                         False)
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
@@ -234,7 +242,7 @@ def async_messages(gen_ai_endpoint, version, environment, application_name,
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
                                         kwargs.get("top_k", ""))
                     span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
-                                        response.stop_reason)
+                                        [response.stop_reason])
                     span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
                                         response.usage.input_tokens)
                     span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
@@ -245,10 +253,19 @@ def async_messages(gen_ai_endpoint, version, environment, application_name,
                     span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                         cost)
                     if trace_content:
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                            prompt)
-                        # pylint: disable=line-too-long
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.content[0].text if response.content else "")
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                            attributes={
+                                SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                            },
+                        )
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                            attributes={
+                                # pylint: disable=line-too-long
+                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.content[0].text if response.content else "",
+                            },
+                        )
                     span.set_status(Status(StatusCode.OK))

openlit/instrumentation/bedrock/__init__.py CHANGED Viewed

@@ -6,9 +6,9 @@ import importlib.metadata
 from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
 from wrapt import wrap_function_wrapper
-from openlit.instrumentation.bedrock.bedrock import chat
+from openlit.instrumentation.bedrock.bedrock import converse
-_instruments = ("boto3 >= 1.34.93",)
+_instruments = ("boto3 >= 1.34.138",)
 class BedrockInstrumentor(BaseInstrumentor):
     """
@@ -32,7 +32,7 @@ class BedrockInstrumentor(BaseInstrumentor):
         wrap_function_wrapper(
             "botocore.client",
             "ClientCreator.create_client",
-            chat("bedrock.invoke_model", version, environment, application_name,
+            converse("bedrock.converse", version, environment, application_name,
                      tracer, pricing_info, trace_content, metrics, disable_metrics),
         )

openlit/instrumentation/bedrock/bedrock.py CHANGED Viewed

@@ -4,15 +4,14 @@ Module for monitoring Amazon Bedrock API calls.
 """
 import logging
-import json
 from botocore.response import StreamingBody
 from botocore.exceptions import ReadTimeoutError, ResponseStreamingError
 from urllib3.exceptions import ProtocolError as URLLib3ProtocolError
 from urllib3.exceptions import ReadTimeoutError as URLLib3ReadTimeoutError
 from opentelemetry.trace import SpanKind, Status, StatusCode
 from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
-from openlit.__helpers import get_chat_model_cost, get_embed_model_cost, get_image_model_cost
-from openlit.__helpers import handle_exception, general_tokens
+from openlit.__helpers import get_chat_model_cost
+from openlit.__helpers import handle_exception
 from openlit.semcov import SemanticConvetion
 # Initialize logger for logging potential issues and operations
@@ -49,7 +48,7 @@ class CustomStreamWrapper(StreamingBody):
         return data_chunk
-def chat(gen_ai_endpoint, version, environment, application_name, tracer,
+def converse(gen_ai_endpoint, version, environment, application_name, tracer,
          pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for messages to collect metrics.
@@ -80,166 +79,23 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
         Returns:
             Response from the original method.
         """
-        def handle_image(span, model, request_body, response_body):
-            cost = 0
-            if "amazon" in model:
-                # pylint: disable=line-too-long
-                size =  str(request_body.get("imageGenerationConfig", {}).get("width", 1024)) + "x" + str(request_body.get("imageGenerationConfig", {}).get("height", 1024))
-                quality = request_body.get("imageGenerationConfig", {}).get("quality", "standard")
-                n = request_body.get("imageGenerationConfig", {}).get("numberOfImages", 1)
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
-                                   size)
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
-                                   quality)
-                # Calculate cost of the operation
-                cost = n * get_image_model_cost(model,
-                                        pricing_info, size, quality)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                            cost)
-                if trace_content:
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                        request_body.get("textToImageParams")["text"])
-            span.set_status(Status(StatusCode.OK))
-            if disable_metrics is False:
-                attributes = {
-                    TELEMETRY_SDK_NAME:
-                        "openlit",
-                    SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                        application_name,
-                    SemanticConvetion.GEN_AI_SYSTEM:
-                        SemanticConvetion.GEN_AI_SYSTEM_BEDROCK,
-                    SemanticConvetion.GEN_AI_ENVIRONMENT:
-                        environment,
-                    SemanticConvetion.GEN_AI_TYPE:
-                        SemanticConvetion.GEN_AI_TYPE_IMAGE,
-                    SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                        model
-                }
-                metrics["genai_requests"].add(1, attributes)
-                metrics["genai_cost"].record(cost, attributes)
-        def handle_embed(span, model, request_body, response_body):
-            prompt_tokens, cost = 0, 0
-            if "amazon" in model:
-                prompt_tokens = response_body["inputTextTokenCount"]
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                   prompt_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                   prompt_tokens)
-                # Calculate cost of the operation
-                cost = get_embed_model_cost(model,
-                                        pricing_info, prompt_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                            cost)
-                if trace_content:
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                        request_body["inputText"])
-            span.set_status(Status(StatusCode.OK))
-            if disable_metrics is False:
-                attributes = {
-                    TELEMETRY_SDK_NAME:
-                        "openlit",
-                    SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                        application_name,
-                    SemanticConvetion.GEN_AI_SYSTEM:
-                        SemanticConvetion.GEN_AI_SYSTEM_BEDROCK,
-                    SemanticConvetion.GEN_AI_ENVIRONMENT:
-                        environment,
-                    SemanticConvetion.GEN_AI_TYPE:
-                        SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
-                    SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                        model
-                }
-                metrics["genai_requests"].add(1, attributes)
-                metrics["genai_total_tokens"].add(
-                    prompt_tokens, attributes
-                )
-                metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
-                metrics["genai_cost"].record(cost, attributes)
-        def handle_chat(span, model, request_body, response_body):
-            prompt_tokens, completion_tokens, cost = 0, 0, 0
-            if "amazon" in model:
-                prompt_tokens = response_body["inputTextTokenCount"]
-                completion_tokens = response_body["results"][0]["tokenCount"]
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                    prompt_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
-                                    completion_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                    completion_tokens +
-                                    prompt_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
-                                    response_body["results"][0]["completionReason"])
-                # Calculate cost of the operation
-                cost = get_chat_model_cost(model,
-                                        pricing_info, prompt_tokens,
-                                        completion_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                            cost)
-                if trace_content:
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                    request_body["inputText"])
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                    response_body["results"][0]["outputText"])
-            elif "mistral" in model:
-                prompt_tokens = general_tokens(request_body["prompt"])
-                completion_tokens = general_tokens(response_body["outputs"][0]["text"])
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                prompt_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
-                                completion_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                prompt_tokens + completion_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
-                                    response_body["outputs"][0]["stop_reason"])
-                # Calculate cost of the operation
-                cost = get_chat_model_cost(model,
-                                        pricing_info, prompt_tokens,
-                                        completion_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                            cost)
-                if trace_content:
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                    request_body["prompt"])
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                    response_body["outputs"][0]["text"])
-            elif "anthropic" in model:
-                prompt_tokens = response_body["usage"]["input_tokens"]
-                completion_tokens = response_body["usage"]["output_tokens"]
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                    prompt_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
-                                    completion_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                    completion_tokens +
-                                    prompt_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
-                                    response_body["stop_reason"])
+        def converse_wrapper(original_method, *method_args, **method_kwargs):
+            """
+            Adds instrumentation to the invoke model call.
-                # Calculate cost of the operation
-                cost = get_chat_model_cost(model,
-                                        pricing_info, prompt_tokens,
-                                        completion_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                            cost)
+            Args:
+                original_method: The original invoke model method.
+                *method_args: Positional arguments for the method.
+                **method_kwargs: Keyword arguments for the method.
+            Returns:
+                The modified response with telemetry.
+            """
+            with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
+                response = original_method(*method_args, **method_kwargs)
-                if trace_content:
-                    # Format 'messages' into a single string
-                    message_prompt = request_body["messages"]
+                try:
+                    message_prompt = method_kwargs.get("messages", "")
                     formatted_messages = []
                     for message in message_prompt:
                         role = message["role"]
@@ -256,145 +112,10 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
                         else:
                             formatted_messages.append(f"{role}: {content}")
                     prompt = "\n".join(formatted_messages)
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                    prompt)
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                    response_body["content"][0]["text"])
-            elif "meta" in model:
-                prompt_tokens = response_body["prompt_token_count"]
-                completion_tokens = response_body["generation_token_count"]
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                    prompt_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
-                                    completion_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                    completion_tokens +
-                                    prompt_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
-                                    response_body["stop_reason"])
-                # Calculate cost of the operation
-                cost = get_chat_model_cost(model,
-                                        pricing_info, prompt_tokens,
-                                        completion_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                            cost)
-                if trace_content:
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                    request_body["prompt"])
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                    response_body["generation"])
-            elif "cohere" in model and "command-r" not in model:
-                prompt_tokens = general_tokens(request_body["prompt"])
-                completion_tokens = general_tokens(response_body["generations"][0]["text"])
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                prompt_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
-                                completion_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                prompt_tokens + completion_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
-                                    response_body["generations"][0]["finish_reason"])
-                # Calculate cost of the operation
-                cost = get_chat_model_cost(model,
-                                        pricing_info, prompt_tokens,
-                                        completion_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                            cost)
-                if trace_content:
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                    request_body["prompt"])
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                    response_body["generations"][0]["text"])
-            elif "ai21" in model:
-                prompt_tokens = general_tokens(request_body["prompt"])
-                completion_tokens = general_tokens(response_body["completions"][0]["data"]["text"])
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                prompt_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
-                                completion_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                prompt_tokens + completion_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
-                                    response_body["completions"][0]["finishReason"]["reason"])
-                # Calculate cost of the operation
-                cost = get_chat_model_cost(model,
-                                        pricing_info, prompt_tokens,
-                                        completion_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                            cost)
-                if trace_content:
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                    request_body["prompt"])
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                    response_body["completions"][0]["data"]["text"])
-            span.set_status(Status(StatusCode.OK))
-            if disable_metrics is False:
-                attributes = {
-                    TELEMETRY_SDK_NAME:
-                        "openlit",
-                    SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                        application_name,
-                    SemanticConvetion.GEN_AI_SYSTEM:
-                        SemanticConvetion.GEN_AI_SYSTEM_BEDROCK,
-                    SemanticConvetion.GEN_AI_ENVIRONMENT:
-                        environment,
-                    SemanticConvetion.GEN_AI_TYPE:
-                        SemanticConvetion.GEN_AI_TYPE_CHAT,
-                    SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                        model
-                }
-                metrics["genai_requests"].add(1, attributes)
-                metrics["genai_total_tokens"].add(
-                    prompt_tokens + completion_tokens, attributes
-                )
-                metrics["genai_completion_tokens"].add(completion_tokens, attributes)
-                metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
-                metrics["genai_cost"].record(cost, attributes)
-        def add_instrumentation(original_method, *method_args, **method_kwargs):
-            """
-            Adds instrumentation to the invoke model call.
-            Args:
-                original_method: The original invoke model method.
-                *method_args: Positional arguments for the method.
-                **method_kwargs: Keyword arguments for the method.
-            Returns:
-                The modified response with telemetry.
-            """
-            with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
-                response = original_method(*method_args, **method_kwargs)
-                try:
-                    # Modify the response body to be reusable
-                    response["body"] = CustomStreamWrapper(
-                        response["body"]._raw_stream, response["body"]._content_length
-                    )
-                    request_body = json.loads(method_kwargs.get("body"))
-                    response_body = json.loads(response.get("body").read())
                     model = method_kwargs.get("modelId", "amazon.titan-text-express-v1")
-                    if ("stability" in model or "image" in model) and "embed-image" not in model:
-                        generation = "image"
-                        span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
-                                        SemanticConvetion.GEN_AI_TYPE_IMAGE)
-                    elif "embed" in model and "embed-image" not in model:
-                        generation = "embeddings"
-                        span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
-                                        SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
-                    else:
-                        generation = "chat"
-                        span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
-                                        SemanticConvetion.GEN_AI_TYPE_CHAT)
+                    input_tokens = response["usage"]["inputTokens"]
+                    output_tokens = response["usage"]["outputTokens"]
                     span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
                     span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
@@ -407,12 +128,60 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
                                         application_name)
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
                                         model)
-                    if generation == "chat":
-                        handle_chat(span, model, request_body, response_body)
-                    elif generation == "embeddings":
-                        handle_embed(span, model, request_body, response_body)
-                    elif generation == "image":
-                        handle_image(span, model, request_body, response_body)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
+                                        input_tokens)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
+                                        output_tokens)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
+                                        input_tokens + output_tokens)
+                    # Calculate cost of the operation
+                    cost = get_chat_model_cost(model,
+                                            pricing_info, input_tokens,
+                                            output_tokens)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
+                                cost)
+                    if trace_content:
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                            attributes={
+                                SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                            },
+                        )
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                            attributes={
+                                # pylint: disable=line-too-long
+                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response["output"]["message"]["content"][0]["text"],
+                            },
+                        )
+                    span.set_status(Status(StatusCode.OK))
+                    if disable_metrics is False:
+                        attributes = {
+                            TELEMETRY_SDK_NAME:
+                                "openlit",
+                            SemanticConvetion.GEN_AI_APPLICATION_NAME:
+                                application_name,
+                            SemanticConvetion.GEN_AI_SYSTEM:
+                                SemanticConvetion.GEN_AI_SYSTEM_BEDROCK,
+                            SemanticConvetion.GEN_AI_ENVIRONMENT:
+                                environment,
+                            SemanticConvetion.GEN_AI_TYPE:
+                                SemanticConvetion.GEN_AI_TYPE_CHAT,
+                            SemanticConvetion.GEN_AI_REQUEST_MODEL:
+                                model
+                        }
+                        metrics["genai_requests"].add(1, attributes)
+                        metrics["genai_total_tokens"].add(
+                            input_tokens + output_tokens, attributes
+                        )
+                        metrics["genai_completion_tokens"].add(output_tokens, attributes)
+                        metrics["genai_prompt_tokens"].add(input_tokens, attributes)
+                        metrics["genai_cost"].record(cost, attributes)
                     return response
@@ -427,9 +196,10 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
         client = wrapped(*args, **kwargs)
         # Replace the original method with the instrumented one
-        original_invoke_model = client.invoke_model
-        client.invoke_model = lambda *args, **kwargs: add_instrumentation(original_invoke_model,
-                                                                          *args, **kwargs)
+        if kwargs.get("service_name") == "bedrock-runtime":
+            original_invoke_model = client.converse
+            client.converse = lambda *args, **kwargs: converse_wrapper(original_invoke_model,
+                                                                            *args, **kwargs)
         return client

openlit 1.16.0__py3-none-any.whl → 1.17.0__py3-none-any.whl

openlit 1.16.0py3-none-any.whl → 1.17.0py3-none-any.whl