PyPI - openlit - Versions diffs - 1.33.8__py3-none-any.whl → 1.33.9__py3-none-any.whl - Mend

openlit 1.33.8py3-none-any.whl → 1.33.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

openlit/__helpers.py +83 -0
openlit/__init__.py +1 -1
openlit/instrumentation/ag2/ag2.py +2 -2
openlit/instrumentation/ai21/__init__.py +4 -4
openlit/instrumentation/ai21/ai21.py +370 -319
openlit/instrumentation/ai21/async_ai21.py +371 -319
openlit/instrumentation/anthropic/__init__.py +4 -4
openlit/instrumentation/anthropic/anthropic.py +321 -189
openlit/instrumentation/anthropic/async_anthropic.py +323 -190
openlit/instrumentation/assemblyai/__init__.py +1 -1
openlit/instrumentation/assemblyai/assemblyai.py +59 -43
openlit/instrumentation/astra/astra.py +4 -4
openlit/instrumentation/astra/async_astra.py +4 -4
openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
openlit/instrumentation/bedrock/__init__.py +1 -1
openlit/instrumentation/bedrock/bedrock.py +115 -58
openlit/instrumentation/chroma/chroma.py +4 -4
openlit/instrumentation/cohere/__init__.py +33 -10
openlit/instrumentation/cohere/async_cohere.py +610 -0
openlit/instrumentation/cohere/cohere.py +410 -219
openlit/instrumentation/controlflow/controlflow.py +2 -2
openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
openlit/instrumentation/crewai/crewai.py +2 -2
openlit/instrumentation/dynamiq/dynamiq.py +2 -2
openlit/instrumentation/elevenlabs/async_elevenlabs.py +73 -47
openlit/instrumentation/elevenlabs/elevenlabs.py +73 -52
openlit/instrumentation/embedchain/embedchain.py +4 -4
openlit/instrumentation/firecrawl/firecrawl.py +2 -2
openlit/instrumentation/google_ai_studio/__init__.py +9 -9
openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
openlit/instrumentation/gpt4all/gpt4all.py +17 -17
openlit/instrumentation/groq/async_groq.py +14 -14
openlit/instrumentation/groq/groq.py +14 -14
openlit/instrumentation/haystack/haystack.py +2 -2
openlit/instrumentation/julep/async_julep.py +2 -2
openlit/instrumentation/julep/julep.py +2 -2
openlit/instrumentation/langchain/langchain.py +36 -31
openlit/instrumentation/letta/letta.py +6 -6
openlit/instrumentation/litellm/async_litellm.py +20 -20
openlit/instrumentation/litellm/litellm.py +20 -20
openlit/instrumentation/llamaindex/llamaindex.py +2 -2
openlit/instrumentation/mem0/mem0.py +2 -2
openlit/instrumentation/milvus/milvus.py +4 -4
openlit/instrumentation/mistral/async_mistral.py +18 -18
openlit/instrumentation/mistral/mistral.py +18 -18
openlit/instrumentation/multion/async_multion.py +2 -2
openlit/instrumentation/multion/multion.py +2 -2
openlit/instrumentation/ollama/async_ollama.py +29 -29
openlit/instrumentation/ollama/ollama.py +29 -29
openlit/instrumentation/openai/__init__.py +11 -230
openlit/instrumentation/openai/async_openai.py +434 -409
openlit/instrumentation/openai/openai.py +415 -393
openlit/instrumentation/phidata/phidata.py +2 -2
openlit/instrumentation/pinecone/pinecone.py +4 -4
openlit/instrumentation/premai/premai.py +20 -20
openlit/instrumentation/qdrant/async_qdrant.py +4 -4
openlit/instrumentation/qdrant/qdrant.py +4 -4
openlit/instrumentation/reka/async_reka.py +6 -6
openlit/instrumentation/reka/reka.py +6 -6
openlit/instrumentation/together/async_together.py +18 -18
openlit/instrumentation/together/together.py +18 -18
openlit/instrumentation/transformers/transformers.py +6 -6
openlit/instrumentation/vertexai/async_vertexai.py +53 -53
openlit/instrumentation/vertexai/vertexai.py +53 -53
openlit/instrumentation/vllm/vllm.py +6 -6
openlit/otel/metrics.py +98 -7
openlit/semcov/__init__.py +113 -80
{openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/METADATA +1 -1
openlit-1.33.9.dist-info/RECORD +121 -0
{openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/WHEEL +1 -1
openlit/instrumentation/openai/async_azure_openai.py +0 -900
openlit/instrumentation/openai/azure_openai.py +0 -898
openlit-1.33.8.dist-info/RECORD +0 -122
{openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/LICENSE +0 -0

openlit/instrumentation/ai21/async_ai21.py CHANGED Viewed

@@ -1,15 +1,19 @@
-# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches, too-many-instance-attributes, inconsistent-return-statements
 """
 Module for monitoring AI21 calls.
 """
 import logging
+import time
 from opentelemetry.trace import SpanKind, Status, StatusCode
-from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
+from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
 from openlit.__helpers import (
     get_chat_model_cost,
     handle_exception,
     response_as_dict,
+    calculate_ttft,
+    calculate_tbt,
+    create_metrics_attributes,
+    set_server_address_and_port,
     general_tokens
 )
 from openlit.semcov import SemanticConvetion
@@ -17,13 +21,12 @@ from openlit.semcov import SemanticConvetion
 # Initialize logger for logging potential issues and operations
 logger = logging.getLogger(__name__)
-def async_chat(gen_ai_endpoint, version, environment, application_name,
+def async_chat(version, environment, application_name,
                      tracer, pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for chat completions to collect metrics.
     Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
         version: Version of the monitoring package.
         environment: Deployment environment (e.g., production, staging).
         application_name: Name of the application using the AI21 SDK.
@@ -38,6 +41,7 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
     class TracedAsyncStream:
         """
         Wrapper for streaming responses to collect metrics and trace data.
+        Wraps the 'ai21.AsyncStream' response to collect message IDs and aggregated response.
         This class implements the '__aiter__' and '__anext__' methods that
         handle asynchronous streaming responses.
@@ -50,6 +54,8 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
                 wrapped,
                 span,
                 kwargs,
+                server_address,
+                server_port,
                 **args,
             ):
             self.__wrapped__ = wrapped
@@ -57,11 +63,19 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
             # Placeholder for aggregating streaming response
             self._llmresponse = ""
             self._response_id = ""
-            self._prompt_tokens = 0
-            self._completion_tokens = 0
+            self._finish_reason = ""
+            self._input_tokens = 0
+            self._output_tokens = 0
             self._args = args
             self._kwargs = kwargs
+            self._start_time = time.time()
+            self._end_time = None
+            self._timestamps = []
+            self._ttft = 0
+            self._tbt = 0
+            self._server_address = server_address
+            self._server_port = server_port
         async def __aenter__(self):
             await self.__wrapped__.__aenter__()
@@ -80,8 +94,15 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
         async def __anext__(self):
             try:
                 chunk = await self.__wrapped__.__anext__()
+                end_time = time.time()
+                # Record the timestamp for the current chunk
+                self._timestamps.append(end_time)
+                if len(self._timestamps) == 1:
+                    # Calculate time to first chunk
+                    self._ttft = calculate_ttft(self._timestamps, self._start_time)
                 chunked = response_as_dict(chunk)
-                # Collect message IDs and aggregated response from events
                 if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
                     'content' in chunked.get('choices')[0].get('delta'))):
@@ -90,14 +111,19 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
                         self._llmresponse += content
                     if chunked.get('usage'):
-                        self._prompt_tokens = chunked.get('usage').get("prompt_tokens")
-                        self._completion_tokens = chunked.get('usage').get("completion_tokens")
+                        self._input_tokens = chunked.get('usage').get("prompt_tokens")
+                        self._output_tokens = chunked.get('usage').get("completion_tokens")
                 self._response_id = chunked.get('id')
+                self._finish_reason = chunked.get('choices')[0].get('finish_reason')
                 return chunk
             except StopAsyncIteration:
                 # Handling exception ensure observability without disrupting operation
                 try:
+                    self._end_time = time.time()
+                    if len(self._timestamps) > 1:
+                        self._tbt = calculate_tbt(self._timestamps)
                     # Format 'messages' into a single string
                     message_prompt = self._kwargs.get("messages", "")
                     formatted_messages = []
@@ -107,7 +133,6 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
                         if isinstance(content, list):
                             content_str = ", ".join(
-                                # pylint: disable=line-too-long
                                 f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
                                 if "type" in item else f'text: {item["text"]}'
                                 for item in content
@@ -117,43 +142,74 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
                             formatted_messages.append(f"{role}: {content}")
                     prompt = "\n".join(formatted_messages)
+                    request_model = self._kwargs.get("model", "jamba-1.5-mini")
                     # Calculate cost of the operation
-                    cost = get_chat_model_cost(self._kwargs.get("model", "jamba-1.5-mini"),
-                                                pricing_info, self._prompt_tokens,
-                                                self._completion_tokens)
+                    cost = get_chat_model_cost(request_model,
+                                                pricing_info, self._input_tokens,
+                                                self._output_tokens)
-                    # Set Span attributes
+                    # Set Span attributes (OTel Semconv)
                     self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
+                                        SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
                     self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
                                         SemanticConvetion.GEN_AI_SYSTEM_AI21)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
-                                        SemanticConvetion.GEN_AI_TYPE_CHAT)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                        gen_ai_endpoint)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
-                                        self._response_id)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
-                                        environment)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
-                                        application_name)
                     self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                        self._kwargs.get("model", "jamba-1.5-mini"))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
-                                        self._kwargs.get("top_p", 1.0))
+                                        request_model)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
+                                        self._kwargs.get("seed", ""))
+                    self._span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                        self._server_port)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+                                        self._kwargs.get("frequency_penalty", 0.0))
                     self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
                                         self._kwargs.get("max_tokens", -1))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
+                                        self._kwargs.get("presence_penalty", 0.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
+                                        self._kwargs.get("stop", []))
                     self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
-                                        self._kwargs.get("temperature", 1.0))
+                                        self._kwargs.get("temperature", 0.4))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
+                                        self._kwargs.get("top_p", 1.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
+                                        [self._finish_reason])
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
+                                        self._response_id)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                        request_model)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
+                                        self._input_tokens)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                        self._output_tokens)
+                    self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                        self._server_address)
+                    if isinstance(self._llmresponse, str):
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        "text")
+                    else:
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        "json")
+                    # Set Span attributes (Extra)
+                    self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
+                                        environment)
+                    self._span.set_attribute(SERVICE_NAME,
+                                        application_name)
                     self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
                                         True)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                        self._prompt_tokens)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
-                                        self._completion_tokens)
                     self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                        self._prompt_tokens + self._completion_tokens)
+                                        self._input_tokens + self._output_tokens)
                     self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                         cost)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
+                                        self._tbt)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
+                                        self._ttft)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                        version)
                     if trace_content:
                         self._span.add_event(
                             name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -167,31 +223,35 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
                                 SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
                             },
                         )
                     self._span.set_status(Status(StatusCode.OK))
                     if disable_metrics is False:
-                        attributes = {
-                            TELEMETRY_SDK_NAME:
-                                "openlit",
-                            SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                                application_name,
-                            SemanticConvetion.GEN_AI_SYSTEM:
-                                SemanticConvetion.GEN_AI_SYSTEM_AI21,
-                            SemanticConvetion.GEN_AI_ENVIRONMENT:
-                                environment,
-                            SemanticConvetion.GEN_AI_TYPE:
-                                SemanticConvetion.GEN_AI_TYPE_CHAT,
-                            SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                                self._kwargs.get("model", "jamba-1.5-mini")
-                        }
+                        attributes = create_metrics_attributes(
+                            service_name=application_name,
+                            deployment_environment=environment,
+                            operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
+                            system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
+                            request_model=request_model,
+                            server_address=self._server_address,
+                            server_port=self._server_port,
+                            response_model=request_model,
+                        )
-                        metrics["genai_requests"].add(1, attributes)
-                        metrics["genai_total_tokens"].add(
-                            self._prompt_tokens + self._completion_tokens, attributes
+                        metrics["genai_client_usage_tokens"].record(
+                            self._input_tokens + self._output_tokens, attributes
+                        )
+                        metrics["genai_client_operation_duration"].record(
+                            self._end_time - self._start_time, attributes
                         )
-                        metrics["genai_completion_tokens"].add(self._completion_tokens, attributes)
-                        metrics["genai_prompt_tokens"].add(self._prompt_tokens, attributes)
+                        metrics["genai_server_tbt"].record(
+                            self._tbt, attributes
+                        )
+                        metrics["genai_server_ttft"].record(
+                            self._ttft, attributes
+                        )
+                        metrics["genai_requests"].add(1, attributes)
+                        metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
+                        metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
                         metrics["genai_cost"].record(cost, attributes)
                 except Exception as e:
@@ -220,19 +280,25 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
         # Check if streaming is enabled for the API call
         streaming = kwargs.get("stream", False)
+        server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
+        request_model = kwargs.get("model", "jamba-1.5-mini")
+        span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
         # pylint: disable=no-else-return
         if streaming:
             # Special handling for streaming response to accommodate the nature of data flow
             awaited_wrapped = await wrapped(*args, **kwargs)
-            span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
+            span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
-            return TracedAsyncStream(awaited_wrapped, span, kwargs)
+            return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
         # Handling for non-streaming responses
         else:
-            # pylint: disable=line-too-long
-            with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
+            with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+                start_time = time.time()
                 response = await wrapped(*args, **kwargs)
+                end_time = time.time()
                 response_dict = response_as_dict(response)
@@ -246,7 +312,6 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
                         if isinstance(content, list):
                             content_str = ", ".join(
-                                # pylint: disable=line-too-long
                                 f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
                                 if "type" in item else f'text: {item["text"]}'
                                 for item in content
@@ -256,30 +321,64 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
                             formatted_messages.append(f"{role}: {content}")
                     prompt = "\n".join(formatted_messages)
-                    # Set base span attribues
+                    input_tokens = response_dict.get('usage').get('prompt_tokens')
+                    output_tokens = response_dict.get('usage').get('completion_tokens')
+                    # Calculate cost of the operation
+                    cost = get_chat_model_cost(request_model,
+                                                pricing_info, input_tokens,
+                                                output_tokens)
+                    # Set base span attribues (OTel Semconv)
                     span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+                    span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
+                                        SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
                     span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
                                         SemanticConvetion.GEN_AI_SYSTEM_AI21)
-                    span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
-                                        SemanticConvetion.GEN_AI_TYPE_CHAT)
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                        gen_ai_endpoint)
-                    span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
-                                        response_dict.get("id"))
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
-                                        environment)
-                    span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
-                                        application_name)
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                        kwargs.get("model", "jamba-1.5-mini"))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
-                                        kwargs.get("top_p", 1.0))
+                                        request_model)
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
+                                        kwargs.get("seed", ""))
+                    span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                        server_port)
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+                                        kwargs.get("frequency_penalty", 0.0))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
                                         kwargs.get("max_tokens", -1))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
+                                        kwargs.get("presence_penalty", 0.0))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
+                                        kwargs.get("stop", []))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
-                                        kwargs.get("temperature", 1.0))
+                                        kwargs.get("temperature", 0.4))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
+                                        kwargs.get("top_p", 1.0))
+                    span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
+                                        response_dict.get("id"))
+                    span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                        request_model)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
+                                        input_tokens)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                        output_tokens)
+                    span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                        server_address)
+                    # Set base span attribues (Extras)
+                    span.set_attribute(DEPLOYMENT_ENVIRONMENT,
+                                        environment)
+                    span.set_attribute(SERVICE_NAME,
+                                        application_name)
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
                                         False)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
+                                        input_tokens + output_tokens)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
+                                        cost)
+                    span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
+                                        end_time - start_time)
+                    span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                        version)
                     if trace_content:
                         span.add_event(
                             name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -288,93 +387,54 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
                             },
                         )
-                    # Set span attributes when tools is not passed to the function call
-                    if "tools" not in kwargs:
-                        # Calculate cost of the operation
-                        cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
-                                                    pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
-                                                    response_dict.get('usage', {}).get('completion_tokens', None))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                           response_dict.get('usage', {}).get('prompt_tokens', None))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
-                                           response_dict.get('usage', {}).get('completion_tokens', None))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                           response_dict.get('usage', {}).get('total_tokens', None))
+                    for i in range(kwargs.get('n',1)):
                         span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
-                                           [response_dict.get('choices', [])[0].get('finish_reason', None)])
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                                            cost)
-                        # Set span attributes for when n = 1 (default)
-                        if "n" not in kwargs or kwargs["n"] == 1:
-                            if trace_content:
-                                span.add_event(
-                                    name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
-                                    attributes={
-                                        SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("message").get("content"),
-                                    },
-                                )
-                        # Set span attributes for when n > 0
-                        else:
-                            i = 0
-                            while i < kwargs["n"] and trace_content is True:
-                                attribute_name = f"gen_ai.content.completion.{i}"
-                                span.add_event(
-                                    name=attribute_name,
-                                    attributes={
-                                        SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
-                                    },
-                                )
-                                i += 1
-                            # Return original response
-                            return response
-                    # Set span attributes when tools is passed to the function call
-                    elif "tools" in kwargs:
-                        # Calculate cost of the operation
-                        cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
-                                                    pricing_info, response_dict.get('usage').get('prompt_tokens'),
-                                                    response_dict.get('usage').get('completion_tokens'))
-                        span.add_event(
-                            name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
-                            attributes={
-                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
-                            },
-                        )
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                            response_dict.get('usage').get('prompt_tokens'))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
-                                            response_dict.get('usage').get('completion_tokens'))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                            response_dict.get('usage').get('total_tokens'))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                                            cost)
+                                           [response_dict.get('choices')[i].get('finish_reason')])
+                        if trace_content:
+                            span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                                attributes={
+                                    # pylint: disable=line-too-long
+                                    SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
+                                },
+                            )
+                        if kwargs.get('tools'):
+                            span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
+                                            str(response_dict.get('choices')[i].get('message').get('tool_calls')))
+                        if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
+                            span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                            "text")
+                        elif response_dict.get('choices')[i].get('message').get('content') is not None:
+                            span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                            "json")
                     span.set_status(Status(StatusCode.OK))
                     if disable_metrics is False:
-                        attributes = {
-                            TELEMETRY_SDK_NAME:
-                                "openlit",
-                            SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                                application_name,
-                            SemanticConvetion.GEN_AI_SYSTEM:
-                                SemanticConvetion.GEN_AI_SYSTEM_AI21,
-                            SemanticConvetion.GEN_AI_ENVIRONMENT:
-                                environment,
-                            SemanticConvetion.GEN_AI_TYPE:
-                                SemanticConvetion.GEN_AI_TYPE_CHAT,
-                            SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                                kwargs.get("model", "jamba-1.5-mini")
-                        }
+                        attributes = create_metrics_attributes(
+                            service_name=application_name,
+                            deployment_environment=environment,
+                            operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
+                            system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
+                            request_model=request_model,
+                            server_address=server_address,
+                            server_port=server_port,
+                            response_model=request_model,
+                        )
+                        metrics["genai_client_usage_tokens"].record(
+                            input_tokens + output_tokens, attributes
+                        )
+                        metrics["genai_client_operation_duration"].record(
+                            end_time - start_time, attributes
+                        )
+                        metrics["genai_server_ttft"].record(
+                            end_time - start_time, attributes
+                        )
                         metrics["genai_requests"].add(1, attributes)
-                        metrics["genai_total_tokens"].add(response_dict.get('usage').get('total_tokens'), attributes)
-                        metrics["genai_completion_tokens"].add(response_dict.get('usage').get('completion_tokens'), attributes)
-                        metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
+                        metrics["genai_completion_tokens"].add(output_tokens, attributes)
+                        metrics["genai_prompt_tokens"].add(input_tokens, attributes)
                         metrics["genai_cost"].record(cost, attributes)
                     # Return original response
@@ -389,13 +449,12 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
     return wrapper
-def async_chat_rag(gen_ai_endpoint, version, environment, application_name,
+def async_chat_rag(version, environment, application_name,
                      tracer, pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for chat completions to collect metrics.
     Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
         version: Version of the monitoring package.
         environment: Deployment environment (e.g., production, staging).
         application_name: Name of the application using the AI21 SDK.
@@ -424,180 +483,173 @@ def async_chat_rag(gen_ai_endpoint, version, environment, application_name,
             The response from the original 'chat.completions' method.
         """
-        # Check if streaming is enabled for the API call
-        streaming = kwargs.get("stream", False)
-        # pylint: disable=no-else-return
-        if streaming:
-            # # Special handling for streaming response to accommodate the nature of data flow
-            # awaited_wrapped = wrapped(*args, **kwargs)
-            # span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
-            # return TracedSyncStream(awaited_wrapped, span, kwargs)
+        server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
+        request_model = kwargs.get("model", "jamba-1.5-mini")
-            return
+        span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
-        # Handling for non-streaming responses
-        else:
-            # pylint: disable=line-too-long
-            with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
-                response = await wrapped(*args, **kwargs)
+        with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+            start_time = time.time()
+            response = await wrapped(*args, **kwargs)
+            end_time = time.time()
-                response_dict = response_as_dict(response)
+            response_dict = response_as_dict(response)
-                try:
-                    # Format 'messages' into a single string
-                    message_prompt = kwargs.get("messages", "")
-                    formatted_messages = []
-                    for message in message_prompt:
-                        role = message.role
-                        content = message.content
-                        if isinstance(content, list):
-                            content_str = ", ".join(
-                                # pylint: disable=line-too-long
-                                f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
-                                if "type" in item else f'text: {item["text"]}'
-                                for item in content
-                            )
-                            formatted_messages.append(f"{role}: {content_str}")
-                        else:
-                            formatted_messages.append(f"{role}: {content}")
-                    prompt = "\n".join(formatted_messages)
-                    # Set base span attribues
-                    span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-                    span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
-                                        SemanticConvetion.GEN_AI_SYSTEM_AI21)
-                    span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
-                                        SemanticConvetion.GEN_AI_TYPE_CHAT)
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                        gen_ai_endpoint)
-                    span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
-                                        response_dict.get("id"))
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
-                                        environment)
-                    span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
-                                        application_name)
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                        kwargs.get("model", "jamba-1.5-mini"))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
-                                        False)
-                    span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_SEGMENTS,
-                                        kwargs.get("max_segments", -1))
-                    span.set_attribute(SemanticConvetion.GEN_AI_RAG_STRATEGY,
-                                        kwargs.get("retrieval_strategy", "segments"))
-                    span.set_attribute(SemanticConvetion.GEN_AI_RAG_SIMILARITY_THRESHOLD,
-                                        kwargs.get("retrieval_similarity_threshold", -1))
-                    span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_NEIGHBORS,
-                                        kwargs.get("max_neighbors", -1))
-                    span.set_attribute(SemanticConvetion.GEN_AI_RAG_FILE_IDS,
-                                        str(kwargs.get("file_ids", "")))
-                    span.set_attribute(SemanticConvetion.GEN_AI_RAG_DOCUMENTS_PATH,
-                                        kwargs.get("path", ""))
-                    if trace_content:
-                        span.add_event(
-                            name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
-                            attributes={
-                                SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
-                            },
+            try:
+                # Format 'messages' into a single string
+                message_prompt = kwargs.get("messages", "")
+                formatted_messages = []
+                for message in message_prompt:
+                    role = message.role
+                    content = message.content
+                    if isinstance(content, list):
+                        content_str = ", ".join(
+                            f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
+                            if "type" in item else f'text: {item["text"]}'
+                            for item in content
                         )
-                    prompt_tokens = general_tokens(prompt)
-                    # Set span attributes when tools is not passed to the function call
-                    if "tools" not in kwargs:
-                        prompt_tokens = general_tokens(prompt)
-                        # Set span attributes for when n = 1 (default)
-                        if "n" not in kwargs or kwargs["n"] == 1:
-                            completion_tokens = general_tokens(response_dict.get('choices', [])[0].get("content"))
-                            if trace_content:
-                                span.add_event(
-                                    name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
-                                    attributes={
-                                        SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("content"),
-                                    },
-                                )
+                        formatted_messages.append(f"{role}: {content_str}")
+                    else:
+                        formatted_messages.append(f"{role}: {content}")
+                prompt = "\n".join(formatted_messages)
+                input_tokens = general_tokens(prompt)
+                # Set base span attribues (OTel Semconv)
+                span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+                span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
+                                    SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
+                span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
+                                    SemanticConvetion.GEN_AI_SYSTEM_AI21)
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
+                                    request_model)
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
+                                    kwargs.get("seed", ""))
+                span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                    server_port)
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+                                    kwargs.get("frequency_penalty", 0.0))
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
+                                    kwargs.get("max_tokens", -1))
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
+                                    kwargs.get("presence_penalty", 0.0))
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
+                                    kwargs.get("stop", []))
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
+                                    kwargs.get("temperature", 0.4))
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
+                                    kwargs.get("top_p", 1.0))
+                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
+                                    response_dict.get("id"))
+                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                    request_model)
+                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
+                                    input_tokens)
+                span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                    server_address)
+                # Set base span attribues (Extras)
+                span.set_attribute(DEPLOYMENT_ENVIRONMENT,
+                                    environment)
+                span.set_attribute(SERVICE_NAME,
+                                    application_name)
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
+                                    False)
+                span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
+                                    end_time - start_time)
+                span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                    version)
+                span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_SEGMENTS,
+                                    kwargs.get("max_segments", -1))
+                span.set_attribute(SemanticConvetion.GEN_AI_RAG_STRATEGY,
+                                    kwargs.get("retrieval_strategy", "segments"))
+                span.set_attribute(SemanticConvetion.GEN_AI_RAG_SIMILARITY_THRESHOLD,
+                                    kwargs.get("retrieval_similarity_threshold", -1))
+                span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_NEIGHBORS,
+                                    kwargs.get("max_neighbors", -1))
+                span.set_attribute(SemanticConvetion.GEN_AI_RAG_FILE_IDS,
+                                    str(kwargs.get("file_ids", "")))
+                span.set_attribute(SemanticConvetion.GEN_AI_RAG_DOCUMENTS_PATH,
+                                    kwargs.get("path", ""))
+                if trace_content:
+                    span.add_event(
+                        name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                        attributes={
+                            SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                        },
+                    )
+                output_tokens = 0
+                for i in range(kwargs.get('n',1)):
+                    output_tokens += general_tokens(response_dict.get('choices')[i].get('content'))
-                        # Set span attributes for when n > 0
-                        else:
-                            i = 0
-                            completion_tokens = 0
-                            while i < kwargs["n"] and trace_content is True:
-                                completion_tokens += general_tokens(response_dict.get('choices')[i].get("message").get("content"))
-                                attribute_name = f"gen_ai.content.completion.{i}"
-                                span.add_event(
-                                    name=attribute_name,
-                                    attributes={
-                                        SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
-                                    },
-                                )
-                                i += 1
-                            # Return original response
-                            return response
-                    # Set span attributes when tools is passed to the function call
-                    elif "tools" in kwargs:
-                        completion_tokens = -1
-                        # Calculate cost of the operation
-                        cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
-                                                    pricing_info, response_dict.get('usage').get('prompt_tokens'),
-                                                    response_dict.get('usage').get('completion_tokens'))
+                    if trace_content:
                         span.add_event(
                             name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
                             attributes={
-                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
+                                # pylint: disable=line-too-long
+                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('content')),
                             },
                         )
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                            prompt_tokens)
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
-                                            completion_tokens)
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                            prompt_tokens + completion_tokens)
-                    # Calculate cost of the operation
-                    cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
-                                                pricing_info, prompt_tokens,
-                                                completion_tokens)
-                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                                        cost)
-                    span.set_status(Status(StatusCode.OK))
-                    if disable_metrics is False:
-                        attributes = {
-                            TELEMETRY_SDK_NAME:
-                                "openlit",
-                            SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                                application_name,
-                            SemanticConvetion.GEN_AI_SYSTEM:
-                                SemanticConvetion.GEN_AI_SYSTEM_AI21,
-                            SemanticConvetion.GEN_AI_ENVIRONMENT:
-                                environment,
-                            SemanticConvetion.GEN_AI_TYPE:
-                                SemanticConvetion.GEN_AI_TYPE_CHAT,
-                            SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                                kwargs.get("model", "jamba-1.5-mini")
-                        }
-                        metrics["genai_requests"].add(1, attributes)
-                        metrics["genai_total_tokens"].add(prompt_tokens + completion_tokens, attributes)
-                        metrics["genai_completion_tokens"].add(completion_tokens, attributes)
-                        metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
-                        metrics["genai_cost"].record(cost, attributes)
-                    # Return original response
-                    return response
-                except Exception as e:
-                    handle_exception(span, e)
-                    logger.error("Error in trace creation: %s", e)
-                    # Return original response
-                    return response
+                    if kwargs.get('tools'):
+                        span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
+                                        str(response_dict.get('choices')[i].get('message').get('tool_calls')))
+                    if isinstance(response_dict.get('choices')[i].get('content'), str):
+                        span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        "text")
+                    elif response_dict.get('choices')[i].get('content') is not None:
+                        span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        "json")
+                # Calculate cost of the operation
+                cost = get_chat_model_cost(request_model,
+                                            pricing_info, input_tokens,
+                                            output_tokens)
+                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
+                                    cost)
+                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                    output_tokens)
+                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
+                                    input_tokens + output_tokens)
+                span.set_status(Status(StatusCode.OK))
+                if disable_metrics is False:
+                    attributes = create_metrics_attributes(
+                        service_name=application_name,
+                        deployment_environment=environment,
+                        operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
+                        system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
+                        request_model=request_model,
+                        server_address=server_address,
+                        server_port=server_port,
+                        response_model=request_model,
+                    )
+                    metrics["genai_client_usage_tokens"].record(
+                        input_tokens + output_tokens, attributes
+                    )
+                    metrics["genai_client_operation_duration"].record(
+                        end_time - start_time, attributes
+                    )
+                    metrics["genai_server_ttft"].record(
+                        end_time - start_time, attributes
+                    )
+                    metrics["genai_requests"].add(1, attributes)
+                    metrics["genai_completion_tokens"].add(output_tokens, attributes)
+                    metrics["genai_prompt_tokens"].add(input_tokens, attributes)
+                    metrics["genai_cost"].record(cost, attributes)
+                # Return original response
+                return response
+            except Exception as e:
+                handle_exception(span, e)
+                logger.error("Error in trace creation: %s", e)
+                # Return original response
+                return response
     return wrapper

openlit 1.33.8__py3-none-any.whl → 1.33.9__py3-none-any.whl

openlit 1.33.8py3-none-any.whl → 1.33.9py3-none-any.whl