PyPI - openlit - Versions diffs - 1.33.9__py3-none-any.whl → 1.33.10__py3-none-any.whl - Mend

openlit 1.33.9py3-none-any.whl → 1.33.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

openlit/__helpers.py +5 -0
openlit/__init__.py +3 -2
openlit/instrumentation/ag2/ag2.py +3 -3
openlit/instrumentation/ai21/ai21.py +1 -1
openlit/instrumentation/ai21/async_ai21.py +1 -1
openlit/instrumentation/anthropic/anthropic.py +1 -1
openlit/instrumentation/anthropic/async_anthropic.py +1 -1
openlit/instrumentation/astra/astra.py +5 -5
openlit/instrumentation/astra/async_astra.py +5 -5
openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +3 -3
openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +3 -3
openlit/instrumentation/chroma/chroma.py +5 -5
openlit/instrumentation/cohere/async_cohere.py +1 -1
openlit/instrumentation/cohere/cohere.py +2 -2
openlit/instrumentation/controlflow/controlflow.py +3 -3
openlit/instrumentation/crawl4ai/async_crawl4ai.py +3 -3
openlit/instrumentation/crawl4ai/crawl4ai.py +3 -3
openlit/instrumentation/crewai/crewai.py +4 -2
openlit/instrumentation/dynamiq/dynamiq.py +3 -3
openlit/instrumentation/elevenlabs/async_elevenlabs.py +1 -2
openlit/instrumentation/elevenlabs/elevenlabs.py +1 -2
openlit/instrumentation/embedchain/embedchain.py +5 -5
openlit/instrumentation/firecrawl/firecrawl.py +3 -3
openlit/instrumentation/gpt4all/__init__.py +2 -2
openlit/instrumentation/gpt4all/gpt4all.py +345 -220
openlit/instrumentation/gpu/__init__.py +5 -5
openlit/instrumentation/groq/__init__.py +2 -2
openlit/instrumentation/groq/async_groq.py +356 -240
openlit/instrumentation/groq/groq.py +356 -240
openlit/instrumentation/haystack/haystack.py +3 -3
openlit/instrumentation/julep/async_julep.py +3 -3
openlit/instrumentation/julep/julep.py +3 -3
openlit/instrumentation/langchain/__init__.py +13 -7
openlit/instrumentation/langchain/async_langchain.py +384 -0
openlit/instrumentation/langchain/langchain.py +98 -490
openlit/instrumentation/letta/letta.py +5 -3
openlit/instrumentation/litellm/__init__.py +4 -5
openlit/instrumentation/litellm/async_litellm.py +316 -245
openlit/instrumentation/litellm/litellm.py +312 -241
openlit/instrumentation/llamaindex/llamaindex.py +3 -3
openlit/instrumentation/mem0/mem0.py +3 -3
openlit/instrumentation/milvus/milvus.py +5 -5
openlit/instrumentation/mistral/__init__.py +6 -6
openlit/instrumentation/mistral/async_mistral.py +421 -248
openlit/instrumentation/mistral/mistral.py +418 -244
openlit/instrumentation/multion/async_multion.py +4 -2
openlit/instrumentation/multion/multion.py +4 -2
openlit/instrumentation/ollama/__init__.py +8 -30
openlit/instrumentation/ollama/async_ollama.py +385 -417
openlit/instrumentation/ollama/ollama.py +384 -417
openlit/instrumentation/openai/async_openai.py +7 -9
openlit/instrumentation/openai/openai.py +7 -9
openlit/instrumentation/phidata/phidata.py +4 -2
openlit/instrumentation/pinecone/pinecone.py +5 -5
openlit/instrumentation/premai/__init__.py +2 -2
openlit/instrumentation/premai/premai.py +262 -213
openlit/instrumentation/qdrant/async_qdrant.py +5 -5
openlit/instrumentation/qdrant/qdrant.py +5 -5
openlit/instrumentation/reka/__init__.py +2 -2
openlit/instrumentation/reka/async_reka.py +90 -52
openlit/instrumentation/reka/reka.py +90 -52
openlit/instrumentation/together/__init__.py +4 -4
openlit/instrumentation/together/async_together.py +278 -236
openlit/instrumentation/together/together.py +278 -236
openlit/instrumentation/transformers/__init__.py +1 -1
openlit/instrumentation/transformers/transformers.py +75 -44
openlit/instrumentation/vertexai/__init__.py +14 -64
openlit/instrumentation/vertexai/async_vertexai.py +329 -986
openlit/instrumentation/vertexai/vertexai.py +329 -986
openlit/instrumentation/vllm/__init__.py +1 -1
openlit/instrumentation/vllm/vllm.py +62 -32
openlit/semcov/__init__.py +3 -3
{openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
openlit-1.33.10.dist-info/RECORD +122 -0
openlit-1.33.9.dist-info/RECORD +0 -121
{openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
{openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/WHEEL +0 -0

openlit/instrumentation/litellm/async_litellm.py CHANGED Viewed

@@ -1,30 +1,32 @@
-# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches
 """
 Module for monitoring LiteLLM calls.
 """
 import logging
+import time
 from opentelemetry.trace import SpanKind, Status, StatusCode
-from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
+from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
 from openlit.__helpers import (
     get_chat_model_cost,
     get_embed_model_cost,
-    openai_tokens,
+    general_tokens,
     handle_exception,
     response_as_dict,
+    calculate_ttft,
+    calculate_tbt,
+    create_metrics_attributes,
 )
 from openlit.semcov import SemanticConvetion
 # Initialize logger for logging potential issues and operations
 logger = logging.getLogger(__name__)
-def acompletion(gen_ai_endpoint, version, environment, application_name,
-                           tracer, pricing_info, trace_content, metrics, disable_metrics):
+def acompletion(version, environment, application_name,
+                     tracer, pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for chat completions to collect metrics.
     Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
         version: Version of the monitoring package.
         environment: Deployment environment (e.g., production, staging).
         application_name: Name of the application using the LiteLLM SDK.
@@ -51,16 +53,27 @@ def acompletion(gen_ai_endpoint, version, environment, application_name,
                 wrapped,
                 span,
                 kwargs,
+                server_address,
+                server_port,
                 **args,
             ):
             self.__wrapped__ = wrapped
             self._span = span
-            # Placeholder for aggregating streaming response
-            self._llmresponse = ""
-            self._response_id = ""
+            self._llmresponse = ''
+            self._response_id = ''
+            self._response_model = ''
+            self._finish_reason = ''
+            self._response_service_tier = ''
             self._args = args
             self._kwargs = kwargs
+            self._start_time = time.time()
+            self._end_time = None
+            self._timestamps = []
+            self._ttft = 0
+            self._tbt = 0
+            self._server_address = server_address
+            self._server_port = server_port
         async def __aenter__(self):
             await self.__wrapped__.__aenter__()
@@ -79,6 +92,14 @@ def acompletion(gen_ai_endpoint, version, environment, application_name,
         async def __anext__(self):
             try:
                 chunk = await self.__wrapped__.__anext__()
+                end_time = time.time()
+                # Record the timestamp for the current chunk
+                self._timestamps.append(end_time)
+                if len(self._timestamps) == 1:
+                    # Calculate time to first chunk
+                    self._ttft = calculate_ttft(self._timestamps, self._start_time)
                 chunked = response_as_dict(chunk)
                 # Collect message IDs and aggregated response from events
                 if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
@@ -88,80 +109,114 @@ def acompletion(gen_ai_endpoint, version, environment, application_name,
                     if content:
                         self._llmresponse += content
                 self._response_id = chunked.get('id')
+                self._response_model = chunked.get('model')
+                self._finish_reason = chunked.get('choices')[0].get('finish_reason')
+                self._response_service_tier = str(chunked.get('system_fingerprint'))
                 return chunk
             except StopAsyncIteration:
                 # Handling exception ensure observability without disrupting operation
                 try:
+                    self._end_time = time.time()
+                    if len(self._timestamps) > 1:
+                        self._tbt = calculate_tbt(self._timestamps)
                     # Format 'messages' into a single string
-                    message_prompt = self._kwargs.get("messages", "")
+                    message_prompt = self._kwargs.get('messages', '')
                     formatted_messages = []
                     for message in message_prompt:
-                        role = message["role"]
-                        content = message["content"]
+                        role = message['role']
+                        content = message['content']
                         if isinstance(content, list):
                             content_str = ", ".join(
-                                # pylint: disable=line-too-long
                                 f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
                                 if "type" in item else f'text: {item["text"]}'
                                 for item in content
                             )
-                            formatted_messages.append(f"{role}: {content_str}")
+                            formatted_messages.append(f'{role}: {content_str}')
                         else:
-                            formatted_messages.append(f"{role}: {content}")
-                    prompt = "\n".join(formatted_messages)
+                            formatted_messages.append(f'{role}: {content}')
+                    prompt = '\n'.join(formatted_messages)
+                    request_model = self._kwargs.get('model', 'openai/gpt-4o')
                     # Calculate tokens using input prompt and aggregated response
-                    prompt_tokens = openai_tokens(prompt,
-                                                    self._kwargs.get("model", "gpt-3.5-turbo"))
-                    completion_tokens = openai_tokens(self._llmresponse,
-                                                        self._kwargs.get("model", "gpt-3.5-turbo"))
+                    input_tokens = general_tokens(prompt)
+                    output_tokens = general_tokens(self._llmresponse)
                     # Calculate cost of the operation
-                    cost = get_chat_model_cost(self._kwargs.get("model", "gpt-3.5-turbo"),
-                                                pricing_info, prompt_tokens,
-                                                completion_tokens)
+                    cost = get_chat_model_cost(request_model,
+                                                pricing_info, input_tokens,
+                                                output_tokens)
-                    # Set Span attributes
-                    self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
-                                        SemanticConvetion.GEN_AI_SYSTEM_LITELLM)
+                    # Set Span attributes (OTel Semconv)
+                    self._span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
                     self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
                                         SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                        gen_ai_endpoint)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
+                                        SemanticConvetion.GEN_AI_SYSTEM_LITELLM)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
+                                        request_model)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
+                                        self._kwargs.get('seed', ''))
+                    self._span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                        self._server_port)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+                                        self._kwargs.get('frequency_penalty', 0.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
+                                        self._kwargs.get('max_tokens', -1))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
+                                        self._kwargs.get('presence_penalty', 0.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
+                                        self._kwargs.get('stop', []))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
+                                        self._kwargs.get('temperature', 1.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
+                                        self._kwargs.get('top_p', 1.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
+                                        [self._finish_reason])
                     self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
                                         self._response_id)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                        self._response_model)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
+                                        input_tokens)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                        output_tokens)
+                    self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                        self._server_address)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SERVICE_TIER,
+                                        self._kwargs.get('service_tier', 'auto'))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SERVICE_TIER,
+                                        self._response_service_tier)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
+                                        self._response_service_tier)
+                    if isinstance(self._llmresponse, str):
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        'text')
+                    else:
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        'json')
+                    # Set Span attributes (Extra)
+                    self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
                                         environment)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
+                    self._span.set_attribute(SERVICE_NAME,
                                         application_name)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                        self._kwargs.get("model", "gpt-3.5-turbo"))
                     self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
-                                        self._kwargs.get("user", ""))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
-                                        self._kwargs.get("top_p", 1.0))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
-                                        self._kwargs.get("max_tokens", -1))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
-                                        self._kwargs.get("temperature", 1.0))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
-                                        self._kwargs.get("presence_penalty", 0.0))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
-                                        self._kwargs.get("frequency_penalty", 0.0))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
-                                        self._kwargs.get("seed", ""))
+                                        self._kwargs.get('user', ''))
                     self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
                                         True)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
-                                        prompt_tokens)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                        completion_tokens)
                     self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                        prompt_tokens + completion_tokens)
+                                        input_tokens + output_tokens)
                     self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                         cost)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
+                                        self._tbt)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
+                                        self._ttft)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                        version)
                     if trace_content:
                         self._span.add_event(
                             name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -175,36 +230,40 @@ def acompletion(gen_ai_endpoint, version, environment, application_name,
                                 SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
                             },
                         )
                     self._span.set_status(Status(StatusCode.OK))
                     if disable_metrics is False:
-                        attributes = {
-                            TELEMETRY_SDK_NAME:
-                                "openlit",
-                            SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                                application_name,
-                            SemanticConvetion.GEN_AI_SYSTEM:
-                                SemanticConvetion.GEN_AI_SYSTEM_LITELLM,
-                            SemanticConvetion.GEN_AI_ENVIRONMENT:
-                                environment,
-                            SemanticConvetion.GEN_AI_OPERATION:
-                                SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
-                            SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                                self._kwargs.get("model", "gpt-3.5-turbo")
-                        }
-                        metrics["genai_requests"].add(1, attributes)
-                        metrics["genai_total_tokens"].add(
-                            prompt_tokens + completion_tokens, attributes
+                        attributes = create_metrics_attributes(
+                            service_name=application_name,
+                            deployment_environment=environment,
+                            operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
+                            system=SemanticConvetion.GEN_AI_SYSTEM_LITELLM,
+                            request_model=request_model,
+                            server_address=self._server_address,
+                            server_port=self._server_port,
+                            response_model=self._response_model,
                         )
-                        metrics["genai_completion_tokens"].add(completion_tokens, attributes)
-                        metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
-                        metrics["genai_cost"].record(cost, attributes)
+                        metrics['genai_client_usage_tokens'].record(
+                            input_tokens + output_tokens, attributes
+                        )
+                        metrics['genai_client_operation_duration'].record(
+                            self._end_time - self._start_time, attributes
+                        )
+                        metrics['genai_server_tbt'].record(
+                            self._tbt, attributes
+                        )
+                        metrics['genai_server_ttft'].record(
+                            self._ttft, attributes
+                        )
+                        metrics['genai_requests'].add(1, attributes)
+                        metrics['genai_completion_tokens'].add(output_tokens, attributes)
+                        metrics['genai_prompt_tokens'].add(input_tokens, attributes)
+                        metrics['genai_cost'].record(cost, attributes)
                 except Exception as e:
                     handle_exception(self._span, e)
-                    logger.error("Error in trace creation: %s", e)
+                    logger.error('Error in trace creation: %s', e)
                 finally:
                     self._span.end()
                 raise
@@ -227,76 +286,113 @@ def acompletion(gen_ai_endpoint, version, environment, application_name,
         """
         # Check if streaming is enabled for the API call
-        streaming = kwargs.get("stream", False)
+        streaming = kwargs.get('stream', False)
+        server_address, server_port = 'NOT_FOUND', 'NOT_FOUND'
+        request_model = kwargs.get('model', 'openai/gpt-4o')
+        span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
         # pylint: disable=no-else-return
         if streaming:
             # Special handling for streaming response to accommodate the nature of data flow
             awaited_wrapped = await wrapped(*args, **kwargs)
-            span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
+            span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
-            return TracedAsyncStream(awaited_wrapped, span, kwargs)
+            return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
+        # Handling for non-streaming responses
         # Handling for non-streaming responses
         else:
-            # pylint: disable=line-too-long
-            with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
+            with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+                start_time = time.time()
                 response = await wrapped(*args, **kwargs)
+                end_time = time.time()
                 response_dict = response_as_dict(response)
                 try:
                     # Format 'messages' into a single string
-                    message_prompt = kwargs.get("messages", "")
+                    message_prompt = kwargs.get('messages', '')
                     formatted_messages = []
                     for message in message_prompt:
-                        role = message["role"]
-                        content = message["content"]
+                        role = message['role']
+                        content = message['content']
                         if isinstance(content, list):
                             content_str = ", ".join(
-                                # pylint: disable=line-too-long
                                 f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
                                 if "type" in item else f'text: {item["text"]}'
                                 for item in content
                             )
-                            formatted_messages.append(f"{role}: {content_str}")
+                            formatted_messages.append(f'{role}: {content_str}')
                         else:
-                            formatted_messages.append(f"{role}: {content}")
-                    prompt = "\n".join(formatted_messages)
+                            formatted_messages.append(f'{role}: {content}')
+                    prompt = '\n'.join(formatted_messages)
-                    # Set base span attribues
-                    span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-                    span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
-                                        SemanticConvetion.GEN_AI_SYSTEM_LITELLM)
+                    input_tokens = response_dict.get('usage').get('prompt_tokens')
+                    output_tokens = response_dict.get('usage').get('completion_tokens')
+                    # Calculate cost of the operation
+                    cost = get_chat_model_cost(request_model,
+                                                pricing_info, input_tokens,
+                                                output_tokens)
+                    # Set base span attribues (OTel Semconv)
+                    span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
                     span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
                                         SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                        gen_ai_endpoint)
+                    span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
+                                        SemanticConvetion.GEN_AI_SYSTEM_LITELLM)
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
+                                        request_model)
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
+                                        kwargs.get('seed', ''))
+                    span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                        server_port)
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+                                        kwargs.get('frequency_penalty', 0.0))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
+                                        kwargs.get('max_tokens', -1))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
+                                        kwargs.get('presence_penalty', 0.0))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
+                                        kwargs.get('stop', []))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
+                                        kwargs.get('temperature', 1.0))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
+                                        kwargs.get('top_p', 1.0))
                     span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
-                                        response_dict.get("id"))
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
+                                        response_dict.get('id'))
+                    span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                        response_dict.get('model'))
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
+                                        input_tokens)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                        output_tokens)
+                    span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                        server_address)
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SERVICE_TIER,
+                                        kwargs.get('service_tier', 'auto'))
+                    span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
+                                        str(response_dict.get('system_fingerprint')))
+                    # Set base span attribues (Extras)
+                    span.set_attribute(DEPLOYMENT_ENVIRONMENT,
                                         environment)
-                    span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
+                    span.set_attribute(SERVICE_NAME,
                                         application_name)
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                        kwargs.get("model", "gpt-3.5-turbo"))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
-                                        kwargs.get("top_p", 1.0))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
-                                        kwargs.get("max_tokens", -1))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
-                                        kwargs.get("user", ""))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
-                                        kwargs.get("temperature", 1.0))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
-                                        kwargs.get("presence_penalty", 0.0))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
-                                        kwargs.get("frequency_penalty", 0.0))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
-                                        kwargs.get("seed", ""))
+                                        kwargs.get('user', ''))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
                                         False)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
+                                        input_tokens + output_tokens)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
+                                        cost)
+                    span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
+                                        end_time - start_time)
+                    span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                        version)
                     if trace_content:
                         span.add_event(
                             name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -305,121 +401,81 @@ def acompletion(gen_ai_endpoint, version, environment, application_name,
                             },
                         )
-                    # Set span attributes when tools is not passed to the function call
-                    if "tools" not in kwargs:
-                        # Calculate cost of the operation
-                        cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
-                                                    pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
-                                                    response_dict.get('usage', {}).get('completion_tokens', None))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
-                                           response_dict.get('usage', {}).get('prompt_tokens', None))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                           response_dict.get('usage', {}).get('completion_tokens', None))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                           response_dict.get('usage', {}).get('total_tokens', None))
+                    for i in range(kwargs.get('n',1)):
                         span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
-                                           [response_dict.get('choices', [])[0].get('finish_reason', None)])
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                                            cost)
-                        # Set span attributes for when n = 1 (default)
-                        if "n" not in kwargs or kwargs["n"] == 1:
-                            if trace_content:
-                                span.add_event(
-                                    name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
-                                    attributes={
-                                        SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("message").get("content"),
-                                    },
-                                )
-                        # Set span attributes for when n > 0
-                        else:
-                            i = 0
-                            while i < kwargs["n"] and trace_content is True:
-                                attribute_name = f"gen_ai.content.completion.{i}"
-                                span.add_event(
-                                    name=attribute_name,
-                                    attributes={
-                                        SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
-                                    },
-                                )
-                                i += 1
-                            # Return original response
-                            return response
-                    # Set span attributes when tools is passed to the function call
-                    elif "tools" in kwargs:
-                        # Calculate cost of the operation
-                        cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
-                                                    pricing_info, response_dict.get('usage').get('prompt_tokens'),
-                                                    response_dict.get('usage').get('completion_tokens'))
-                        span.add_event(
-                            name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
-                            attributes={
-                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
-                            },
-                        )
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
-                                            response_dict.get('usage').get('prompt_tokens'))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                            response_dict.get('usage').get('completion_tokens'))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                            response_dict.get('usage').get('total_tokens'))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                                            cost)
+                                           [response_dict.get('choices')[i].get('finish_reason')])
+                        if trace_content:
+                            span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                                attributes={
+                                    # pylint: disable=line-too-long
+                                    SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
+                                },
+                            )
+                        if kwargs.get('tools'):
+                            span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
+                                            str(response_dict.get('choices')[i].get('message').get('tool_calls')))
+                        if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
+                            span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                            'text')
+                        elif response_dict.get('choices')[i].get('message').get('content') is not None:
+                            span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                            'json')
                     span.set_status(Status(StatusCode.OK))
                     if disable_metrics is False:
-                        attributes = {
-                            TELEMETRY_SDK_NAME:
-                                "openlit",
-                            SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                                application_name,
-                            SemanticConvetion.GEN_AI_SYSTEM:
-                                SemanticConvetion.GEN_AI_SYSTEM_LITELLM,
-                            SemanticConvetion.GEN_AI_ENVIRONMENT:
-                                environment,
-                            SemanticConvetion.GEN_AI_OPERATION:
-                                SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
-                            SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                                kwargs.get("model", "gpt-3.5-turbo")
-                        }
-                        metrics["genai_requests"].add(1, attributes)
-                        metrics["genai_total_tokens"].add(response_dict.get('usage').get('total_tokens'), attributes)
-                        metrics["genai_completion_tokens"].add(response_dict.get('usage').get('completion_tokens'), attributes)
-                        metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
-                        metrics["genai_cost"].record(cost, attributes)
+                        attributes = create_metrics_attributes(
+                            service_name=application_name,
+                            deployment_environment=environment,
+                            operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
+                            system=SemanticConvetion.GEN_AI_SYSTEM_LITELLM,
+                            request_model=request_model,
+                            server_address=server_address,
+                            server_port=server_port,
+                            response_model=response_dict.get('model'),
+                        )
+                        metrics['genai_client_usage_tokens'].record(
+                            input_tokens + output_tokens, attributes
+                        )
+                        metrics['genai_client_operation_duration'].record(
+                            end_time - start_time, attributes
+                        )
+                        metrics['genai_server_ttft'].record(
+                            end_time - start_time, attributes
+                        )
+                        metrics['genai_requests'].add(1, attributes)
+                        metrics['genai_completion_tokens'].add(output_tokens, attributes)
+                        metrics['genai_prompt_tokens'].add(input_tokens, attributes)
+                        metrics['genai_cost'].record(cost, attributes)
                     # Return original response
                     return response
                 except Exception as e:
                     handle_exception(span, e)
-                    logger.error("Error in trace creation: %s", e)
+                    logger.error('Error in trace creation: %s', e)
                     # Return original response
                     return response
     return wrapper
-def aembedding(gen_ai_endpoint, version, environment, application_name,
-                    tracer, pricing_info, trace_content, metrics, disable_metrics):
+def aembedding(version, environment, application_name,
+              tracer, pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for embeddings to collect metrics.
     Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
         version: Version of the monitoring package.
         environment: Deployment environment (e.g., production, staging).
-        application_name: Name of the application using the OpenAI API.
+        application_name: Name of the application using the LiteLLM API.
         tracer: OpenTelemetry tracer for creating spans.
-        pricing_info: Information used for calculating the cost of OpenAI usage.
+        pricing_info: Information used for calculating the cost of LiteLLM usage.
         trace_content: Flag indicating whether to trace the actual content.
     Returns:
         A function that wraps the embeddings method to add telemetry.
     """
@@ -441,79 +497,94 @@ def aembedding(gen_ai_endpoint, version, environment, application_name,
             The response from the original 'embeddings' method.
         """
-        with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
+        server_address, server_port = 'NOT_FOUND', 'NOT_FOUND'
+        request_model = kwargs.get('model', 'text-embedding-ada-002')
+        span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
+        with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+            start_time = time.time()
             response = await wrapped(*args, **kwargs)
+            end_time = time.time()
             response_dict = response_as_dict(response)
             try:
+                input_tokens = response_dict.get('usage').get('prompt_tokens')
                 # Calculate cost of the operation
-                cost = get_embed_model_cost(kwargs.get("model", "text-embedding-ada-002"),
-                                pricing_info, response_dict.get('usage').get('prompt_tokens'))
+                cost = get_embed_model_cost(request_model,
+                                    pricing_info, input_tokens)
-                # Set Span attributes
-                span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-                span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
-                                    SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
+                # Set Span attributes (OTel Semconv)
+                span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
                 span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
                                     SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
-                span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                    gen_ai_endpoint)
-                span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
-                                    environment)
-                span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
-                                    application_name)
+                span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
+                                    SemanticConvetion.GEN_AI_SYSTEM_LITELLM)
                 span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                    kwargs.get("model", "text-embedding-ada-002"))
+                                    request_model)
                 span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
-                                    kwargs.get("encoding_format", "float"))
-                # span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
-                #                     kwargs.get("dimensions", "null"))
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
-                                    kwargs.get("user", ""))
+                                    [kwargs.get('encoding_format', 'float')])
+                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                    response_dict.get('model'))
+                span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                    server_address)
+                span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                    server_port)
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
-                                    response_dict.get('usage').get('prompt_tokens'))
+                                    input_tokens)
+                # Set Span attributes (Extras)
+                span.set_attribute(DEPLOYMENT_ENVIRONMENT,
+                                    environment)
+                span.set_attribute(SERVICE_NAME,
+                                    application_name)
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
+                                    kwargs.get('user', ''))
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                    response_dict.get('usage').get('total_tokens'))
+                                    input_tokens)
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                     cost)
+                span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                    version)
                 if trace_content:
                     span.add_event(
                         name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
                         attributes={
-                            SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("input", ""),
+                            SemanticConvetion.GEN_AI_CONTENT_PROMPT: str(kwargs.get('input', '')),
                         },
                     )
                 span.set_status(Status(StatusCode.OK))
                 if disable_metrics is False:
-                    attributes = {
-                        TELEMETRY_SDK_NAME:
-                            "openlit",
-                        SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                            application_name,
-                        SemanticConvetion.GEN_AI_SYSTEM:
-                            SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
-                        SemanticConvetion.GEN_AI_ENVIRONMENT:
-                            environment,
-                        SemanticConvetion.GEN_AI_OPERATION:
-                            SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
-                        SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                            kwargs.get("model", "text-embedding-ada-002")
-                    }
-                    metrics["genai_requests"].add(1, attributes)
-                    metrics["genai_total_tokens"].add(
-                        response_dict.get('usage').get('total_tokens'), attributes)
-                    metrics["genai_prompt_tokens"].add(
-                        response_dict.get('usage').get('prompt_tokens'), attributes)
-                    metrics["genai_cost"].record(cost, attributes)
+                    attributes = create_metrics_attributes(
+                        service_name=application_name,
+                        deployment_environment=environment,
+                        operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
+                        system=SemanticConvetion.GEN_AI_SYSTEM_LITELLM,
+                        request_model=request_model,
+                        server_address=server_address,
+                        server_port=server_port,
+                        response_model=response_dict.get('model'),
+                    )
+                    metrics['genai_client_usage_tokens'].record(
+                            input_tokens, attributes
+                        )
+                    metrics['genai_client_operation_duration'].record(
+                        end_time - start_time, attributes
+                    )
+                    metrics['genai_requests'].add(1, attributes)
+                    metrics['genai_prompt_tokens'].add(input_tokens, attributes)
+                    metrics['genai_cost'].record(cost, attributes)
                 # Return original response
                 return response
             except Exception as e:
                 handle_exception(span, e)
-                logger.error("Error in trace creation: %s", e)
+                logger.error('Error in trace creation: %s', e)
                 # Return original response
                 return response

openlit 1.33.9__py3-none-any.whl → 1.33.10__py3-none-any.whl

openlit 1.33.9py3-none-any.whl → 1.33.10py3-none-any.whl