PyPI - openlit - Versions diffs - 1.33.9__py3-none-any.whl → 1.33.10__py3-none-any.whl - Mend

openlit 1.33.9py3-none-any.whl → 1.33.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

openlit/__helpers.py +5 -0
openlit/__init__.py +3 -2
openlit/instrumentation/ag2/ag2.py +3 -3
openlit/instrumentation/ai21/ai21.py +1 -1
openlit/instrumentation/ai21/async_ai21.py +1 -1
openlit/instrumentation/anthropic/anthropic.py +1 -1
openlit/instrumentation/anthropic/async_anthropic.py +1 -1
openlit/instrumentation/astra/astra.py +5 -5
openlit/instrumentation/astra/async_astra.py +5 -5
openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +3 -3
openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +3 -3
openlit/instrumentation/chroma/chroma.py +5 -5
openlit/instrumentation/cohere/async_cohere.py +1 -1
openlit/instrumentation/cohere/cohere.py +2 -2
openlit/instrumentation/controlflow/controlflow.py +3 -3
openlit/instrumentation/crawl4ai/async_crawl4ai.py +3 -3
openlit/instrumentation/crawl4ai/crawl4ai.py +3 -3
openlit/instrumentation/crewai/crewai.py +4 -2
openlit/instrumentation/dynamiq/dynamiq.py +3 -3
openlit/instrumentation/elevenlabs/async_elevenlabs.py +1 -2
openlit/instrumentation/elevenlabs/elevenlabs.py +1 -2
openlit/instrumentation/embedchain/embedchain.py +5 -5
openlit/instrumentation/firecrawl/firecrawl.py +3 -3
openlit/instrumentation/gpt4all/__init__.py +2 -2
openlit/instrumentation/gpt4all/gpt4all.py +345 -220
openlit/instrumentation/gpu/__init__.py +5 -5
openlit/instrumentation/groq/__init__.py +2 -2
openlit/instrumentation/groq/async_groq.py +356 -240
openlit/instrumentation/groq/groq.py +356 -240
openlit/instrumentation/haystack/haystack.py +3 -3
openlit/instrumentation/julep/async_julep.py +3 -3
openlit/instrumentation/julep/julep.py +3 -3
openlit/instrumentation/langchain/__init__.py +13 -7
openlit/instrumentation/langchain/async_langchain.py +384 -0
openlit/instrumentation/langchain/langchain.py +98 -490
openlit/instrumentation/letta/letta.py +5 -3
openlit/instrumentation/litellm/__init__.py +4 -5
openlit/instrumentation/litellm/async_litellm.py +316 -245
openlit/instrumentation/litellm/litellm.py +312 -241
openlit/instrumentation/llamaindex/llamaindex.py +3 -3
openlit/instrumentation/mem0/mem0.py +3 -3
openlit/instrumentation/milvus/milvus.py +5 -5
openlit/instrumentation/mistral/__init__.py +6 -6
openlit/instrumentation/mistral/async_mistral.py +421 -248
openlit/instrumentation/mistral/mistral.py +418 -244
openlit/instrumentation/multion/async_multion.py +4 -2
openlit/instrumentation/multion/multion.py +4 -2
openlit/instrumentation/ollama/__init__.py +8 -30
openlit/instrumentation/ollama/async_ollama.py +385 -417
openlit/instrumentation/ollama/ollama.py +384 -417
openlit/instrumentation/openai/async_openai.py +7 -9
openlit/instrumentation/openai/openai.py +7 -9
openlit/instrumentation/phidata/phidata.py +4 -2
openlit/instrumentation/pinecone/pinecone.py +5 -5
openlit/instrumentation/premai/__init__.py +2 -2
openlit/instrumentation/premai/premai.py +262 -213
openlit/instrumentation/qdrant/async_qdrant.py +5 -5
openlit/instrumentation/qdrant/qdrant.py +5 -5
openlit/instrumentation/reka/__init__.py +2 -2
openlit/instrumentation/reka/async_reka.py +90 -52
openlit/instrumentation/reka/reka.py +90 -52
openlit/instrumentation/together/__init__.py +4 -4
openlit/instrumentation/together/async_together.py +278 -236
openlit/instrumentation/together/together.py +278 -236
openlit/instrumentation/transformers/__init__.py +1 -1
openlit/instrumentation/transformers/transformers.py +75 -44
openlit/instrumentation/vertexai/__init__.py +14 -64
openlit/instrumentation/vertexai/async_vertexai.py +329 -986
openlit/instrumentation/vertexai/vertexai.py +329 -986
openlit/instrumentation/vllm/__init__.py +1 -1
openlit/instrumentation/vllm/vllm.py +62 -32
openlit/semcov/__init__.py +3 -3
{openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
openlit-1.33.10.dist-info/RECORD +122 -0
openlit-1.33.9.dist-info/RECORD +0 -121
{openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
{openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/WHEEL +0 -0

openlit/instrumentation/premai/premai.py CHANGED Viewed

@@ -1,30 +1,32 @@
-# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment, too-many-branches
 """
 Module for monitoring Prem AI API calls.
 """
 import logging
+import time
 from opentelemetry.trace import SpanKind, Status, StatusCode
-from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
+from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
 from openlit.__helpers import (
-    handle_exception,
-    general_tokens,
     get_chat_model_cost,
     get_embed_model_cost,
-    response_as_dict
+    general_tokens,
+    handle_exception,
+    calculate_ttft,
+    calculate_tbt,
+    create_metrics_attributes,
+    set_server_address_and_port
 )
 from openlit.semcov import SemanticConvetion
 # Initialize logger for logging potential issues and operations
 logger = logging.getLogger(__name__)
-def chat(gen_ai_endpoint, version, environment, application_name,
-                     tracer, pricing_info, trace_content, metrics, disable_metrics):
+def chat(version, environment, application_name,
+            tracer, pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for chat completions to collect metrics.
     Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
         version: Version of the monitoring package.
         environment: Deployment environment (e.g., production, staging).
         application_name: Name of the application using the PremAI API.
@@ -42,13 +44,22 @@ def chat(gen_ai_endpoint, version, environment, application_name,
         Wraps the response to collect message IDs and aggregated response.
         """
-        def __init__(self, wrapped, span, kwargs, **args):
+        def __init__(self, wrapped, span, kwargs, server_address, server_port,**args):
             self.__wrapped__ = wrapped
             self._span = span
             self._llmresponse = ""
             self._response_id = ""
             self._args = args
             self._kwargs = kwargs
+            self._server_address = server_address
+            self._server_port = server_port
+            self._start_time = time.time()
+            self._end_time = None
+            self._timestamps = []
+            self._ttft = 0
+            self._tbt = 0
+            self._response_model = ''
+            self._finish_reason = ''
         def __enter__(self):
             # Using context management protocols (if needed)
@@ -64,6 +75,14 @@ def chat(gen_ai_endpoint, version, environment, application_name,
         def __iter__(self):
             try:
+                end_time = time.time()
+                # Record the timestamp for the current chunk
+                self._timestamps.append(end_time)
+                if len(self._timestamps) == 1:
+                    # Calculate time to first chunk
+                    self._ttft = calculate_ttft(self._timestamps, self._start_time)
                 for chunk in self.__wrapped__:
                     # Assuming `chunk` has similar structure as 'ChatCompletionResponseStream'
                     if chunk.choices:
@@ -72,7 +91,11 @@ def chat(gen_ai_endpoint, version, environment, application_name,
                         if first_choice.delta.get('content'):
                             self._llmresponse += first_choice.delta.get('content')
-                    self._response_id = chunk.id
+                    if chunk.choices[0].finish_reason:
+                        self._finish_reason = chunk.choices[0].finish_reason
+                        self._response_id = chunk.id
+                        self._response_model = chunk.model
                     if not chunk:
                         # pylint: disable= stop-iteration-return
                         raise StopIteration
@@ -81,6 +104,10 @@ def chat(gen_ai_endpoint, version, environment, application_name,
             finally:
                 # Handling exception ensure observability without disrupting operation
                 try:
+                    self._end_time = time.time()
+                    if len(self._timestamps) > 1:
+                        self._tbt = calculate_tbt(self._timestamps)
                     # Format 'messages' into a single string
                     message_prompt = self._kwargs.get("messages", "")
                     formatted_messages = []
@@ -90,7 +117,6 @@ def chat(gen_ai_endpoint, version, environment, application_name,
                         if isinstance(content, list):
                             content_str = ", ".join(
-                                # pylint: disable=line-too-long
                                 f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
                                 if "type" in item else f'text: {item["text"]}'
                                 for item in content
@@ -100,55 +126,79 @@ def chat(gen_ai_endpoint, version, environment, application_name,
                             formatted_messages.append(f"{role}: {content}")
                     prompt = "\n".join(formatted_messages)
+                    request_model = self._kwargs.get("model", "gpt-4o-mini")
                     # Calculate tokens using input prompt and aggregated response
-                    prompt_tokens = general_tokens(prompt,)
-                    completion_tokens = general_tokens(self._llmresponse)
+                    input_tokens = general_tokens(prompt)
+                    output_tokens = general_tokens(self._llmresponse)
                     # Calculate cost of the operation
-                    cost = get_chat_model_cost(self._kwargs.get("model", "gpt-4o-mini"),
-                                                pricing_info, prompt_tokens,
-                                                completion_tokens)
-                    print(self._kwargs)
-                    # Set Span attributes
+                    cost = get_chat_model_cost(request_model,
+                                                pricing_info, input_tokens,
+                                                output_tokens)
+                    # Set Span attributes (OTel Semconv)
                     self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
-                                        SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
                     self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
                                         SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                        gen_ai_endpoint)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
+                                        SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
+                                        request_model)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
+                                        self._kwargs.get("seed", ""))
+                    self._span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                        self._server_port)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+                                        self._kwargs.get("frequency_penalty", 0.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
+                                        self._kwargs.get("max_tokens", -1))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
+                                        self._kwargs.get("presence_penalty", 0.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
+                                        self._kwargs.get("stop", []))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
+                                        self._kwargs.get("temperature", 1.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
+                                        self._kwargs.get("top_p", 1.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
+                                        [self._finish_reason])
                     self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
                                         self._response_id)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                        self._response_model)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
+                                        input_tokens)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                        output_tokens)
+                    self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                        self._server_address)
+                    if isinstance(self._llmresponse, str):
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        "text")
+                    else:
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        "json")
+                    # Set Span attributes (Extra)
+                    self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
                                         environment)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
+                    self._span.set_attribute(SERVICE_NAME,
                                         application_name)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                        self._kwargs.get("model", "gpt-4o-mini"))
                     self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
                                         self._kwargs.get("user", ""))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
-                                        self._kwargs.get("top_p", 1.0))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
-                                        self._kwargs.get("max_tokens", -1))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
-                                        self._kwargs.get("temperature", 1.0))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
-                                        self._kwargs.get("presence_penalty", 0.0))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
-                                        self._kwargs.get("frequency_penalty", 0.0))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
-                                        self._kwargs.get("seed", ""))
                     self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
                                         True)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
-                                        prompt_tokens)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                        completion_tokens)
                     self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                        prompt_tokens + completion_tokens)
+                                        input_tokens + output_tokens)
                     self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                         cost)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
+                                        self._tbt)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
+                                        self._ttft)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                        version)
                     if trace_content:
                         self._span.add_event(
                             name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -162,31 +212,35 @@ def chat(gen_ai_endpoint, version, environment, application_name,
                                 SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
                             },
                         )
                     self._span.set_status(Status(StatusCode.OK))
                     if disable_metrics is False:
-                        attributes = {
-                            TELEMETRY_SDK_NAME:
-                                "openlit",
-                            SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                                application_name,
-                            SemanticConvetion.GEN_AI_SYSTEM:
-                                SemanticConvetion.GEN_AI_SYSTEM_PREMAI,
-                            SemanticConvetion.GEN_AI_ENVIRONMENT:
-                                environment,
-                            SemanticConvetion.GEN_AI_OPERATION:
-                                SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
-                            SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                                self._kwargs.get("model", "gpt-3.5-turbo")
-                        }
+                        attributes = create_metrics_attributes(
+                            service_name=application_name,
+                            deployment_environment=environment,
+                            operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
+                            system=SemanticConvetion.GEN_AI_SYSTEM_PREMAI,
+                            request_model=request_model,
+                            server_address=self._server_address,
+                            server_port=self._server_port,
+                            response_model=self._response_model,
+                        )
-                        metrics["genai_requests"].add(1, attributes)
-                        metrics["genai_total_tokens"].add(
-                            prompt_tokens + completion_tokens, attributes
+                        metrics["genai_client_usage_tokens"].record(
+                            input_tokens + output_tokens, attributes
+                        )
+                        metrics["genai_client_operation_duration"].record(
+                            self._end_time - self._start_time, attributes
+                        )
+                        metrics["genai_server_tbt"].record(
+                            self._tbt, attributes
                         )
-                        metrics["genai_completion_tokens"].add(completion_tokens, attributes)
-                        metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
+                        metrics["genai_server_ttft"].record(
+                            self._ttft, attributes
+                        )
+                        metrics["genai_requests"].add(1, attributes)
+                        metrics["genai_completion_tokens"].add(output_tokens, attributes)
+                        metrics["genai_prompt_tokens"].add(input_tokens, attributes)
                         metrics["genai_cost"].record(cost, attributes)
                 except Exception as e:
@@ -214,22 +268,25 @@ def chat(gen_ai_endpoint, version, environment, application_name,
         # Check if streaming is enabled for the API call
         streaming = kwargs.get("stream", False)
+        server_address, server_port = set_server_address_and_port(instance, "app.premai.io", 443)
+        request_model = kwargs.get("model", "gpt-4o-mini")
+        span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
         # pylint: disable=no-else-return
         if streaming:
             # Special handling for streaming response to accommodate the nature of data flow
             awaited_wrapped = wrapped(*args, **kwargs)
-            span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
+            span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
-            return TracedSyncStream(awaited_wrapped, span, kwargs)
+            return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
         # Handling for non-streaming responses
         else:
-            # pylint: disable=line-too-long
-            with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
+            with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+                start_time = time.time()
                 response = wrapped(*args, **kwargs)
-                response_dict = response_as_dict(response)
+                end_time = time.time()
                 try:
                     # Format 'messages' into a single string
@@ -241,7 +298,6 @@ def chat(gen_ai_endpoint, version, environment, application_name,
                         if isinstance(content, list):
                             content_str = ", ".join(
-                                # pylint: disable=line-too-long
                                 f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
                                 if "type" in item else f'text: {item["text"]}'
                                 for item in content
@@ -251,38 +307,62 @@ def chat(gen_ai_endpoint, version, environment, application_name,
                             formatted_messages.append(f"{role}: {content}")
                     prompt = "\n".join(formatted_messages)
-                    # Set base span attribues
+                    input_tokens = response.usage.prompt_tokens
+                    output_tokens = response.usage.completion_tokens
+                    # Calculate cost of the operation
+                    cost = get_chat_model_cost(request_model,
+                                                pricing_info, input_tokens,
+                                                output_tokens)
+                    # Set base span attribues (OTel Semconv)
                     span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-                    span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
-                                        SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
                     span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
                                         SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                        gen_ai_endpoint)
-                    span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
-                                        response_dict.additional_properties["id"])
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
-                                        environment)
-                    span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
-                                        application_name)
+                    span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
+                                        SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                        kwargs.get("model", "gpt-3.5-turbo"))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
-                                        kwargs.get("top_p", 1.0))
+                                        request_model)
+                    span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                        server_port)
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+                                        kwargs.get("frequency_penalty", 0.0))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
                                         kwargs.get("max_tokens", -1))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
-                                        kwargs.get("user", ""))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
-                                        kwargs.get("temperature", 1.0))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
                                         kwargs.get("presence_penalty", 0.0))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
-                                        kwargs.get("frequency_penalty", 0.0))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
-                                        kwargs.get("seed", ""))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
+                                        kwargs.get("stop", []))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
+                                        kwargs.get("temperature", 1.0))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
+                                        kwargs.get("top_p", 1.0))
+                    span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
+                                        response.additional_properties.get('id'))
+                    span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                        response.model)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
+                                        input_tokens)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                        output_tokens)
+                    span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                        server_address)
+                    # Set base span attribues (Extras)
+                    span.set_attribute(DEPLOYMENT_ENVIRONMENT,
+                                        environment)
+                    span.set_attribute(SERVICE_NAME,
+                                        application_name)
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
                                         False)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
+                                        input_tokens + output_tokens)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
+                                        cost)
+                    span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
+                                        end_time - start_time)
+                    span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                        version)
                     if trace_content:
                         span.add_event(
                             name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -290,94 +370,50 @@ def chat(gen_ai_endpoint, version, environment, application_name,
                                 SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
                             },
                         )
-                    # Set span attributes when tools is not passed to the function call
-                    if "tools" not in kwargs:
-                        # Calculate cost of the operation
-                        cost = get_chat_model_cost(kwargs.get("model", "gpt-4o-mini"),
-                                                    pricing_info, response_dict.usage.prompt_tokens,
-                                                    response_dict.usage.completion_tokens)
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
-                                           response_dict.usage.prompt_tokens)
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                           response_dict.usage.completion_tokens)
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                           response_dict.usage.total_tokens)
-                        span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
-                                           [response_dict.choices[0].finish_reason])
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                                            cost)
-                        # Set span attributes for when n = 1 (default)
-                        if "n" not in kwargs or kwargs["n"] == 1:
-                            if trace_content:
-                                span.add_event(
-                                    name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
-                                    attributes={
-                                        SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.choices[0].message.content,
-                                    },
-                                )
-                        # Set span attributes for when n > 0
-                        else:
-                            i = 0
-                            while i < kwargs["n"] and trace_content is True:
-                                attribute_name = f"gen_ai.content.completion.{i}"
-                                span.add_event(
-                                    name=attribute_name,
-                                    attributes={
-                                        SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.choices[i].message.content,
-                                    },
-                                )
-                                i += 1
-                            # Return original response
-                            return response
-                    # Set span attributes when tools is passed to the function call
-                    elif "tools" in kwargs:
-                        # Calculate cost of the operation
-                        cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
-                                                    pricing_info, response_dict.usage.prompt_tokens,
-                                                    response_dict.usage.completion_tokens)
                         span.add_event(
                             name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
                             attributes={
-                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
+                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response.choices[0].message.content),
                             },
                         )
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
-                                            response_dict.usage.prompt_tokens)
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                            response_dict.usage.completion_tokens)
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                            response_dict.usage.total_tokens)
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                                            cost)
+                    if kwargs.get('tools'):
+                        span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
+                                        str(response.choices[0].message.tool_calls))
+                    if kwargs.get('response_format', '') != '':
+                        span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        "json")
+                    else:
+                        span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        "text")
                     span.set_status(Status(StatusCode.OK))
                     if disable_metrics is False:
-                        attributes = {
-                            TELEMETRY_SDK_NAME:
-                                "openlit",
-                            SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                                application_name,
-                            SemanticConvetion.GEN_AI_SYSTEM:
-                                SemanticConvetion.GEN_AI_SYSTEM_PREMAI,
-                            SemanticConvetion.GEN_AI_ENVIRONMENT:
-                                environment,
-                            SemanticConvetion.GEN_AI_OPERATION:
-                                SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
-                            SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                                kwargs.get("model", "gpt-3.5-turbo")
-                        }
+                        attributes = create_metrics_attributes(
+                            service_name=application_name,
+                            deployment_environment=environment,
+                            operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
+                            system=SemanticConvetion.GEN_AI_SYSTEM_PREMAI,
+                            request_model=request_model,
+                            server_address=server_address,
+                            server_port=server_port,
+                            response_model=response.model,
+                        )
+                        metrics["genai_client_usage_tokens"].record(
+                            input_tokens + output_tokens, attributes
+                        )
+                        metrics["genai_client_operation_duration"].record(
+                            end_time - start_time, attributes
+                        )
+                        metrics["genai_server_ttft"].record(
+                            end_time - start_time, attributes
+                        )
                         metrics["genai_requests"].add(1, attributes)
-                        metrics["genai_total_tokens"].add(response_dict.usage.total_tokens, attributes)
-                        metrics["genai_completion_tokens"].add(response_dict.usage.completion_tokens, attributes)
-                        metrics["genai_prompt_tokens"].add(response_dict.usage.prompt_tokens, attributes)
+                        metrics["genai_completion_tokens"].add(output_tokens, attributes)
+                        metrics["genai_prompt_tokens"].add(input_tokens, attributes)
                         metrics["genai_cost"].record(cost, attributes)
                     # Return original response
@@ -392,18 +428,17 @@ def chat(gen_ai_endpoint, version, environment, application_name,
     return wrapper
-def embedding(gen_ai_endpoint, version, environment, application_name,
+def embedding(version, environment, application_name,
               tracer, pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for embeddings to collect metrics.
     Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
         version: Version of the monitoring package.
         environment: Deployment environment (e.g., production, staging).
-        application_name: Name of the application using the Prem AI API.
+        application_name: Name of the application using the PremAI API.
         tracer: OpenTelemetry tracer for creating spans.
-        pricing_info: Information used for calculating the cost of Prem AI usage.
+        pricing_info: Information used for calculating the cost of PremAI usage.
         trace_content: Flag indicating whether to trace the actual content.
     Returns:
@@ -427,71 +462,85 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
             The response from the original 'embeddings' method.
         """
-        with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
+        server_address, server_port = set_server_address_and_port(instance, "app.premai.io", 443)
+        request_model = kwargs.get("model", "text-embedding-ada-002")
+        span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
+        with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+            start_time = time.time()
             response = wrapped(*args, **kwargs)
-            response_dict = response_as_dict(response)
+            end_time = time.time()
             try:
+                input_tokens = response.usage.prompt_tokens
                 # Calculate cost of the operation
-                cost = get_embed_model_cost(kwargs.get("model", "text-embedding-ada-002"),
-                                    pricing_info, response_dict.usage.prompt_tokens)
+                cost = get_embed_model_cost(request_model,
+                                    pricing_info, input_tokens)
-                # Set Span attributes
+                # Set Span attributes (OTel Semconv)
                 span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-                span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
-                                    SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
                 span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
                                     SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
-                span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                    gen_ai_endpoint)
-                span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
-                                    environment)
-                span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
-                                    application_name)
+                span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
+                                    SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
                 span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                    kwargs.get("model", "text-embedding-3-large"))
+                                    request_model)
                 span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
-                                    kwargs.get("encoding_format", "float"))
-                # span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
-                #                     kwargs.get("dimensions", "null"))
+                                    [kwargs.get('encoding_format', 'float')])
+                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                    response.model)
+                span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                    server_address)
+                span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                    server_port)
+                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
+                                    input_tokens)
+                # Set Span attributes (Extras)
+                span.set_attribute(DEPLOYMENT_ENVIRONMENT,
+                                    environment)
+                span.set_attribute(SERVICE_NAME,
+                                    application_name)
                 span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
                                     kwargs.get("user", ""))
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
-                                    response_dict.usage.prompt_tokens)
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                    response_dict.usage.total_tokens)
+                                    input_tokens)
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                     cost)
+                span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                    version)
                 if trace_content:
                     span.add_event(
                         name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
                         attributes={
-                            SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("input", ""),
+                            SemanticConvetion.GEN_AI_CONTENT_PROMPT: str(kwargs.get("input", "")),
                         },
                     )
                 span.set_status(Status(StatusCode.OK))
                 if disable_metrics is False:
-                    attributes = {
-                        TELEMETRY_SDK_NAME:
-                            "openlit",
-                        SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                            application_name,
-                        SemanticConvetion.GEN_AI_SYSTEM:
-                            SemanticConvetion.GEN_AI_SYSTEM_PREMAI,
-                        SemanticConvetion.GEN_AI_ENVIRONMENT:
-                            environment,
-                        SemanticConvetion.GEN_AI_OPERATION:
-                            SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
-                        SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                            kwargs.get("model", "text-embedding-ada-002")
-                    }
+                    attributes = create_metrics_attributes(
+                        service_name=application_name,
+                        deployment_environment=environment,
+                        operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
+                        system=SemanticConvetion.GEN_AI_SYSTEM_PREMAI,
+                        request_model=request_model,
+                        server_address=server_address,
+                        server_port=server_port,
+                        response_model=response.model,
+                    )
+                    metrics["genai_client_usage_tokens"].record(
+                            input_tokens, attributes
+                        )
+                    metrics["genai_client_operation_duration"].record(
+                        end_time - start_time, attributes
+                    )
                     metrics["genai_requests"].add(1, attributes)
-                    metrics["genai_total_tokens"].add(
-                        response_dict.usage.total_tokens, attributes)
-                    metrics["genai_prompt_tokens"].add(
-                        response_dict.usageprompt_tokens, attributes)
+                    metrics["genai_prompt_tokens"].add(input_tokens, attributes)
                     metrics["genai_cost"].record(cost, attributes)
                 # Return original response

openlit 1.33.9__py3-none-any.whl → 1.33.10__py3-none-any.whl

openlit 1.33.9py3-none-any.whl → 1.33.10py3-none-any.whl