PyPI - openlit - Versions diffs - 1.33.8__py3-none-any.whl → 1.33.9__py3-none-any.whl - Mend

openlit 1.33.8py3-none-any.whl → 1.33.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

openlit/__helpers.py +83 -0
openlit/__init__.py +1 -1
openlit/instrumentation/ag2/ag2.py +2 -2
openlit/instrumentation/ai21/__init__.py +4 -4
openlit/instrumentation/ai21/ai21.py +370 -319
openlit/instrumentation/ai21/async_ai21.py +371 -319
openlit/instrumentation/anthropic/__init__.py +4 -4
openlit/instrumentation/anthropic/anthropic.py +321 -189
openlit/instrumentation/anthropic/async_anthropic.py +323 -190
openlit/instrumentation/assemblyai/__init__.py +1 -1
openlit/instrumentation/assemblyai/assemblyai.py +59 -43
openlit/instrumentation/astra/astra.py +4 -4
openlit/instrumentation/astra/async_astra.py +4 -4
openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
openlit/instrumentation/bedrock/__init__.py +1 -1
openlit/instrumentation/bedrock/bedrock.py +115 -58
openlit/instrumentation/chroma/chroma.py +4 -4
openlit/instrumentation/cohere/__init__.py +33 -10
openlit/instrumentation/cohere/async_cohere.py +610 -0
openlit/instrumentation/cohere/cohere.py +410 -219
openlit/instrumentation/controlflow/controlflow.py +2 -2
openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
openlit/instrumentation/crewai/crewai.py +2 -2
openlit/instrumentation/dynamiq/dynamiq.py +2 -2
openlit/instrumentation/elevenlabs/async_elevenlabs.py +73 -47
openlit/instrumentation/elevenlabs/elevenlabs.py +73 -52
openlit/instrumentation/embedchain/embedchain.py +4 -4
openlit/instrumentation/firecrawl/firecrawl.py +2 -2
openlit/instrumentation/google_ai_studio/__init__.py +9 -9
openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
openlit/instrumentation/gpt4all/gpt4all.py +17 -17
openlit/instrumentation/groq/async_groq.py +14 -14
openlit/instrumentation/groq/groq.py +14 -14
openlit/instrumentation/haystack/haystack.py +2 -2
openlit/instrumentation/julep/async_julep.py +2 -2
openlit/instrumentation/julep/julep.py +2 -2
openlit/instrumentation/langchain/langchain.py +36 -31
openlit/instrumentation/letta/letta.py +6 -6
openlit/instrumentation/litellm/async_litellm.py +20 -20
openlit/instrumentation/litellm/litellm.py +20 -20
openlit/instrumentation/llamaindex/llamaindex.py +2 -2
openlit/instrumentation/mem0/mem0.py +2 -2
openlit/instrumentation/milvus/milvus.py +4 -4
openlit/instrumentation/mistral/async_mistral.py +18 -18
openlit/instrumentation/mistral/mistral.py +18 -18
openlit/instrumentation/multion/async_multion.py +2 -2
openlit/instrumentation/multion/multion.py +2 -2
openlit/instrumentation/ollama/async_ollama.py +29 -29
openlit/instrumentation/ollama/ollama.py +29 -29
openlit/instrumentation/openai/__init__.py +11 -230
openlit/instrumentation/openai/async_openai.py +434 -409
openlit/instrumentation/openai/openai.py +415 -393
openlit/instrumentation/phidata/phidata.py +2 -2
openlit/instrumentation/pinecone/pinecone.py +4 -4
openlit/instrumentation/premai/premai.py +20 -20
openlit/instrumentation/qdrant/async_qdrant.py +4 -4
openlit/instrumentation/qdrant/qdrant.py +4 -4
openlit/instrumentation/reka/async_reka.py +6 -6
openlit/instrumentation/reka/reka.py +6 -6
openlit/instrumentation/together/async_together.py +18 -18
openlit/instrumentation/together/together.py +18 -18
openlit/instrumentation/transformers/transformers.py +6 -6
openlit/instrumentation/vertexai/async_vertexai.py +53 -53
openlit/instrumentation/vertexai/vertexai.py +53 -53
openlit/instrumentation/vllm/vllm.py +6 -6
openlit/otel/metrics.py +98 -7
openlit/semcov/__init__.py +113 -80
{openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/METADATA +1 -1
openlit-1.33.9.dist-info/RECORD +121 -0
{openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/WHEEL +1 -1
openlit/instrumentation/openai/async_azure_openai.py +0 -900
openlit/instrumentation/openai/azure_openai.py +0 -898
openlit-1.33.8.dist-info/RECORD +0 -122
{openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/LICENSE +0 -0

openlit/instrumentation/openai/openai.py CHANGED Viewed

@@ -1,11 +1,11 @@
-# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches
 """
 Module for monitoring OpenAI API calls.
 """
 import logging
+import time
 from opentelemetry.trace import SpanKind, Status, StatusCode
-from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
+from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
 from openlit.__helpers import (
     get_chat_model_cost,
     get_embed_model_cost,
@@ -14,19 +14,22 @@ from openlit.__helpers import (
     openai_tokens,
     handle_exception,
     response_as_dict,
+    calculate_ttft,
+    calculate_tbt,
+    create_metrics_attributes,
+    set_server_address_and_port
 )
 from openlit.semcov import SemanticConvetion
 # Initialize logger for logging potential issues and operations
 logger = logging.getLogger(__name__)
-def chat_completions(gen_ai_endpoint, version, environment, application_name,
+def chat_completions(version, environment, application_name,
                      tracer, pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for chat completions to collect metrics.
     Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
         version: Version of the monitoring package.
         environment: Deployment environment (e.g., production, staging).
         application_name: Name of the application using the OpenAI API.
@@ -54,6 +57,8 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
                 wrapped,
                 span,
                 kwargs,
+                server_address,
+                server_port,
                 **args,
             ):
             self.__wrapped__ = wrapped
@@ -61,9 +66,20 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
             # Placeholder for aggregating streaming response
             self._llmresponse = ""
             self._response_id = ""
+            self._response_model = ""
+            self._finish_reason = ""
+            self._openai_response_service_tier = ""
+            self._openai_system_fingerprint = ""
             self._args = args
             self._kwargs = kwargs
+            self._start_time = time.time()
+            self._end_time = None
+            self._timestamps = []
+            self._ttft = 0
+            self._tbt = 0
+            self._server_address = server_address
+            self._server_port = server_port
         def __enter__(self):
             self.__wrapped__.__enter__()
@@ -82,6 +98,14 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
         def __next__(self):
             try:
                 chunk = self.__wrapped__.__next__()
+                end_time = time.time()
+                # Record the timestamp for the current chunk
+                self._timestamps.append(end_time)
+                if len(self._timestamps) == 1:
+                    # Calculate time to first chunk
+                    self._ttft = calculate_ttft(self._timestamps, self._start_time)
                 chunked = response_as_dict(chunk)
                 # Collect message IDs and aggregated response from events
                 if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
@@ -91,10 +115,18 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
                     if content:
                         self._llmresponse += content
                 self._response_id = chunked.get('id')
+                self._response_model = chunked.get('model')
+                self._finish_reason = chunked.get('choices')[0].get('finish_reason')
+                self._openai_response_service_tier = chunked.get('service_tier')
+                self._openai_system_fingerprint = chunked.get('system_fingerprint')
                 return chunk
             except StopIteration:
                 # Handling exception ensure observability without disrupting operation
                 try:
+                    self._end_time = time.time()
+                    if len(self._timestamps) > 1:
+                        self._tbt = calculate_tbt(self._timestamps)
                     # Format 'messages' into a single string
                     message_prompt = self._kwargs.get("messages", "")
                     formatted_messages = []
@@ -109,7 +141,6 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
                                     content_str_list.append(f'text: {item["text"]}')
                                 elif (item["type"] == "image_url" and
                                       not item["image_url"]["url"].startswith("data:")):
-                                    # pylint: disable=line-too-long
                                     content_str_list.append(f'image_url: {item["image_url"]["url"]}')
                             content_str = ", ".join(content_str_list)
                             formatted_messages.append(f"{role}: {content_str}")
@@ -117,57 +148,87 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
                             formatted_messages.append(f"{role}: {content}")
                     prompt = "\n".join(formatted_messages)
+                    request_model = self._kwargs.get("model", "gpt-4o")
                     # Calculate tokens using input prompt and aggregated response
-                    prompt_tokens = openai_tokens(prompt,
-                                                    self._kwargs.get("model", "gpt-3.5-turbo"))
-                    completion_tokens = openai_tokens(self._llmresponse,
-                                                        self._kwargs.get("model", "gpt-3.5-turbo"))
+                    input_tokens = openai_tokens(prompt,
+                                                    request_model)
+                    output_tokens = openai_tokens(self._llmresponse,
+                                                        request_model)
                     # Calculate cost of the operation
-                    cost = get_chat_model_cost(self._kwargs.get("model", "gpt-3.5-turbo"),
-                                                pricing_info, prompt_tokens,
-                                                completion_tokens)
+                    cost = get_chat_model_cost(request_model,
+                                                pricing_info, input_tokens,
+                                                output_tokens)
-                    # Set Span attributes
+                    # Set Span attributes (OTel Semconv)
                     self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
+                                        SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
                     self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
                                         SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
-                                        SemanticConvetion.GEN_AI_TYPE_CHAT)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                        gen_ai_endpoint)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
+                                        request_model)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
+                                        self._kwargs.get("seed", ""))
+                    self._span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                        self._server_port)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+                                        self._kwargs.get("frequency_penalty", 0.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
+                                        self._kwargs.get("max_tokens", -1))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
+                                        self._kwargs.get("presence_penalty", 0.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
+                                        self._kwargs.get("stop", []))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
+                                        self._kwargs.get("temperature", 1.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
+                                        self._kwargs.get("top_p", 1.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
+                                        [self._finish_reason])
                     self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
                                         self._response_id)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                        self._response_model)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
+                                        input_tokens)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                        output_tokens)
+                    self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                        self._server_address)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_REQUEST_SERVICE_TIER,
+                                        self._kwargs.get("service_tier", "auto"))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER,
+                                        self._openai_response_service_tier)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SYSTEM_FINGERPRINT,
+                                        self._openai_system_fingerprint)
+                    if isinstance(self._llmresponse, str):
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        "text")
+                    else:
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        "json")
+                    # Set Span attributes (Extra)
+                    self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
                                         environment)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
+                    self._span.set_attribute(SERVICE_NAME,
                                         application_name)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                        self._kwargs.get("model", "gpt-3.5-turbo"))
                     self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
                                         self._kwargs.get("user", ""))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
-                                        self._kwargs.get("top_p", 1.0))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
-                                        self._kwargs.get("max_tokens", -1))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
-                                        self._kwargs.get("temperature", 1.0))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
-                                        self._kwargs.get("presence_penalty", 0.0))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
-                                        self._kwargs.get("frequency_penalty", 0.0))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
-                                        self._kwargs.get("seed", ""))
                     self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
                                         True)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                        prompt_tokens)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
-                                        completion_tokens)
                     self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                        prompt_tokens + completion_tokens)
+                                        input_tokens + output_tokens)
                     self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                         cost)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
+                                        self._tbt)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
+                                        self._ttft)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                        version)
                     if trace_content:
                         self._span.add_event(
                             name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -181,31 +242,35 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
                                 SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
                             },
                         )
                     self._span.set_status(Status(StatusCode.OK))
                     if disable_metrics is False:
-                        attributes = {
-                            TELEMETRY_SDK_NAME:
-                                "openlit",
-                            SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                                application_name,
-                            SemanticConvetion.GEN_AI_SYSTEM:
-                                SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
-                            SemanticConvetion.GEN_AI_ENVIRONMENT:
-                                environment,
-                            SemanticConvetion.GEN_AI_TYPE:
-                                SemanticConvetion.GEN_AI_TYPE_CHAT,
-                            SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                                self._kwargs.get("model", "gpt-3.5-turbo")
-                        }
+                        attributes = create_metrics_attributes(
+                            service_name=application_name,
+                            deployment_environment=environment,
+                            operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
+                            system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
+                            request_model=request_model,
+                            server_address=self._server_address,
+                            server_port=self._server_port,
+                            response_model=self._response_model,
+                        )
-                        metrics["genai_requests"].add(1, attributes)
-                        metrics["genai_total_tokens"].add(
-                            prompt_tokens + completion_tokens, attributes
+                        metrics["genai_client_usage_tokens"].record(
+                            input_tokens + output_tokens, attributes
+                        )
+                        metrics["genai_client_operation_duration"].record(
+                            self._end_time - self._start_time, attributes
+                        )
+                        metrics["genai_server_tbt"].record(
+                            self._tbt, attributes
                         )
-                        metrics["genai_completion_tokens"].add(completion_tokens, attributes)
-                        metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
+                        metrics["genai_server_ttft"].record(
+                            self._ttft, attributes
+                        )
+                        metrics["genai_requests"].add(1, attributes)
+                        metrics["genai_completion_tokens"].add(output_tokens, attributes)
+                        metrics["genai_prompt_tokens"].add(input_tokens, attributes)
                         metrics["genai_cost"].record(cost, attributes)
                 except Exception as e:
@@ -234,20 +299,25 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
         # Check if streaming is enabled for the API call
         streaming = kwargs.get("stream", False)
+        server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
+        request_model = kwargs.get("model", "gpt-4o")
+        span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
         # pylint: disable=no-else-return
         if streaming:
             # Special handling for streaming response to accommodate the nature of data flow
             awaited_wrapped = wrapped(*args, **kwargs)
-            span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
+            span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
-            return TracedSyncStream(awaited_wrapped, span, kwargs)
+            return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
         # Handling for non-streaming responses
         else:
-            # pylint: disable=line-too-long
-            with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
+            with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+                start_time = time.time()
                 response = wrapped(*args, **kwargs)
+                end_time = time.time()
                 response_dict = response_as_dict(response)
@@ -261,7 +331,6 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
                         if isinstance(content, list):
                             content_str = ", ".join(
-                                # pylint: disable=line-too-long
                                 f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
                                 if "type" in item else f'text: {item["text"]}'
                                 for item in content
@@ -271,38 +340,72 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
                             formatted_messages.append(f"{role}: {content}")
                     prompt = "\n".join(formatted_messages)
-                    # Set base span attribues
+                    input_tokens = response_dict.get('usage').get('prompt_tokens')
+                    output_tokens = response_dict.get('usage').get('completion_tokens')
+                    # Calculate cost of the operation
+                    cost = get_chat_model_cost(request_model,
+                                                pricing_info, input_tokens,
+                                                output_tokens)
+                    # Set base span attribues (OTel Semconv)
                     span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+                    span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
+                                        SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
                     span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
                                         SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
-                    span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
-                                        SemanticConvetion.GEN_AI_TYPE_CHAT)
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                        gen_ai_endpoint)
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
+                                        request_model)
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
+                                        kwargs.get("seed", ""))
+                    span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                        server_port)
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+                                        kwargs.get("frequency_penalty", 0.0))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
+                                        kwargs.get("max_tokens", -1))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
+                                        kwargs.get("presence_penalty", 0.0))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
+                                        kwargs.get("stop", []))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
+                                        kwargs.get("temperature", 1.0))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
+                                        kwargs.get("top_p", 1.0))
                     span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
                                         response_dict.get("id"))
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
+                    span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                        response_dict.get('model'))
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
+                                        input_tokens)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                        output_tokens)
+                    span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                        server_address)
+                    span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_REQUEST_SERVICE_TIER,
+                                        kwargs.get("service_tier", "auto"))
+                    span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER,
+                                        response_dict.get('service_tier'))
+                    span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SYSTEM_FINGERPRINT,
+                                        response_dict.get('system_fingerprint'))
+                    # Set base span attribues (Extras)
+                    span.set_attribute(DEPLOYMENT_ENVIRONMENT,
                                         environment)
-                    span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
+                    span.set_attribute(SERVICE_NAME,
                                         application_name)
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                        kwargs.get("model", "gpt-3.5-turbo"))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
-                                        kwargs.get("top_p", 1.0))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
-                                        kwargs.get("max_tokens", -1))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
                                         kwargs.get("user", ""))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
-                                        kwargs.get("temperature", 1.0))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
-                                        kwargs.get("presence_penalty", 0.0))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
-                                        kwargs.get("frequency_penalty", 0.0))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
-                                        kwargs.get("seed", ""))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
                                         False)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
+                                        input_tokens + output_tokens)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
+                                        cost)
+                    span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
+                                        end_time - start_time)
+                    span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                        version)
                     if trace_content:
                         span.add_event(
                             name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -311,93 +414,54 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
                             },
                         )
-                    # Set span attributes when tools is not passed to the function call
-                    if "tools" not in kwargs:
-                        # Calculate cost of the operation
-                        cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
-                                                    pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
-                                                    response_dict.get('usage', {}).get('completion_tokens', None))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                           response_dict.get('usage', {}).get('prompt_tokens', None))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
-                                           response_dict.get('usage', {}).get('completion_tokens', None))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                           response_dict.get('usage', {}).get('total_tokens', None))
+                    for i in range(kwargs.get('n',1)):
                         span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
-                                           [response_dict.get('choices', [])[0].get('finish_reason', None)])
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                                            cost)
-                        # Set span attributes for when n = 1 (default)
-                        if "n" not in kwargs or kwargs["n"] == 1:
-                            if trace_content:
-                                span.add_event(
-                                    name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
-                                    attributes={
-                                        SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("message").get("content"),
-                                    },
-                                )
-                        # Set span attributes for when n > 0
-                        else:
-                            i = 0
-                            while i < kwargs["n"] and trace_content is True:
-                                attribute_name = f"gen_ai.content.completion.{i}"
-                                span.add_event(
-                                    name=attribute_name,
-                                    attributes={
-                                        SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
-                                    },
-                                )
-                                i += 1
-                            # Return original response
-                            return response
-                    # Set span attributes when tools is passed to the function call
-                    elif "tools" in kwargs:
-                        # Calculate cost of the operation
-                        cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
-                                                    pricing_info, response_dict.get('usage').get('prompt_tokens'),
-                                                    response_dict.get('usage').get('completion_tokens'))
-                        span.add_event(
-                            name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
-                            attributes={
-                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
-                            },
-                        )
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                            response_dict.get('usage').get('prompt_tokens'))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
-                                            response_dict.get('usage').get('completion_tokens'))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                            response_dict.get('usage').get('total_tokens'))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                                            cost)
+                                           [response_dict.get('choices')[i].get('finish_reason')])
+                        if trace_content:
+                            span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                                attributes={
+                                    # pylint: disable=line-too-long
+                                    SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
+                                },
+                            )
+                        if kwargs.get('tools'):
+                            span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
+                                            str(response_dict.get('choices')[i].get('message').get('tool_calls')))
+                        if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
+                            span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                            "text")
+                        elif response_dict.get('choices')[i].get('message').get('content') is not None:
+                            span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                            "json")
                     span.set_status(Status(StatusCode.OK))
                     if disable_metrics is False:
-                        attributes = {
-                            TELEMETRY_SDK_NAME:
-                                "openlit",
-                            SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                                application_name,
-                            SemanticConvetion.GEN_AI_SYSTEM:
-                                SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
-                            SemanticConvetion.GEN_AI_ENVIRONMENT:
-                                environment,
-                            SemanticConvetion.GEN_AI_TYPE:
-                                SemanticConvetion.GEN_AI_TYPE_CHAT,
-                            SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                                kwargs.get("model", "gpt-3.5-turbo")
-                        }
+                        attributes = create_metrics_attributes(
+                            service_name=application_name,
+                            deployment_environment=environment,
+                            operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
+                            system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
+                            request_model=request_model,
+                            server_address=server_address,
+                            server_port=server_port,
+                            response_model=response_dict.get('model'),
+                        )
+                        metrics["genai_client_usage_tokens"].record(
+                            input_tokens + output_tokens, attributes
+                        )
+                        metrics["genai_client_operation_duration"].record(
+                            end_time - start_time, attributes
+                        )
+                        metrics["genai_server_ttft"].record(
+                            end_time - start_time, attributes
+                        )
                         metrics["genai_requests"].add(1, attributes)
-                        metrics["genai_total_tokens"].add(response_dict.get('usage').get('total_tokens'), attributes)
-                        metrics["genai_completion_tokens"].add(response_dict.get('usage').get('completion_tokens'), attributes)
-                        metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
+                        metrics["genai_completion_tokens"].add(output_tokens, attributes)
+                        metrics["genai_prompt_tokens"].add(input_tokens, attributes)
                         metrics["genai_cost"].record(cost, attributes)
                     # Return original response
@@ -412,13 +476,12 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
     return wrapper
-def embedding(gen_ai_endpoint, version, environment, application_name,
+def embedding(version, environment, application_name,
               tracer, pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for embeddings to collect metrics.
     Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
         version: Version of the monitoring package.
         environment: Deployment environment (e.g., production, staging).
         application_name: Name of the application using the OpenAI API.
@@ -447,40 +510,56 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
             The response from the original 'embeddings' method.
         """
-        with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
+        server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
+        request_model = kwargs.get("model", "text-embedding-ada-002")
+        span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
+        with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+            start_time = time.time()
             response = wrapped(*args, **kwargs)
+            end_time = time.time()
             response_dict = response_as_dict(response)
             try:
+                input_tokens = response_dict.get('usage').get('prompt_tokens')
                 # Calculate cost of the operation
-                cost = get_embed_model_cost(kwargs.get("model", "text-embedding-ada-002"),
-                                    pricing_info, response_dict.get('usage').get('prompt_tokens'))
+                cost = get_embed_model_cost(request_model,
+                                    pricing_info, input_tokens)
-                # Set Span attributes
+                # Set Span attributes (OTel Semconv)
                 span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+                span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
+                                    SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
                 span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
                                     SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
-                span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
-                                    SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
-                span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                    gen_ai_endpoint)
-                span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
+                                    request_model)
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
+                                    [kwargs.get('encoding_format', 'float')])
+                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                    request_model)
+                span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                    server_address)
+                span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                    server_port)
+                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
+                                    input_tokens)
+                # Set Span attributes (Extras)
+                span.set_attribute(DEPLOYMENT_ENVIRONMENT,
                                     environment)
-                span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
+                span.set_attribute(SERVICE_NAME,
                                     application_name)
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                    kwargs.get("model", "text-embedding-ada-002"))
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
-                                    kwargs.get("encoding_format", "float"))
-                # span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
-                #                     kwargs.get("dimensions", "null"))
                 span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
                                     kwargs.get("user", ""))
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                    response_dict.get('usage').get('prompt_tokens'))
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                    response_dict.get('usage').get('total_tokens'))
+                                    input_tokens)
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                     cost)
+                span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                    version)
                 if trace_content:
                     span.add_event(
@@ -493,26 +572,24 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
                 span.set_status(Status(StatusCode.OK))
                 if disable_metrics is False:
-                    attributes = {
-                        TELEMETRY_SDK_NAME:
-                            "openlit",
-                        SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                            application_name,
-                        SemanticConvetion.GEN_AI_SYSTEM:
-                            SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
-                        SemanticConvetion.GEN_AI_ENVIRONMENT:
-                            environment,
-                        SemanticConvetion.GEN_AI_TYPE:
-                            SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
-                        SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                            kwargs.get("model", "text-embedding-ada-002")
-                    }
+                    attributes = create_metrics_attributes(
+                        service_name=application_name,
+                        deployment_environment=environment,
+                        operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
+                        system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
+                        request_model=request_model,
+                        server_address=server_address,
+                        server_port=server_port,
+                        response_model=request_model,
+                    )
+                    metrics["genai_client_usage_tokens"].record(
+                            input_tokens, attributes
+                        )
+                    metrics["genai_client_operation_duration"].record(
+                        end_time - start_time, attributes
+                    )
                     metrics["genai_requests"].add(1, attributes)
-                    metrics["genai_total_tokens"].add(
-                        response_dict.get('usage').get('total_tokens'), attributes)
-                    metrics["genai_prompt_tokens"].add(
-                        response_dict.get('usage').get('prompt_tokens'), attributes)
+                    metrics["genai_prompt_tokens"].add(input_tokens, attributes)
                     metrics["genai_cost"].record(cost, attributes)
                 # Return original response
@@ -527,118 +604,12 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
     return wrapper
-def finetune(gen_ai_endpoint, version, environment, application_name,
-             tracer, pricing_info, trace_content, metrics, disable_metrics):
-    """
-    Generates a telemetry wrapper for fine-tuning jobs to collect metrics.
-    Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
-        version: Version of the monitoring package.
-        environment: Deployment environment (e.g., production, staging).
-        application_name: Name of the application using the OpenAI API.
-        tracer: OpenTelemetry tracer for creating spans.
-        pricing_info: Information used for calculating the cost of OpenAI usage.
-        trace_content: Flag indicating whether to trace the actual content.
-    Returns:
-        A function that wraps the fine tuning creation method to add telemetry.
-    """
-    def wrapper(wrapped, instance, args, kwargs):
-        """
-        Wraps the 'fine_tuning.jobs.create' API call to add telemetry.
-        This collects metrics such as execution time, usage stats, and handles errors
-        gracefully, adding details to the trace for observability.
-        Args:
-            wrapped: The original 'fine_tuning.jobs.create' method to be wrapped.
-            instance: The instance of the class where the original method is defined.
-            args: Positional arguments for the method.
-            kwargs: Keyword arguments for the method.
-        Returns:
-            The response from the original 'fine_tuning.jobs.create' method.
-        """
-        with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
-            response = wrapped(*args, **kwargs)
-            # Handling exception ensure observability without disrupting operation
-            try:
-                # Set Span attributes
-                span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-                span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
-                                    SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
-                span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
-                                    SemanticConvetion.GEN_AI_TYPE_FINETUNING)
-                span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                    gen_ai_endpoint)
-                span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
-                                    environment)
-                span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
-                                    application_name)
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                    kwargs.get("model", "gpt-3.5-turbo"))
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TRAINING_FILE,
-                                    kwargs.get("training_file", ""))
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_VALIDATION_FILE,
-                                    kwargs.get("validation_file", ""))
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_BATCH_SIZE,
-                                    kwargs.get("hyperparameters.batch_size", "auto"))
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_LRM,
-                                    kwargs.get("hyperparameters.learning_rate_multiplier",
-                                                "auto"))
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_EPOCHS,
-                                    kwargs.get("hyperparameters.n_epochs", "auto"))
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_SUFFIX,
-                                    kwargs.get("suffix", ""))
-                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
-                                    response.id)
-                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
-                                    response.usage.prompt_tokens)
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_STATUS,
-                                    response.status)
-                span.set_status(Status(StatusCode.OK))
-                if disable_metrics is False:
-                    attributes = {
-                        TELEMETRY_SDK_NAME:
-                            "openlit",
-                        SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                            application_name,
-                        SemanticConvetion.GEN_AI_SYSTEM:
-                            SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
-                        SemanticConvetion.GEN_AI_ENVIRONMENT:
-                            environment,
-                        SemanticConvetion.GEN_AI_TYPE:
-                            SemanticConvetion.GEN_AI_TYPE_FINETUNING,
-                        SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                            kwargs.get("model", "gpt-3.5-turbo")
-                    }
-                    metrics["genai_requests"].add(1, attributes)
-                # Return original response
-                return response
-            except Exception as e:
-                handle_exception(span, e)
-                logger.error("Error in trace creation: %s", e)
-                # Return original response
-                return response
-    return wrapper
-def image_generate(gen_ai_endpoint, version, environment, application_name,
+def image_generate(version, environment, application_name,
                    tracer, pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for image generation to collect metrics.
     Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
         version: Version of the monitoring package.
         environment: Deployment environment (e.g., production, staging).
         application_name: Name of the application using the OpenAI API.
@@ -667,8 +638,16 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
             The response from the original 'images.generate' method.
         """
-        with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
+        server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
+        request_model = kwargs.get("model", "dall-e-2")
+        span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
+        with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+            start_time = time.time()
             response = wrapped(*args, **kwargs)
+            end_time = time.time()
             images_count = 0
             try:
@@ -678,28 +657,38 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
                 else:
                     image = "url"
+                request_model = kwargs.get("model", "dall-e-2")
                 # Calculate cost of the operation
-                cost = get_image_model_cost(kwargs.get("model", "dall-e-2"),
+                cost = get_image_model_cost(request_model,
                                             pricing_info, kwargs.get("size", "1024x1024"),
                                             kwargs.get("quality", "standard"))
                 for items in response.data:
-                    # Set Span attributes
+                    # Set Span attributes (OTel Semconv)
                     span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+                    span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
+                                        SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
                     span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
                                         SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
-                    span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
-                                        SemanticConvetion.GEN_AI_TYPE_IMAGE)
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                        gen_ai_endpoint)
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
+                                        request_model)
+                    span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                        server_address)
+                    span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                        server_port)
                     span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
                                         response.created)
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
+                    span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                        request_model)
+                    span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        "image")
+                    # Set Span attributes (Extras)
+                    span.set_attribute(DEPLOYMENT_ENVIRONMENT,
                                         environment)
-                    span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
+                    span.set_attribute(SERVICE_NAME,
                                         application_name)
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                        kwargs.get("model", "dall-e-2"))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
                                         kwargs.get("size", "1024x1024"))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
@@ -710,6 +699,9 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
                                         items.revised_prompt if items.revised_prompt else "")
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
                                         kwargs.get("user", ""))
+                    span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                    version)
                     if trace_content:
                         span.add_event(
                             name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -717,7 +709,7 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
                                 SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
                             },
                         )
-                        attribute_name = f"gen_ai.response.image.{images_count}"
+                        attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
                         span.add_event(
                             name=attribute_name,
                             attributes={
@@ -732,21 +724,20 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
                 span.set_status(Status(StatusCode.OK))
                 if disable_metrics is False:
-                    attributes = {
-                        TELEMETRY_SDK_NAME:
-                            "openlit",
-                        SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                            application_name,
-                        SemanticConvetion.GEN_AI_SYSTEM:
-                            SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
-                        SemanticConvetion.GEN_AI_ENVIRONMENT:
-                            environment,
-                        SemanticConvetion.GEN_AI_TYPE:
-                            SemanticConvetion.GEN_AI_TYPE_IMAGE,
-                        SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                            kwargs.get("model", "dall-e-2")
-                    }
+                    attributes = create_metrics_attributes(
+                        service_name=application_name,
+                        deployment_environment=environment,
+                        operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
+                        system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
+                        request_model=request_model,
+                        server_address=server_address,
+                        server_port=server_port,
+                        response_model=request_model,
+                    )
+                    metrics["genai_client_operation_duration"].record(
+                        end_time - start_time, attributes
+                    )
                     metrics["genai_requests"].add(1, attributes)
                     metrics["genai_cost"].record(cost, attributes)
@@ -762,13 +753,12 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
     return wrapper
-def image_variatons(gen_ai_endpoint, version, environment, application_name,
+def image_variatons(version, environment, application_name,
                     tracer, pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for creating image variations to collect metrics.
     Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
         version: Version of the monitoring package.
         environment: Deployment environment (e.g., production, staging).
         application_name: Name of the application using the OpenAI API.
@@ -797,8 +787,16 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
             The response from the original 'images.create.variations' method.
         """
-        with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
+        server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
+        request_model = kwargs.get("model", "dall-e-2")
+        span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
+        with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+            start_time = time.time()
             response = wrapped(*args, **kwargs)
+            end_time = time.time()
             images_count = 0
             try:
@@ -809,34 +807,45 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
                     image = "url"
                 # Calculate cost of the operation
-                cost = get_image_model_cost(kwargs.get("model", "dall-e-2"), pricing_info,
+                cost = get_image_model_cost(request_model, pricing_info,
                                             kwargs.get("size", "1024x1024"), "standard")
                 for items in response.data:
-                    # Set Span attributes
+                    # Set Span attributes (OTel Semconv)
                     span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+                    span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
+                                        SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
                     span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
                                         SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
-                    span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
-                                        SemanticConvetion.GEN_AI_TYPE_IMAGE)
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                        gen_ai_endpoint)
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
+                                        request_model)
+                    span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                        server_address)
+                    span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                        server_port)
                     span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
                                         response.created)
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
+                    span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                        request_model)
+                    span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        "image")
+                    # Set Span attributes (Extras)
+                    span.set_attribute(DEPLOYMENT_ENVIRONMENT,
                                         environment)
-                    span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
+                    span.set_attribute(SERVICE_NAME,
                                         application_name)
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                        kwargs.get("model", "dall-e-2"))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
-                                        kwargs.get("user", ""))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
                                         kwargs.get("size", "1024x1024"))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
                                         "standard")
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
+                                        kwargs.get("user", ""))
+                    span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                    version)
                     if trace_content:
-                        attribute_name = f"gen_ai.response.image.{images_count}"
+                        attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
                         span.add_event(
                             name=attribute_name,
                             attributes={
@@ -851,21 +860,20 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
                 span.set_status(Status(StatusCode.OK))
                 if disable_metrics is False:
-                    attributes = {
-                        TELEMETRY_SDK_NAME:
-                            "openlit",
-                        SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                            application_name,
-                        SemanticConvetion.GEN_AI_SYSTEM:
-                            SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
-                        SemanticConvetion.GEN_AI_ENVIRONMENT:
-                            environment,
-                        SemanticConvetion.GEN_AI_TYPE:
-                            SemanticConvetion.GEN_AI_TYPE_IMAGE,
-                        SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                            kwargs.get("model", "dall-e-2")
-                    }
+                    attributes = create_metrics_attributes(
+                        service_name=application_name,
+                        deployment_environment=environment,
+                        operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
+                        system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
+                        request_model=request_model,
+                        server_address=server_address,
+                        server_port=server_port,
+                        response_model=request_model,
+                    )
+                    metrics["genai_client_operation_duration"].record(
+                        end_time - start_time, attributes
+                    )
                     metrics["genai_requests"].add(1, attributes)
                     metrics["genai_cost"].record(cost, attributes)
@@ -881,13 +889,12 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
     return wrapper
-def audio_create(gen_ai_endpoint, version, environment, application_name,
+def audio_create(version, environment, application_name,
                  tracer, pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for creating speech audio to collect metrics.
     Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
         version: Version of the monitoring package.
         environment: Deployment environment (e.g., production, staging).
         application_name: Name of the application using the OpenAI API.
@@ -916,28 +923,42 @@ def audio_create(gen_ai_endpoint, version, environment, application_name,
             The response from the original 'audio.speech.create' method.
         """
-        with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
+        server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
+        request_model = kwargs.get("model", "tts-1")
+        span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO} {request_model}"
+        with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
+            start_time = time.time()
             response = wrapped(*args, **kwargs)
+            end_time = time.time()
             try:
                 # Calculate cost of the operation
-                cost = get_audio_model_cost(kwargs.get("model", "tts-1"),
+                cost = get_audio_model_cost(request_model,
                                             pricing_info, kwargs.get("input", ""))
                 # Set Span attributes
                 span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+                span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
+                                    SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO)
                 span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
                                     SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
-                span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
-                                    SemanticConvetion.GEN_AI_TYPE_AUDIO)
-                span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                    gen_ai_endpoint)
-                span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
+                                    request_model)
+                span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                    server_address)
+                span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                    server_port)
+                span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                    request_model)
+                span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                    "speech")
+                span.set_attribute(DEPLOYMENT_ENVIRONMENT,
                                     environment)
-                span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
+                span.set_attribute(SERVICE_NAME,
                                     application_name)
-                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                    kwargs.get("model", "tts-1"))
                 span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_VOICE,
                                     kwargs.get("voice", "alloy"))
                 span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_RESPONSE_FORMAT,
@@ -946,6 +967,8 @@ def audio_create(gen_ai_endpoint, version, environment, application_name,
                                     kwargs.get("speed", 1))
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                     cost)
+                span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                    version)
                 if trace_content:
                     span.add_event(
                         name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -957,21 +980,20 @@ def audio_create(gen_ai_endpoint, version, environment, application_name,
                 span.set_status(Status(StatusCode.OK))
                 if disable_metrics is False:
-                    attributes = {
-                        TELEMETRY_SDK_NAME:
-                            "openlit",
-                        SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                            application_name,
-                        SemanticConvetion.GEN_AI_SYSTEM:
-                            SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
-                        SemanticConvetion.GEN_AI_ENVIRONMENT:
-                            environment,
-                        SemanticConvetion.GEN_AI_TYPE:
-                            SemanticConvetion.GEN_AI_TYPE_AUDIO,
-                        SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                            kwargs.get("model", "tts-1")
-                    }
+                    attributes = create_metrics_attributes(
+                        service_name=application_name,
+                        deployment_environment=environment,
+                        operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO,
+                        system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
+                        request_model=request_model,
+                        server_address=server_address,
+                        server_port=server_port,
+                        response_model=request_model,
+                    )
+                    metrics["genai_client_operation_duration"].record(
+                        end_time - start_time, attributes
+                    )
                     metrics["genai_requests"].add(1, attributes)
                     metrics["genai_cost"].record(cost, attributes)

openlit 1.33.8__py3-none-any.whl → 1.33.9__py3-none-any.whl

openlit 1.33.8py3-none-any.whl → 1.33.9py3-none-any.whl