PyPI - openlit - Versions diffs - 1.33.9__py3-none-any.whl → 1.33.10__py3-none-any.whl - Mend

openlit 1.33.9py3-none-any.whl → 1.33.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

openlit/__helpers.py +5 -0
openlit/__init__.py +3 -2
openlit/instrumentation/ag2/ag2.py +3 -3
openlit/instrumentation/ai21/ai21.py +1 -1
openlit/instrumentation/ai21/async_ai21.py +1 -1
openlit/instrumentation/anthropic/anthropic.py +1 -1
openlit/instrumentation/anthropic/async_anthropic.py +1 -1
openlit/instrumentation/astra/astra.py +5 -5
openlit/instrumentation/astra/async_astra.py +5 -5
openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +3 -3
openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +3 -3
openlit/instrumentation/chroma/chroma.py +5 -5
openlit/instrumentation/cohere/async_cohere.py +1 -1
openlit/instrumentation/cohere/cohere.py +2 -2
openlit/instrumentation/controlflow/controlflow.py +3 -3
openlit/instrumentation/crawl4ai/async_crawl4ai.py +3 -3
openlit/instrumentation/crawl4ai/crawl4ai.py +3 -3
openlit/instrumentation/crewai/crewai.py +4 -2
openlit/instrumentation/dynamiq/dynamiq.py +3 -3
openlit/instrumentation/elevenlabs/async_elevenlabs.py +1 -2
openlit/instrumentation/elevenlabs/elevenlabs.py +1 -2
openlit/instrumentation/embedchain/embedchain.py +5 -5
openlit/instrumentation/firecrawl/firecrawl.py +3 -3
openlit/instrumentation/gpt4all/__init__.py +2 -2
openlit/instrumentation/gpt4all/gpt4all.py +345 -220
openlit/instrumentation/gpu/__init__.py +5 -5
openlit/instrumentation/groq/__init__.py +2 -2
openlit/instrumentation/groq/async_groq.py +356 -240
openlit/instrumentation/groq/groq.py +356 -240
openlit/instrumentation/haystack/haystack.py +3 -3
openlit/instrumentation/julep/async_julep.py +3 -3
openlit/instrumentation/julep/julep.py +3 -3
openlit/instrumentation/langchain/__init__.py +13 -7
openlit/instrumentation/langchain/async_langchain.py +384 -0
openlit/instrumentation/langchain/langchain.py +98 -490
openlit/instrumentation/letta/letta.py +5 -3
openlit/instrumentation/litellm/__init__.py +4 -5
openlit/instrumentation/litellm/async_litellm.py +316 -245
openlit/instrumentation/litellm/litellm.py +312 -241
openlit/instrumentation/llamaindex/llamaindex.py +3 -3
openlit/instrumentation/mem0/mem0.py +3 -3
openlit/instrumentation/milvus/milvus.py +5 -5
openlit/instrumentation/mistral/__init__.py +6 -6
openlit/instrumentation/mistral/async_mistral.py +421 -248
openlit/instrumentation/mistral/mistral.py +418 -244
openlit/instrumentation/multion/async_multion.py +4 -2
openlit/instrumentation/multion/multion.py +4 -2
openlit/instrumentation/ollama/__init__.py +8 -30
openlit/instrumentation/ollama/async_ollama.py +385 -417
openlit/instrumentation/ollama/ollama.py +384 -417
openlit/instrumentation/openai/async_openai.py +7 -9
openlit/instrumentation/openai/openai.py +7 -9
openlit/instrumentation/phidata/phidata.py +4 -2
openlit/instrumentation/pinecone/pinecone.py +5 -5
openlit/instrumentation/premai/__init__.py +2 -2
openlit/instrumentation/premai/premai.py +262 -213
openlit/instrumentation/qdrant/async_qdrant.py +5 -5
openlit/instrumentation/qdrant/qdrant.py +5 -5
openlit/instrumentation/reka/__init__.py +2 -2
openlit/instrumentation/reka/async_reka.py +90 -52
openlit/instrumentation/reka/reka.py +90 -52
openlit/instrumentation/together/__init__.py +4 -4
openlit/instrumentation/together/async_together.py +278 -236
openlit/instrumentation/together/together.py +278 -236
openlit/instrumentation/transformers/__init__.py +1 -1
openlit/instrumentation/transformers/transformers.py +75 -44
openlit/instrumentation/vertexai/__init__.py +14 -64
openlit/instrumentation/vertexai/async_vertexai.py +329 -986
openlit/instrumentation/vertexai/vertexai.py +329 -986
openlit/instrumentation/vllm/__init__.py +1 -1
openlit/instrumentation/vllm/vllm.py +62 -32
openlit/semcov/__init__.py +3 -3
{openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
openlit-1.33.10.dist-info/RECORD +122 -0
openlit-1.33.9.dist-info/RECORD +0 -121
{openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
{openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/WHEEL +0 -0

openlit/instrumentation/together/together.py CHANGED Viewed

@@ -1,32 +1,35 @@
-# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches, too-many-instance-attributes
 """
 Module for monitoring Together calls.
 """
 import logging
+import time
 from opentelemetry.trace import SpanKind, Status, StatusCode
-from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
+from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
 from openlit.__helpers import (
     get_chat_model_cost,
     get_image_model_cost,
     handle_exception,
     response_as_dict,
+    calculate_ttft,
+    calculate_tbt,
+    create_metrics_attributes,
+    set_server_address_and_port
 )
 from openlit.semcov import SemanticConvetion
 # Initialize logger for logging potential issues and operations
 logger = logging.getLogger(__name__)
-def completion(gen_ai_endpoint, version, environment, application_name,
-                     tracer, pricing_info, trace_content, metrics, disable_metrics):
+def completion(version, environment, application_name,
+                tracer, pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for chat completions to collect metrics.
     Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
         version: Version of the monitoring package.
         environment: Deployment environment (e.g., production, staging).
-        application_name: Name of the application using the Together AI SDK.
+        application_name: Name of the application using the Together AI API.
         tracer: OpenTelemetry tracer for creating spans.
         pricing_info: Information used for calculating the cost of Together AI usage.
         trace_content: Flag indicating whether to trace the actual content.
@@ -38,6 +41,7 @@ def completion(gen_ai_endpoint, version, environment, application_name,
     class TracedSyncStream:
         """
         Wrapper for streaming responses to collect metrics and trace data.
+        Wraps the response to collect message IDs and aggregated response.
         This class implements the '__aiter__' and '__anext__' methods that
         handle asynchronous streaming responses.
@@ -50,6 +54,8 @@ def completion(gen_ai_endpoint, version, environment, application_name,
                 wrapped,
                 span,
                 kwargs,
+                server_address,
+                server_port,
                 **args,
             ):
             self.__wrapped__ = wrapped
@@ -57,12 +63,20 @@ def completion(gen_ai_endpoint, version, environment, application_name,
             # Placeholder for aggregating streaming response
             self._llmresponse = ""
             self._response_id = ""
-            self._prompt_tokens = 0
-            self._completion_tokens = 0
-            self._total_tokens = 0
+            self._response_model = ""
+            self._finish_reason = ""
+            self._input_tokens = 0
+            self._output_tokens = 0
             self._args = args
             self._kwargs = kwargs
+            self._start_time = time.time()
+            self._end_time = None
+            self._timestamps = []
+            self._ttft = 0
+            self._tbt = 0
+            self._server_address = server_address
+            self._server_port = server_port
         def __enter__(self):
             self.__wrapped__.__enter__()
@@ -81,6 +95,14 @@ def completion(gen_ai_endpoint, version, environment, application_name,
         def __next__(self):
             try:
                 chunk = self.__wrapped__.__next__()
+                end_time = time.time()
+                # Record the timestamp for the current chunk
+                self._timestamps.append(end_time)
+                if len(self._timestamps) == 1:
+                    # Calculate time to first chunk
+                    self._ttft = calculate_ttft(self._timestamps, self._start_time)
                 chunked = response_as_dict(chunk)
                 # Collect message IDs and aggregated response from events
                 if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
@@ -89,15 +111,22 @@ def completion(gen_ai_endpoint, version, environment, application_name,
                     content = chunked.get('choices')[0].get('delta').get('content')
                     if content:
                         self._llmresponse += content
-                if chunked.get("usage"):
-                    self._prompt_tokens = chunked.get("usage").get("prompt_tokens")
-                    self._completion_tokens = chunked.get("usage").get("completion_tokens")
-                    self._total_tokens = chunked.get("usage").get("total_tokens")
-                self._response_id = chunked.get('id')
+                if chunked.get('usage'):
+                    self._response_id = chunked.get('id')
+                    self._response_model = chunked.get('model')
+                    self._finish_reason = str(chunked.get('choices')[0].get('finish_reason'))
+                    self._input_tokens = chunked.get('usage').get('prompt_tokens')
+                    self._output_tokens = chunked.get('usage').get('completion_tokens')
                 return chunk
             except StopIteration:
                 # Handling exception ensure observability without disrupting operation
                 try:
+                    self._end_time = time.time()
+                    if len(self._timestamps) > 1:
+                        self._tbt = calculate_tbt(self._timestamps)
                     # Format 'messages' into a single string
                     message_prompt = self._kwargs.get("messages", "")
                     formatted_messages = []
@@ -106,68 +135,89 @@ def completion(gen_ai_endpoint, version, environment, application_name,
                         content = message["content"]
                         if isinstance(content, list):
-                            content_str = ", ".join(
-                                # pylint: disable=line-too-long
-                                f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
-                                if "type" in item else f'text: {item["text"]}'
-                                for item in content
-                            )
+                            content_str_list = []
+                            for item in content:
+                                if item["type"] == "text":
+                                    content_str_list.append(f'text: {item["text"]}')
+                                elif (item["type"] == "image_url" and
+                                      not item["image_url"]["url"].startswith("data:")):
+                                    content_str_list.append(f'image_url: {item["image_url"]["url"]}')
+                            content_str = ", ".join(content_str_list)
                             formatted_messages.append(f"{role}: {content_str}")
                         else:
                             formatted_messages.append(f"{role}: {content}")
                     prompt = "\n".join(formatted_messages)
+                    request_model = self._kwargs.get("model", "gpt-4o")
                     # Calculate cost of the operation
-                    cost = get_chat_model_cost(self._kwargs.get(
-                                                    "model",
-                                                    "meta-llama/Llama-3.3-70B-Instruct-Turbo"
-                                                ),
-                                                pricing_info, self._prompt_tokens,
-                                                self._completion_tokens)
-                    # Set Span attributes
+                    cost = get_chat_model_cost(request_model,
+                                                pricing_info, self._input_tokens,
+                                                self._output_tokens)
+                    # Set Span attributes (OTel Semconv)
                     self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
-                                        SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
                     self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
                                         SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                        gen_ai_endpoint)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
+                                        SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
+                                        request_model)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
+                                        self._kwargs.get("seed", ""))
+                    self._span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                        self._server_port)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+                                        self._kwargs.get("frequency_penalty", 0.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
+                                        self._kwargs.get("max_tokens", -1))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
+                                        self._kwargs.get("presence_penalty", 0.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
+                                        self._kwargs.get("stop", []))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
+                                        self._kwargs.get("temperature", 1.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
+                                        self._kwargs.get("top_p", 1.0))
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
+                                        [self._finish_reason])
                     self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
                                         self._response_id)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                        self._response_model)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
+                                        self._input_tokens)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                        self._output_tokens)
+                    self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                        self._server_address)
+                    if isinstance(self._llmresponse, str):
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        "text")
+                    else:
+                        self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        "json")
+                    # Set Span attributes (Extra)
+                    self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
                                         environment)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
+                    self._span.set_attribute(SERVICE_NAME,
                                         application_name)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                        self._kwargs.get(
-                                            "model",
-                                            "meta-llama/Llama-3.3-70B-Instruct-Turbo"
-                                        ))
                     self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
                                         self._kwargs.get("user", ""))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
-                                        self._kwargs.get("top_p", 1.0))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
-                                        self._kwargs.get("max_tokens", -1))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
-                                        self._kwargs.get("temperature", 1.0))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
-                                        self._kwargs.get("presence_penalty", 0.0))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
-                                        self._kwargs.get("frequency_penalty", 0.0))
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
-                                        self._kwargs.get("seed", ""))
                     self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
                                         True)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
-                                        self._prompt_tokens)
-                    self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                        self._completion_tokens)
                     self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                        self._total_tokens)
+                                        self._input_tokens + self._output_tokens)
                     self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                         cost)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
+                                        self._tbt)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
+                                        self._ttft)
+                    self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                        version)
                     if trace_content:
                         self._span.add_event(
                             name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -181,36 +231,35 @@ def completion(gen_ai_endpoint, version, environment, application_name,
                                 SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
                             },
                         )
                     self._span.set_status(Status(StatusCode.OK))
                     if disable_metrics is False:
-                        attributes = {
-                            TELEMETRY_SDK_NAME:
-                                "openlit",
-                            SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                                application_name,
-                            SemanticConvetion.GEN_AI_SYSTEM:
-                                SemanticConvetion.GEN_AI_SYSTEM_TOGETHER,
-                            SemanticConvetion.GEN_AI_ENVIRONMENT:
-                                environment,
-                            SemanticConvetion.GEN_AI_OPERATION:
-                                SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
-                            SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                                self._kwargs.get("model",
-                                "meta-llama/Llama-3.3-70B-Instruct-Turbo")
-                        }
+                        attributes = create_metrics_attributes(
+                            service_name=application_name,
+                            deployment_environment=environment,
+                            operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
+                            system=SemanticConvetion.GEN_AI_SYSTEM_TOGETHER,
+                            request_model=request_model,
+                            server_address=self._server_address,
+                            server_port=self._server_port,
+                            response_model=self._response_model,
+                        )
-                        metrics["genai_requests"].add(1, attributes)
-                        metrics["genai_total_tokens"].add(
-                            self._total_tokens, attributes
+                        metrics["genai_client_usage_tokens"].record(
+                            self._input_tokens + self._output_tokens, attributes
                         )
-                        metrics["genai_completion_tokens"].add(
-                            self._completion_tokens, attributes
+                        metrics["genai_client_operation_duration"].record(
+                            self._end_time - self._start_time, attributes
                         )
-                        metrics["genai_prompt_tokens"].add(
-                            self._prompt_tokens, attributes
+                        metrics["genai_server_tbt"].record(
+                            self._tbt, attributes
                         )
+                        metrics["genai_server_ttft"].record(
+                            self._ttft, attributes
+                        )
+                        metrics["genai_requests"].add(1, attributes)
+                        metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
+                        metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
                         metrics["genai_cost"].record(cost, attributes)
                 except Exception as e:
@@ -219,7 +268,6 @@ def completion(gen_ai_endpoint, version, environment, application_name,
                 finally:
                     self._span.end()
                 raise
     def wrapper(wrapped, instance, args, kwargs):
         """
         Wraps the 'chat.completions' API call to add telemetry.
@@ -239,20 +287,25 @@ def completion(gen_ai_endpoint, version, environment, application_name,
         # Check if streaming is enabled for the API call
         streaming = kwargs.get("stream", False)
+        server_address, server_port = set_server_address_and_port(instance, "api.together.xyz", 443)
+        request_model = kwargs.get("model", "gpt-4o")
+        span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
         # pylint: disable=no-else-return
         if streaming:
             # Special handling for streaming response to accommodate the nature of data flow
             awaited_wrapped = wrapped(*args, **kwargs)
-            span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
+            span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
-            return TracedSyncStream(awaited_wrapped, span, kwargs)
+            return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
         # Handling for non-streaming responses
         else:
-            # pylint: disable=line-too-long
-            with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
+            with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+                start_time = time.time()
                 response = wrapped(*args, **kwargs)
+                end_time = time.time()
                 response_dict = response_as_dict(response)
@@ -266,7 +319,6 @@ def completion(gen_ai_endpoint, version, environment, application_name,
                         if isinstance(content, list):
                             content_str = ", ".join(
-                                # pylint: disable=line-too-long
                                 f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
                                 if "type" in item else f'text: {item["text"]}'
                                 for item in content
@@ -276,39 +328,66 @@ def completion(gen_ai_endpoint, version, environment, application_name,
                             formatted_messages.append(f"{role}: {content}")
                     prompt = "\n".join(formatted_messages)
-                    # Set base span attribues
+                    input_tokens = response_dict.get('usage').get('prompt_tokens')
+                    output_tokens = response_dict.get('usage').get('completion_tokens')
+                    # Calculate cost of the operation
+                    cost = get_chat_model_cost(request_model,
+                                                pricing_info, input_tokens,
+                                                output_tokens)
+                    # Set base span attribues (OTel Semconv)
                     span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-                    span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
-                                        SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
                     span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
                                         SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                        gen_ai_endpoint)
+                    span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
+                                        SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
+                                        request_model)
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
+                                        kwargs.get("seed", ""))
+                    span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                        server_port)
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+                                        kwargs.get("frequency_penalty", 0.0))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
+                                        kwargs.get("max_tokens", -1))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
+                                        kwargs.get("presence_penalty", 0.0))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
+                                        kwargs.get("stop", []))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
+                                        kwargs.get("temperature", 1.0))
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
+                                        kwargs.get("top_p", 1.0))
                     span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
                                         response_dict.get("id"))
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
+                    span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                        response_dict.get('model'))
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
+                                        input_tokens)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                        output_tokens)
+                    span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                        server_address)
+                    # Set base span attribues (Extras)
+                    span.set_attribute(DEPLOYMENT_ENVIRONMENT,
                                         environment)
-                    span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
+                    span.set_attribute(SERVICE_NAME,
                                         application_name)
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                        kwargs.get("model",
-                                        "meta-llama/Llama-3.3-70B-Instruct-Turbo"))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
-                                        kwargs.get("top_p", 1.0))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
-                                        kwargs.get("max_tokens", -1))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
                                         kwargs.get("user", ""))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
-                                        kwargs.get("temperature", 1.0))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
-                                        kwargs.get("presence_penalty", 0.0))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
-                                        kwargs.get("frequency_penalty", 0.0))
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
-                                        kwargs.get("seed", ""))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
                                         False)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
+                                        input_tokens + output_tokens)
+                    span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
+                                        cost)
+                    span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
+                                        end_time - start_time)
+                    span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                        version)
                     if trace_content:
                         span.add_event(
                             name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -317,103 +396,54 @@ def completion(gen_ai_endpoint, version, environment, application_name,
                             },
                         )
-                    # Set span attributes when tools is not passed to the function call
-                    if "tools" not in kwargs:
-                        # Calculate cost of the operation
-                        cost = get_chat_model_cost(kwargs.get(
-                                                        "model",
-                                                        "meta-llama/Llama-3.3-70B-Instruct-Turbo"
-                                                    ),
-                                                    pricing_info,
-                                                    response_dict.get('usage', {}).get('prompt_tokens', None),
-                                                    response_dict.get('usage', {}).get('completion_tokens', None))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
-                                           response_dict.get('usage', {}).get('prompt_tokens', None))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                           response_dict.get('usage', {}).get('completion_tokens', None))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                           response_dict.get('usage', {}).get('total_tokens', None))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                                            cost)
-                        # Set span attributes for when n = 1 (default)
-                        if "n" not in kwargs or kwargs["n"] == 1:
-                            if trace_content:
-                                span.add_event(
-                                    name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
-                                    attributes={
-                                        SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("message").get("content"),
-                                    },
-                                )
-                        # Set span attributes for when n > 0
-                        else:
-                            i = 0
-                            while i < kwargs["n"] and trace_content is True:
-                                attribute_name = f"gen_ai.content.completion.{i}"
-                                span.add_event(
-                                    name=attribute_name,
-                                    attributes={
-                                        SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
-                                    },
-                                )
-                                i += 1
-                            # Return original response
-                            return response
-                    # Set span attributes when tools is passed to the function call
-                    elif "tools" in kwargs:
-                        # Calculate cost of the operation
-                        cost = get_chat_model_cost(kwargs.get(
-                                                    "model",
-                                                    "meta-llama/Llama-3.3-70B-Instruct-Turbo"
-                                                    ),
-                                                    pricing_info,
-                                                    response_dict.get('usage').get('prompt_tokens'),
-                                                    response_dict.get('usage').get('completion_tokens'))
+                    for i in range(kwargs.get('n',1)):
+                        span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
+                                           [str(response_dict.get('choices')[i].get('finish_reason'))])
+                        if trace_content:
+                            span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                                attributes={
+                                    # pylint: disable=line-too-long
+                                    SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
+                                },
+                            )
+                        if kwargs.get('tools'):
+                            span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
+                                            str(response_dict.get('choices')[i].get('message').get('tool_calls')))
-                        span.add_event(
-                            name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
-                            attributes={
-                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
-                            },
-                        )
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
-                                            response_dict.get('usage').get('prompt_tokens'))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                            response_dict.get('usage').get('completion_tokens'))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
-                                            response_dict.get('usage').get('total_tokens'))
-                        span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
-                                            cost)
+                        if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
+                            span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                            "text")
+                        elif response_dict.get('choices')[i].get('message').get('content') is not None:
+                            span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                            "json")
                     span.set_status(Status(StatusCode.OK))
                     if disable_metrics is False:
-                        attributes = {
-                            TELEMETRY_SDK_NAME:
-                                "openlit",
-                            SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                                application_name,
-                            SemanticConvetion.GEN_AI_SYSTEM:
-                                SemanticConvetion.GEN_AI_SYSTEM_TOGETHER,
-                            SemanticConvetion.GEN_AI_ENVIRONMENT:
-                                environment,
-                            SemanticConvetion.GEN_AI_OPERATION:
-                                SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
-                            SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                                kwargs.get("model", "meta-llama/Llama-3.3-70B-Instruct-Turbo")
-                        }
+                        attributes = create_metrics_attributes(
+                            service_name=application_name,
+                            deployment_environment=environment,
+                            operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
+                            system=SemanticConvetion.GEN_AI_SYSTEM_TOGETHER,
+                            request_model=request_model,
+                            server_address=server_address,
+                            server_port=server_port,
+                            response_model=response_dict.get('model'),
+                        )
+                        metrics["genai_client_usage_tokens"].record(
+                            input_tokens + output_tokens, attributes
+                        )
+                        metrics["genai_client_operation_duration"].record(
+                            end_time - start_time, attributes
+                        )
+                        metrics["genai_server_ttft"].record(
+                            end_time - start_time, attributes
+                        )
                         metrics["genai_requests"].add(1, attributes)
-                        metrics["genai_total_tokens"].add(
-                            response_dict.get('usage').get('total_tokens'), attributes)
-                        metrics["genai_completion_tokens"].add(
-                            response_dict.get('usage').get('completion_tokens'), attributes)
-                        metrics["genai_prompt_tokens"].add(
-                            response_dict.get('usage').get('prompt_tokens'), attributes)
+                        metrics["genai_completion_tokens"].add(output_tokens, attributes)
+                        metrics["genai_prompt_tokens"].add(input_tokens, attributes)
                         metrics["genai_cost"].record(cost, attributes)
                     # Return original response
@@ -428,18 +458,17 @@ def completion(gen_ai_endpoint, version, environment, application_name,
     return wrapper
-def image_generate(gen_ai_endpoint, version, environment, application_name,
+def image_generate(version, environment, application_name,
                    tracer, pricing_info, trace_content, metrics, disable_metrics):
     """
     Generates a telemetry wrapper for image generation to collect metrics.
     Args:
-        gen_ai_endpoint: Endpoint identifier for logging and tracing.
         version: Version of the monitoring package.
         environment: Deployment environment (e.g., production, staging).
-        application_name: Name of the application using the Together API.
+        application_name: Name of the application using the Together AI API.
         tracer: OpenTelemetry tracer for creating spans.
-        pricing_info: Information used for calculating the cost of Together image generation.
+        pricing_info: Information used for calculating the cost of Together AI image generation.
         trace_content: Flag indicating whether to trace the input prompt and generated images.
     Returns:
@@ -463,8 +492,16 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
             The response from the original 'images.generate' method.
         """
-        with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
+        server_address, server_port = set_server_address_and_port(instance, "api.together.xyz", 443)
+        request_model = kwargs.get("model", "dall-e-2")
+        span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
+        with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
+            start_time = time.time()
             response = wrapped(*args, **kwargs)
+            end_time = time.time()
             images_count = 0
             try:
@@ -474,37 +511,43 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
                 else:
                     image = "url"
+                image_size = str(kwargs.get('width')) + 'x' + str(kwargs.get('height'))
                 # Calculate cost of the operation
-                image_size = str(kwargs.get("width", 1024)) + "x" + str(kwargs.get("height", 1024))
-                cost_per_million = get_image_model_cost(kwargs.get(
-                                            "model", "black-forest-labs/FLUX.1-dev"
-                                            ),
-                                            pricing_info, "1000000",
+                cost = get_image_model_cost(request_model,
+                                            pricing_info, image_size,
                                             kwargs.get("quality", "standard"))
-                pixels = kwargs.get("width", 1024) * kwargs.get("height", 1024)
-                cost = pixels / 1_000_000 * cost_per_million
                 for items in response.data:
-                    # Set Span attributes
+                    # Set Span attributes (OTel Semconv)
                     span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
-                    span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
-                                        SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
                     span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
                                         SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
-                                        gen_ai_endpoint)
+                    span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
+                                        SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
+                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
+                                        request_model)
+                    span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
+                                        server_address)
+                    span.set_attribute(SemanticConvetion.SERVER_PORT,
+                                        server_port)
                     span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
                                         response.id)
-                    span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
+                    span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
+                                        response.model)
+                    span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
+                                        "image")
+                    # Set Span attributes (Extras)
+                    span.set_attribute(DEPLOYMENT_ENVIRONMENT,
                                         environment)
-                    span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
+                    span.set_attribute(SERVICE_NAME,
                                         application_name)
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
-                                        kwargs.get("model", "black-forest-labs/FLUX.1-dev"))
                     span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
                                         image_size)
-                    span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
-                                        kwargs.get("quality", "standard"))
+                    span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
+                                        version)
                     if trace_content:
                         span.add_event(
                             name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -512,7 +555,7 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
                                 SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
                             },
                         )
-                        attribute_name = f"gen_ai.response.image.{images_count}"
+                        attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
                         span.add_event(
                             name=attribute_name,
                             attributes={
@@ -527,21 +570,20 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
                 span.set_status(Status(StatusCode.OK))
                 if disable_metrics is False:
-                    attributes = {
-                        TELEMETRY_SDK_NAME:
-                            "openlit",
-                        SemanticConvetion.GEN_AI_APPLICATION_NAME:
-                            application_name,
-                        SemanticConvetion.GEN_AI_SYSTEM:
-                            SemanticConvetion.GEN_AI_SYSTEM_TOGETHER,
-                        SemanticConvetion.GEN_AI_ENVIRONMENT:
-                            environment,
-                        SemanticConvetion.GEN_AI_OPERATION:
-                            SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
-                        SemanticConvetion.GEN_AI_REQUEST_MODEL:
-                            kwargs.get("model", "black-forest-labs/FLUX.1-dev")
-                    }
+                    attributes = create_metrics_attributes(
+                        service_name=application_name,
+                        deployment_environment=environment,
+                        operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
+                        system=SemanticConvetion.GEN_AI_SYSTEM_TOGETHER,
+                        request_model=request_model,
+                        server_address=server_address,
+                        server_port=server_port,
+                        response_model=response.model,
+                    )
+                    metrics["genai_client_operation_duration"].record(
+                        end_time - start_time, attributes
+                    )
                     metrics["genai_requests"].add(1, attributes)
                     metrics["genai_cost"].record(cost, attributes)

openlit 1.33.9__py3-none-any.whl → 1.33.10__py3-none-any.whl

openlit 1.33.9py3-none-any.whl → 1.33.10py3-none-any.whl