PyPI - openlit - Versions diffs - 1.34.17__py3-none-any.whl → 1.34.19__py3-none-any.whl - Mend

openlit 1.34.17py3-none-any.whl → 1.34.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

openlit/instrumentation/anthropic/__init__.py +20 -22
openlit/instrumentation/anthropic/anthropic.py +39 -46
openlit/instrumentation/anthropic/async_anthropic.py +40 -47
openlit/instrumentation/anthropic/utils.py +144 -170
openlit/instrumentation/cohere/__init__.py +42 -28
openlit/instrumentation/cohere/async_cohere.py +148 -557
openlit/instrumentation/cohere/cohere.py +147 -556
openlit/instrumentation/cohere/utils.py +330 -0
{openlit-1.34.17.dist-info → openlit-1.34.19.dist-info}/METADATA +1 -1
{openlit-1.34.17.dist-info → openlit-1.34.19.dist-info}/RECORD +12 -11
{openlit-1.34.17.dist-info → openlit-1.34.19.dist-info}/LICENSE +0 -0
{openlit-1.34.17.dist-info → openlit-1.34.19.dist-info}/WHEEL +0 -0

openlit/instrumentation/anthropic/utils.py CHANGED Viewed

@@ -3,63 +3,92 @@ Anthropic OpenTelemetry instrumentation utility functions
 """
 import time
-from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
 from opentelemetry.trace import Status, StatusCode
 from openlit.__helpers import (
     calculate_ttft,
     response_as_dict,
     calculate_tbt,
-    extract_and_format_input,
     get_chat_model_cost,
-    create_metrics_attributes,
-    otel_event,
-    concatenate_all_contents
+    record_completion_metrics,
+    common_span_attributes,
 )
 from openlit.semcov import SemanticConvention
-def process_chunk(self, chunk):
+def format_content(messages):
+    """
+    Format the messages into a string for span events.
+    """
+    if not messages:
+        return ""
+    formatted_messages = []
+    for message in messages:
+        if isinstance(message, dict):
+            role = message.get("role", "user")
+            content = message.get("content", "")
+        else:
+            # Handle Anthropic object format
+            role = getattr(message, "role", "user")
+            content = getattr(message, "content", "")
+        if isinstance(content, list):
+            # Handle structured content (e.g., text + images)
+            text_parts = []
+            for part in content:
+                if isinstance(part, dict) and part.get("type") == "text":
+                    text_parts.append(part.get("text", ""))
+            content = " ".join(text_parts)
+        elif not isinstance(content, str):
+            content = str(content)
+        formatted_messages.append(f"{role}: {content}")
+    return "\n".join(formatted_messages)
+def process_chunk(scope, chunk):
     """
     Process a chunk of response data and update state.
     """
     end_time = time.time()
     # Record the timestamp for the current chunk
-    self._timestamps.append(end_time)
+    scope._timestamps.append(end_time)
-    if len(self._timestamps) == 1:
+    if len(scope._timestamps) == 1:
         # Calculate time to first chunk
-        self._ttft = calculate_ttft(self._timestamps, self._start_time)
+        scope._ttft = calculate_ttft(scope._timestamps, scope._start_time)
     chunked = response_as_dict(chunk)
     # Collect message IDs and input token from events
-    if chunked.get('type') == 'message_start':
-        self._response_id = chunked.get('message').get('id')
-        self._input_tokens = chunked.get('message').get('usage').get('input_tokens')
-        self._response_model = chunked.get('message').get('model')
-        self._response_role = chunked.get('message').get('role')
+    if chunked.get("type") == "message_start":
+        scope._response_id = chunked.get("message").get("id")
+        scope._input_tokens = chunked.get("message").get("usage").get("input_tokens")
+        scope._response_model = chunked.get("message").get("model")
+        scope._response_role = chunked.get("message").get("role")
     # Collect message IDs and aggregated response from events
-    if chunked.get('type') == 'content_block_delta':
-        if chunked.get('delta').get('text'):
-            self._llmresponse += chunked.get('delta').get('text')
-        elif chunked.get('delta').get('partial_json'):
-            self._tool_arguments += chunked.get('delta').get('partial_json')
-    if chunked.get('type') == 'content_block_start':
-        if chunked.get('content_block').get('id'):
-            self._tool_id = chunked.get('content_block').get('id')
-        if chunked.get('content_block').get('name'):
-            self._tool_name = chunked.get('content_block').get('name')
+    if chunked.get("type") == "content_block_delta":
+        if chunked.get("delta").get("text"):
+            scope._llmresponse += chunked.get("delta").get("text")
+        elif chunked.get("delta").get("partial_json"):
+            scope._tool_arguments += chunked.get("delta").get("partial_json")
+    if chunked.get("type") == "content_block_start":
+        if chunked.get("content_block").get("id"):
+            scope._tool_id = chunked.get("content_block").get("id")
+        if chunked.get("content_block").get("name"):
+            scope._tool_name = chunked.get("content_block").get("name")
     # Collect output tokens and stop reason from events
-    if chunked.get('type') == 'message_delta':
-        self._output_tokens = chunked.get('usage').get('output_tokens')
-        self._finish_reason = chunked.get('delta').get('stop_reason')
+    if chunked.get("type") == "message_delta":
+        scope._output_tokens = chunked.get("usage").get("output_tokens")
+        scope._finish_reason = chunked.get("delta").get("stop_reason")
 def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
-                        event_provider, capture_message_content, disable_metrics, version, is_stream):
+                        capture_message_content, disable_metrics, version, is_stream):
     """
     Process chat request and generate Telemetry
     """
@@ -68,48 +97,56 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
     if len(scope._timestamps) > 1:
         scope._tbt = calculate_tbt(scope._timestamps)
-    formatted_messages = extract_and_format_input(scope._kwargs.get('messages', ''))
-    request_model = scope._kwargs.get('model', 'claude-3-opus-20240229')
+    formatted_messages = format_content(scope._kwargs.get("messages", []))
+    request_model = scope._kwargs.get("model", "claude-3-5-sonnet-latest")
     cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
-    # Set Span attributes (OTel Semconv)
-    scope._span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
-    scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_ANTHROPIC)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
-    scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get('max_tokens', -1))
-    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get('stop_sequences', []))
-    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get('temperature', 1.0))
-    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, scope._kwargs.get('top_k', 1.0))
-    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get('top_p', 1.0))
-    scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
+    # Common Span Attributes
+    common_span_attributes(scope,
+        SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_ANTHROPIC,
+        scope._server_address, scope._server_port, request_model, scope._response_model,
+        environment, application_name, is_stream, scope._tbt, scope._ttft, version)
+    # Span Attributes for Request parameters
+    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get("max_tokens", -1))
+    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get("stop_sequences", []))
+    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get("temperature", 1.0))
+    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, scope._kwargs.get("top_k", 1.0))
+    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get("top_p", 1.0))
+    # Span Attributes for Response parameters
     scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._response_model)
+    scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
+    scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
+    # Span Attributes for Cost and Tokens
     scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
     scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
-    scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
-                              'text' if isinstance(scope._llmresponse, str) else 'json')
-    scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
-    scope._span.set_attribute(SERVICE_NAME, application_name)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
     scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
     scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
-    scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
-    # To be removed one the change to log events (from span events) is complete
-    prompt = concatenate_all_contents(formatted_messages)
+    # Handle tool calls if present
+    if scope._tool_calls:
+        # Optimized tool handling - extract name, id, and arguments
+        tool_name = scope._tool_calls.get("name", "")
+        tool_id = scope._tool_calls.get("id", "")
+        tool_args = scope._tool_calls.get("input", "")
+        scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_NAME, tool_name)
+        scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALL_ID, tool_id)
+        scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_ARGS, str(tool_args))
+    # Span Attributes for Content
     if capture_message_content:
+        scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, formatted_messages)
+        scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
+        # To be removed once the change to span_attributes (from span events) is complete
         scope._span.add_event(
             name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
             attributes={
-                SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
+                SemanticConvention.GEN_AI_CONTENT_PROMPT: formatted_messages,
             },
         )
         scope._span.add_event(
@@ -119,133 +156,70 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
             },
         )
-    choice_event_body = {
-        'finish_reason': scope._finish_reason,
-        'index': 0,
-        'message': {
-            **({'content': scope._llmresponse} if capture_message_content else {}),
-            'role': scope._response_role
-        }
-    }
-    if scope._tool_calls:
-        choice_event_body['message'].update({
-            'tool_calls': {
-                'function': {
-                    'name': scope._tool_calls.get('name', ''),
-                    'arguments': scope._tool_calls.get('input', '')
-                },
-                'id': scope._tool_calls.get('id', ''),
-                'type': 'function'
-            }
-        })
-    # Emit events
-    for role in ['user', 'system', 'assistant', 'tool']:
-        if formatted_messages.get(role, {}).get('content', ''):
-            event = otel_event(
-                name=getattr(SemanticConvention, f'GEN_AI_{role.upper()}_MESSAGE'),
-                attributes={
-                    SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_ANTHROPIC
-                },
-                body = {
-                    # pylint: disable=line-too-long
-                    **({'content': formatted_messages.get(role, {}).get('content', '')} if capture_message_content else {}),
-                    'role': formatted_messages.get(role, {}).get('role', []),
-                    **({
-                        'tool_calls': {
-                            'function': {
-                                # pylint: disable=line-too-long
-                                'name': (scope._tool_calls[0].get('function', {}).get('name', '') if scope._tool_calls else ''),
-                                'arguments': (scope._tool_calls[0].get('function', {}).get('arguments', '') if scope._tool_calls else '')
-                            },
-                            'id': (scope._tool_calls[0].get('id', '') if scope._tool_calls else ''),
-                            'type': 'function'
-                        }
-                    } if role == 'assistant' else {}),
-                    **({
-                        'id': (scope._tool_calls[0].get('id', '') if scope._tool_calls else '')
-                    } if role == 'tool' else {})
-                }
-            )
-            event_provider.emit(event)
-    choice_event = otel_event(
-        name=SemanticConvention.GEN_AI_CHOICE,
-        attributes={
-            SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_ANTHROPIC
-        },
-        body=choice_event_body
-    )
-    event_provider.emit(choice_event)
     scope._span.set_status(Status(StatusCode.OK))
+    # Record metrics
     if not disable_metrics:
-        metrics_attributes = create_metrics_attributes(
-            service_name=application_name,
-            deployment_environment=environment,
-            operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
-            system=SemanticConvention.GEN_AI_SYSTEM_ANTHROPIC,
-            request_model=request_model,
-            server_address=scope._server_address,
-            server_port=scope._server_port,
-            response_model=scope._response_model,
-        )
+        record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_ANTHROPIC,
+            scope._server_address, scope._server_port, request_model, scope._response_model, environment,
+            application_name, scope._start_time, scope._end_time, scope._input_tokens, scope._output_tokens,
+            cost, scope._tbt, scope._ttft)
-        metrics['genai_client_usage_tokens'].record(scope._input_tokens + scope._output_tokens, metrics_attributes)
-        metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
-        metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
-        metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
-        metrics['genai_requests'].add(1, metrics_attributes)
-        metrics['genai_completion_tokens'].add(scope._output_tokens, metrics_attributes)
-        metrics['genai_prompt_tokens'].add(scope._input_tokens, metrics_attributes)
-        metrics['genai_cost'].record(cost, metrics_attributes)
-def process_streaming_chat_response(self, pricing_info, environment, application_name, metrics,
-                                    event_provider, capture_message_content=False, disable_metrics=False, version=''):
+def process_streaming_chat_response(scope, pricing_info, environment, application_name, metrics,
+                                    capture_message_content=False, disable_metrics=False, version=""):
     """
-    Process chat request and generate Telemetry
+    Process streaming chat response and generate telemetry.
     """
-    if self._tool_id != '':
-        self._tool_calls = {
-            'id': self._tool_id,
-            'name': self._tool_name,
-            'input': self._tool_arguments
+    if scope._tool_id != "":
+        scope._tool_calls = {
+            "id": scope._tool_id,
+            "name": scope._tool_name,
+            "input": scope._tool_arguments
         }
-    common_chat_logic(self, pricing_info, environment, application_name, metrics,
-                        event_provider, capture_message_content, disable_metrics, version, is_stream=True)
+    common_chat_logic(scope, pricing_info, environment, application_name, metrics,
+                        capture_message_content, disable_metrics, version, is_stream=True)
 def process_chat_response(response, request_model, pricing_info, server_port, server_address,
-                          environment, application_name, metrics, event_provider, start_time,
-                          span, capture_message_content=False, disable_metrics=False, version='1.0.0', **kwargs):
+                          environment, application_name, metrics, start_time,
+                          span, capture_message_content=False, disable_metrics=False, version="1.0.0", **kwargs):
     """
-    Process chat request and generate Telemetry
+    Process non-streaming chat response and generate telemetry.
     """
-    self = type('GenericScope', (), {})()
+    scope = type("GenericScope", (), {})()
     response_dict = response_as_dict(response)
     # pylint: disable = no-member
-    self._start_time = start_time
-    self._end_time = time.time()
-    self._span = span
-    self._llmresponse = response_dict.get('content', {})[0].get('text', '')
-    self._response_role = response_dict.get('message', {}).get('role', 'assistant')
-    self._input_tokens = response_dict.get('usage').get('input_tokens')
-    self._output_tokens = response_dict.get('usage').get('output_tokens')
-    self._response_model = response_dict.get('model', '')
-    self._finish_reason = response_dict.get('stop_reason', '')
-    self._response_id = response_dict.get('id', '')
-    self._timestamps = []
-    self._ttft, self._tbt = self._end_time - self._start_time, 0
-    self._server_address, self._server_port = server_address, server_port
-    self._kwargs = kwargs
-    #pylint: disable=line-too-long
-    self._tool_calls = (lambda c: c[1] if len(c) > 1 and c[1].get('type') == 'tool_use' else None)(response_dict.get('content', []))
-    common_chat_logic(self, pricing_info, environment, application_name, metrics,
-                        event_provider, capture_message_content, disable_metrics, version, is_stream=False)
+    scope._start_time = start_time
+    scope._end_time = time.time()
+    scope._span = span
+    scope._llmresponse = response_dict.get("content", [{}])[0].get("text", "")
+    scope._response_role = response_dict.get("role", "assistant")
+    scope._input_tokens = response_dict.get("usage").get("input_tokens")
+    scope._output_tokens = response_dict.get("usage").get("output_tokens")
+    scope._response_model = response_dict.get("model", "")
+    scope._finish_reason = response_dict.get("stop_reason", "")
+    scope._response_id = response_dict.get("id", "")
+    scope._timestamps = []
+    scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
+    scope._server_address, scope._server_port = server_address, server_port
+    scope._kwargs = kwargs
+    # Handle tool calls if present
+    content_blocks = response_dict.get("content", [])
+    scope._tool_calls = None
+    for block in content_blocks:
+        if block.get("type") == "tool_use":
+            scope._tool_calls = {
+                "id": block.get("id", ""),
+                "name": block.get("name", ""),
+                "input": block.get("input", "")
+            }
+            break
+    common_chat_logic(scope, pricing_info, environment, application_name, metrics,
+                        capture_message_content, disable_metrics, version, is_stream=False)
     return response

openlit/instrumentation/cohere/__init__.py CHANGED Viewed

@@ -1,74 +1,88 @@
-# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
 """Initializer of Auto Instrumentation of Cohere Functions"""
 from typing import Collection
 import importlib.metadata
 from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
 from wrapt import wrap_function_wrapper
-from openlit.instrumentation.cohere.cohere import chat, chat_stream, embed
-from openlit.instrumentation.cohere.async_cohere import async_chat, async_chat_stream, async_embed
+from openlit.instrumentation.cohere.cohere import (
+    chat,
+    chat_stream,
+    embed
+)
+from openlit.instrumentation.cohere.async_cohere import (
+    async_chat,
+    async_chat_stream,
+    async_embed
+)
 _instruments = ("cohere >= 5.14.0",)
 class CohereInstrumentor(BaseInstrumentor):
-    """An instrumentor for Cohere's client library."""
+    """
+    An instrumentor for Cohere client library.
+    """
     def instrumentation_dependencies(self) -> Collection[str]:
         return _instruments
     def _instrument(self, **kwargs):
-        application_name = kwargs.get("application_name")
-        environment = kwargs.get("environment")
+        application_name = kwargs.get("application_name", "default")
+        environment = kwargs.get("environment", "default")
         tracer = kwargs.get("tracer")
         metrics = kwargs.get("metrics_dict")
-        pricing_info = kwargs.get("pricing_info")
-        capture_message_content = kwargs.get("capture_message_content")
+        pricing_info = kwargs.get("pricing_info", {})
+        capture_message_content = kwargs.get("capture_message_content", False)
         disable_metrics = kwargs.get("disable_metrics")
         version = importlib.metadata.version("cohere")
-        # Sync Client
+        # sync chat completions
         wrap_function_wrapper(
-            "cohere.client_v2",
-            "ClientV2.chat",
+            "cohere.client_v2",
+            "ClientV2.chat",
             chat(version, environment, application_name,
-                 tracer, pricing_info, capture_message_content, metrics, disable_metrics),
+                  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
         )
+        # sync chat streaming
         wrap_function_wrapper(
-            "cohere.client_v2",
-            "ClientV2.chat_stream",
+            "cohere.client_v2",
+            "ClientV2.chat_stream",
             chat_stream(version, environment, application_name,
-                        tracer, pricing_info, capture_message_content, metrics, disable_metrics),
+                  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
         )
+        # sync embeddings
         wrap_function_wrapper(
-            "cohere.client_v2",
-            "ClientV2.embed",
+            "cohere.client_v2",
+            "ClientV2.embed",
             embed(version, environment, application_name,
                   tracer, pricing_info, capture_message_content, metrics, disable_metrics),
         )
-        # Async Client
+        # async chat completions
         wrap_function_wrapper(
-            "cohere.client_v2",
-            "AsyncClientV2.chat",
+            "cohere.client_v2",
+            "AsyncClientV2.chat",
             async_chat(version, environment, application_name,
-                 tracer, pricing_info, capture_message_content, metrics, disable_metrics),
+                  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
         )
+        # async chat streaming
         wrap_function_wrapper(
-            "cohere.client_v2",
-            "AsyncClientV2.chat_stream",
+            "cohere.client_v2",
+            "AsyncClientV2.chat_stream",
             async_chat_stream(version, environment, application_name,
-                        tracer, pricing_info, capture_message_content, metrics, disable_metrics),
+                  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
         )
+        # async embeddings
         wrap_function_wrapper(
-            "cohere.client_v2",
-            "AsyncClientV2.embed",
+            "cohere.client_v2",
+            "AsyncClientV2.embed",
             async_embed(version, environment, application_name,
                   tracer, pricing_info, capture_message_content, metrics, disable_metrics),
         )
-    @staticmethod
     def _uninstrument(self, **kwargs):
         pass

openlit 1.34.17__py3-none-any.whl → 1.34.19__py3-none-any.whl

openlit 1.34.17py3-none-any.whl → 1.34.19py3-none-any.whl