PyPI - openlit - Versions diffs - 1.16.2__py3-none-any.whl → 1.18.0__py3-none-any.whl - Mend

openlit 1.16.2py3-none-any.whl → 1.18.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

openlit/__init__.py +3 -0
openlit/instrumentation/anthropic/anthropic.py +28 -10
openlit/instrumentation/anthropic/async_anthropic.py +27 -10
openlit/instrumentation/bedrock/bedrock.py +13 -5
openlit/instrumentation/cohere/cohere.py +33 -12
openlit/instrumentation/elevenlabs/async_elevenlabs.py +6 -2
openlit/instrumentation/elevenlabs/elevenlabs.py +6 -2
openlit/instrumentation/gpt4all/gpt4all.py +30 -10
openlit/instrumentation/groq/async_groq.py +31 -11
openlit/instrumentation/groq/groq.py +31 -11
openlit/instrumentation/mistral/async_mistral.py +33 -12
openlit/instrumentation/mistral/mistral.py +33 -12
openlit/instrumentation/ollama/async_ollama.py +57 -20
openlit/instrumentation/ollama/ollama.py +57 -20
openlit/instrumentation/openai/async_azure_openai.py +94 -35
openlit/instrumentation/openai/async_openai.py +68 -27
openlit/instrumentation/openai/azure_openai.py +89 -31
openlit/instrumentation/openai/openai.py +68 -29
openlit/instrumentation/transformers/transformers.py +20 -16
openlit/instrumentation/vertexai/async_vertexai.py +104 -35
openlit/instrumentation/vertexai/vertexai.py +104 -35
openlit/instrumentation/vllm/__init__.py +43 -0
openlit/instrumentation/vllm/vllm.py +143 -0
openlit/semcov/__init__.py +4 -1
{openlit-1.16.2.dist-info → openlit-1.18.0.dist-info}/METADATA +3 -1
{openlit-1.16.2.dist-info → openlit-1.18.0.dist-info}/RECORD +28 -26
{openlit-1.16.2.dist-info → openlit-1.18.0.dist-info}/LICENSE +0 -0
{openlit-1.16.2.dist-info → openlit-1.18.0.dist-info}/WHEEL +0 -0

openlit/instrumentation/vertexai/async_vertexai.py CHANGED Viewed

@@ -103,10 +103,18 @@ def generate_content_async(gen_ai_endpoint, version, environment, application_na
                         span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                             cost)
                         if trace_content:
-                            span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                                prompt)
-                            span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                                llmresponse)
+                            span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                                attributes={
+                                    SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                                },
+                            )
+                            span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                                attributes={
+                                    SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
+                                },
+                            )
                         span.set_status(Status(StatusCode.OK))
@@ -181,10 +189,19 @@ def generate_content_async(gen_ai_endpoint, version, environment, application_na
                     span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                         cost)
                     if trace_content:
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                           prompt)
-                        # pylint: disable=line-too-long
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.candidates[0].content.parts[0].text)
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                            attributes={
+                                SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                            },
+                        )
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                            attributes={
+                                # pylint: disable=line-too-long
+                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.candidates[0].content.parts[0].text,
+                            },
+                        )
                     span.set_status(Status(StatusCode.OK))
@@ -316,10 +333,18 @@ def send_message_async(gen_ai_endpoint, version, environment, application_name,
                         span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                             cost)
                         if trace_content:
-                            span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                                prompt)
-                            span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                                llmresponse)
+                            span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                                attributes={
+                                    SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                                },
+                            )
+                            span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                                attributes={
+                                    SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
+                                },
+                            )
                         span.set_status(Status(StatusCode.OK))
@@ -394,11 +419,19 @@ def send_message_async(gen_ai_endpoint, version, environment, application_name,
                     span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                         cost)
                     if trace_content:
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                           prompt)
-                        # pylint: disable=line-too-long
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                           response.candidates[0].content.parts[0].text)
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                            attributes={
+                                SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                            },
+                        )
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                            attributes={
+                                # pylint: disable=line-too-long
+                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.candidates[0].content.parts[0].text,
+                            },
+                        )
                     span.set_status(Status(StatusCode.OK))
@@ -516,10 +549,18 @@ def predict_async(gen_ai_endpoint, version, environment, application_name, trace
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                     cost)
                 if trace_content:
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                        prompt)
-                    # pylint: disable=line-too-long
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.text)
+                    span.add_event(
+                        name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                        attributes={
+                            SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                        },
+                    )
+                    span.add_event(
+                        name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                        attributes={
+                            SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
+                        },
+                    )
                 span.set_status(Status(StatusCode.OK))
@@ -648,10 +689,18 @@ def predict_streaming_async(gen_ai_endpoint, version, environment, application_n
                     span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                         cost)
                     if trace_content:
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                            prompt)
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                            llmresponse)
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                            attributes={
+                                SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                            },
+                        )
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                            attributes={
+                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
+                            },
+                        )
                     span.set_status(Status(StatusCode.OK))
@@ -765,10 +814,18 @@ def start_chat_async(gen_ai_endpoint, version, environment, application_name, tr
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                     cost)
                 if trace_content:
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                        prompt)
-                    # pylint: disable=line-too-long
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.text)
+                    span.add_event(
+                        name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                        attributes={
+                            SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                        },
+                    )
+                    span.add_event(
+                        name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                        attributes={
+                            SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
+                        },
+                    )
                 span.set_status(Status(StatusCode.OK))
@@ -895,10 +952,18 @@ def start_chat_streaming_async(gen_ai_endpoint, version, environment, applicatio
                     span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                         cost)
                     if trace_content:
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                            prompt)
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                            llmresponse)
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                            attributes={
+                                SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                            },
+                        )
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                            attributes={
+                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
+                            },
+                        )
                     span.set_status(Status(StatusCode.OK))
@@ -1006,8 +1071,12 @@ def embeddings_async(gen_ai_endpoint, version, environment, application_name, tr
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                     cost)
                 if trace_content:
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                        prompt)
+                    span.add_event(
+                        name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                        attributes={
+                            SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                        },
+                    )
                 span.set_status(Status(StatusCode.OK))

openlit/instrumentation/vertexai/vertexai.py CHANGED Viewed

@@ -103,10 +103,18 @@ def generate_content(gen_ai_endpoint, version, environment, application_name, tr
                         span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                             cost)
                         if trace_content:
-                            span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                                prompt)
-                            span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                                llmresponse)
+                            span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                                attributes={
+                                    SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                                },
+                            )
+                            span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                                attributes={
+                                    SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
+                                },
+                            )
                         span.set_status(Status(StatusCode.OK))
@@ -181,10 +189,19 @@ def generate_content(gen_ai_endpoint, version, environment, application_name, tr
                     span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                         cost)
                     if trace_content:
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                           prompt)
-                        # pylint: disable=line-too-long
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.candidates[0].content.parts[0].text)
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                            attributes={
+                                SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                            },
+                        )
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                            attributes={
+                                # pylint: disable=line-too-long
+                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.candidates[0].content.parts[0].text,
+                            },
+                        )
                     span.set_status(Status(StatusCode.OK))
@@ -316,10 +333,18 @@ def send_message(gen_ai_endpoint, version, environment, application_name, tracer
                         span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                             cost)
                         if trace_content:
-                            span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                                prompt)
-                            span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                                llmresponse)
+                            span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                                attributes={
+                                    SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                                },
+                            )
+                            span.add_event(
+                                name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                                attributes={
+                                    SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
+                                },
+                            )
                         span.set_status(Status(StatusCode.OK))
@@ -394,11 +419,19 @@ def send_message(gen_ai_endpoint, version, environment, application_name, tracer
                     span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                         cost)
                     if trace_content:
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                           prompt)
-                        # pylint: disable=line-too-long
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                           response.candidates[0].content.parts[0].text)
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                            attributes={
+                                SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                            },
+                        )
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                            attributes={
+                                # pylint: disable=line-too-long
+                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.candidates[0].content.parts[0].text,
+                            },
+                        )
                     span.set_status(Status(StatusCode.OK))
@@ -516,10 +549,18 @@ def predict(gen_ai_endpoint, version, environment, application_name, tracer,
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                     cost)
                 if trace_content:
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                        prompt)
-                    # pylint: disable=line-too-long
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.text)
+                    span.add_event(
+                        name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                        attributes={
+                            SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                        },
+                    )
+                    span.add_event(
+                        name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                        attributes={
+                            SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
+                        },
+                    )
                 span.set_status(Status(StatusCode.OK))
@@ -648,10 +689,18 @@ def predict_streaming(gen_ai_endpoint, version, environment, application_name, t
                     span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                         cost)
                     if trace_content:
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                            prompt)
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                            llmresponse)
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                            attributes={
+                                SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                            },
+                        )
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                            attributes={
+                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
+                            },
+                        )
                     span.set_status(Status(StatusCode.OK))
@@ -765,10 +814,18 @@ def start_chat(gen_ai_endpoint, version, environment, application_name, tracer,
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                     cost)
                 if trace_content:
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                        prompt)
-                    # pylint: disable=line-too-long
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.text)
+                    span.add_event(
+                        name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                        attributes={
+                            SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                        },
+                    )
+                    span.add_event(
+                        name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                        attributes={
+                            SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
+                        },
+                    )
                 span.set_status(Status(StatusCode.OK))
@@ -895,10 +952,18 @@ def start_chat_streaming(gen_ai_endpoint, version, environment, application_name
                     span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                         cost)
                     if trace_content:
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                            prompt)
-                        span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
-                                            llmresponse)
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                            attributes={
+                                SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                            },
+                        )
+                        span.add_event(
+                            name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                            attributes={
+                                SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
+                            },
+                        )
                     span.set_status(Status(StatusCode.OK))
@@ -1006,8 +1071,12 @@ def embeddings(gen_ai_endpoint, version, environment, application_name, tracer,
                 span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
                                     cost)
                 if trace_content:
-                    span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
-                                        prompt)
+                    span.add_event(
+                        name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                        attributes={
+                            SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
+                        },
+                    )
                 span.set_status(Status(StatusCode.OK))

openlit/instrumentation/vllm/__init__.py ADDED Viewed

@@ -0,0 +1,43 @@
+# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
+"""Initializer of Auto Instrumentation of vLLM Functions"""
+from typing import Collection
+import importlib.metadata
+from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
+from wrapt import wrap_function_wrapper
+from openlit.instrumentation.vllm.vllm import (
+    generate
+)
+_instruments = ("vllm >= 0.5.4",)
+class VLLMInstrumentor(BaseInstrumentor):
+    """
+    An instrumentor for vLLM's client library.
+    """
+    def instrumentation_dependencies(self) -> Collection[str]:
+        return _instruments
+    def _instrument(self, **kwargs):
+        application_name = kwargs.get("application_name", "default_application")
+        environment = kwargs.get("environment", "default_environment")
+        tracer = kwargs.get("tracer")
+        metrics = kwargs.get("metrics_dict")
+        pricing_info = kwargs.get("pricing_info", {})
+        trace_content = kwargs.get("trace_content", False)
+        disable_metrics = kwargs.get("disable_metrics")
+        version = importlib.metadata.version("vllm")
+        # sync chat
+        wrap_function_wrapper(
+            "vllm",
+            "LLM.generate",
+            generate("vllm.generate", version, environment, application_name,
+                  tracer, pricing_info, trace_content, metrics, disable_metrics),
+        )
+    def _uninstrument(self, **kwargs):
+        # Proper uninstrumentation logic to revert patched methods
+        pass

openlit/instrumentation/vllm/vllm.py ADDED Viewed

@@ -0,0 +1,143 @@
+# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
+"""
+Module for monitoring vLLM API calls.
+"""
+import logging
+from opentelemetry.trace import SpanKind, Status, StatusCode
+from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
+from openlit.__helpers import handle_exception, general_tokens
+from openlit.semcov import SemanticConvetion
+# Initialize logger for logging potential issues and operations
+logger = logging.getLogger(__name__)
+def generate(gen_ai_endpoint, version, environment, application_name,
+                     tracer, pricing_info, trace_content, metrics, disable_metrics):
+    """
+    Generates a telemetry wrapper for generate to collect metrics.
+    Args:
+        gen_ai_endpoint: Endpoint identifier for logging and tracing.
+        version: Version of the monitoring package.
+        environment: Deployment environment (e.g., production, staging).
+        application_name: Name of the application using the vLLM API.
+        tracer: OpenTelemetry tracer for creating spans.
+        pricing_info: Information used for calculating the cost of vLLM usage.
+        trace_content: Flag indicating whether to trace the actual content.
+    Returns:
+        A function that wraps the generate method to add telemetry.
+    """
+    def wrapper(wrapped, instance, args, kwargs):
+        """
+        Wraps the 'generate' API call to add telemetry.
+        This collects metrics such as execution time, cost, and token usage, and handles errors
+        gracefully, adding details to the trace for observability.
+        Args:
+            wrapped: The original 'generate' method to be wrapped.
+            instance: The instance of the class where the original method is defined.
+            args: Positional arguments for the 'generate' method.
+            kwargs: Keyword arguments for the 'generate' method.
+        Returns:
+            The response from the original 'generate' method.
+        """
+        # pylint: disable=line-too-long
+        with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
+            response = wrapped(*args, **kwargs)
+            try:
+                model = instance.llm_engine.model_config.model or "facebook/opt-125m"
+                # Set base span attribues
+                span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
+                span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
+                                    SemanticConvetion.GEN_AI_SYSTEM_VLLM)
+                span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
+                                    SemanticConvetion.GEN_AI_TYPE_CHAT)
+                span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
+                                    gen_ai_endpoint)
+                span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
+                                    environment)
+                span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
+                                    application_name)
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
+                                    model)
+                span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
+                                    False)
+                input_tokens = 0
+                output_tokens = 0
+                cost = 0
+                if trace_content:
+                    prompt_attributes = {}
+                    completion_attributes = {}
+                    for i, output in enumerate(response):
+                        prompt_attributes[f"{SemanticConvetion.GEN_AI_CONTENT_PROMPT}.{i}"] = output.prompt
+                        completion_attributes[f"{SemanticConvetion.GEN_AI_CONTENT_COMPLETION}.{i}"] = output.outputs[0].text
+                        input_tokens += general_tokens(output.prompt)
+                        output_tokens += general_tokens(output.outputs[0].text)
+                    # Add a single event for all prompts
+                    span.add_event(
+                        name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
+                        attributes=prompt_attributes,
+                    )
+                    # Add a single event for all completions
+                    span.add_event(
+                        name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
+                        attributes=completion_attributes,
+                    )
+                total_tokens = input_tokens + output_tokens
+                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
+                                    input_tokens)
+                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
+                                    output_tokens)
+                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
+                                    total_tokens)
+                span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
+                                    cost)
+                span.set_status(Status(StatusCode.OK))
+                if disable_metrics is False:
+                    attributes = {
+                        TELEMETRY_SDK_NAME:
+                            "openlit",
+                        SemanticConvetion.GEN_AI_APPLICATION_NAME:
+                            application_name,
+                        SemanticConvetion.GEN_AI_SYSTEM:
+                            SemanticConvetion.GEN_AI_SYSTEM_VLLM,
+                        SemanticConvetion.GEN_AI_ENVIRONMENT:
+                            environment,
+                        SemanticConvetion.GEN_AI_TYPE:
+                            SemanticConvetion.GEN_AI_TYPE_CHAT,
+                        SemanticConvetion.GEN_AI_REQUEST_MODEL:
+                            model
+                    }
+                    metrics["genai_requests"].add(1, attributes)
+                    metrics["genai_total_tokens"].add(total_tokens, attributes)
+                    metrics["genai_completion_tokens"].add(output_tokens, attributes)
+                    metrics["genai_prompt_tokens"].add(input_tokens, attributes)
+                    metrics["genai_cost"].record(cost, attributes)
+                # Return original response
+                return response
+            except Exception as e:
+                handle_exception(span, e)
+                logger.error("Error in trace creation: %s", e)
+                # Return original response
+                return response
+    return wrapper

openlit/semcov/__init__.py CHANGED Viewed

@@ -70,7 +70,9 @@ class SemanticConvetion:
     GEN_AI_RESPONSE_IMAGE = "gen_ai.response.image"  # Not used directly in code yet
     # GenAI Content
+    GEN_AI_CONTENT_PROMPT_EVENT = "gen_ai.content.prompt"
     GEN_AI_CONTENT_PROMPT = "gen_ai.prompt"
+    GEN_AI_CONTENT_COMPLETION_EVENT = "gen_ai.content.completion"
     GEN_AI_CONTENT_COMPLETION = "gen_ai.completion"
     GEN_AI_CONTENT_REVISED_PROMPT = "gen_ai.content.revised_prompt"
@@ -94,11 +96,12 @@ class SemanticConvetion:
     GEN_AI_SYSTEM_COHERE = "cohere"
     GEN_AI_SYSTEM_MISTRAL = "mistral"
     GEN_AI_SYSTEM_BEDROCK = "bedrock"
-    GEN_AI_SYSTEM_VERTEXAI = "vertexai"
+    GEN_AI_SYSTEM_VERTEXAI = "vertex_ai"
     GEN_AI_SYSTEM_GROQ = "groq"
     GEN_AI_SYSTEM_OLLAMA = "ollama"
     GEN_AI_SYSTEM_GPT4ALL = "gpt4all"
     GEN_AI_SYSTEM_ELEVENLABS = "elevenlabs"
+    GEN_AI_SYSTEM_VLLM = "vLLM"
     GEN_AI_SYSTEM_LANGCHAIN = "langchain"
     GEN_AI_SYSTEM_LLAMAINDEX = "llama_index"
     GEN_AI_SYSTEM_HAYSTACK = "haystack"

{openlit-1.16.2.dist-info → openlit-1.18.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: openlit
-Version: 1.16.2
+Version: 1.18.0
 Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications, facilitating the integration of observability into your GenAI-driven projects
 Home-page: https://github.com/openlit/openlit/tree/main/openlit/python
 Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT
@@ -68,6 +68,8 @@ This project adheres to the [Semantic Conventions](https://github.com/open-telem
 | [✅ Vertex AI](https://docs.openlit.io/latest/integrations/vertexai)         |                                                                          |                                       |               |
 | [✅ Groq](https://docs.openlit.io/latest/integrations/groq)                  |                                                                          |                                       |               |
 | [✅ ElevenLabs](https://docs.openlit.io/latest/integrations/elevenlabs)      |                                                                          |                                       |               |
+| [✅ vLLM](https://docs.openlit.io/latest/integrations/vllm)                  |                                                                          |                                       |               |
 ## Supported Destinations
 - [✅ OpenTelemetry Collector](https://docs.openlit.io/latest/connections/otelcol)
 - [✅ Prometheus + Tempo](https://docs.openlit.io/latest/connections/prometheus-tempo)

openlit 1.16.2__py3-none-any.whl → 1.18.0__py3-none-any.whl

openlit 1.16.2py3-none-any.whl → 1.18.0py3-none-any.whl