PyPI - openlit - Versions diffs - 1.13.0__tar.gz → 1.14.0__tar.gz - Mend

openlit 1.13.0tar.gz → 1.14.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

{openlit-1.13.0 → openlit-1.14.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: openlit
-Version: 1.13.0
+Version: 1.14.0
 Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications, facilitating the integration of observability into your GenAI-driven projects
 Home-page: https://github.com/openlit/openlit/tree/main/openlit/python
 Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT
@@ -14,11 +14,13 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Requires-Dist: boto3 (>=1.34.0,<2.0.0)
 Requires-Dist: botocore (>=1.34.0,<2.0.0)
+Requires-Dist: gpustat (>=1.1.1,<2.0.0)
 Requires-Dist: opentelemetry-api (>=1.24.0,<2.0.0)
 Requires-Dist: opentelemetry-exporter-otlp (>=1.24.0,<2.0.0)
 Requires-Dist: opentelemetry-instrumentation (>=0.45b0,<0.46)
 Requires-Dist: opentelemetry-sdk (>=1.24.0,<2.0.0)
 Requires-Dist: requests (>=2.26.0,<3.0.0)
+Requires-Dist: schedule (>=1.2.2,<2.0.0)
 Requires-Dist: tiktoken (>=0.6.0,<0.7.0)
 Project-URL: Repository, https://github.com/openlit/openlit/tree/main/openlit/python
 Description-Content-Type: text/markdown
@@ -185,6 +187,7 @@ Below is a detailed overview of the configuration options available, allowing yo
 | `disabled_instrumentors`| List of instrumentors to disable. | `None` |    No    |
 | `disable_metrics`       | If set, disables the collection of metrics.                                                   | `False`        |    No    |
 | `pricing_json`          | URL or file path of the pricing JSON file.                                             | `https://github.com/openlit/openlit/blob/main/assets/pricing.json`        |    No    |
+| `collect_gpu_stats`          | Flag to enable or disable GPU metrics collection.                                         | `False`        |    No    |
 ## 🌱 Contributing

{openlit-1.13.0 → openlit-1.14.0}/README.md RENAMED Viewed

@@ -160,6 +160,7 @@ Below is a detailed overview of the configuration options available, allowing yo
 | `disabled_instrumentors`| List of instrumentors to disable. | `None` |    No    |
 | `disable_metrics`       | If set, disables the collection of metrics.                                                   | `False`        |    No    |
 | `pricing_json`          | URL or file path of the pricing JSON file.                                             | `https://github.com/openlit/openlit/blob/main/assets/pricing.json`        |    No    |
+| `collect_gpu_stats`          | Flag to enable or disable GPU metrics collection.                                         | `False`        |    No    |
 ## 🌱 Contributing

{openlit-1.13.0 → openlit-1.14.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "openlit"
-version = "1.13.0"
+version = "1.14.0"
 description = "OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications, facilitating the integration of observability into your GenAI-driven projects"
 authors = ["OpenLIT"]
 repository = "https://github.com/openlit/openlit/tree/main/openlit/python"
@@ -11,7 +11,9 @@ keywords = ["OpenTelemetry", "otel", "otlp","llm", "tracing", "openai", "anthrop
 [tool.poetry.dependencies]
 python = "^3.7.1"
 requests = "^2.26.0"
+schedule = "^1.2.2"
 tiktoken = "^0.6.0"
+gpustat = "^1.1.1"
 boto3 = "^1.34.0"
 botocore = "^1.34.0"
 opentelemetry-api = "^1.24.0"

{openlit-1.13.0 → openlit-1.14.0}/src/openlit/__init__.py RENAMED Viewed

@@ -40,6 +40,7 @@ from openlit.instrumentation.pinecone import PineconeInstrumentor
 from openlit.instrumentation.qdrant import QdrantInstrumentor
 from openlit.instrumentation.milvus import MilvusInstrumentor
 from openlit.instrumentation.transformers import TransformersInstrumentor
+from openlit.instrumentation.gpu import NvidiaGPUInstrumentor
 # Set up logging for error and information messages.
 logger = logging.getLogger(__name__)
@@ -155,20 +156,9 @@ def instrument_if_available(
         except Exception as e:
             logger.error("Failed to instrument %s: %s", instrumentor_name, e)
-def init(
-    environment="default",
-    application_name="default",
-    tracer=None,
-    otlp_endpoint=None,
-    otlp_headers=None,
-    disable_batch=False,
-    trace_content=True,
-    disabled_instrumentors=None,
-    meter=None,
-    disable_metrics=False,
-    pricing_json=None,
-):
+def init(environment="default", application_name="default", tracer=None, otlp_endpoint=None,
+         otlp_headers=None, disable_batch=False, trace_content=True, disabled_instrumentors=None,
+         meter=None, disable_metrics=False, pricing_json=None, collect_gpu_stats=False):
     """
     Initializes the openLIT configuration and setups tracing.
@@ -185,8 +175,9 @@ def init(
         disable_batch (bool): Flag to disable batch span processing (Optional).
         trace_content (bool): Flag to trace content (Optional).
         disabled_instrumentors (List[str]): Optional. List of instrumentor names to disable.
-        disable_metrics (bool): Flag to disable metrics (Optional)
-        pricing_json(str): File path or url to the pricing json (Optional)
+        disable_metrics (bool): Flag to disable metrics (Optional).
+        pricing_json(str): File path or url to the pricing json (Optional).
+        collect_gpu_stats (bool): Flag to enable or disable GPU metrics collection.
     """
     disabled_instrumentors = disabled_instrumentors if disabled_instrumentors else []
     # Check for invalid instrumentor names
@@ -289,8 +280,13 @@ def init(
         # Initialize and instrument only the enabled instrumentors
         for name, instrumentor in instrumentor_instances.items():
-            instrument_if_available(
-                name, instrumentor, config, disabled_instrumentors, module_name_map
+            instrument_if_available(name, instrumentor, config,
+                                    disabled_instrumentors, module_name_map)
+        if (disable_metrics is False) and (collect_gpu_stats is True):
+            NvidiaGPUInstrumentor().instrument(
+                environment=config.environment,
+                application_name=config.application_name,
             )
     except Exception as e:

openlit-1.14.0/src/openlit/instrumentation/gpu/__init__.py ADDED Viewed

@@ -0,0 +1,132 @@
+# pylint: disable=useless-return, bad-staticmethod-argument, duplicate-code, import-outside-toplevel, broad-exception-caught, unused-argument
+"""Initializer of Auto Instrumentation of GPU Metrics"""
+from typing import Collection, Iterable
+import logging
+from functools import partial
+from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
+from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
+from opentelemetry.metrics import get_meter, CallbackOptions, Observation
+from openlit.semcov import SemanticConvetion
+# Initialize logger for logging potential issues and operations
+logger = logging.getLogger(__name__)
+class NvidiaGPUInstrumentor(BaseInstrumentor):
+    """
+    An instrumentor for collecting NVIDIA GPU metrics.
+    """
+    def instrumentation_dependencies(self) -> Collection[str]:
+        return []
+    def _instrument(self, **kwargs):
+        application_name = kwargs.get("application_name", "default")
+        environment = kwargs.get("environment", "default")
+        meter = get_meter(
+            __name__,
+            "0.1.0",
+            schema_url="https://opentelemetry.io/schemas/1.11.0",
+        )
+        def check_and_record(value):
+            return value if value is not None else 0
+        meter.create_observable_gauge(
+            name=SemanticConvetion.GPU_UTILIZATION,
+            callbacks=[partial(self._collect_metric, environment,
+                               application_name, check_and_record, "utilization")],
+            description="GPU Utilization",
+        )
+        meter.create_observable_gauge(
+            name=SemanticConvetion.GPU_UTILIZATION_ENC,
+            callbacks=[partial(self._collect_metric, environment,
+                               application_name, check_and_record, "utilization_enc")],
+            description="GPU Encoder Utilization",
+        )
+        meter.create_observable_gauge(
+            name=SemanticConvetion.GPU_UTILIZATION_DEC,
+            callbacks=[partial(self._collect_metric, environment,
+                               application_name, check_and_record, "utilization_dec")],
+            description="GPU Decoder Utilization",
+        )
+        meter.create_observable_gauge(
+            name=SemanticConvetion.GPU_TEMPERATURE,
+            callbacks=[partial(self._collect_metric, environment,
+                               application_name, check_and_record, "temperature")],
+            description="GPU Temperature",
+        )
+        meter.create_observable_gauge(
+            name=SemanticConvetion.GPU_FAN_SPEED,
+            callbacks=[partial(self._collect_metric, environment,
+                               application_name, check_and_record, "fan_speed")],
+            description="GPU Fan Speed",
+        )
+        meter.create_observable_gauge(
+            name=SemanticConvetion.GPU_MEMORY_AVAILABLE,
+            callbacks=[partial(self._collect_metric, environment,
+                               application_name, check_and_record, "memory_available")],
+            description="GPU Memory Available",
+        )
+        meter.create_observable_gauge(
+            name=SemanticConvetion.GPU_MEMORY_TOTAL,
+            callbacks=[partial(self._collect_metric, environment,
+                               application_name, check_and_record, "memory_total")],
+            description="GPU Memory Total",
+        )
+        meter.create_observable_gauge(
+            name=SemanticConvetion.GPU_MEMORY_USED,
+            callbacks=[partial(self._collect_metric, environment,
+                               application_name, check_and_record, "memory_used")],
+            description="GPU Memory Used",
+        )
+        meter.create_observable_gauge(
+            name=SemanticConvetion.GPU_MEMORY_FREE,
+            callbacks=[partial(self._collect_metric, environment,
+                               application_name, check_and_record, "memory_free")],
+            description="GPU Memory Free",
+        )
+        meter.create_observable_gauge(
+            name=SemanticConvetion.GPU_POWER_DRAW,
+            callbacks=[partial(self._collect_metric, environment,
+                               application_name, check_and_record, "power_draw")],
+            description="GPU Power Draw",
+        )
+        meter.create_observable_gauge(
+            name=SemanticConvetion.GPU_POWER_LIMIT,
+            callbacks=[partial(self._collect_metric, environment,
+                               application_name, check_and_record, "power_limit")],
+            description="GPU Power Limit",
+        )
+    def _uninstrument(self, **kwargs):
+        # Proper uninstrumentation logic to revert patched methods
+        pass
+    def _collect_metric(self, environment, application_name,
+                        check_and_record, metric_name,
+                        options: CallbackOptions) -> Iterable[Observation]:
+        import gpustat
+        try:
+            gpu_stats = gpustat.GPUStatCollection.new_query()
+            for gpu in gpu_stats.gpus:
+                attributes = {
+                    TELEMETRY_SDK_NAME: "openlit",
+                    SemanticConvetion.GEN_AI_APPLICATION_NAME: application_name,
+                    SemanticConvetion.GEN_AI_ENVIRONMENT: environment,
+                    SemanticConvetion.GPU_INDEX: gpu.index,
+                    SemanticConvetion.GPU_UUID: gpu.uuid,
+                    SemanticConvetion.GPU_NAME: gpu.name,
+                }
+                yield Observation(check_and_record(getattr(gpu, metric_name, 0)), attributes)
+        except Exception as e:
+            logger.error("Error in GPU metrics collection: %s", e)

{openlit-1.13.0 → openlit-1.14.0}/src/openlit/semcov/__init__.py RENAMED Viewed

@@ -145,3 +145,20 @@ class SemanticConvetion:
     DB_SYSTEM_PINECONE = "pinecone"
     DB_SYSTEM_QDRANT = "qdrant"
     DB_SYSTEM_MILVUS = "milvus"
+    # GPU
+    GPU_INDEX = "gpu.index"
+    GPU_UUID = "gpu.uuid"
+    GPU_NAME = "gpu.name"
+    GPU_UTILIZATION = "gpu.utilization"
+    GPU_UTILIZATION_ENC = "gpu.enc.utilization"
+    GPU_UTILIZATION_DEC = "gpu.dec.utilization"
+    GPU_TEMPERATURE = "gpu.temperature"
+    GPU_FAN_SPEED = "gpu.fan_speed"
+    GPU_MEMORY_AVAILABLE = "gpu.memory.available"
+    GPU_MEMORY_TOTAL = "gpu.memory.total"
+    GPU_MEMORY_USED = "gpu.memory.used"
+    GPU_MEMORY_FREE = "gpu.memory.free"
+    GPU_POWER_DRAW = "gpu.power.draw"
+    GPU_POWER_LIMIT = "gpu.power.limit"