genai-otel-instrument 0.1.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genai_otel/__init__.py +132 -0
- genai_otel/__version__.py +34 -0
- genai_otel/auto_instrument.py +602 -0
- genai_otel/cli.py +92 -0
- genai_otel/config.py +333 -0
- genai_otel/cost_calculator.py +467 -0
- genai_otel/cost_enriching_exporter.py +207 -0
- genai_otel/cost_enrichment_processor.py +174 -0
- genai_otel/evaluation/__init__.py +76 -0
- genai_otel/evaluation/bias_detector.py +364 -0
- genai_otel/evaluation/config.py +261 -0
- genai_otel/evaluation/hallucination_detector.py +525 -0
- genai_otel/evaluation/pii_detector.py +356 -0
- genai_otel/evaluation/prompt_injection_detector.py +262 -0
- genai_otel/evaluation/restricted_topics_detector.py +316 -0
- genai_otel/evaluation/span_processor.py +962 -0
- genai_otel/evaluation/toxicity_detector.py +406 -0
- genai_otel/exceptions.py +17 -0
- genai_otel/gpu_metrics.py +516 -0
- genai_otel/instrumentors/__init__.py +71 -0
- genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
- genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
- genai_otel/instrumentors/autogen_instrumentor.py +394 -0
- genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
- genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
- genai_otel/instrumentors/base.py +919 -0
- genai_otel/instrumentors/bedrock_agents_instrumentor.py +398 -0
- genai_otel/instrumentors/cohere_instrumentor.py +140 -0
- genai_otel/instrumentors/crewai_instrumentor.py +311 -0
- genai_otel/instrumentors/dspy_instrumentor.py +661 -0
- genai_otel/instrumentors/google_ai_instrumentor.py +310 -0
- genai_otel/instrumentors/groq_instrumentor.py +106 -0
- genai_otel/instrumentors/guardrails_ai_instrumentor.py +510 -0
- genai_otel/instrumentors/haystack_instrumentor.py +503 -0
- genai_otel/instrumentors/huggingface_instrumentor.py +399 -0
- genai_otel/instrumentors/hyperbolic_instrumentor.py +236 -0
- genai_otel/instrumentors/instructor_instrumentor.py +425 -0
- genai_otel/instrumentors/langchain_instrumentor.py +340 -0
- genai_otel/instrumentors/langgraph_instrumentor.py +328 -0
- genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
- genai_otel/instrumentors/mistralai_instrumentor.py +315 -0
- genai_otel/instrumentors/ollama_instrumentor.py +197 -0
- genai_otel/instrumentors/ollama_server_metrics_poller.py +336 -0
- genai_otel/instrumentors/openai_agents_instrumentor.py +291 -0
- genai_otel/instrumentors/openai_instrumentor.py +260 -0
- genai_otel/instrumentors/pydantic_ai_instrumentor.py +362 -0
- genai_otel/instrumentors/replicate_instrumentor.py +87 -0
- genai_otel/instrumentors/sambanova_instrumentor.py +196 -0
- genai_otel/instrumentors/togetherai_instrumentor.py +146 -0
- genai_otel/instrumentors/vertexai_instrumentor.py +106 -0
- genai_otel/llm_pricing.json +1676 -0
- genai_otel/logging_config.py +45 -0
- genai_otel/mcp_instrumentors/__init__.py +14 -0
- genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
- genai_otel/mcp_instrumentors/base.py +105 -0
- genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
- genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/manager.py +139 -0
- genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
- genai_otel/metrics.py +148 -0
- genai_otel/py.typed +2 -0
- genai_otel/server_metrics.py +197 -0
- genai_otel_instrument-0.1.24.dist-info/METADATA +1404 -0
- genai_otel_instrument-0.1.24.dist-info/RECORD +69 -0
- genai_otel_instrument-0.1.24.dist-info/WHEEL +5 -0
- genai_otel_instrument-0.1.24.dist-info/entry_points.txt +2 -0
- genai_otel_instrument-0.1.24.dist-info/licenses/LICENSE +680 -0
- genai_otel_instrument-0.1.24.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""Module for collecting server-side LLM serving metrics.
|
|
2
|
+
|
|
3
|
+
This module provides metrics for LLM serving engines like vLLM, TGI, etc.
|
|
4
|
+
It includes KV cache metrics, request queue metrics, and other server-level observability.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import threading
|
|
9
|
+
from typing import Callable, Dict, Optional
|
|
10
|
+
|
|
11
|
+
from opentelemetry.metrics import Meter, ObservableGauge, Observation
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ServerMetricsCollector:
|
|
17
|
+
"""Collects and reports server-side LLM serving metrics.
|
|
18
|
+
|
|
19
|
+
This class provides metrics for:
|
|
20
|
+
- KV cache usage (per model)
|
|
21
|
+
- Request queue depth and concurrency
|
|
22
|
+
- Maximum request capacity
|
|
23
|
+
|
|
24
|
+
These metrics can be populated by:
|
|
25
|
+
1. Manual instrumentation (user calls set_* methods)
|
|
26
|
+
2. Auto-instrumentation of serving frameworks (vLLM, TGI, etc.)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, meter: Meter):
|
|
30
|
+
"""Initialize the ServerMetricsCollector.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
meter: The OpenTelemetry meter to use for recording metrics.
|
|
34
|
+
"""
|
|
35
|
+
self.meter = meter
|
|
36
|
+
self._lock = threading.Lock()
|
|
37
|
+
|
|
38
|
+
# Storage for metric values
|
|
39
|
+
self._kv_cache_usage: Dict[str, float] = {} # model_name -> usage_percentage
|
|
40
|
+
self._num_requests_running = 0
|
|
41
|
+
self._num_requests_waiting = 0
|
|
42
|
+
self._num_requests_max = 0
|
|
43
|
+
|
|
44
|
+
# Create observable gauges
|
|
45
|
+
self.kv_cache_gauge = self.meter.create_observable_gauge(
|
|
46
|
+
"gen_ai.server.kv_cache.usage",
|
|
47
|
+
callbacks=[self._observe_kv_cache],
|
|
48
|
+
description="GPU KV-cache usage percentage (0-100)",
|
|
49
|
+
unit="%",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
self.requests_running_gauge = self.meter.create_observable_gauge(
|
|
53
|
+
"gen_ai.server.requests.running",
|
|
54
|
+
callbacks=[self._observe_requests_running],
|
|
55
|
+
description="Number of requests currently executing",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
self.requests_waiting_gauge = self.meter.create_observable_gauge(
|
|
59
|
+
"gen_ai.server.requests.waiting",
|
|
60
|
+
callbacks=[self._observe_requests_waiting],
|
|
61
|
+
description="Number of requests waiting in queue",
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
self.requests_max_gauge = self.meter.create_observable_gauge(
|
|
65
|
+
"gen_ai.server.requests.max",
|
|
66
|
+
callbacks=[self._observe_requests_max],
|
|
67
|
+
description="Maximum concurrent request capacity",
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
logger.info("Server metrics collector initialized")
|
|
71
|
+
|
|
72
|
+
def _observe_kv_cache(self, options) -> list:
|
|
73
|
+
"""Observable callback for KV cache usage."""
|
|
74
|
+
with self._lock:
|
|
75
|
+
observations = []
|
|
76
|
+
for model_name, usage in self._kv_cache_usage.items():
|
|
77
|
+
observations.append(Observation(value=usage, attributes={"model": model_name}))
|
|
78
|
+
return observations
|
|
79
|
+
|
|
80
|
+
def _observe_requests_running(self, options) -> list:
|
|
81
|
+
"""Observable callback for running requests."""
|
|
82
|
+
with self._lock:
|
|
83
|
+
return [Observation(value=self._num_requests_running)]
|
|
84
|
+
|
|
85
|
+
def _observe_requests_waiting(self, options) -> list:
|
|
86
|
+
"""Observable callback for waiting requests."""
|
|
87
|
+
with self._lock:
|
|
88
|
+
return [Observation(value=self._num_requests_waiting)]
|
|
89
|
+
|
|
90
|
+
def _observe_requests_max(self, options) -> list:
|
|
91
|
+
"""Observable callback for max requests."""
|
|
92
|
+
with self._lock:
|
|
93
|
+
return [Observation(value=self._num_requests_max)]
|
|
94
|
+
|
|
95
|
+
# Public API for manual instrumentation
|
|
96
|
+
|
|
97
|
+
def set_kv_cache_usage(self, model_name: str, usage_percent: float):
|
|
98
|
+
"""Set KV cache usage for a specific model.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
model_name: Name of the model
|
|
102
|
+
usage_percent: Cache usage as percentage (0-100)
|
|
103
|
+
"""
|
|
104
|
+
with self._lock:
|
|
105
|
+
self._kv_cache_usage[model_name] = min(100.0, max(0.0, usage_percent))
|
|
106
|
+
|
|
107
|
+
def set_requests_running(self, count: int):
|
|
108
|
+
"""Set number of currently running requests.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
count: Number of active requests
|
|
112
|
+
"""
|
|
113
|
+
with self._lock:
|
|
114
|
+
self._num_requests_running = max(0, count)
|
|
115
|
+
|
|
116
|
+
def set_requests_waiting(self, count: int):
|
|
117
|
+
"""Set number of requests waiting in queue.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
count: Number of queued requests
|
|
121
|
+
"""
|
|
122
|
+
with self._lock:
|
|
123
|
+
self._num_requests_waiting = max(0, count)
|
|
124
|
+
|
|
125
|
+
def set_requests_max(self, count: int):
|
|
126
|
+
"""Set maximum concurrent request capacity.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
count: Maximum request capacity
|
|
130
|
+
"""
|
|
131
|
+
with self._lock:
|
|
132
|
+
self._num_requests_max = max(0, count)
|
|
133
|
+
|
|
134
|
+
def increment_requests_running(self, delta: int = 1):
|
|
135
|
+
"""Increment running requests counter.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
delta: Amount to increment by (default: 1)
|
|
139
|
+
"""
|
|
140
|
+
with self._lock:
|
|
141
|
+
self._num_requests_running = max(0, self._num_requests_running + delta)
|
|
142
|
+
|
|
143
|
+
def decrement_requests_running(self, delta: int = 1):
|
|
144
|
+
"""Decrement running requests counter.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
delta: Amount to decrement by (default: 1)
|
|
148
|
+
"""
|
|
149
|
+
with self._lock:
|
|
150
|
+
self._num_requests_running = max(0, self._num_requests_running - delta)
|
|
151
|
+
|
|
152
|
+
def increment_requests_waiting(self, delta: int = 1):
|
|
153
|
+
"""Increment waiting requests counter.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
delta: Amount to increment by (default: 1)
|
|
157
|
+
"""
|
|
158
|
+
with self._lock:
|
|
159
|
+
self._num_requests_waiting = max(0, self._num_requests_waiting + delta)
|
|
160
|
+
|
|
161
|
+
def decrement_requests_waiting(self, delta: int = 1):
|
|
162
|
+
"""Decrement waiting requests counter.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
delta: Amount to decrement by (default: 1)
|
|
166
|
+
"""
|
|
167
|
+
with self._lock:
|
|
168
|
+
self._num_requests_waiting = max(0, self._num_requests_waiting - delta)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# Global instance that can be accessed by users
|
|
172
|
+
_global_server_metrics: Optional[ServerMetricsCollector] = None
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def get_server_metrics() -> Optional[ServerMetricsCollector]:
|
|
176
|
+
"""Get the global server metrics collector instance.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
ServerMetricsCollector instance or None if not initialized
|
|
180
|
+
"""
|
|
181
|
+
return _global_server_metrics
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def initialize_server_metrics(meter: Meter) -> ServerMetricsCollector:
|
|
185
|
+
"""Initialize the global server metrics collector.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
meter: OpenTelemetry meter instance
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Initialized ServerMetricsCollector instance
|
|
192
|
+
"""
|
|
193
|
+
global _global_server_metrics
|
|
194
|
+
if _global_server_metrics is None:
|
|
195
|
+
_global_server_metrics = ServerMetricsCollector(meter)
|
|
196
|
+
logger.info("Global server metrics collector initialized")
|
|
197
|
+
return _global_server_metrics
|