genai-otel-instrument 0.1.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genai_otel/__init__.py +132 -0
- genai_otel/__version__.py +34 -0
- genai_otel/auto_instrument.py +602 -0
- genai_otel/cli.py +92 -0
- genai_otel/config.py +333 -0
- genai_otel/cost_calculator.py +467 -0
- genai_otel/cost_enriching_exporter.py +207 -0
- genai_otel/cost_enrichment_processor.py +174 -0
- genai_otel/evaluation/__init__.py +76 -0
- genai_otel/evaluation/bias_detector.py +364 -0
- genai_otel/evaluation/config.py +261 -0
- genai_otel/evaluation/hallucination_detector.py +525 -0
- genai_otel/evaluation/pii_detector.py +356 -0
- genai_otel/evaluation/prompt_injection_detector.py +262 -0
- genai_otel/evaluation/restricted_topics_detector.py +316 -0
- genai_otel/evaluation/span_processor.py +962 -0
- genai_otel/evaluation/toxicity_detector.py +406 -0
- genai_otel/exceptions.py +17 -0
- genai_otel/gpu_metrics.py +516 -0
- genai_otel/instrumentors/__init__.py +71 -0
- genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
- genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
- genai_otel/instrumentors/autogen_instrumentor.py +394 -0
- genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
- genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
- genai_otel/instrumentors/base.py +919 -0
- genai_otel/instrumentors/bedrock_agents_instrumentor.py +398 -0
- genai_otel/instrumentors/cohere_instrumentor.py +140 -0
- genai_otel/instrumentors/crewai_instrumentor.py +311 -0
- genai_otel/instrumentors/dspy_instrumentor.py +661 -0
- genai_otel/instrumentors/google_ai_instrumentor.py +310 -0
- genai_otel/instrumentors/groq_instrumentor.py +106 -0
- genai_otel/instrumentors/guardrails_ai_instrumentor.py +510 -0
- genai_otel/instrumentors/haystack_instrumentor.py +503 -0
- genai_otel/instrumentors/huggingface_instrumentor.py +399 -0
- genai_otel/instrumentors/hyperbolic_instrumentor.py +236 -0
- genai_otel/instrumentors/instructor_instrumentor.py +425 -0
- genai_otel/instrumentors/langchain_instrumentor.py +340 -0
- genai_otel/instrumentors/langgraph_instrumentor.py +328 -0
- genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
- genai_otel/instrumentors/mistralai_instrumentor.py +315 -0
- genai_otel/instrumentors/ollama_instrumentor.py +197 -0
- genai_otel/instrumentors/ollama_server_metrics_poller.py +336 -0
- genai_otel/instrumentors/openai_agents_instrumentor.py +291 -0
- genai_otel/instrumentors/openai_instrumentor.py +260 -0
- genai_otel/instrumentors/pydantic_ai_instrumentor.py +362 -0
- genai_otel/instrumentors/replicate_instrumentor.py +87 -0
- genai_otel/instrumentors/sambanova_instrumentor.py +196 -0
- genai_otel/instrumentors/togetherai_instrumentor.py +146 -0
- genai_otel/instrumentors/vertexai_instrumentor.py +106 -0
- genai_otel/llm_pricing.json +1676 -0
- genai_otel/logging_config.py +45 -0
- genai_otel/mcp_instrumentors/__init__.py +14 -0
- genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
- genai_otel/mcp_instrumentors/base.py +105 -0
- genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
- genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/manager.py +139 -0
- genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
- genai_otel/metrics.py +148 -0
- genai_otel/py.typed +2 -0
- genai_otel/server_metrics.py +197 -0
- genai_otel_instrument-0.1.24.dist-info/METADATA +1404 -0
- genai_otel_instrument-0.1.24.dist-info/RECORD +69 -0
- genai_otel_instrument-0.1.24.dist-info/WHEEL +5 -0
- genai_otel_instrument-0.1.24.dist-info/entry_points.txt +2 -0
- genai_otel_instrument-0.1.24.dist-info/licenses/LICENSE +680 -0
- genai_otel_instrument-0.1.24.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""Ollama server metrics poller for automatic KV cache and memory tracking.
|
|
2
|
+
|
|
3
|
+
This module provides automatic collection of Ollama server metrics by polling
|
|
4
|
+
the /api/ps endpoint to get running models and their VRAM usage.
|
|
5
|
+
|
|
6
|
+
GPU VRAM is auto-detected using:
|
|
7
|
+
1. pynvml (nvidia-ml-py) - preferred method
|
|
8
|
+
2. nvidia-smi subprocess - fallback
|
|
9
|
+
3. Environment variable - manual override
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
import subprocess # nosec B404 - Only used for nvidia-smi with hardcoded args
|
|
14
|
+
import threading
|
|
15
|
+
import time
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
import requests
|
|
19
|
+
|
|
20
|
+
from ..server_metrics import get_server_metrics
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
# Try to import nvidia-ml-py for GPU VRAM detection
|
|
25
|
+
# Package: nvidia-ml-py, Import: pynvml
|
|
26
|
+
try:
|
|
27
|
+
import pynvml
|
|
28
|
+
|
|
29
|
+
NVIDIA_ML_AVAILABLE = True
|
|
30
|
+
except ImportError:
|
|
31
|
+
NVIDIA_ML_AVAILABLE = False
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class OllamaServerMetricsPoller:
|
|
35
|
+
"""Polls Ollama server for metrics and updates ServerMetricsCollector.
|
|
36
|
+
|
|
37
|
+
This poller queries the /api/ps endpoint to get:
|
|
38
|
+
- Running models and their VRAM usage
|
|
39
|
+
- Total VRAM usage across all models
|
|
40
|
+
- Number of models currently loaded
|
|
41
|
+
|
|
42
|
+
It automatically updates the global ServerMetricsCollector with:
|
|
43
|
+
- KV cache usage per model (approximated from VRAM usage)
|
|
44
|
+
- Running models count
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
base_url: str = "http://localhost:11434",
|
|
50
|
+
interval: float = 5.0,
|
|
51
|
+
max_vram_gb: Optional[float] = None,
|
|
52
|
+
):
|
|
53
|
+
"""Initialize the Ollama server metrics poller.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
base_url: Base URL of the Ollama server (default: http://localhost:11434)
|
|
57
|
+
interval: Polling interval in seconds (default: 5.0)
|
|
58
|
+
max_vram_gb: Maximum VRAM in GB for percentage calculation.
|
|
59
|
+
If None, will attempt auto-detection via nvidia-ml-py or nvidia-smi.
|
|
60
|
+
"""
|
|
61
|
+
self.base_url = base_url.rstrip("/")
|
|
62
|
+
self.interval = interval
|
|
63
|
+
|
|
64
|
+
# Auto-detect GPU VRAM if not provided
|
|
65
|
+
if max_vram_gb is not None:
|
|
66
|
+
self.max_vram_bytes = max_vram_gb * 1024**3
|
|
67
|
+
logger.info(f"Using configured GPU VRAM: {max_vram_gb}GB")
|
|
68
|
+
else:
|
|
69
|
+
detected_vram_gb = self._detect_gpu_vram()
|
|
70
|
+
if detected_vram_gb:
|
|
71
|
+
self.max_vram_bytes = detected_vram_gb * 1024**3
|
|
72
|
+
logger.info(f"Auto-detected GPU VRAM: {detected_vram_gb}GB")
|
|
73
|
+
else:
|
|
74
|
+
self.max_vram_bytes = None
|
|
75
|
+
logger.info("GPU VRAM not detected, using heuristic-based percentages")
|
|
76
|
+
|
|
77
|
+
self._stop_event = threading.Event()
|
|
78
|
+
self._thread: Optional[threading.Thread] = None
|
|
79
|
+
self._running = False
|
|
80
|
+
|
|
81
|
+
def _detect_gpu_vram(self) -> Optional[float]:
|
|
82
|
+
"""Auto-detect GPU VRAM in GB using nvidia-ml-py or nvidia-smi.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
GPU VRAM in GB, or None if detection failed
|
|
86
|
+
"""
|
|
87
|
+
# Try nvidia-ml-py first (preferred method)
|
|
88
|
+
vram_gb = self._detect_vram_nvidia_ml()
|
|
89
|
+
if vram_gb:
|
|
90
|
+
return vram_gb
|
|
91
|
+
|
|
92
|
+
# Fallback to nvidia-smi
|
|
93
|
+
vram_gb = self._detect_vram_nvidia_smi()
|
|
94
|
+
if vram_gb:
|
|
95
|
+
return vram_gb
|
|
96
|
+
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
def _detect_vram_nvidia_ml(self) -> Optional[float]:
|
|
100
|
+
"""Detect GPU VRAM using nvidia-ml-py library.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
GPU VRAM in GB for the first GPU, or None if failed
|
|
104
|
+
"""
|
|
105
|
+
if not NVIDIA_ML_AVAILABLE:
|
|
106
|
+
logger.debug("nvidia-ml-py not available for VRAM detection")
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
pynvml.nvmlInit()
|
|
111
|
+
device_count = pynvml.nvmlDeviceGetCount()
|
|
112
|
+
|
|
113
|
+
if device_count == 0:
|
|
114
|
+
logger.debug("No NVIDIA GPUs detected via nvidia-ml-py")
|
|
115
|
+
pynvml.nvmlShutdown()
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
# Get VRAM from first GPU (Ollama typically uses GPU 0)
|
|
119
|
+
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
|
|
120
|
+
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
|
|
121
|
+
vram_bytes = mem_info.total
|
|
122
|
+
vram_gb = vram_bytes / (1024**3)
|
|
123
|
+
|
|
124
|
+
# Get GPU name for logging
|
|
125
|
+
gpu_name = pynvml.nvmlDeviceGetName(handle)
|
|
126
|
+
logger.debug(f"Detected via nvidia-ml-py: {gpu_name} with {vram_gb:.2f}GB VRAM")
|
|
127
|
+
|
|
128
|
+
pynvml.nvmlShutdown()
|
|
129
|
+
return vram_gb
|
|
130
|
+
|
|
131
|
+
except Exception as e:
|
|
132
|
+
logger.debug(f"Failed to detect VRAM via nvidia-ml-py: {e}")
|
|
133
|
+
try:
|
|
134
|
+
pynvml.nvmlShutdown()
|
|
135
|
+
except Exception:
|
|
136
|
+
pass
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
def _detect_vram_nvidia_smi(self) -> Optional[float]:
|
|
140
|
+
"""Detect GPU VRAM using nvidia-smi command.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
GPU VRAM in GB for the first GPU, or None if failed
|
|
144
|
+
"""
|
|
145
|
+
try:
|
|
146
|
+
# Query nvidia-smi for total memory of GPU 0
|
|
147
|
+
result = subprocess.run( # nosec B603 B607
|
|
148
|
+
[
|
|
149
|
+
"nvidia-smi",
|
|
150
|
+
"--query-gpu=memory.total",
|
|
151
|
+
"--format=csv,noheader,nounits",
|
|
152
|
+
"--id=0",
|
|
153
|
+
],
|
|
154
|
+
capture_output=True,
|
|
155
|
+
text=True,
|
|
156
|
+
timeout=5,
|
|
157
|
+
check=True,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Parse output (in MB)
|
|
161
|
+
vram_mb = float(result.stdout.strip())
|
|
162
|
+
vram_gb = vram_mb / 1024
|
|
163
|
+
|
|
164
|
+
logger.debug(f"Detected via nvidia-smi: {vram_gb:.2f}GB VRAM")
|
|
165
|
+
return vram_gb
|
|
166
|
+
|
|
167
|
+
except FileNotFoundError:
|
|
168
|
+
logger.debug("nvidia-smi command not found")
|
|
169
|
+
return None
|
|
170
|
+
except subprocess.TimeoutExpired:
|
|
171
|
+
logger.debug("nvidia-smi command timed out")
|
|
172
|
+
return None
|
|
173
|
+
except Exception as e:
|
|
174
|
+
logger.debug(f"Failed to detect VRAM via nvidia-smi: {e}")
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
def start(self):
|
|
178
|
+
"""Start the background polling thread."""
|
|
179
|
+
if self._running:
|
|
180
|
+
logger.warning("Ollama server metrics poller already running")
|
|
181
|
+
return
|
|
182
|
+
|
|
183
|
+
self._stop_event.clear()
|
|
184
|
+
self._thread = threading.Thread(target=self._poll_loop, daemon=True)
|
|
185
|
+
self._thread.start()
|
|
186
|
+
self._running = True
|
|
187
|
+
logger.info(
|
|
188
|
+
f"Ollama server metrics poller started (interval={self.interval}s, url={self.base_url})"
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
def stop(self):
|
|
192
|
+
"""Stop the background polling thread."""
|
|
193
|
+
if not self._running:
|
|
194
|
+
return
|
|
195
|
+
|
|
196
|
+
self._stop_event.set()
|
|
197
|
+
if self._thread:
|
|
198
|
+
self._thread.join(timeout=self.interval + 1.0)
|
|
199
|
+
self._running = False
|
|
200
|
+
logger.info("Ollama server metrics poller stopped")
|
|
201
|
+
|
|
202
|
+
def _poll_loop(self):
|
|
203
|
+
"""Background polling loop that runs in a separate thread."""
|
|
204
|
+
while not self._stop_event.is_set():
|
|
205
|
+
try:
|
|
206
|
+
self._collect_metrics()
|
|
207
|
+
except Exception as e:
|
|
208
|
+
logger.warning(f"Failed to collect Ollama server metrics: {e}")
|
|
209
|
+
|
|
210
|
+
# Wait for interval or until stop is requested
|
|
211
|
+
self._stop_event.wait(self.interval)
|
|
212
|
+
|
|
213
|
+
def _collect_metrics(self):
|
|
214
|
+
"""Query Ollama /api/ps and update server metrics."""
|
|
215
|
+
try:
|
|
216
|
+
# Query /api/ps endpoint
|
|
217
|
+
response = requests.get(f"{self.base_url}/api/ps", timeout=2.0)
|
|
218
|
+
response.raise_for_status()
|
|
219
|
+
data = response.json()
|
|
220
|
+
|
|
221
|
+
# Get server metrics collector
|
|
222
|
+
server_metrics = get_server_metrics()
|
|
223
|
+
if not server_metrics:
|
|
224
|
+
logger.debug("Server metrics collector not initialized, skipping update")
|
|
225
|
+
return
|
|
226
|
+
|
|
227
|
+
# Extract running models
|
|
228
|
+
models = data.get("models", [])
|
|
229
|
+
num_models = len(models)
|
|
230
|
+
|
|
231
|
+
# Update the "max" capacity to reflect number of model slots
|
|
232
|
+
# This gives visibility into how many models can be loaded simultaneously
|
|
233
|
+
# Note: This is approximate - actual capacity depends on VRAM
|
|
234
|
+
if num_models > 0:
|
|
235
|
+
server_metrics.set_requests_max(num_models)
|
|
236
|
+
|
|
237
|
+
logger.debug(f"Ollama has {num_models} models loaded in memory")
|
|
238
|
+
|
|
239
|
+
# Process each model's VRAM usage and details
|
|
240
|
+
total_vram_bytes = 0
|
|
241
|
+
total_size_bytes = 0
|
|
242
|
+
|
|
243
|
+
for model_info in models:
|
|
244
|
+
model_name = model_info.get("name", "unknown")
|
|
245
|
+
size_vram = model_info.get("size_vram", 0)
|
|
246
|
+
size_total = model_info.get("size", 0)
|
|
247
|
+
total_vram_bytes += size_vram
|
|
248
|
+
total_size_bytes += size_total
|
|
249
|
+
|
|
250
|
+
# Calculate VRAM usage percentage for this model
|
|
251
|
+
if self.max_vram_bytes and self.max_vram_bytes > 0:
|
|
252
|
+
# If we know max VRAM, calculate actual percentage
|
|
253
|
+
vram_usage_pct = (size_vram / self.max_vram_bytes) * 100
|
|
254
|
+
vram_usage_pct = min(100.0, vram_usage_pct) # Cap at 100%
|
|
255
|
+
else:
|
|
256
|
+
# If we don't know max VRAM, use a simple heuristic:
|
|
257
|
+
# Models in memory are "using" the cache, so report a non-zero value
|
|
258
|
+
# This is an approximation - actual KV cache varies with context
|
|
259
|
+
vram_usage_pct = min(100.0, (size_vram / (8 * 1024**3)) * 100)
|
|
260
|
+
|
|
261
|
+
# Update KV cache usage for this model
|
|
262
|
+
server_metrics.set_kv_cache_usage(model_name, vram_usage_pct)
|
|
263
|
+
|
|
264
|
+
# Extract model details for logging
|
|
265
|
+
details = model_info.get("details", {})
|
|
266
|
+
param_size = details.get("parameter_size", "unknown")
|
|
267
|
+
quant_level = details.get("quantization_level", "unknown")
|
|
268
|
+
model_format = details.get("format", "unknown")
|
|
269
|
+
|
|
270
|
+
logger.debug(
|
|
271
|
+
f"Model {model_name}: "
|
|
272
|
+
f"VRAM={size_vram / 1024**3:.2f}GB ({vram_usage_pct:.1f}%), "
|
|
273
|
+
f"Size={size_total / 1024**3:.2f}GB, "
|
|
274
|
+
f"Params={param_size}, Quant={quant_level}, Format={model_format}"
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
# Log aggregate metrics
|
|
278
|
+
if total_vram_bytes > 0:
|
|
279
|
+
logger.debug(
|
|
280
|
+
f"Total Ollama usage: VRAM={total_vram_bytes / 1024**3:.2f}GB, "
|
|
281
|
+
f"Total={total_size_bytes / 1024**3:.2f}GB, "
|
|
282
|
+
f"Models={num_models}"
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
# Note: We don't update requests.running here because that's tracked
|
|
286
|
+
# automatically by BaseInstrumentor when requests are made
|
|
287
|
+
|
|
288
|
+
except requests.exceptions.ConnectionError:
|
|
289
|
+
logger.debug(f"Cannot connect to Ollama server at {self.base_url}")
|
|
290
|
+
except requests.exceptions.Timeout:
|
|
291
|
+
logger.warning("Ollama server request timed out")
|
|
292
|
+
except requests.exceptions.RequestException as e:
|
|
293
|
+
logger.warning(f"Error querying Ollama server: {e}")
|
|
294
|
+
except Exception as e:
|
|
295
|
+
logger.error(f"Unexpected error collecting Ollama metrics: {e}", exc_info=True)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
# Global instance
|
|
299
|
+
_global_poller: Optional[OllamaServerMetricsPoller] = None
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def start_ollama_metrics_poller(
|
|
303
|
+
base_url: str = "http://localhost:11434",
|
|
304
|
+
interval: float = 5.0,
|
|
305
|
+
max_vram_gb: Optional[float] = None,
|
|
306
|
+
) -> OllamaServerMetricsPoller:
|
|
307
|
+
"""Start the global Ollama server metrics poller.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
base_url: Base URL of the Ollama server (default: http://localhost:11434)
|
|
311
|
+
interval: Polling interval in seconds (default: 5.0)
|
|
312
|
+
max_vram_gb: Maximum VRAM in GB for percentage calculation (optional)
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
The global poller instance
|
|
316
|
+
"""
|
|
317
|
+
global _global_poller
|
|
318
|
+
|
|
319
|
+
if _global_poller is not None and _global_poller._running:
|
|
320
|
+
logger.warning("Ollama metrics poller already running")
|
|
321
|
+
return _global_poller
|
|
322
|
+
|
|
323
|
+
_global_poller = OllamaServerMetricsPoller(
|
|
324
|
+
base_url=base_url, interval=interval, max_vram_gb=max_vram_gb
|
|
325
|
+
)
|
|
326
|
+
_global_poller.start()
|
|
327
|
+
return _global_poller
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def stop_ollama_metrics_poller():
|
|
331
|
+
"""Stop the global Ollama server metrics poller."""
|
|
332
|
+
global _global_poller
|
|
333
|
+
|
|
334
|
+
if _global_poller is not None:
|
|
335
|
+
_global_poller.stop()
|
|
336
|
+
_global_poller = None
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"""OpenTelemetry instrumentor for the OpenAI Agents SDK.
|
|
2
|
+
|
|
3
|
+
This instrumentor automatically traces agent execution, handoffs, sessions,
|
|
4
|
+
and guardrails using the OpenAI Agents SDK (openai-agents package).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Any, Dict, Optional
|
|
10
|
+
|
|
11
|
+
from ..config import OTelConfig
|
|
12
|
+
from .base import BaseInstrumentor
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class OpenAIAgentsInstrumentor(BaseInstrumentor):
|
|
18
|
+
"""Instrumentor for OpenAI Agents SDK"""
|
|
19
|
+
|
|
20
|
+
def __init__(self):
|
|
21
|
+
"""Initialize the instrumentor."""
|
|
22
|
+
super().__init__()
|
|
23
|
+
self._agents_available = False
|
|
24
|
+
self._check_availability()
|
|
25
|
+
|
|
26
|
+
def _check_availability(self):
|
|
27
|
+
"""Check if OpenAI Agents SDK is available."""
|
|
28
|
+
try:
|
|
29
|
+
import agents
|
|
30
|
+
|
|
31
|
+
self._agents_available = True
|
|
32
|
+
logger.debug("OpenAI Agents SDK detected and available for instrumentation")
|
|
33
|
+
except ImportError:
|
|
34
|
+
logger.debug("OpenAI Agents SDK not installed, instrumentation will be skipped")
|
|
35
|
+
self._agents_available = False
|
|
36
|
+
|
|
37
|
+
def instrument(self, config: OTelConfig):
|
|
38
|
+
"""Instrument OpenAI Agents SDK if available.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
config (OTelConfig): The OpenTelemetry configuration object.
|
|
42
|
+
"""
|
|
43
|
+
if not self._agents_available:
|
|
44
|
+
logger.debug("Skipping OpenAI Agents instrumentation - library not available")
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
self.config = config
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
import agents
|
|
51
|
+
import wrapt
|
|
52
|
+
|
|
53
|
+
# Instrument Runner.run() (async) and Runner.run_sync() (sync)
|
|
54
|
+
if hasattr(agents, "Runner"):
|
|
55
|
+
# Instrument async run method
|
|
56
|
+
if hasattr(agents.Runner, "run"):
|
|
57
|
+
original_run = agents.Runner.run
|
|
58
|
+
agents.Runner.run = wrapt.FunctionWrapper(original_run, self._wrap_runner_run)
|
|
59
|
+
|
|
60
|
+
# Instrument sync run method
|
|
61
|
+
if hasattr(agents.Runner, "run_sync"):
|
|
62
|
+
original_run_sync = agents.Runner.run_sync
|
|
63
|
+
agents.Runner.run_sync = wrapt.FunctionWrapper(
|
|
64
|
+
original_run_sync, self._wrap_runner_run_sync
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
self._instrumented = True
|
|
68
|
+
logger.info("OpenAI Agents SDK instrumentation enabled")
|
|
69
|
+
|
|
70
|
+
except Exception as e:
|
|
71
|
+
logger.error("Failed to instrument OpenAI Agents SDK: %s", e, exc_info=True)
|
|
72
|
+
if config.fail_on_error:
|
|
73
|
+
raise
|
|
74
|
+
|
|
75
|
+
def _wrap_runner_run(self, wrapped, instance, args, kwargs):
|
|
76
|
+
"""Wrap Runner.run() async method with span.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
wrapped: The original method.
|
|
80
|
+
instance: The Runner instance.
|
|
81
|
+
args: Positional arguments.
|
|
82
|
+
kwargs: Keyword arguments.
|
|
83
|
+
"""
|
|
84
|
+
return self.create_span_wrapper(
|
|
85
|
+
span_name="openai_agents.runner.run",
|
|
86
|
+
extract_attributes=self._extract_runner_attributes,
|
|
87
|
+
)(wrapped)(instance, *args, **kwargs)
|
|
88
|
+
|
|
89
|
+
def _wrap_runner_run_sync(self, wrapped, instance, args, kwargs):
|
|
90
|
+
"""Wrap Runner.run_sync() sync method with span.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
wrapped: The original method.
|
|
94
|
+
instance: The Runner instance.
|
|
95
|
+
args: Positional arguments.
|
|
96
|
+
kwargs: Keyword arguments.
|
|
97
|
+
"""
|
|
98
|
+
return self.create_span_wrapper(
|
|
99
|
+
span_name="openai_agents.runner.run_sync",
|
|
100
|
+
extract_attributes=self._extract_runner_attributes,
|
|
101
|
+
)(wrapped)(instance, *args, **kwargs)
|
|
102
|
+
|
|
103
|
+
def _extract_runner_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
|
|
104
|
+
"""Extract attributes from Runner.run() or Runner.run_sync() call.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
instance: The Runner instance.
|
|
108
|
+
args: Positional arguments (agent, input_data, session, etc.).
|
|
109
|
+
kwargs: Keyword arguments.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Dict[str, Any]: Dictionary of attributes to set on the span.
|
|
113
|
+
"""
|
|
114
|
+
attrs = {}
|
|
115
|
+
|
|
116
|
+
# Core attributes
|
|
117
|
+
attrs["gen_ai.system"] = "openai_agents"
|
|
118
|
+
attrs["gen_ai.operation.name"] = "agent.run"
|
|
119
|
+
|
|
120
|
+
# Extract agent from args (first positional argument)
|
|
121
|
+
agent = None
|
|
122
|
+
if len(args) > 0:
|
|
123
|
+
agent = args[0]
|
|
124
|
+
else:
|
|
125
|
+
agent = kwargs.get("agent")
|
|
126
|
+
|
|
127
|
+
if agent:
|
|
128
|
+
# Agent attributes
|
|
129
|
+
if hasattr(agent, "name"):
|
|
130
|
+
attrs["openai.agent.name"] = agent.name
|
|
131
|
+
attrs["gen_ai.request.model"] = agent.name # Use agent name as "model"
|
|
132
|
+
|
|
133
|
+
if hasattr(agent, "model"):
|
|
134
|
+
attrs["openai.agent.model"] = agent.model
|
|
135
|
+
|
|
136
|
+
if hasattr(agent, "instructions"):
|
|
137
|
+
# Truncate instructions to avoid span size issues
|
|
138
|
+
instructions = str(agent.instructions)[:500]
|
|
139
|
+
attrs["openai.agent.instructions"] = instructions
|
|
140
|
+
|
|
141
|
+
# Extract tools
|
|
142
|
+
if hasattr(agent, "tools") and agent.tools:
|
|
143
|
+
try:
|
|
144
|
+
tool_names = [getattr(tool, "name", str(tool)[:50]) for tool in agent.tools]
|
|
145
|
+
attrs["openai.agent.tools"] = tool_names
|
|
146
|
+
attrs["openai.agent.tool_count"] = len(agent.tools)
|
|
147
|
+
except Exception as e:
|
|
148
|
+
logger.debug("Failed to extract agent tools: %s", e)
|
|
149
|
+
|
|
150
|
+
# Extract handoffs
|
|
151
|
+
if hasattr(agent, "handoffs") and agent.handoffs:
|
|
152
|
+
try:
|
|
153
|
+
handoff_names = [getattr(h, "name", str(h)[:50]) for h in agent.handoffs]
|
|
154
|
+
attrs["openai.agent.handoffs"] = handoff_names
|
|
155
|
+
attrs["openai.agent.handoff_count"] = len(agent.handoffs)
|
|
156
|
+
except Exception as e:
|
|
157
|
+
logger.debug("Failed to extract agent handoffs: %s", e)
|
|
158
|
+
|
|
159
|
+
# Extract guardrails
|
|
160
|
+
if hasattr(agent, "guardrails") and agent.guardrails:
|
|
161
|
+
try:
|
|
162
|
+
attrs["openai.agent.guardrails_enabled"] = True
|
|
163
|
+
attrs["openai.agent.guardrail_count"] = len(agent.guardrails)
|
|
164
|
+
except Exception as e:
|
|
165
|
+
logger.debug("Failed to extract agent guardrails: %s", e)
|
|
166
|
+
|
|
167
|
+
# Extract input data (second positional argument)
|
|
168
|
+
input_data = None
|
|
169
|
+
if len(args) > 1:
|
|
170
|
+
input_data = args[1]
|
|
171
|
+
else:
|
|
172
|
+
input_data = kwargs.get("input_data")
|
|
173
|
+
|
|
174
|
+
if input_data:
|
|
175
|
+
# Truncate input to avoid span size issues
|
|
176
|
+
input_str = str(input_data)[:500]
|
|
177
|
+
attrs["openai.agent.input"] = input_str
|
|
178
|
+
attrs["openai.agent.input_length"] = len(str(input_data))
|
|
179
|
+
|
|
180
|
+
# Extract session (third positional argument or kwarg)
|
|
181
|
+
session = None
|
|
182
|
+
if len(args) > 2:
|
|
183
|
+
session = args[2]
|
|
184
|
+
else:
|
|
185
|
+
session = kwargs.get("session")
|
|
186
|
+
|
|
187
|
+
if session:
|
|
188
|
+
# Session attributes
|
|
189
|
+
if hasattr(session, "session_id"):
|
|
190
|
+
attrs["openai.session.id"] = session.session_id
|
|
191
|
+
attrs["session.id"] = session.session_id # Generic session ID
|
|
192
|
+
|
|
193
|
+
# Detect session type
|
|
194
|
+
session_type = type(session).__name__
|
|
195
|
+
attrs["openai.session.type"] = session_type
|
|
196
|
+
|
|
197
|
+
return attrs
|
|
198
|
+
|
|
199
|
+
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
200
|
+
"""Extract token usage from agent run result.
|
|
201
|
+
|
|
202
|
+
Note: The OpenAI Agents SDK may not directly expose token usage in the result.
|
|
203
|
+
Token usage is captured by the underlying OpenAI SDK instrumentor.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
result: The agent run result object.
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
Optional[Dict[str, int]]: Dictionary with token counts or None.
|
|
210
|
+
"""
|
|
211
|
+
# OpenAI Agents SDK doesn't directly expose usage in the result
|
|
212
|
+
# Token usage is captured by the OpenAI SDK instrumentor for underlying LLM calls
|
|
213
|
+
# We can try to extract if the result has usage information
|
|
214
|
+
if hasattr(result, "usage") and result.usage:
|
|
215
|
+
usage = result.usage
|
|
216
|
+
return {
|
|
217
|
+
"prompt_tokens": getattr(usage, "prompt_tokens", 0),
|
|
218
|
+
"completion_tokens": getattr(usage, "completion_tokens", 0),
|
|
219
|
+
"total_tokens": getattr(usage, "total_tokens", 0),
|
|
220
|
+
}
|
|
221
|
+
return None
|
|
222
|
+
|
|
223
|
+
def _extract_response_attributes(self, result) -> Dict[str, Any]:
|
|
224
|
+
"""Extract response attributes from agent run result.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
result: The agent run result object.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
Dict[str, Any]: Dictionary of response attributes.
|
|
231
|
+
"""
|
|
232
|
+
attrs = {}
|
|
233
|
+
|
|
234
|
+
# Extract final output
|
|
235
|
+
if hasattr(result, "final_output"):
|
|
236
|
+
output = str(result.final_output)[:500] # Truncate to avoid span size issues
|
|
237
|
+
attrs["openai.agent.output"] = output
|
|
238
|
+
attrs["openai.agent.output_length"] = len(str(result.final_output))
|
|
239
|
+
|
|
240
|
+
# Extract handoff information if the agent handed off to another agent
|
|
241
|
+
if hasattr(result, "handoff") and result.handoff:
|
|
242
|
+
attrs["openai.handoff.occurred"] = True
|
|
243
|
+
if hasattr(result.handoff, "target_agent"):
|
|
244
|
+
attrs["openai.handoff.to_agent"] = result.handoff.target_agent
|
|
245
|
+
|
|
246
|
+
# Extract guardrail validation results
|
|
247
|
+
if hasattr(result, "guardrail_results"):
|
|
248
|
+
try:
|
|
249
|
+
guardrail_results = result.guardrail_results
|
|
250
|
+
if guardrail_results:
|
|
251
|
+
# Count violations
|
|
252
|
+
violation_count = sum(
|
|
253
|
+
1 for r in guardrail_results if not getattr(r, "passed", True)
|
|
254
|
+
)
|
|
255
|
+
attrs["openai.guardrail.violations"] = violation_count
|
|
256
|
+
attrs["openai.guardrail.validated"] = True
|
|
257
|
+
except Exception as e:
|
|
258
|
+
logger.debug("Failed to extract guardrail results: %s", e)
|
|
259
|
+
|
|
260
|
+
# Extract metadata if available
|
|
261
|
+
if hasattr(result, "metadata"):
|
|
262
|
+
try:
|
|
263
|
+
metadata = result.metadata
|
|
264
|
+
if isinstance(metadata, dict):
|
|
265
|
+
# Add selected metadata fields
|
|
266
|
+
for key in ["run_id", "agent_id", "session_id"]:
|
|
267
|
+
if key in metadata:
|
|
268
|
+
attrs[f"openai.agent.metadata.{key}"] = metadata[key]
|
|
269
|
+
except Exception as e:
|
|
270
|
+
logger.debug("Failed to extract result metadata: %s", e)
|
|
271
|
+
|
|
272
|
+
return attrs
|
|
273
|
+
|
|
274
|
+
def _extract_finish_reason(self, result) -> Optional[str]:
|
|
275
|
+
"""Extract finish reason from agent run result.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
result: The agent run result object.
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
Optional[str]: The finish reason string or None if not available.
|
|
282
|
+
"""
|
|
283
|
+
# Try to extract finish reason if available
|
|
284
|
+
if hasattr(result, "finish_reason"):
|
|
285
|
+
return result.finish_reason
|
|
286
|
+
|
|
287
|
+
# Check if result has a status
|
|
288
|
+
if hasattr(result, "status"):
|
|
289
|
+
return result.status
|
|
290
|
+
|
|
291
|
+
return None
|