genai-otel-instrument 0.1.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of genai-otel-instrument might be problematic. Click here for more details.
- genai_otel/__init__.py +129 -0
- genai_otel/__version__.py +34 -0
- genai_otel/auto_instrument.py +413 -0
- genai_otel/cli.py +92 -0
- genai_otel/config.py +187 -0
- genai_otel/cost_calculator.py +276 -0
- genai_otel/exceptions.py +17 -0
- genai_otel/gpu_metrics.py +240 -0
- genai_otel/instrumentors/__init__.py +47 -0
- genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
- genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
- genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
- genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
- genai_otel/instrumentors/base.py +528 -0
- genai_otel/instrumentors/cohere_instrumentor.py +76 -0
- genai_otel/instrumentors/google_ai_instrumentor.py +87 -0
- genai_otel/instrumentors/groq_instrumentor.py +106 -0
- genai_otel/instrumentors/huggingface_instrumentor.py +97 -0
- genai_otel/instrumentors/langchain_instrumentor.py +75 -0
- genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
- genai_otel/instrumentors/mistralai_instrumentor.py +119 -0
- genai_otel/instrumentors/ollama_instrumentor.py +83 -0
- genai_otel/instrumentors/openai_instrumentor.py +241 -0
- genai_otel/instrumentors/replicate_instrumentor.py +42 -0
- genai_otel/instrumentors/togetherai_instrumentor.py +42 -0
- genai_otel/instrumentors/vertexai_instrumentor.py +42 -0
- genai_otel/llm_pricing.json +589 -0
- genai_otel/logging_config.py +45 -0
- genai_otel/mcp_instrumentors/__init__.py +14 -0
- genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
- genai_otel/mcp_instrumentors/base.py +105 -0
- genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
- genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/manager.py +139 -0
- genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
- genai_otel/metrics.py +148 -0
- genai_otel/py.typed +2 -0
- genai_otel_instrument-0.1.1.dev0.dist-info/METADATA +463 -0
- genai_otel_instrument-0.1.1.dev0.dist-info/RECORD +44 -0
- genai_otel_instrument-0.1.1.dev0.dist-info/WHEEL +5 -0
- genai_otel_instrument-0.1.1.dev0.dist-info/entry_points.txt +2 -0
- genai_otel_instrument-0.1.1.dev0.dist-info/licenses/LICENSE +201 -0
- genai_otel_instrument-0.1.1.dev0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
"""Base classes for OpenTelemetry instrumentors for GenAI libraries and tools.
|
|
2
|
+
|
|
3
|
+
This module defines the `BaseInstrumentor` abstract base class, which provides
|
|
4
|
+
common functionality and a standardized interface for instrumenting various
|
|
5
|
+
Generative AI (GenAI) libraries and Model Context Protocol (MCP) tools.
|
|
6
|
+
It includes methods for creating OpenTelemetry spans, recording metrics,
|
|
7
|
+
and handling configuration and cost calculation.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import threading
|
|
12
|
+
import time
|
|
13
|
+
from abc import ABC, abstractmethod
|
|
14
|
+
from typing import Any, Callable, Dict, Optional
|
|
15
|
+
|
|
16
|
+
import wrapt
|
|
17
|
+
from opentelemetry import metrics, trace
|
|
18
|
+
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
|
19
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
20
|
+
from opentelemetry.sdk.metrics import MeterProvider
|
|
21
|
+
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
|
|
22
|
+
from opentelemetry.sdk.resources import Resource
|
|
23
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
24
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
25
|
+
from opentelemetry.trace import Status, StatusCode
|
|
26
|
+
|
|
27
|
+
from ..config import OTelConfig
|
|
28
|
+
from ..cost_calculator import CostCalculator
|
|
29
|
+
|
|
30
|
+
# Import semantic conventions
|
|
31
|
+
try:
|
|
32
|
+
from openlit.semcov import SemanticConvention as SC
|
|
33
|
+
except ImportError:
|
|
34
|
+
# Fallback if openlit not available
|
|
35
|
+
class SC:
|
|
36
|
+
GEN_AI_REQUESTS = "gen_ai.requests"
|
|
37
|
+
GEN_AI_CLIENT_TOKEN_USAGE = "gen_ai.client.token.usage"
|
|
38
|
+
GEN_AI_CLIENT_OPERATION_DURATION = "gen_ai.client.operation.duration"
|
|
39
|
+
GEN_AI_USAGE_COST = "gen_ai.usage.cost"
|
|
40
|
+
GEN_AI_SERVER_TTFT = "gen_ai.server.ttft"
|
|
41
|
+
GEN_AI_SERVER_TBT = "gen_ai.server.tbt"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# Import histogram bucket definitions
|
|
45
|
+
try:
|
|
46
|
+
from genai_otel.metrics import _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
|
|
47
|
+
except ImportError:
|
|
48
|
+
# Fallback buckets if import fails
|
|
49
|
+
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS = [
|
|
50
|
+
0.01,
|
|
51
|
+
0.02,
|
|
52
|
+
0.04,
|
|
53
|
+
0.08,
|
|
54
|
+
0.16,
|
|
55
|
+
0.32,
|
|
56
|
+
0.64,
|
|
57
|
+
1.28,
|
|
58
|
+
2.56,
|
|
59
|
+
5.12,
|
|
60
|
+
10.24,
|
|
61
|
+
20.48,
|
|
62
|
+
40.96,
|
|
63
|
+
81.92,
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
logger = logging.getLogger(__name__)
|
|
67
|
+
# Global flag to track if shared metrics have been created
|
|
68
|
+
_SHARED_METRICS_CREATED = False
|
|
69
|
+
_SHARED_METRICS_LOCK = threading.Lock()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
73
|
+
"""Abstract base class for all LLM library instrumentors.
|
|
74
|
+
|
|
75
|
+
Provides common functionality for setting up OpenTelemetry spans, metrics,
|
|
76
|
+
and handling common instrumentation patterns.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
# Class-level shared metrics (created once, shared by all instances)
|
|
80
|
+
_shared_request_counter = None
|
|
81
|
+
_shared_token_counter = None
|
|
82
|
+
_shared_latency_histogram = None
|
|
83
|
+
_shared_cost_counter = None
|
|
84
|
+
_shared_error_counter = None
|
|
85
|
+
# Streaming metrics (Phase 3.4)
|
|
86
|
+
_shared_ttft_histogram = None
|
|
87
|
+
_shared_tbt_histogram = None
|
|
88
|
+
|
|
89
|
+
def __init__(self):
|
|
90
|
+
"""Initializes the instrumentor with OpenTelemetry tracers, meters, and common metrics."""
|
|
91
|
+
self.tracer = trace.get_tracer(__name__)
|
|
92
|
+
self.meter = metrics.get_meter(__name__)
|
|
93
|
+
self.config: Optional[OTelConfig] = None
|
|
94
|
+
self.cost_calculator = CostCalculator()
|
|
95
|
+
self._instrumented = False
|
|
96
|
+
|
|
97
|
+
# Use shared metrics to avoid duplicate warnings
|
|
98
|
+
self._ensure_shared_metrics_created()
|
|
99
|
+
|
|
100
|
+
# Reference the shared metrics
|
|
101
|
+
self.request_counter = self._shared_request_counter
|
|
102
|
+
self.token_counter = self._shared_token_counter
|
|
103
|
+
self.latency_histogram = self._shared_latency_histogram
|
|
104
|
+
self.cost_counter = self._shared_cost_counter
|
|
105
|
+
self.error_counter = self._shared_error_counter
|
|
106
|
+
# Streaming metrics
|
|
107
|
+
self.ttft_histogram = self._shared_ttft_histogram
|
|
108
|
+
self.tbt_histogram = self._shared_tbt_histogram
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def _ensure_shared_metrics_created(cls):
|
|
112
|
+
"""Ensure shared metrics are created only once across all instrumentor instances."""
|
|
113
|
+
global _SHARED_METRICS_CREATED
|
|
114
|
+
|
|
115
|
+
with _SHARED_METRICS_LOCK:
|
|
116
|
+
if _SHARED_METRICS_CREATED:
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
meter = metrics.get_meter(__name__)
|
|
121
|
+
|
|
122
|
+
# Create shared metrics once using semantic conventions
|
|
123
|
+
cls._shared_request_counter = meter.create_counter(
|
|
124
|
+
SC.GEN_AI_REQUESTS, description="Number of GenAI requests"
|
|
125
|
+
)
|
|
126
|
+
cls._shared_token_counter = meter.create_counter(
|
|
127
|
+
SC.GEN_AI_CLIENT_TOKEN_USAGE, description="Token usage for GenAI operations"
|
|
128
|
+
)
|
|
129
|
+
# Note: Histogram buckets should be configured via Views in MeterProvider
|
|
130
|
+
# The advisory parameter is provided as a hint but Views take precedence
|
|
131
|
+
cls._shared_latency_histogram = meter.create_histogram(
|
|
132
|
+
SC.GEN_AI_CLIENT_OPERATION_DURATION,
|
|
133
|
+
description="GenAI client operation duration",
|
|
134
|
+
unit="s",
|
|
135
|
+
)
|
|
136
|
+
cls._shared_cost_counter = meter.create_counter(
|
|
137
|
+
SC.GEN_AI_USAGE_COST, description="Cost of GenAI operations", unit="USD"
|
|
138
|
+
)
|
|
139
|
+
# Granular cost counters (Phase 3.2)
|
|
140
|
+
cls._shared_prompt_cost_counter = meter.create_counter(
|
|
141
|
+
"gen_ai.usage.cost.prompt", description="Prompt tokens cost", unit="USD"
|
|
142
|
+
)
|
|
143
|
+
cls._shared_completion_cost_counter = meter.create_counter(
|
|
144
|
+
"gen_ai.usage.cost.completion", description="Completion tokens cost", unit="USD"
|
|
145
|
+
)
|
|
146
|
+
cls._shared_reasoning_cost_counter = meter.create_counter(
|
|
147
|
+
"gen_ai.usage.cost.reasoning",
|
|
148
|
+
description="Reasoning tokens cost (o1 models)",
|
|
149
|
+
unit="USD",
|
|
150
|
+
)
|
|
151
|
+
cls._shared_cache_read_cost_counter = meter.create_counter(
|
|
152
|
+
"gen_ai.usage.cost.cache_read",
|
|
153
|
+
description="Cache read cost (Anthropic)",
|
|
154
|
+
unit="USD",
|
|
155
|
+
)
|
|
156
|
+
cls._shared_cache_write_cost_counter = meter.create_counter(
|
|
157
|
+
"gen_ai.usage.cost.cache_write",
|
|
158
|
+
description="Cache write cost (Anthropic)",
|
|
159
|
+
unit="USD",
|
|
160
|
+
)
|
|
161
|
+
cls._shared_error_counter = meter.create_counter(
|
|
162
|
+
"gen_ai.client.errors", description="Number of GenAI client errors"
|
|
163
|
+
)
|
|
164
|
+
# Streaming metrics (Phase 3.4)
|
|
165
|
+
# Note: Buckets should be configured via Views in MeterProvider
|
|
166
|
+
cls._shared_ttft_histogram = meter.create_histogram(
|
|
167
|
+
SC.GEN_AI_SERVER_TTFT,
|
|
168
|
+
description="Time to first token in seconds",
|
|
169
|
+
unit="s",
|
|
170
|
+
)
|
|
171
|
+
cls._shared_tbt_histogram = meter.create_histogram(
|
|
172
|
+
SC.GEN_AI_SERVER_TBT,
|
|
173
|
+
description="Time between tokens in seconds",
|
|
174
|
+
unit="s",
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
_SHARED_METRICS_CREATED = True
|
|
178
|
+
logger.debug("Shared metrics created successfully")
|
|
179
|
+
|
|
180
|
+
except Exception as e:
|
|
181
|
+
logger.error("Failed to create shared metrics: %s", e, exc_info=True)
|
|
182
|
+
# Create dummy metrics that do nothing to avoid crashes
|
|
183
|
+
cls._shared_request_counter = None
|
|
184
|
+
cls._shared_token_counter = None
|
|
185
|
+
cls._shared_latency_histogram = None
|
|
186
|
+
cls._shared_cost_counter = None
|
|
187
|
+
cls._shared_prompt_cost_counter = None
|
|
188
|
+
cls._shared_completion_cost_counter = None
|
|
189
|
+
cls._shared_reasoning_cost_counter = None
|
|
190
|
+
cls._shared_cache_read_cost_counter = None
|
|
191
|
+
cls._shared_cache_write_cost_counter = None
|
|
192
|
+
cls._shared_error_counter = None
|
|
193
|
+
cls._shared_ttft_histogram = None
|
|
194
|
+
cls._shared_tbt_histogram = None
|
|
195
|
+
|
|
196
|
+
@abstractmethod
|
|
197
|
+
def instrument(self, config: OTelConfig):
|
|
198
|
+
"""Abstract method to implement library-specific instrumentation.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
config (OTelConfig): The OpenTelemetry configuration object.
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
def create_span_wrapper(
|
|
205
|
+
self, span_name: str, extract_attributes: Optional[Callable[[Any, Any, Any], Dict]] = None
|
|
206
|
+
) -> Callable:
|
|
207
|
+
"""Create a decorator that instruments a function with an OpenTelemetry span."""
|
|
208
|
+
|
|
209
|
+
@wrapt.decorator
|
|
210
|
+
def wrapper(wrapped, instance, args, kwargs):
|
|
211
|
+
# If instrumentation failed during initialization, just call the original function.
|
|
212
|
+
if not self._instrumented:
|
|
213
|
+
logger.debug("Instrumentation not active, calling %s directly", span_name)
|
|
214
|
+
return wrapped(*args, **kwargs)
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
# Start a new span
|
|
218
|
+
initial_attributes = {}
|
|
219
|
+
if extract_attributes:
|
|
220
|
+
try:
|
|
221
|
+
extracted_attrs = extract_attributes(instance, args, kwargs)
|
|
222
|
+
for key, value in extracted_attrs.items():
|
|
223
|
+
if isinstance(value, (str, int, float, bool)):
|
|
224
|
+
initial_attributes[key] = value
|
|
225
|
+
else:
|
|
226
|
+
initial_attributes[key] = str(value)
|
|
227
|
+
except Exception as e:
|
|
228
|
+
logger.warning(
|
|
229
|
+
"Failed to extract attributes for span '%s': %s", span_name, e
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# Check if this is a streaming request before creating the span
|
|
233
|
+
is_streaming = kwargs.get("stream", False)
|
|
234
|
+
|
|
235
|
+
# Start the span (but don't use context manager for streaming to keep it open)
|
|
236
|
+
span = self.tracer.start_span(span_name, attributes=initial_attributes)
|
|
237
|
+
start_time = time.time()
|
|
238
|
+
|
|
239
|
+
try:
|
|
240
|
+
# Call the original function
|
|
241
|
+
result = wrapped(*args, **kwargs)
|
|
242
|
+
|
|
243
|
+
if self.request_counter:
|
|
244
|
+
self.request_counter.add(1, {"operation": span.name})
|
|
245
|
+
|
|
246
|
+
# Handle streaming vs non-streaming responses (Phase 3.4)
|
|
247
|
+
if is_streaming:
|
|
248
|
+
# For streaming responses, wrap the iterator to capture TTFT/TBT
|
|
249
|
+
model = kwargs.get(
|
|
250
|
+
"model", initial_attributes.get("gen_ai.request.model", "unknown")
|
|
251
|
+
)
|
|
252
|
+
logger.debug(f"Detected streaming response for model: {model}")
|
|
253
|
+
# Wrap the streaming response - span will be finalized when iteration completes
|
|
254
|
+
return self._wrap_streaming_response(result, span, start_time, model)
|
|
255
|
+
|
|
256
|
+
# Non-streaming: record metrics and close span normally
|
|
257
|
+
try:
|
|
258
|
+
self._record_result_metrics(span, result, start_time, kwargs)
|
|
259
|
+
except Exception as e:
|
|
260
|
+
logger.warning("Failed to record metrics for span '%s': %s", span_name, e)
|
|
261
|
+
|
|
262
|
+
# Set span status to OK on successful execution
|
|
263
|
+
span.set_status(Status(StatusCode.OK))
|
|
264
|
+
span.end()
|
|
265
|
+
return result
|
|
266
|
+
|
|
267
|
+
except Exception as e:
|
|
268
|
+
# Handle exceptions during the wrapped function execution
|
|
269
|
+
try:
|
|
270
|
+
if self.error_counter:
|
|
271
|
+
self.error_counter.add(
|
|
272
|
+
1, {"operation": span_name, "error_type": type(e).__name__}
|
|
273
|
+
)
|
|
274
|
+
except Exception:
|
|
275
|
+
pass
|
|
276
|
+
|
|
277
|
+
# Set span status to ERROR and record the exception
|
|
278
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
279
|
+
span.record_exception(e)
|
|
280
|
+
span.end()
|
|
281
|
+
raise
|
|
282
|
+
|
|
283
|
+
except Exception as e:
|
|
284
|
+
logger.error("Span creation failed for '%s': %s", span_name, e, exc_info=True)
|
|
285
|
+
return wrapped(*args, **kwargs)
|
|
286
|
+
|
|
287
|
+
return wrapper
|
|
288
|
+
|
|
289
|
+
def _record_result_metrics(self, span, result, start_time: float, request_kwargs: dict = None):
|
|
290
|
+
"""Record metrics derived from the function result and execution time.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
span: The OpenTelemetry span to record metrics on.
|
|
294
|
+
result: The result from the wrapped function.
|
|
295
|
+
start_time: The time when the function started executing.
|
|
296
|
+
request_kwargs: The original request kwargs (for content capture).
|
|
297
|
+
"""
|
|
298
|
+
# Record latency
|
|
299
|
+
try:
|
|
300
|
+
duration = time.time() - start_time
|
|
301
|
+
if self.latency_histogram:
|
|
302
|
+
self.latency_histogram.record(duration, {"operation": span.name})
|
|
303
|
+
except Exception as e:
|
|
304
|
+
logger.warning("Failed to record latency for span '%s': %s", span.name, e)
|
|
305
|
+
|
|
306
|
+
# Extract and set response attributes if available
|
|
307
|
+
try:
|
|
308
|
+
if hasattr(self, "_extract_response_attributes"):
|
|
309
|
+
response_attrs = self._extract_response_attributes(result)
|
|
310
|
+
if response_attrs and isinstance(response_attrs, dict):
|
|
311
|
+
for key, value in response_attrs.items():
|
|
312
|
+
if isinstance(value, (str, int, float, bool)):
|
|
313
|
+
span.set_attribute(key, value)
|
|
314
|
+
elif isinstance(value, list):
|
|
315
|
+
# For arrays like finish_reasons
|
|
316
|
+
span.set_attribute(key, value)
|
|
317
|
+
else:
|
|
318
|
+
span.set_attribute(key, str(value))
|
|
319
|
+
except Exception as e:
|
|
320
|
+
logger.warning("Failed to extract response attributes for span '%s': %s", span.name, e)
|
|
321
|
+
|
|
322
|
+
# Add content events if content capture is enabled
|
|
323
|
+
try:
|
|
324
|
+
if (
|
|
325
|
+
hasattr(self, "_add_content_events")
|
|
326
|
+
and self.config
|
|
327
|
+
and self.config.enable_content_capture
|
|
328
|
+
):
|
|
329
|
+
self._add_content_events(span, result, request_kwargs or {})
|
|
330
|
+
except Exception as e:
|
|
331
|
+
logger.warning("Failed to add content events for span '%s': %s", span.name, e)
|
|
332
|
+
|
|
333
|
+
# Extract and record token usage and cost
|
|
334
|
+
try:
|
|
335
|
+
usage = self._extract_usage(result)
|
|
336
|
+
if usage and isinstance(usage, dict):
|
|
337
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
338
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
|
339
|
+
total_tokens = usage.get("total_tokens", 0)
|
|
340
|
+
|
|
341
|
+
# Record token counts if available and positive
|
|
342
|
+
# Support dual emission based on OTEL_SEMCONV_STABILITY_OPT_IN
|
|
343
|
+
emit_old_attrs = (
|
|
344
|
+
self.config
|
|
345
|
+
and self.config.semconv_stability_opt_in
|
|
346
|
+
and "dup" in self.config.semconv_stability_opt_in
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
if (
|
|
350
|
+
self.token_counter
|
|
351
|
+
and isinstance(prompt_tokens, (int, float))
|
|
352
|
+
and prompt_tokens > 0
|
|
353
|
+
):
|
|
354
|
+
self.token_counter.add(
|
|
355
|
+
prompt_tokens, {"token_type": "prompt", "operation": span.name}
|
|
356
|
+
)
|
|
357
|
+
# New semantic convention
|
|
358
|
+
span.set_attribute("gen_ai.usage.prompt_tokens", int(prompt_tokens))
|
|
359
|
+
# Old semantic convention (if dual emission enabled)
|
|
360
|
+
if emit_old_attrs:
|
|
361
|
+
span.set_attribute("gen_ai.usage.input_tokens", int(prompt_tokens))
|
|
362
|
+
|
|
363
|
+
if (
|
|
364
|
+
self.token_counter
|
|
365
|
+
and isinstance(completion_tokens, (int, float))
|
|
366
|
+
and completion_tokens > 0
|
|
367
|
+
):
|
|
368
|
+
self.token_counter.add(
|
|
369
|
+
completion_tokens, {"token_type": "completion", "operation": span.name}
|
|
370
|
+
)
|
|
371
|
+
# New semantic convention
|
|
372
|
+
span.set_attribute("gen_ai.usage.completion_tokens", int(completion_tokens))
|
|
373
|
+
# Old semantic convention (if dual emission enabled)
|
|
374
|
+
if emit_old_attrs:
|
|
375
|
+
span.set_attribute("gen_ai.usage.output_tokens", int(completion_tokens))
|
|
376
|
+
|
|
377
|
+
if isinstance(total_tokens, (int, float)) and total_tokens > 0:
|
|
378
|
+
span.set_attribute("gen_ai.usage.total_tokens", int(total_tokens))
|
|
379
|
+
|
|
380
|
+
# Calculate and record cost if enabled and applicable
|
|
381
|
+
if self.config and self.config.enable_cost_tracking and self._shared_cost_counter:
|
|
382
|
+
try:
|
|
383
|
+
model = span.attributes.get("gen_ai.request.model", "unknown")
|
|
384
|
+
# Assuming 'chat' as a default call_type for generic base instrumentor tests.
|
|
385
|
+
# Specific instrumentors will provide the actual call_type.
|
|
386
|
+
call_type = span.attributes.get("gen_ai.request.type", "chat")
|
|
387
|
+
|
|
388
|
+
# Use granular cost calculation for chat requests
|
|
389
|
+
if call_type == "chat":
|
|
390
|
+
costs = self.cost_calculator.calculate_granular_cost(
|
|
391
|
+
model, usage, call_type
|
|
392
|
+
)
|
|
393
|
+
total_cost = costs["total"]
|
|
394
|
+
|
|
395
|
+
# Record total cost
|
|
396
|
+
if total_cost > 0:
|
|
397
|
+
self._shared_cost_counter.add(total_cost, {"model": str(model)})
|
|
398
|
+
# Set span attributes for granular costs
|
|
399
|
+
span.set_attribute("gen_ai.usage.cost.total", total_cost)
|
|
400
|
+
|
|
401
|
+
# Record and set attributes for granular costs
|
|
402
|
+
if costs["prompt"] > 0 and self._shared_prompt_cost_counter:
|
|
403
|
+
self._shared_prompt_cost_counter.add(
|
|
404
|
+
costs["prompt"], {"model": str(model)}
|
|
405
|
+
)
|
|
406
|
+
span.set_attribute("gen_ai.usage.cost.prompt", costs["prompt"])
|
|
407
|
+
|
|
408
|
+
if costs["completion"] > 0 and self._shared_completion_cost_counter:
|
|
409
|
+
self._shared_completion_cost_counter.add(
|
|
410
|
+
costs["completion"], {"model": str(model)}
|
|
411
|
+
)
|
|
412
|
+
span.set_attribute(
|
|
413
|
+
"gen_ai.usage.cost.completion", costs["completion"]
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
if costs["reasoning"] > 0 and self._shared_reasoning_cost_counter:
|
|
417
|
+
self._shared_reasoning_cost_counter.add(
|
|
418
|
+
costs["reasoning"], {"model": str(model)}
|
|
419
|
+
)
|
|
420
|
+
span.set_attribute(
|
|
421
|
+
"gen_ai.usage.cost.reasoning", costs["reasoning"]
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
if costs["cache_read"] > 0 and self._shared_cache_read_cost_counter:
|
|
425
|
+
self._shared_cache_read_cost_counter.add(
|
|
426
|
+
costs["cache_read"], {"model": str(model)}
|
|
427
|
+
)
|
|
428
|
+
span.set_attribute(
|
|
429
|
+
"gen_ai.usage.cost.cache_read", costs["cache_read"]
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
if costs["cache_write"] > 0 and self._shared_cache_write_cost_counter:
|
|
433
|
+
self._shared_cache_write_cost_counter.add(
|
|
434
|
+
costs["cache_write"], {"model": str(model)}
|
|
435
|
+
)
|
|
436
|
+
span.set_attribute(
|
|
437
|
+
"gen_ai.usage.cost.cache_write", costs["cache_write"]
|
|
438
|
+
)
|
|
439
|
+
else:
|
|
440
|
+
# For non-chat requests, use simple cost calculation
|
|
441
|
+
cost = self.cost_calculator.calculate_cost(model, usage, call_type)
|
|
442
|
+
if cost and cost > 0:
|
|
443
|
+
self._shared_cost_counter.add(cost, {"model": str(model)})
|
|
444
|
+
except Exception as e:
|
|
445
|
+
logger.warning("Failed to calculate cost for span '%s': %s", span.name, e)
|
|
446
|
+
|
|
447
|
+
except Exception as e:
|
|
448
|
+
logger.warning(
|
|
449
|
+
"Failed to extract or record usage metrics for span '%s': %s", span.name, e
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
def _wrap_streaming_response(self, stream, span, start_time: float, model: str):
|
|
453
|
+
"""Wrap a streaming response to capture TTFT and TBT metrics.
|
|
454
|
+
|
|
455
|
+
This generator wrapper yields chunks from the streaming response while
|
|
456
|
+
measuring time to first token (TTFT) and time between tokens (TBT).
|
|
457
|
+
The span is finalized when the stream completes or errors.
|
|
458
|
+
|
|
459
|
+
Args:
|
|
460
|
+
stream: The streaming response iterator
|
|
461
|
+
span: The OpenTelemetry span for this request
|
|
462
|
+
start_time: Request start time (for TTFT calculation)
|
|
463
|
+
model: Model name/identifier for metric attributes
|
|
464
|
+
|
|
465
|
+
Yields:
|
|
466
|
+
Chunks from the original stream
|
|
467
|
+
"""
|
|
468
|
+
from opentelemetry.trace import Status, StatusCode
|
|
469
|
+
|
|
470
|
+
first_token = True
|
|
471
|
+
last_token_time = start_time
|
|
472
|
+
token_count = 0
|
|
473
|
+
|
|
474
|
+
try:
|
|
475
|
+
for chunk in stream:
|
|
476
|
+
current_time = time.time()
|
|
477
|
+
token_count += 1
|
|
478
|
+
|
|
479
|
+
if first_token:
|
|
480
|
+
# Record Time to First Token
|
|
481
|
+
ttft = current_time - start_time
|
|
482
|
+
span.set_attribute("gen_ai.server.ttft", ttft)
|
|
483
|
+
if self.ttft_histogram:
|
|
484
|
+
self.ttft_histogram.record(ttft, {"model": model, "operation": span.name})
|
|
485
|
+
logger.debug(f"TTFT for {model}: {ttft:.3f}s")
|
|
486
|
+
first_token = False
|
|
487
|
+
else:
|
|
488
|
+
# Record Time Between Tokens
|
|
489
|
+
tbt = current_time - last_token_time
|
|
490
|
+
if self.tbt_histogram:
|
|
491
|
+
self.tbt_histogram.record(tbt, {"model": model, "operation": span.name})
|
|
492
|
+
|
|
493
|
+
last_token_time = current_time
|
|
494
|
+
yield chunk
|
|
495
|
+
|
|
496
|
+
# Stream completed successfully
|
|
497
|
+
duration = time.time() - start_time
|
|
498
|
+
if self.latency_histogram:
|
|
499
|
+
self.latency_histogram.record(duration, {"operation": span.name})
|
|
500
|
+
span.set_attribute("gen_ai.streaming.token_count", token_count)
|
|
501
|
+
span.set_status(Status(StatusCode.OK))
|
|
502
|
+
span.end() # Close the span when streaming completes
|
|
503
|
+
logger.debug(f"Streaming completed: {token_count} chunks in {duration:.3f}s")
|
|
504
|
+
|
|
505
|
+
except Exception as e:
|
|
506
|
+
# Stream failed
|
|
507
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
508
|
+
span.record_exception(e)
|
|
509
|
+
span.end() # Close the span even on error
|
|
510
|
+
if self.error_counter:
|
|
511
|
+
self.error_counter.add(1, {"operation": span.name, "error_type": type(e).__name__})
|
|
512
|
+
logger.warning(f"Error in streaming wrapper: {e}")
|
|
513
|
+
raise
|
|
514
|
+
|
|
515
|
+
@abstractmethod
|
|
516
|
+
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
517
|
+
"""Abstract method to extract token usage information from a function result.
|
|
518
|
+
|
|
519
|
+
Subclasses must implement this to parse the specific library's response object
|
|
520
|
+
and return a dictionary containing 'prompt_tokens', 'completion_tokens',
|
|
521
|
+
and optionally 'total_tokens'.
|
|
522
|
+
|
|
523
|
+
Args:
|
|
524
|
+
result: The return value of the instrumented function.
|
|
525
|
+
|
|
526
|
+
Returns:
|
|
527
|
+
Optional[Dict[str, int]]: A dictionary with token counts, or None if usage cannot be extracted.
|
|
528
|
+
"""
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""OpenTelemetry instrumentor for the Cohere SDK.
|
|
2
|
+
|
|
3
|
+
This instrumentor automatically traces calls to Cohere models, capturing
|
|
4
|
+
relevant attributes such as the model name.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Dict, Optional
|
|
9
|
+
|
|
10
|
+
from ..config import OTelConfig
|
|
11
|
+
from .base import BaseInstrumentor
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CohereInstrumentor(BaseInstrumentor):
|
|
17
|
+
"""Instrumentor for Cohere"""
|
|
18
|
+
|
|
19
|
+
def __init__(self):
|
|
20
|
+
"""Initialize the instrumentor."""
|
|
21
|
+
super().__init__()
|
|
22
|
+
self._cohere_available = False
|
|
23
|
+
self._check_availability()
|
|
24
|
+
|
|
25
|
+
def _check_availability(self):
|
|
26
|
+
"""Check if cohere library is available."""
|
|
27
|
+
try:
|
|
28
|
+
import cohere
|
|
29
|
+
|
|
30
|
+
self._cohere_available = True
|
|
31
|
+
logger.debug("cohere library detected and available for instrumentation")
|
|
32
|
+
except ImportError:
|
|
33
|
+
logger.debug("cohere library not installed, instrumentation will be skipped")
|
|
34
|
+
self._cohere_available = False
|
|
35
|
+
|
|
36
|
+
def instrument(self, config: OTelConfig):
|
|
37
|
+
"""Instrument cohere available if available."""
|
|
38
|
+
if not self._cohere_available:
|
|
39
|
+
logger.debug("Skipping instrumentation - library not available")
|
|
40
|
+
return
|
|
41
|
+
|
|
42
|
+
self.config = config
|
|
43
|
+
try:
|
|
44
|
+
import cohere
|
|
45
|
+
|
|
46
|
+
original_init = cohere.Client.__init__
|
|
47
|
+
|
|
48
|
+
def wrapped_init(instance, *args, **kwargs):
|
|
49
|
+
original_init(instance, *args, **kwargs)
|
|
50
|
+
self._instrument_client(instance)
|
|
51
|
+
|
|
52
|
+
cohere.Client.__init__ = wrapped_init
|
|
53
|
+
|
|
54
|
+
except ImportError:
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
def _instrument_client(self, client):
|
|
58
|
+
original_generate = client.generate
|
|
59
|
+
|
|
60
|
+
def wrapped_generate(*args, **kwargs):
|
|
61
|
+
with self.tracer.start_as_current_span("cohere.generate") as span:
|
|
62
|
+
model = kwargs.get("model", "command")
|
|
63
|
+
|
|
64
|
+
span.set_attribute("gen_ai.system", "cohere")
|
|
65
|
+
span.set_attribute("gen_ai.request.model", model)
|
|
66
|
+
|
|
67
|
+
if self.request_counter:
|
|
68
|
+
self.request_counter.add(1, {"model": model, "provider": "cohere"})
|
|
69
|
+
|
|
70
|
+
result = original_generate(*args, **kwargs)
|
|
71
|
+
return result
|
|
72
|
+
|
|
73
|
+
client.generate = wrapped_generate
|
|
74
|
+
|
|
75
|
+
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
76
|
+
return None
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""OpenTelemetry instrumentor for Google Generative AI (Gemini) SDK.
|
|
2
|
+
|
|
3
|
+
This instrumentor automatically traces calls to Google Gemini models, capturing
|
|
4
|
+
relevant attributes such as the model name and token usage.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
9
|
+
|
|
10
|
+
from ..config import OTelConfig
|
|
11
|
+
from .base import BaseInstrumentor
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GoogleAIInstrumentor(BaseInstrumentor):
|
|
15
|
+
"""Instrumentor for Google Generative AI (Gemini)"""
|
|
16
|
+
|
|
17
|
+
def __init__(self):
|
|
18
|
+
"""Initialize the instrumentor."""
|
|
19
|
+
super().__init__()
|
|
20
|
+
self._google_available = False
|
|
21
|
+
self._check_availability()
|
|
22
|
+
|
|
23
|
+
def _check_availability(self):
|
|
24
|
+
"""Check if Google Generative AI library is available."""
|
|
25
|
+
try:
|
|
26
|
+
import google.generativeai as genai
|
|
27
|
+
|
|
28
|
+
self._google_available = True
|
|
29
|
+
logging.debug("Google Generative AI library detected and available for instrumentation")
|
|
30
|
+
except ImportError:
|
|
31
|
+
logging.debug(
|
|
32
|
+
"Google Generative AI library not installed, instrumentation will be skipped"
|
|
33
|
+
)
|
|
34
|
+
self._google_available = False
|
|
35
|
+
|
|
36
|
+
def instrument(self, config: OTelConfig):
|
|
37
|
+
if not self._google_available:
|
|
38
|
+
logging.debug("Skipping Google Generative AI instrumentation - library not available")
|
|
39
|
+
return
|
|
40
|
+
|
|
41
|
+
self.config = config
|
|
42
|
+
try:
|
|
43
|
+
import google.generativeai as genai
|
|
44
|
+
|
|
45
|
+
if hasattr(genai, "GenerativeModel") and hasattr(
|
|
46
|
+
genai.GenerativeModel, "generate_content"
|
|
47
|
+
):
|
|
48
|
+
instrumented_generate_method = self.create_span_wrapper(
|
|
49
|
+
span_name="google.generativeai.generate_content",
|
|
50
|
+
extract_attributes=self._extract_google_ai_attributes,
|
|
51
|
+
)
|
|
52
|
+
genai.GenerativeModel.generate_content = instrumented_generate_method
|
|
53
|
+
self._instrumented = True
|
|
54
|
+
logging.info("Google Generative AI instrumentation enabled")
|
|
55
|
+
|
|
56
|
+
except Exception as e:
|
|
57
|
+
logging.error("Failed to instrument Google Generative AI: %s", e, exc_info=True)
|
|
58
|
+
if config.fail_on_error:
|
|
59
|
+
raise
|
|
60
|
+
|
|
61
|
+
def _extract_google_ai_attributes(
|
|
62
|
+
self, instance: Any, args: Any, kwargs: Any
|
|
63
|
+
) -> Dict[str, Any]: # pylint: disable=W0613
|
|
64
|
+
|
|
65
|
+
attrs = {}
|
|
66
|
+
|
|
67
|
+
model_name = getattr(instance, "model_name", "unknown")
|
|
68
|
+
|
|
69
|
+
attrs["gen_ai.system"] = "google"
|
|
70
|
+
|
|
71
|
+
attrs["gen_ai.request.model"] = model_name
|
|
72
|
+
|
|
73
|
+
return attrs
|
|
74
|
+
|
|
75
|
+
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
76
|
+
|
|
77
|
+
if hasattr(result, "usage_metadata") and result.usage_metadata:
|
|
78
|
+
|
|
79
|
+
usage = result.usage_metadata
|
|
80
|
+
|
|
81
|
+
return {
|
|
82
|
+
"prompt_tokens": getattr(usage, "prompt_token_count", 0),
|
|
83
|
+
"completion_tokens": getattr(usage, "candidates_token_count", 0),
|
|
84
|
+
"total_tokens": getattr(usage, "total_token_count", 0),
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return None
|