genai-otel-instrument 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. genai_otel/__init__.py +132 -0
  2. genai_otel/__version__.py +34 -0
  3. genai_otel/auto_instrument.py +602 -0
  4. genai_otel/cli.py +92 -0
  5. genai_otel/config.py +333 -0
  6. genai_otel/cost_calculator.py +467 -0
  7. genai_otel/cost_enriching_exporter.py +207 -0
  8. genai_otel/cost_enrichment_processor.py +174 -0
  9. genai_otel/evaluation/__init__.py +76 -0
  10. genai_otel/evaluation/bias_detector.py +364 -0
  11. genai_otel/evaluation/config.py +261 -0
  12. genai_otel/evaluation/hallucination_detector.py +525 -0
  13. genai_otel/evaluation/pii_detector.py +356 -0
  14. genai_otel/evaluation/prompt_injection_detector.py +262 -0
  15. genai_otel/evaluation/restricted_topics_detector.py +316 -0
  16. genai_otel/evaluation/span_processor.py +962 -0
  17. genai_otel/evaluation/toxicity_detector.py +406 -0
  18. genai_otel/exceptions.py +17 -0
  19. genai_otel/gpu_metrics.py +516 -0
  20. genai_otel/instrumentors/__init__.py +71 -0
  21. genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
  22. genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
  23. genai_otel/instrumentors/autogen_instrumentor.py +394 -0
  24. genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
  25. genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
  26. genai_otel/instrumentors/base.py +919 -0
  27. genai_otel/instrumentors/bedrock_agents_instrumentor.py +398 -0
  28. genai_otel/instrumentors/cohere_instrumentor.py +140 -0
  29. genai_otel/instrumentors/crewai_instrumentor.py +311 -0
  30. genai_otel/instrumentors/dspy_instrumentor.py +661 -0
  31. genai_otel/instrumentors/google_ai_instrumentor.py +310 -0
  32. genai_otel/instrumentors/groq_instrumentor.py +106 -0
  33. genai_otel/instrumentors/guardrails_ai_instrumentor.py +510 -0
  34. genai_otel/instrumentors/haystack_instrumentor.py +503 -0
  35. genai_otel/instrumentors/huggingface_instrumentor.py +399 -0
  36. genai_otel/instrumentors/hyperbolic_instrumentor.py +236 -0
  37. genai_otel/instrumentors/instructor_instrumentor.py +425 -0
  38. genai_otel/instrumentors/langchain_instrumentor.py +340 -0
  39. genai_otel/instrumentors/langgraph_instrumentor.py +328 -0
  40. genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
  41. genai_otel/instrumentors/mistralai_instrumentor.py +315 -0
  42. genai_otel/instrumentors/ollama_instrumentor.py +197 -0
  43. genai_otel/instrumentors/ollama_server_metrics_poller.py +336 -0
  44. genai_otel/instrumentors/openai_agents_instrumentor.py +291 -0
  45. genai_otel/instrumentors/openai_instrumentor.py +260 -0
  46. genai_otel/instrumentors/pydantic_ai_instrumentor.py +362 -0
  47. genai_otel/instrumentors/replicate_instrumentor.py +87 -0
  48. genai_otel/instrumentors/sambanova_instrumentor.py +196 -0
  49. genai_otel/instrumentors/togetherai_instrumentor.py +146 -0
  50. genai_otel/instrumentors/vertexai_instrumentor.py +106 -0
  51. genai_otel/llm_pricing.json +1676 -0
  52. genai_otel/logging_config.py +45 -0
  53. genai_otel/mcp_instrumentors/__init__.py +14 -0
  54. genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
  55. genai_otel/mcp_instrumentors/base.py +105 -0
  56. genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
  57. genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
  58. genai_otel/mcp_instrumentors/manager.py +139 -0
  59. genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
  60. genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
  61. genai_otel/metrics.py +148 -0
  62. genai_otel/py.typed +2 -0
  63. genai_otel/server_metrics.py +197 -0
  64. genai_otel_instrument-0.1.24.dist-info/METADATA +1404 -0
  65. genai_otel_instrument-0.1.24.dist-info/RECORD +69 -0
  66. genai_otel_instrument-0.1.24.dist-info/WHEEL +5 -0
  67. genai_otel_instrument-0.1.24.dist-info/entry_points.txt +2 -0
  68. genai_otel_instrument-0.1.24.dist-info/licenses/LICENSE +680 -0
  69. genai_otel_instrument-0.1.24.dist-info/top_level.txt +1 -0
@@ -0,0 +1,919 @@
1
+ """Base classes for OpenTelemetry instrumentors for GenAI libraries and tools.
2
+
3
+ This module defines the `BaseInstrumentor` abstract base class, which provides
4
+ common functionality and a standardized interface for instrumenting various
5
+ Generative AI (GenAI) libraries and Model Context Protocol (MCP) tools.
6
+ It includes methods for creating OpenTelemetry spans, recording metrics,
7
+ and handling configuration and cost calculation.
8
+ """
9
+
10
+ import json
11
+ import logging
12
+ import threading
13
+ import time
14
+ from abc import ABC, abstractmethod
15
+ from typing import Any, Callable, Dict, List, Optional
16
+
17
+ import wrapt
18
+ from opentelemetry import metrics, trace
19
+ from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
20
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
21
+ from opentelemetry.sdk.metrics import MeterProvider
22
+ from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
23
+ from opentelemetry.sdk.resources import Resource
24
+ from opentelemetry.sdk.trace import TracerProvider
25
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
26
+ from opentelemetry.trace import Status, StatusCode
27
+
28
+ from ..config import OTelConfig
29
+ from ..cost_calculator import CostCalculator
30
+ from ..server_metrics import get_server_metrics
31
+
32
+ # Import semantic conventions
33
+ try:
34
+ from openlit.semcov import SemanticConvention as SC
35
+ except ImportError:
36
+ # Fallback if openlit not available
37
+ class SC:
38
+ GEN_AI_REQUESTS = "gen_ai.requests"
39
+ GEN_AI_CLIENT_TOKEN_USAGE = "gen_ai.client.token.usage"
40
+ GEN_AI_CLIENT_OPERATION_DURATION = "gen_ai.client.operation.duration"
41
+ GEN_AI_USAGE_COST = "gen_ai.usage.cost"
42
+ GEN_AI_SERVER_TTFT = "gen_ai.server.ttft"
43
+ GEN_AI_SERVER_TBT = "gen_ai.server.tbt"
44
+
45
+
46
+ # Import histogram bucket definitions
47
+ try:
48
+ from genai_otel.metrics import _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
49
+ except ImportError:
50
+ # Fallback buckets if import fails
51
+ _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS = [
52
+ 0.01,
53
+ 0.02,
54
+ 0.04,
55
+ 0.08,
56
+ 0.16,
57
+ 0.32,
58
+ 0.64,
59
+ 1.28,
60
+ 2.56,
61
+ 5.12,
62
+ 10.24,
63
+ 20.48,
64
+ 40.96,
65
+ 81.92,
66
+ ]
67
+
68
+ logger = logging.getLogger(__name__)
69
+ # Global flag to track if shared metrics have been created
70
+ _SHARED_METRICS_CREATED = False
71
+ _SHARED_METRICS_LOCK = threading.Lock()
72
+
73
+
74
+ class BaseInstrumentor(ABC): # pylint: disable=R0902
75
+ """Abstract base class for all LLM library instrumentors.
76
+
77
+ Provides common functionality for setting up OpenTelemetry spans, metrics,
78
+ and handling common instrumentation patterns.
79
+ """
80
+
81
+ # Class-level shared metrics (created once, shared by all instances)
82
+ _shared_request_counter = None
83
+ _shared_token_counter = None
84
+ _shared_latency_histogram = None
85
+ _shared_cost_counter = None
86
+ _shared_error_counter = None
87
+ # Granular cost counters (Phase 3.2)
88
+ _shared_prompt_cost_counter = None
89
+ _shared_completion_cost_counter = None
90
+ _shared_reasoning_cost_counter = None
91
+ _shared_cache_read_cost_counter = None
92
+ _shared_cache_write_cost_counter = None
93
+ # Streaming metrics (Phase 3.4)
94
+ _shared_ttft_histogram = None
95
+ _shared_tbt_histogram = None
96
+ # Token distribution histograms
97
+ _shared_prompt_tokens_histogram = None
98
+ _shared_completion_tokens_histogram = None
99
+ # Finish reason tracking counters
100
+ _shared_request_finish_counter = None
101
+ _shared_request_success_counter = None
102
+ _shared_request_failure_counter = None
103
+
104
+ def __init__(self):
105
+ """Initializes the instrumentor with OpenTelemetry tracers, meters, and common metrics."""
106
+ self.tracer = trace.get_tracer(__name__)
107
+ self.meter = metrics.get_meter(__name__)
108
+ self.config: Optional[OTelConfig] = None
109
+ self.cost_calculator = CostCalculator() # Will be updated when instrument() is called
110
+ self._instrumented = False
111
+
112
+ # Use shared metrics to avoid duplicate warnings
113
+ self._ensure_shared_metrics_created()
114
+
115
+ # Reference the shared metrics
116
+ self.request_counter = self._shared_request_counter
117
+ self.token_counter = self._shared_token_counter
118
+ self.latency_histogram = self._shared_latency_histogram
119
+ self.cost_counter = self._shared_cost_counter
120
+ self.error_counter = self._shared_error_counter
121
+ # Granular cost counters (Phase 3.2)
122
+ self.prompt_cost_counter = self._shared_prompt_cost_counter
123
+ self.completion_cost_counter = self._shared_completion_cost_counter
124
+ self.reasoning_cost_counter = self._shared_reasoning_cost_counter
125
+ self.cache_read_cost_counter = self._shared_cache_read_cost_counter
126
+ self.cache_write_cost_counter = self._shared_cache_write_cost_counter
127
+ # Streaming metrics
128
+ self.ttft_histogram = self._shared_ttft_histogram
129
+ self.tbt_histogram = self._shared_tbt_histogram
130
+ # Token distribution histograms
131
+ self.prompt_tokens_histogram = self._shared_prompt_tokens_histogram
132
+ self.completion_tokens_histogram = self._shared_completion_tokens_histogram
133
+ # Finish reason tracking counters
134
+ self.request_finish_counter = self._shared_request_finish_counter
135
+ self.request_success_counter = self._shared_request_success_counter
136
+ self.request_failure_counter = self._shared_request_failure_counter
137
+
138
+ @classmethod
139
+ def _ensure_shared_metrics_created(cls):
140
+ """Ensure shared metrics are created only once across all instrumentor instances."""
141
+ global _SHARED_METRICS_CREATED
142
+
143
+ with _SHARED_METRICS_LOCK:
144
+ if _SHARED_METRICS_CREATED:
145
+ return
146
+
147
+ try:
148
+ meter = metrics.get_meter(__name__)
149
+
150
+ # Create shared metrics once using semantic conventions
151
+ cls._shared_request_counter = meter.create_counter(
152
+ SC.GEN_AI_REQUESTS, description="Number of GenAI requests"
153
+ )
154
+ cls._shared_token_counter = meter.create_counter(
155
+ SC.GEN_AI_CLIENT_TOKEN_USAGE, description="Token usage for GenAI operations"
156
+ )
157
+ # Note: Histogram buckets should be configured via Views in MeterProvider
158
+ # The advisory parameter is provided as a hint but Views take precedence
159
+ cls._shared_latency_histogram = meter.create_histogram(
160
+ SC.GEN_AI_CLIENT_OPERATION_DURATION,
161
+ description="GenAI client operation duration",
162
+ unit="s",
163
+ )
164
+ cls._shared_cost_counter = meter.create_counter(
165
+ SC.GEN_AI_USAGE_COST, description="Cost of GenAI operations", unit="USD"
166
+ )
167
+ # Granular cost counters (Phase 3.2)
168
+ cls._shared_prompt_cost_counter = meter.create_counter(
169
+ "gen_ai.usage.cost.prompt", description="Prompt tokens cost", unit="USD"
170
+ )
171
+ cls._shared_completion_cost_counter = meter.create_counter(
172
+ "gen_ai.usage.cost.completion", description="Completion tokens cost", unit="USD"
173
+ )
174
+ cls._shared_reasoning_cost_counter = meter.create_counter(
175
+ "gen_ai.usage.cost.reasoning",
176
+ description="Reasoning tokens cost (o1 models)",
177
+ unit="USD",
178
+ )
179
+ cls._shared_cache_read_cost_counter = meter.create_counter(
180
+ "gen_ai.usage.cost.cache_read",
181
+ description="Cache read cost (Anthropic)",
182
+ unit="USD",
183
+ )
184
+ cls._shared_cache_write_cost_counter = meter.create_counter(
185
+ "gen_ai.usage.cost.cache_write",
186
+ description="Cache write cost (Anthropic)",
187
+ unit="USD",
188
+ )
189
+ cls._shared_error_counter = meter.create_counter(
190
+ "gen_ai.client.errors", description="Number of GenAI client errors"
191
+ )
192
+ # Streaming metrics (Phase 3.4)
193
+ # Note: Buckets should be configured via Views in MeterProvider
194
+ cls._shared_ttft_histogram = meter.create_histogram(
195
+ SC.GEN_AI_SERVER_TTFT,
196
+ description="Time to first token in seconds",
197
+ unit="s",
198
+ )
199
+ cls._shared_tbt_histogram = meter.create_histogram(
200
+ SC.GEN_AI_SERVER_TBT,
201
+ description="Time between tokens in seconds",
202
+ unit="s",
203
+ )
204
+ # Token distribution histograms
205
+ cls._shared_prompt_tokens_histogram = meter.create_histogram(
206
+ "gen_ai.client.token.usage.prompt",
207
+ description="Distribution of prompt tokens per request",
208
+ unit="tokens",
209
+ )
210
+ cls._shared_completion_tokens_histogram = meter.create_histogram(
211
+ "gen_ai.client.token.usage.completion",
212
+ description="Distribution of completion tokens per request",
213
+ unit="tokens",
214
+ )
215
+ # Finish reason tracking counters
216
+ cls._shared_request_finish_counter = meter.create_counter(
217
+ "gen_ai.server.request.finish",
218
+ description="Number of finished requests by finish reason",
219
+ )
220
+ cls._shared_request_success_counter = meter.create_counter(
221
+ "gen_ai.server.request.success",
222
+ description="Number of successfully completed requests",
223
+ )
224
+ cls._shared_request_failure_counter = meter.create_counter(
225
+ "gen_ai.server.request.failure",
226
+ description="Number of failed requests",
227
+ )
228
+
229
+ _SHARED_METRICS_CREATED = True
230
+ logger.debug("Shared metrics created successfully")
231
+
232
+ except Exception as e:
233
+ logger.error("Failed to create shared metrics: %s", e, exc_info=True)
234
+ # Create dummy metrics that do nothing to avoid crashes
235
+ cls._shared_request_counter = None
236
+ cls._shared_token_counter = None
237
+ cls._shared_latency_histogram = None
238
+ cls._shared_cost_counter = None
239
+ cls._shared_prompt_cost_counter = None
240
+ cls._shared_completion_cost_counter = None
241
+ cls._shared_reasoning_cost_counter = None
242
+ cls._shared_cache_read_cost_counter = None
243
+ cls._shared_cache_write_cost_counter = None
244
+ cls._shared_error_counter = None
245
+ cls._shared_ttft_histogram = None
246
+ cls._shared_tbt_histogram = None
247
+ cls._shared_prompt_tokens_histogram = None
248
+ cls._shared_completion_tokens_histogram = None
249
+ cls._shared_request_finish_counter = None
250
+ cls._shared_request_success_counter = None
251
+ cls._shared_request_failure_counter = None
252
+
253
+ def _setup_config(self, config: OTelConfig):
254
+ """Set up configuration and reinitialize cost calculator with custom pricing if provided.
255
+
256
+ Args:
257
+ config (OTelConfig): The OpenTelemetry configuration object.
258
+ """
259
+ self.config = config
260
+ # Reinitialize cost calculator with custom pricing if provided
261
+ if config.custom_pricing_json:
262
+ self.cost_calculator = CostCalculator(custom_pricing_json=config.custom_pricing_json)
263
+ logger.info("Cost calculator reinitialized with custom pricing")
264
+
265
+ @abstractmethod
266
+ def instrument(self, config: OTelConfig):
267
+ """Abstract method to implement library-specific instrumentation.
268
+
269
+ Implementers should call self._setup_config(config) at the beginning of this method
270
+ to ensure custom pricing is loaded.
271
+
272
+ Args:
273
+ config (OTelConfig): The OpenTelemetry configuration object.
274
+ """
275
+
276
+ def create_span_wrapper(
277
+ self, span_name: str, extract_attributes: Optional[Callable[[Any, Any, Any], Dict]] = None
278
+ ) -> Callable:
279
+ """Create a decorator that instruments a function with an OpenTelemetry span."""
280
+
281
+ @wrapt.decorator
282
+ def wrapper(wrapped, instance, args, kwargs):
283
+ # If instrumentation failed during initialization, just call the original function.
284
+ if not self._instrumented:
285
+ logger.debug("Instrumentation not active, calling %s directly", span_name)
286
+ return wrapped(*args, **kwargs)
287
+
288
+ try:
289
+ # Start a new span
290
+ initial_attributes = {}
291
+ if extract_attributes:
292
+ try:
293
+ extracted_attrs = extract_attributes(instance, args, kwargs)
294
+ for key, value in extracted_attrs.items():
295
+ if isinstance(value, (str, int, float, bool)):
296
+ initial_attributes[key] = value
297
+ else:
298
+ initial_attributes[key] = str(value)
299
+ except Exception as e:
300
+ logger.warning(
301
+ "Failed to extract attributes for span '%s': %s", span_name, e
302
+ )
303
+
304
+ # Check if this is a streaming request before creating the span
305
+ is_streaming = kwargs.get("stream", False)
306
+
307
+ # Start the span (but don't use context manager for streaming to keep it open)
308
+ span = self.tracer.start_span(span_name, attributes=initial_attributes)
309
+ start_time = time.time()
310
+
311
+ # Increment server metrics: running requests counter
312
+ server_metrics = get_server_metrics()
313
+ if server_metrics:
314
+ server_metrics.increment_requests_running()
315
+ logger.debug(f"Incremented running requests for {span_name}")
316
+
317
+ # Extract session and user context (Phase 4.1)
318
+ if self.config:
319
+ if self.config.session_id_extractor:
320
+ try:
321
+ session_id = self.config.session_id_extractor(instance, args, kwargs)
322
+ if session_id:
323
+ span.set_attribute("session.id", session_id)
324
+ logger.debug("Set session.id: %s", session_id)
325
+ except Exception as e:
326
+ logger.debug("Failed to extract session ID: %s", e)
327
+
328
+ if self.config.user_id_extractor:
329
+ try:
330
+ user_id = self.config.user_id_extractor(instance, args, kwargs)
331
+ if user_id:
332
+ span.set_attribute("user.id", user_id)
333
+ logger.debug("Set user.id: %s", user_id)
334
+ except Exception as e:
335
+ logger.debug("Failed to extract user ID: %s", e)
336
+
337
+ try:
338
+ # Call the original function
339
+ result = wrapped(*args, **kwargs)
340
+
341
+ if self.request_counter:
342
+ self.request_counter.add(1, {"operation": span.name})
343
+
344
+ # Handle streaming vs non-streaming responses (Phase 3.4)
345
+ if is_streaming:
346
+ # For streaming responses, wrap the iterator to capture TTFT/TBT
347
+ model = kwargs.get(
348
+ "model", initial_attributes.get("gen_ai.request.model", "unknown")
349
+ )
350
+ logger.debug(f"Detected streaming response for model: {model}")
351
+ # Wrap the streaming response - span will be finalized when iteration completes
352
+ return self._wrap_streaming_response(result, span, start_time, model)
353
+
354
+ # Non-streaming: record metrics and close span normally
355
+ try:
356
+ self._record_result_metrics(span, result, start_time, kwargs)
357
+ except Exception as e:
358
+ logger.warning("Failed to record metrics for span '%s': %s", span_name, e)
359
+
360
+ # Set span status to OK on successful execution
361
+ span.set_status(Status(StatusCode.OK))
362
+ span.end()
363
+
364
+ # Decrement server metrics: running requests counter
365
+ server_metrics = get_server_metrics()
366
+ if server_metrics:
367
+ server_metrics.decrement_requests_running()
368
+ logger.debug(f"Decremented running requests for {span_name}")
369
+
370
+ return result
371
+
372
+ except Exception as e:
373
+ # Handle exceptions during the wrapped function execution
374
+ try:
375
+ if self.error_counter:
376
+ self.error_counter.add(
377
+ 1, {"operation": span_name, "error_type": type(e).__name__}
378
+ )
379
+ except Exception:
380
+ pass
381
+
382
+ # Set span status to ERROR and record the exception
383
+ span.set_status(Status(StatusCode.ERROR, str(e)))
384
+ span.record_exception(e)
385
+ span.end()
386
+
387
+ # Decrement server metrics: running requests counter (error path)
388
+ server_metrics = get_server_metrics()
389
+ if server_metrics:
390
+ server_metrics.decrement_requests_running()
391
+ logger.debug(f"Decremented running requests for {span_name} (error)")
392
+
393
+ raise
394
+
395
+ except Exception as e:
396
+ logger.error("Span creation failed for '%s': %s", span_name, e, exc_info=True)
397
+ return wrapped(*args, **kwargs)
398
+
399
+ return wrapper
400
+
401
+ def _record_result_metrics(self, span, result, start_time: float, request_kwargs: dict = None):
402
+ """Record metrics derived from the function result and execution time.
403
+
404
+ Args:
405
+ span: The OpenTelemetry span to record metrics on.
406
+ result: The result from the wrapped function.
407
+ start_time: The time when the function started executing.
408
+ request_kwargs: The original request kwargs (for content capture).
409
+ """
410
+ # Record latency
411
+ try:
412
+ duration = time.time() - start_time
413
+ if self.latency_histogram:
414
+ self.latency_histogram.record(duration, {"operation": span.name})
415
+ except Exception as e:
416
+ logger.warning("Failed to record latency for span '%s': %s", span.name, e)
417
+
418
+ # Extract and set response attributes if available
419
+ try:
420
+ if hasattr(self, "_extract_response_attributes"):
421
+ response_attrs = self._extract_response_attributes(result)
422
+ if response_attrs and isinstance(response_attrs, dict):
423
+ for key, value in response_attrs.items():
424
+ if isinstance(value, (str, int, float, bool)):
425
+ span.set_attribute(key, value)
426
+ elif isinstance(value, list):
427
+ # For arrays like finish_reasons
428
+ span.set_attribute(key, value)
429
+ else:
430
+ span.set_attribute(key, str(value))
431
+ except Exception as e:
432
+ logger.warning("Failed to extract response attributes for span '%s': %s", span.name, e)
433
+
434
+ # Add content events if content capture is enabled
435
+ try:
436
+ if (
437
+ hasattr(self, "_add_content_events")
438
+ and self.config
439
+ and self.config.enable_content_capture
440
+ ):
441
+ self._add_content_events(span, result, request_kwargs or {})
442
+ except Exception as e:
443
+ logger.warning("Failed to add content events for span '%s': %s", span.name, e)
444
+
445
+ # Extract and record token usage and cost
446
+ try:
447
+ usage = self._extract_usage(result)
448
+ if usage and isinstance(usage, dict):
449
+ prompt_tokens = usage.get("prompt_tokens", 0)
450
+ completion_tokens = usage.get("completion_tokens", 0)
451
+ total_tokens = usage.get("total_tokens", 0)
452
+
453
+ # Record token counts if available and positive
454
+ # Support dual emission based on OTEL_SEMCONV_STABILITY_OPT_IN
455
+ emit_old_attrs = (
456
+ self.config
457
+ and self.config.semconv_stability_opt_in
458
+ and "dup" in self.config.semconv_stability_opt_in
459
+ )
460
+
461
+ # Record prompt tokens
462
+ if isinstance(prompt_tokens, (int, float)) and prompt_tokens > 0:
463
+ # Record counter metric if available
464
+ if self.token_counter:
465
+ self.token_counter.add(
466
+ prompt_tokens, {"token_type": "prompt", "operation": span.name}
467
+ )
468
+ # Record histogram for distribution analysis
469
+ if self.prompt_tokens_histogram:
470
+ model = span.attributes.get("gen_ai.request.model", "unknown")
471
+ self.prompt_tokens_histogram.record(
472
+ int(prompt_tokens), {"model": str(model), "operation": span.name}
473
+ )
474
+ # Always set span attributes (needed for cost calculation)
475
+ span.set_attribute("gen_ai.usage.prompt_tokens", int(prompt_tokens))
476
+ # Old semantic convention (if dual emission enabled)
477
+ if emit_old_attrs:
478
+ span.set_attribute("gen_ai.usage.input_tokens", int(prompt_tokens))
479
+
480
+ # Record completion tokens
481
+ if isinstance(completion_tokens, (int, float)) and completion_tokens > 0:
482
+ # Record counter metric if available
483
+ if self.token_counter:
484
+ self.token_counter.add(
485
+ completion_tokens, {"token_type": "completion", "operation": span.name}
486
+ )
487
+ # Record histogram for distribution analysis
488
+ if self.completion_tokens_histogram:
489
+ model = span.attributes.get("gen_ai.request.model", "unknown")
490
+ self.completion_tokens_histogram.record(
491
+ int(completion_tokens), {"model": str(model), "operation": span.name}
492
+ )
493
+ # Always set span attributes (needed for cost calculation)
494
+ span.set_attribute("gen_ai.usage.completion_tokens", int(completion_tokens))
495
+ # Old semantic convention (if dual emission enabled)
496
+ if emit_old_attrs:
497
+ span.set_attribute("gen_ai.usage.output_tokens", int(completion_tokens))
498
+
499
+ # Record total tokens
500
+ if isinstance(total_tokens, (int, float)) and total_tokens > 0:
501
+ span.set_attribute("gen_ai.usage.total_tokens", int(total_tokens))
502
+
503
+ # Calculate and record cost if enabled and applicable
504
+ logger.debug(
505
+ f"Cost tracking check: config={self.config is not None}, "
506
+ f"enable_cost_tracking={self.config.enable_cost_tracking if self.config else 'N/A'}"
507
+ )
508
+ if self.config and self.config.enable_cost_tracking:
509
+ try:
510
+ model = span.attributes.get("gen_ai.request.model", "unknown")
511
+ # Assuming 'chat' as a default call_type for generic base instrumentor tests.
512
+ # Specific instrumentors will provide the actual call_type.
513
+ call_type = span.attributes.get("gen_ai.request.type", "chat")
514
+
515
+ logger.debug(
516
+ f"Calculating cost for model={model}, call_type={call_type}, "
517
+ f"prompt_tokens={usage.get('prompt_tokens')}, "
518
+ f"completion_tokens={usage.get('completion_tokens')}"
519
+ )
520
+
521
+ # Use granular cost calculation for chat requests
522
+ if call_type == "chat":
523
+ costs = self.cost_calculator.calculate_granular_cost(
524
+ model, usage, call_type
525
+ )
526
+ total_cost = costs["total"]
527
+
528
+ # Record total cost
529
+ if total_cost > 0:
530
+ if self.cost_counter:
531
+ self.cost_counter.add(total_cost, {"model": str(model)})
532
+ # Always set span attributes (needed for cost tracking)
533
+ span.set_attribute("gen_ai.usage.cost.total", total_cost)
534
+ logger.debug(
535
+ f"Set cost attribute: gen_ai.usage.cost.total={total_cost}"
536
+ )
537
+ else:
538
+ logger.debug(
539
+ f"Cost is zero, not setting attributes. Costs: {costs}"
540
+ )
541
+
542
+ # Record and set attributes for granular costs
543
+ # Note: Metrics recording is optional, span attributes are always set
544
+ if costs["prompt"] > 0:
545
+ if self.prompt_cost_counter:
546
+ self.prompt_cost_counter.add(
547
+ costs["prompt"], {"model": str(model)}
548
+ )
549
+ span.set_attribute("gen_ai.usage.cost.prompt", costs["prompt"])
550
+
551
+ if costs["completion"] > 0:
552
+ if self.completion_cost_counter:
553
+ self.completion_cost_counter.add(
554
+ costs["completion"], {"model": str(model)}
555
+ )
556
+ span.set_attribute(
557
+ "gen_ai.usage.cost.completion", costs["completion"]
558
+ )
559
+
560
+ if costs["reasoning"] > 0:
561
+ if self.reasoning_cost_counter:
562
+ self.reasoning_cost_counter.add(
563
+ costs["reasoning"], {"model": str(model)}
564
+ )
565
+ span.set_attribute(
566
+ "gen_ai.usage.cost.reasoning", costs["reasoning"]
567
+ )
568
+
569
+ if costs["cache_read"] > 0:
570
+ if self.cache_read_cost_counter:
571
+ self.cache_read_cost_counter.add(
572
+ costs["cache_read"], {"model": str(model)}
573
+ )
574
+ span.set_attribute(
575
+ "gen_ai.usage.cost.cache_read", costs["cache_read"]
576
+ )
577
+
578
+ if costs["cache_write"] > 0:
579
+ if self.cache_write_cost_counter:
580
+ self.cache_write_cost_counter.add(
581
+ costs["cache_write"], {"model": str(model)}
582
+ )
583
+ span.set_attribute(
584
+ "gen_ai.usage.cost.cache_write", costs["cache_write"]
585
+ )
586
+ else:
587
+ # For non-chat requests, use simple cost calculation
588
+ cost = self.cost_calculator.calculate_cost(model, usage, call_type)
589
+ if cost and cost > 0:
590
+ if self.cost_counter:
591
+ self.cost_counter.add(cost, {"model": str(model)})
592
+ except Exception as e:
593
+ logger.warning("Failed to calculate cost for span '%s': %s", span.name, e)
594
+
595
+ except Exception as e:
596
+ logger.warning(
597
+ "Failed to extract or record usage metrics for span '%s': %s", span.name, e
598
+ )
599
+
600
+ # Extract and record finish reason if available (for request outcome tracking)
601
+ try:
602
+ if hasattr(self, "_extract_finish_reason"):
603
+ finish_reason = self._extract_finish_reason(result)
604
+ if finish_reason:
605
+ model = span.attributes.get("gen_ai.request.model", "unknown")
606
+
607
+ # Record finish reason counter
608
+ if self.request_finish_counter:
609
+ self.request_finish_counter.add(
610
+ 1, {"finish_reason": finish_reason, "model": str(model)}
611
+ )
612
+
613
+ # Set span attribute
614
+ span.set_attribute("gen_ai.response.finish_reason", finish_reason)
615
+
616
+ # Track success vs failure based on finish reason
617
+ # Success: stop, length, end_turn, etc.
618
+ # Failure: error, content_filter, timeout, etc.
619
+ success_reasons = {"stop", "length", "end_turn", "max_tokens"}
620
+ failure_reasons = {"error", "content_filter", "timeout", "rate_limit"}
621
+
622
+ if finish_reason in success_reasons:
623
+ if self.request_success_counter:
624
+ self.request_success_counter.add(1, {"model": str(model)})
625
+ elif finish_reason in failure_reasons:
626
+ if self.request_failure_counter:
627
+ self.request_failure_counter.add(
628
+ 1, {"finish_reason": finish_reason, "model": str(model)}
629
+ )
630
+ except Exception as e:
631
+ logger.debug(
632
+ "Failed to extract or record finish reason for span '%s': %s", span.name, e
633
+ )
634
+
635
+ def _wrap_streaming_response(self, stream, span, start_time: float, model: str):
636
+ """Wrap a streaming response to capture TTFT and TBT metrics.
637
+
638
+ This generator wrapper yields chunks from the streaming response while
639
+ measuring time to first token (TTFT) and time between tokens (TBT).
640
+ The span is finalized when the stream completes or errors.
641
+
642
+ Args:
643
+ stream: The streaming response iterator
644
+ span: The OpenTelemetry span for this request
645
+ start_time: Request start time (for TTFT calculation)
646
+ model: Model name/identifier for metric attributes
647
+
648
+ Yields:
649
+ Chunks from the original stream
650
+ """
651
+ from opentelemetry.trace import Status, StatusCode
652
+
653
+ first_token = True
654
+ last_token_time = start_time
655
+ token_count = 0
656
+ last_chunk = None # Store last chunk to extract usage
657
+
658
+ try:
659
+ for chunk in stream:
660
+ current_time = time.time()
661
+ token_count += 1
662
+
663
+ if first_token:
664
+ # Record Time to First Token
665
+ ttft = current_time - start_time
666
+ span.set_attribute("gen_ai.server.ttft", ttft)
667
+ if self.ttft_histogram:
668
+ self.ttft_histogram.record(ttft, {"model": model, "operation": span.name})
669
+ logger.debug(f"TTFT for {model}: {ttft:.3f}s")
670
+ first_token = False
671
+ else:
672
+ # Record Time Between Tokens
673
+ tbt = current_time - last_token_time
674
+ if self.tbt_histogram:
675
+ self.tbt_histogram.record(tbt, {"model": model, "operation": span.name})
676
+
677
+ last_token_time = current_time
678
+ last_chunk = chunk # Keep track of last chunk for usage extraction
679
+ yield chunk
680
+
681
+ # Stream completed successfully
682
+ duration = time.time() - start_time
683
+ if self.latency_histogram:
684
+ self.latency_histogram.record(duration, {"operation": span.name})
685
+ span.set_attribute("gen_ai.streaming.token_count", token_count)
686
+
687
+ # Extract usage from last chunk and calculate cost
688
+ # Many providers (OpenAI, Anthropic, etc.) include usage in the final chunk
689
+ try:
690
+ if last_chunk is not None:
691
+ usage = self._extract_usage(last_chunk)
692
+ if usage and isinstance(usage, dict):
693
+ # Record token usage metrics and calculate cost
694
+ # This will set span attributes and record cost metrics
695
+ prompt_tokens = usage.get("prompt_tokens", 0)
696
+ completion_tokens = usage.get("completion_tokens", 0)
697
+ total_tokens = usage.get("total_tokens", 0)
698
+
699
+ # Record token counts
700
+ if isinstance(prompt_tokens, (int, float)) and prompt_tokens > 0:
701
+ if self.token_counter:
702
+ self.token_counter.add(
703
+ prompt_tokens, {"token_type": "prompt", "operation": span.name}
704
+ )
705
+ # Record histogram for distribution analysis
706
+ if self.prompt_tokens_histogram:
707
+ self.prompt_tokens_histogram.record(
708
+ int(prompt_tokens), {"model": model, "operation": span.name}
709
+ )
710
+ span.set_attribute("gen_ai.usage.prompt_tokens", int(prompt_tokens))
711
+
712
+ if isinstance(completion_tokens, (int, float)) and completion_tokens > 0:
713
+ if self.token_counter:
714
+ self.token_counter.add(
715
+ completion_tokens,
716
+ {"token_type": "completion", "operation": span.name},
717
+ )
718
+ # Record histogram for distribution analysis
719
+ if self.completion_tokens_histogram:
720
+ self.completion_tokens_histogram.record(
721
+ int(completion_tokens), {"model": model, "operation": span.name}
722
+ )
723
+ span.set_attribute(
724
+ "gen_ai.usage.completion_tokens", int(completion_tokens)
725
+ )
726
+
727
+ if isinstance(total_tokens, (int, float)) and total_tokens > 0:
728
+ span.set_attribute("gen_ai.usage.total_tokens", int(total_tokens))
729
+
730
+ # Calculate and record cost if enabled
731
+ if self.config and self.config.enable_cost_tracking:
732
+ try:
733
+ # Get call_type from span attributes or default to "chat"
734
+ call_type = span.attributes.get("gen_ai.request.type", "chat")
735
+
736
+ # Use granular cost calculation for chat requests
737
+ if call_type == "chat":
738
+ costs = self.cost_calculator.calculate_granular_cost(
739
+ model, usage, call_type
740
+ )
741
+ total_cost = costs["total"]
742
+
743
+ # Record total cost
744
+ if total_cost > 0:
745
+ if self.cost_counter:
746
+ self.cost_counter.add(total_cost, {"model": str(model)})
747
+ span.set_attribute("gen_ai.usage.cost.total", total_cost)
748
+ logger.debug(f"Streaming cost: {total_cost} USD")
749
+
750
+ # Record granular costs
751
+ if costs["prompt"] > 0:
752
+ if self.prompt_cost_counter:
753
+ self.prompt_cost_counter.add(
754
+ costs["prompt"], {"model": str(model)}
755
+ )
756
+ span.set_attribute(
757
+ "gen_ai.usage.cost.prompt", costs["prompt"]
758
+ )
759
+
760
+ if costs["completion"] > 0:
761
+ if self.completion_cost_counter:
762
+ self.completion_cost_counter.add(
763
+ costs["completion"], {"model": str(model)}
764
+ )
765
+ span.set_attribute(
766
+ "gen_ai.usage.cost.completion", costs["completion"]
767
+ )
768
+
769
+ if costs["reasoning"] > 0:
770
+ if self.reasoning_cost_counter:
771
+ self.reasoning_cost_counter.add(
772
+ costs["reasoning"], {"model": str(model)}
773
+ )
774
+ span.set_attribute(
775
+ "gen_ai.usage.cost.reasoning", costs["reasoning"]
776
+ )
777
+
778
+ if costs["cache_read"] > 0:
779
+ if self.cache_read_cost_counter:
780
+ self.cache_read_cost_counter.add(
781
+ costs["cache_read"], {"model": str(model)}
782
+ )
783
+ span.set_attribute(
784
+ "gen_ai.usage.cost.cache_read", costs["cache_read"]
785
+ )
786
+
787
+ if costs["cache_write"] > 0:
788
+ if self.cache_write_cost_counter:
789
+ self.cache_write_cost_counter.add(
790
+ costs["cache_write"], {"model": str(model)}
791
+ )
792
+ span.set_attribute(
793
+ "gen_ai.usage.cost.cache_write", costs["cache_write"]
794
+ )
795
+ else:
796
+ # For non-chat requests, use simple cost calculation
797
+ cost = self.cost_calculator.calculate_cost(
798
+ model, usage, call_type
799
+ )
800
+ if cost and cost > 0:
801
+ if self.cost_counter:
802
+ self.cost_counter.add(cost, {"model": str(model)})
803
+ span.set_attribute("gen_ai.usage.cost.total", cost)
804
+ except Exception as e:
805
+ logger.warning(
806
+ "Failed to calculate cost for streaming response: %s", e
807
+ )
808
+ else:
809
+ logger.debug("No usage information found in streaming response")
810
+ except Exception as e:
811
+ logger.warning("Failed to extract usage from streaming response: %s", e)
812
+
813
+ span.set_status(Status(StatusCode.OK))
814
+ span.end() # Close the span when streaming completes
815
+
816
+ # Decrement server metrics: running requests counter (streaming success)
817
+ server_metrics = get_server_metrics()
818
+ if server_metrics:
819
+ server_metrics.decrement_requests_running()
820
+ logger.debug("Decremented running requests (streaming success)")
821
+
822
+ logger.debug(f"Streaming completed: {token_count} chunks in {duration:.3f}s")
823
+
824
+ except Exception as e:
825
+ # Stream failed
826
+ span.set_status(Status(StatusCode.ERROR, str(e)))
827
+ span.record_exception(e)
828
+ span.end() # Close the span even on error
829
+
830
+ # Decrement server metrics: running requests counter (streaming error)
831
+ server_metrics = get_server_metrics()
832
+ if server_metrics:
833
+ server_metrics.decrement_requests_running()
834
+ logger.debug("Decremented running requests (streaming error)")
835
+
836
+ if self.error_counter:
837
+ self.error_counter.add(1, {"operation": span.name, "error_type": type(e).__name__})
838
+ logger.warning(f"Error in streaming wrapper: {e}")
839
+ raise
840
+
841
+ # Phase 4.2: RAG/Embedding Helper Methods
842
+ def add_embedding_attributes(
843
+ self, span, model: str, input_text: str, vector: Optional[List[float]] = None
844
+ ):
845
+ """Add embedding-specific attributes to a span.
846
+
847
+ Args:
848
+ span: The OpenTelemetry span
849
+ model: The embedding model name
850
+ input_text: The text being embedded (will be truncated to 500 chars)
851
+ vector: Optional embedding vector (use with caution - can be large!)
852
+ """
853
+ span.set_attribute("embedding.model_name", model)
854
+ span.set_attribute("embedding.text", input_text[:500]) # Truncate to avoid large spans
855
+
856
+ if vector and self.config and hasattr(self.config, "capture_embedding_vectors"):
857
+ # Only capture vectors if explicitly enabled (they can be very large)
858
+ span.set_attribute("embedding.vector", json.dumps(vector))
859
+ span.set_attribute("embedding.vector.dimension", len(vector))
860
+
861
+ def add_retrieval_attributes(
862
+ self,
863
+ span,
864
+ documents: List[Dict[str, Any]],
865
+ query: Optional[str] = None,
866
+ max_docs: int = 5,
867
+ ):
868
+ """Add retrieval/RAG-specific attributes to a span.
869
+
870
+ Args:
871
+ span: The OpenTelemetry span
872
+ documents: List of retrieved documents. Each dict should have:
873
+ - id: Document identifier
874
+ - score: Relevance score
875
+ - content: Document content
876
+ - metadata: Optional metadata dict
877
+ query: Optional query string
878
+ max_docs: Maximum number of documents to include in attributes (default: 5)
879
+ """
880
+ if query:
881
+ span.set_attribute("retrieval.query", query[:500]) # Truncate
882
+
883
+ # Limit to first N documents to avoid attribute explosion
884
+ for i, doc in enumerate(documents[:max_docs]):
885
+ prefix = f"retrieval.documents.{i}.document"
886
+
887
+ if "id" in doc:
888
+ span.set_attribute(f"{prefix}.id", str(doc["id"]))
889
+ if "score" in doc:
890
+ span.set_attribute(f"{prefix}.score", float(doc["score"]))
891
+ if "content" in doc:
892
+ # Truncate content to avoid large attributes
893
+ content = str(doc["content"])[:500]
894
+ span.set_attribute(f"{prefix}.content", content)
895
+
896
+ # Add metadata if present
897
+ if "metadata" in doc and isinstance(doc["metadata"], dict):
898
+ for key, value in doc["metadata"].items():
899
+ # Flatten metadata, limit key names to avoid explosion
900
+ safe_key = str(key)[:50] # Limit key length
901
+ safe_value = str(value)[:200] # Limit value length
902
+ span.set_attribute(f"{prefix}.metadata.{safe_key}", safe_value)
903
+
904
+ span.set_attribute("retrieval.document_count", len(documents))
905
+
906
+ @abstractmethod
907
+ def _extract_usage(self, result) -> Optional[Dict[str, int]]:
908
+ """Abstract method to extract token usage information from a function result.
909
+
910
+ Subclasses must implement this to parse the specific library's response object
911
+ and return a dictionary containing 'prompt_tokens', 'completion_tokens',
912
+ and optionally 'total_tokens'.
913
+
914
+ Args:
915
+ result: The return value of the instrumented function.
916
+
917
+ Returns:
918
+ Optional[Dict[str, int]]: A dictionary with token counts, or None if usage cannot be extracted.
919
+ """