genai-otel-instrument 0.1.4.dev0__py3-none-any.whl → 0.1.9.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of genai-otel-instrument might be problematic. Click here for more details.

@@ -1,177 +1,174 @@
1
- """Custom SpanProcessor to enrich OpenInference spans with cost tracking.
2
-
3
- This processor adds cost attributes to spans created by OpenInference instrumentors
4
- (smolagents, litellm, mcp) by extracting token usage and model information from
5
- existing span attributes and calculating costs using our CostCalculator.
6
-
7
- Supports both OpenTelemetry GenAI and OpenInference semantic conventions:
8
- - GenAI: gen_ai.request.model, gen_ai.usage.{prompt_tokens,completion_tokens}
9
- - OpenInference: llm.model_name, llm.token_count.{prompt,completion}
10
- """
11
-
12
- import logging
13
- from typing import Optional
14
-
15
- from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
16
- from opentelemetry.trace import SpanContext
17
-
18
- from .cost_calculator import CostCalculator
19
-
20
- logger = logging.getLogger(__name__)
21
-
22
-
23
- class CostEnrichmentSpanProcessor(SpanProcessor):
24
- """Enriches spans with cost tracking attributes.
25
-
26
- This processor:
27
- 1. Identifies spans from OpenInference instrumentors (smolagents, litellm, mcp)
28
- 2. Extracts model name and token usage from span attributes
29
- 3. Calculates cost using CostCalculator
30
- 4. Adds cost attributes (gen_ai.usage.cost.total, etc.) to the span
31
- """
32
-
33
- def __init__(self, cost_calculator: Optional[CostCalculator] = None):
34
- """Initialize the cost enrichment processor.
35
-
36
- Args:
37
- cost_calculator: CostCalculator instance to use for cost calculations.
38
- If None, creates a new instance.
39
- """
40
- self.cost_calculator = cost_calculator or CostCalculator()
41
- logger.info("CostEnrichmentSpanProcessor initialized")
42
-
43
- def on_start(self, span: Span, parent_context: Optional[SpanContext] = None) -> None:
44
- """Called when a span starts. No action needed."""
45
- pass
46
-
47
- def on_end(self, span: ReadableSpan) -> None:
48
- """Called when a span ends. Enriches with cost attributes if applicable.
49
-
50
- Args:
51
- span: The span that just ended.
52
- """
53
- try:
54
- # Only process spans that have LLM-related attributes
55
- if not span.attributes:
56
- return
57
-
58
- attributes = span.attributes
59
-
60
- # Check for model name - support both GenAI and OpenInference conventions
61
- model = (
62
- attributes.get("gen_ai.request.model")
63
- or attributes.get("llm.model_name")
64
- or attributes.get("embedding.model_name")
65
- )
66
- if not model:
67
- return
68
-
69
- # Skip if cost attributes are already present (added by instrumentor)
70
- if "gen_ai.usage.cost.total" in attributes:
71
- logger.debug(
72
- f"Span '{span.name}' already has cost attributes, skipping enrichment"
73
- )
74
- return
75
-
76
- # Extract token usage - support GenAI, OpenInference, and legacy conventions
77
- prompt_tokens = (
78
- attributes.get("gen_ai.usage.prompt_tokens")
79
- or attributes.get("gen_ai.usage.input_tokens")
80
- or attributes.get("llm.token_count.prompt") # OpenInference
81
- or 0
82
- )
83
- completion_tokens = (
84
- attributes.get("gen_ai.usage.completion_tokens")
85
- or attributes.get("gen_ai.usage.output_tokens")
86
- or attributes.get("llm.token_count.completion") # OpenInference
87
- or 0
88
- )
89
-
90
- # Skip if no tokens recorded
91
- if prompt_tokens == 0 and completion_tokens == 0:
92
- return
93
-
94
- # Get call type - support both GenAI and OpenInference conventions
95
- # OpenInference uses openinference.span.kind (values: LLM, EMBEDDING, etc.)
96
- span_kind = attributes.get("openinference.span.kind", "").upper()
97
- call_type = attributes.get("gen_ai.operation.name") or span_kind.lower() or "chat"
98
-
99
- # Map operation names to call types for cost calculator
100
- # Supports both GenAI and OpenInference conventions
101
- call_type_mapping = {
102
- # GenAI conventions
103
- "chat": "chat",
104
- "completion": "chat",
105
- "embedding": "embedding",
106
- "embeddings": "embedding",
107
- "text_generation": "chat",
108
- "image_generation": "image",
109
- "audio": "audio",
110
- # OpenInference conventions (span.kind values)
111
- "llm": "chat",
112
- "embedding": "embedding",
113
- "chain": "chat",
114
- "retriever": "embedding",
115
- "reranker": "embedding",
116
- "tool": "chat",
117
- "agent": "chat",
118
- }
119
- normalized_call_type = call_type_mapping.get(str(call_type).lower(), "chat")
120
-
121
- # Calculate cost
122
- usage = {
123
- "prompt_tokens": int(prompt_tokens),
124
- "completion_tokens": int(completion_tokens),
125
- "total_tokens": int(prompt_tokens) + int(completion_tokens),
126
- }
127
-
128
- # Use calculate_granular_cost to get detailed breakdown
129
- cost_info = self.cost_calculator.calculate_granular_cost(
130
- model=str(model),
131
- usage=usage,
132
- call_type=normalized_call_type,
133
- )
134
-
135
- if cost_info and cost_info.get("total", 0.0) > 0:
136
- # Add cost attributes to the span
137
- # Note: We can't modify ReadableSpan attributes directly,
138
- # but we can if span is still a Span instance
139
- if isinstance(span, Span):
140
- span.set_attribute("gen_ai.usage.cost.total", cost_info["total"])
141
-
142
- if cost_info.get("prompt", 0.0) > 0:
143
- span.set_attribute("gen_ai.usage.cost.prompt", cost_info["prompt"])
144
- if cost_info.get("completion", 0.0) > 0:
145
- span.set_attribute("gen_ai.usage.cost.completion", cost_info["completion"])
146
-
147
- logger.info(
148
- f"Enriched span '{span.name}' with cost: {cost_info['total']:.6f} USD "
149
- f"for model {model} ({usage['total_tokens']} tokens)"
150
- )
151
- else:
152
- logger.warning(
153
- f"Span '{span.name}' is not mutable (type: {type(span).__name__}), "
154
- "cannot add cost attributes"
155
- )
156
-
157
- except Exception as e:
158
- # Don't fail span processing due to cost enrichment errors
159
- logger.warning(
160
- f"Failed to enrich span '{getattr(span, 'name', 'unknown')}' with cost: {e}",
161
- exc_info=True,
162
- )
163
-
164
- def shutdown(self) -> None:
165
- """Called when the processor is shutdown."""
166
- logger.info("CostEnrichmentSpanProcessor shutdown")
167
-
168
- def force_flush(self, timeout_millis: int = 30000) -> bool:
169
- """Force flush any pending spans.
170
-
171
- Args:
172
- timeout_millis: Timeout in milliseconds.
173
-
174
- Returns:
175
- True if flush succeeded.
176
- """
177
- return True
1
+ """Custom SpanProcessor to enrich OpenInference spans with cost tracking.
2
+
3
+ This processor adds cost attributes to spans created by OpenInference instrumentors
4
+ (smolagents, litellm, mcp) by extracting token usage and model information from
5
+ existing span attributes and calculating costs using our CostCalculator.
6
+
7
+ Supports both OpenTelemetry GenAI and OpenInference semantic conventions:
8
+ - GenAI: gen_ai.request.model, gen_ai.usage.{prompt_tokens,completion_tokens}
9
+ - OpenInference: llm.model_name, llm.token_count.{prompt,completion}
10
+ """
11
+
12
+ import logging
13
+ from typing import Optional
14
+
15
+ from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
16
+ from opentelemetry.trace import SpanContext
17
+
18
+ from .cost_calculator import CostCalculator
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class CostEnrichmentSpanProcessor(SpanProcessor):
24
+ """Enriches spans with cost tracking attributes.
25
+
26
+ This processor:
27
+ 1. Identifies spans from OpenInference instrumentors (smolagents, litellm, mcp)
28
+ 2. Extracts model name and token usage from span attributes
29
+ 3. Calculates cost using CostCalculator
30
+ 4. Adds cost attributes (gen_ai.usage.cost.total, etc.) to the span
31
+ """
32
+
33
+ def __init__(self, cost_calculator: Optional[CostCalculator] = None):
34
+ """Initialize the cost enrichment processor.
35
+
36
+ Args:
37
+ cost_calculator: CostCalculator instance to use for cost calculations.
38
+ If None, creates a new instance.
39
+ """
40
+ self.cost_calculator = cost_calculator or CostCalculator()
41
+ logger.info("CostEnrichmentSpanProcessor initialized")
42
+
43
+ def on_start(self, span: Span, parent_context: Optional[SpanContext] = None) -> None:
44
+ """Called when a span starts. No action needed."""
45
+ pass
46
+
47
+ def on_end(self, span: ReadableSpan) -> None:
48
+ """Called when a span ends. Enriches with cost attributes if applicable.
49
+
50
+ Args:
51
+ span: The span that just ended.
52
+ """
53
+ try:
54
+ # Only process spans that have LLM-related attributes
55
+ if not span.attributes:
56
+ return
57
+
58
+ attributes = span.attributes
59
+
60
+ # Check for model name - support both GenAI and OpenInference conventions
61
+ model = (
62
+ attributes.get("gen_ai.request.model")
63
+ or attributes.get("llm.model_name")
64
+ or attributes.get("embedding.model_name")
65
+ )
66
+ if not model:
67
+ return
68
+
69
+ # Skip if cost attributes are already present (added by instrumentor)
70
+ if "gen_ai.usage.cost.total" in attributes:
71
+ logger.debug(f"Span '{span.name}' already has cost attributes, skipping enrichment")
72
+ return
73
+
74
+ # Extract token usage - support GenAI, OpenInference, and legacy conventions
75
+ prompt_tokens = (
76
+ attributes.get("gen_ai.usage.prompt_tokens")
77
+ or attributes.get("gen_ai.usage.input_tokens")
78
+ or attributes.get("llm.token_count.prompt") # OpenInference
79
+ or 0
80
+ )
81
+ completion_tokens = (
82
+ attributes.get("gen_ai.usage.completion_tokens")
83
+ or attributes.get("gen_ai.usage.output_tokens")
84
+ or attributes.get("llm.token_count.completion") # OpenInference
85
+ or 0
86
+ )
87
+
88
+ # Skip if no tokens recorded
89
+ if prompt_tokens == 0 and completion_tokens == 0:
90
+ return
91
+
92
+ # Get call type - support both GenAI and OpenInference conventions
93
+ # OpenInference uses openinference.span.kind (values: LLM, EMBEDDING, etc.)
94
+ span_kind = attributes.get("openinference.span.kind", "").upper()
95
+ call_type = attributes.get("gen_ai.operation.name") or span_kind.lower() or "chat"
96
+
97
+ # Map operation names to call types for cost calculator
98
+ # Supports both GenAI and OpenInference conventions
99
+ call_type_mapping = {
100
+ # GenAI conventions
101
+ "chat": "chat",
102
+ "completion": "chat",
103
+ "embedding": "embedding",
104
+ "embeddings": "embedding",
105
+ "text_generation": "chat",
106
+ "image_generation": "image",
107
+ "audio": "audio",
108
+ # OpenInference conventions (span.kind values)
109
+ "llm": "chat",
110
+ "embedding": "embedding",
111
+ "chain": "chat",
112
+ "retriever": "embedding",
113
+ "reranker": "embedding",
114
+ "tool": "chat",
115
+ "agent": "chat",
116
+ }
117
+ normalized_call_type = call_type_mapping.get(str(call_type).lower(), "chat")
118
+
119
+ # Calculate cost
120
+ usage = {
121
+ "prompt_tokens": int(prompt_tokens),
122
+ "completion_tokens": int(completion_tokens),
123
+ "total_tokens": int(prompt_tokens) + int(completion_tokens),
124
+ }
125
+
126
+ # Use calculate_granular_cost to get detailed breakdown
127
+ cost_info = self.cost_calculator.calculate_granular_cost(
128
+ model=str(model),
129
+ usage=usage,
130
+ call_type=normalized_call_type,
131
+ )
132
+
133
+ if cost_info and cost_info.get("total", 0.0) > 0:
134
+ # Add cost attributes to the span
135
+ # Use duck typing to check if span supports set_attribute
136
+ if hasattr(span, "set_attribute") and callable(getattr(span, "set_attribute")):
137
+ span.set_attribute("gen_ai.usage.cost.total", cost_info["total"])
138
+
139
+ if cost_info.get("prompt", 0.0) > 0:
140
+ span.set_attribute("gen_ai.usage.cost.prompt", cost_info["prompt"])
141
+ if cost_info.get("completion", 0.0) > 0:
142
+ span.set_attribute("gen_ai.usage.cost.completion", cost_info["completion"])
143
+
144
+ logger.info(
145
+ f"Enriched span '{span.name}' with cost: {cost_info['total']:.6f} USD "
146
+ f"for model {model} ({usage['total_tokens']} tokens)"
147
+ )
148
+ else:
149
+ logger.warning(
150
+ f"Span '{span.name}' is not mutable (type: {type(span).__name__}), "
151
+ "cannot add cost attributes"
152
+ )
153
+
154
+ except Exception as e:
155
+ # Don't fail span processing due to cost enrichment errors
156
+ logger.warning(
157
+ f"Failed to enrich span '{getattr(span, 'name', 'unknown')}' with cost: {e}",
158
+ exc_info=True,
159
+ )
160
+
161
+ def shutdown(self) -> None:
162
+ """Called when the processor is shutdown."""
163
+ logger.info("CostEnrichmentSpanProcessor shutdown")
164
+
165
+ def force_flush(self, timeout_millis: int = 30000) -> bool:
166
+ """Force flush any pending spans.
167
+
168
+ Args:
169
+ timeout_millis: Timeout in milliseconds.
170
+
171
+ Returns:
172
+ True if flush succeeded.
173
+ """
174
+ return True
genai_otel/gpu_metrics.py CHANGED
@@ -42,6 +42,7 @@ class GPUMetricsCollector:
42
42
  self._stop_event = threading.Event()
43
43
  self.gpu_utilization_counter: Optional[ObservableCounter] = None
44
44
  self.gpu_memory_used_gauge: Optional[ObservableGauge] = None
45
+ self.gpu_memory_total_gauge: Optional[ObservableGauge] = None
45
46
  self.gpu_temperature_gauge: Optional[ObservableGauge] = None
46
47
  self.gpu_power_gauge: Optional[ObservableGauge] = None
47
48
  self.config = config
@@ -67,6 +68,11 @@ class GPUMetricsCollector:
67
68
  description="Cumulative CO2 equivalent emissions in grams",
68
69
  unit="gCO2e",
69
70
  )
71
+ self.power_cost_counter = meter.create_counter(
72
+ "gen_ai.power.cost", # New metric name
73
+ description="Cumulative electricity cost in USD based on GPU power consumption",
74
+ unit="USD",
75
+ )
70
76
  if not NVML_AVAILABLE:
71
77
  logger.warning(
72
78
  "GPU metrics collection not available - nvidia-ml-py not installed. "
@@ -88,6 +94,12 @@ class GPUMetricsCollector:
88
94
  description="GPU memory used in MiB",
89
95
  unit="MiB",
90
96
  )
97
+ self.gpu_memory_total_gauge = self.meter.create_observable_gauge(
98
+ "gen_ai.gpu.memory.total", # Fixed metric name
99
+ callbacks=[self._observe_gpu_memory_total],
100
+ description="Total GPU memory capacity in MiB",
101
+ unit="MiB",
102
+ )
91
103
  self.gpu_temperature_gauge = self.meter.create_observable_gauge(
92
104
  "gen_ai.gpu.temperature", # Fixed metric name
93
105
  callbacks=[self._observe_gpu_temperature],
@@ -167,6 +179,33 @@ class GPUMetricsCollector:
167
179
  except Exception as e:
168
180
  logger.error("Error observing GPU memory: %s", e)
169
181
 
182
+ def _observe_gpu_memory_total(self, options):
183
+ """Observable callback for total GPU memory capacity."""
184
+ if not NVML_AVAILABLE or not self.gpu_available:
185
+ return
186
+
187
+ try:
188
+ pynvml.nvmlInit()
189
+ device_count = pynvml.nvmlDeviceGetCount()
190
+
191
+ for i in range(device_count):
192
+ handle = pynvml.nvmlDeviceGetHandleByIndex(i)
193
+ device_name = self._get_device_name(handle, i)
194
+
195
+ try:
196
+ memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
197
+ gpu_memory_total = memory_info.total / (1024**2) # Convert to MiB
198
+ yield Observation(
199
+ value=gpu_memory_total,
200
+ attributes={"gpu_id": str(i), "gpu_name": device_name},
201
+ )
202
+ except Exception as e:
203
+ logger.debug("Failed to get total GPU memory for GPU %d: %s", i, e)
204
+
205
+ pynvml.nvmlShutdown()
206
+ except Exception as e:
207
+ logger.error("Error observing total GPU memory: %s", e)
208
+
170
209
  def _observe_gpu_temperature(self, options):
171
210
  """Observable callback for GPU temperature."""
172
211
  if not NVML_AVAILABLE or not self.gpu_available:
@@ -249,11 +288,22 @@ class GPUMetricsCollector:
249
288
  delta_time_hours * 3600.0
250
289
  ) # Wh (power in kW * hours = kWh, but track in Wh for precision)
251
290
  self.cumulative_energy_wh[i] += delta_energy_wh
291
+
292
+ # Calculate and record CO2 emissions if enabled
252
293
  if self.config.enable_co2_tracking:
253
294
  delta_co2_g = (
254
295
  delta_energy_wh / 1000.0
255
296
  ) * self.config.carbon_intensity # gCO2e
256
297
  self.co2_counter.add(delta_co2_g, {"gpu_id": str(i)})
298
+
299
+ # Calculate and record power cost
300
+ # delta_energy_wh is in Wh, convert to kWh and multiply by cost per kWh
301
+ delta_cost_usd = (delta_energy_wh / 1000.0) * self.config.power_cost_per_kwh
302
+ device_name = self._get_device_name(handle, i)
303
+ self.power_cost_counter.add(
304
+ delta_cost_usd, {"gpu_id": str(i), "gpu_name": device_name}
305
+ )
306
+
257
307
  self.last_timestamp[i] = current_time
258
308
  except Exception as e:
259
309
  logger.error(f"Error collecting GPU {i} metrics: {e}")