genai-otel-instrument 0.1.2.dev0__py3-none-any.whl → 0.1.7.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of genai-otel-instrument might be problematic. Click here for more details.

Files changed (24) hide show
  1. genai_otel/__version__.py +2 -2
  2. genai_otel/auto_instrument.py +18 -1
  3. genai_otel/config.py +22 -1
  4. genai_otel/cost_calculator.py +204 -13
  5. genai_otel/cost_enrichment_processor.py +175 -0
  6. genai_otel/gpu_metrics.py +50 -0
  7. genai_otel/instrumentors/base.py +300 -44
  8. genai_otel/instrumentors/cohere_instrumentor.py +140 -76
  9. genai_otel/instrumentors/huggingface_instrumentor.py +142 -13
  10. genai_otel/instrumentors/langchain_instrumentor.py +75 -75
  11. genai_otel/instrumentors/mistralai_instrumentor.py +234 -38
  12. genai_otel/instrumentors/ollama_instrumentor.py +104 -35
  13. genai_otel/instrumentors/replicate_instrumentor.py +59 -14
  14. genai_otel/instrumentors/togetherai_instrumentor.py +120 -16
  15. genai_otel/instrumentors/vertexai_instrumentor.py +79 -15
  16. genai_otel/llm_pricing.json +869 -589
  17. genai_otel/logging_config.py +45 -45
  18. genai_otel/py.typed +2 -2
  19. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/METADATA +294 -33
  20. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/RECORD +24 -23
  21. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/WHEEL +0 -0
  22. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/entry_points.txt +0 -0
  23. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/licenses/LICENSE +0 -0
  24. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/top_level.txt +0 -0
genai_otel/__version__.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.2.dev0'
32
- __version_tuple__ = version_tuple = (0, 1, 2, 'dev0')
31
+ __version__ = version = '0.1.7.dev0'
32
+ __version_tuple__ = version_tuple = (0, 1, 7, 'dev0')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -17,6 +17,8 @@ from opentelemetry.sdk.trace import TracerProvider
17
17
  from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
18
18
 
19
19
  from .config import OTelConfig
20
+ from .cost_calculator import CostCalculator
21
+ from .cost_enrichment_processor import CostEnrichmentSpanProcessor
20
22
  from .gpu_metrics import GPUMetricsCollector
21
23
  from .mcp_instrumentors import MCPInstrumentorManager
22
24
  from .metrics import (
@@ -117,12 +119,16 @@ INSTRUMENTORS = {
117
119
  }
118
120
 
119
121
  # Add OpenInference instrumentors if available (requires Python >= 3.10)
122
+ # IMPORTANT: Order matters! Load in this specific sequence:
123
+ # 1. smolagents - instruments the agent framework
124
+ # 2. litellm - instruments LLM calls made by agents
125
+ # 3. mcp - instruments Model Context Protocol tools
120
126
  if OPENINFERENCE_AVAILABLE:
121
127
  INSTRUMENTORS.update(
122
128
  {
123
129
  "smolagents": SmolagentsInstrumentor,
124
- "mcp": MCPInstrumentor,
125
130
  "litellm": LiteLLMInstrumentor,
131
+ "mcp": MCPInstrumentor,
126
132
  }
127
133
  )
128
134
 
@@ -163,6 +169,17 @@ def setup_auto_instrumentation(config: OTelConfig):
163
169
 
164
170
  set_global_textmap(TraceContextTextMapPropagator())
165
171
 
172
+ # Add cost enrichment processor for OpenInference instrumentors
173
+ # This enriches spans from smolagents, litellm, mcp with cost attributes
174
+ if config.enable_cost_tracking:
175
+ try:
176
+ cost_calculator = CostCalculator()
177
+ cost_processor = CostEnrichmentSpanProcessor(cost_calculator)
178
+ tracer_provider.add_span_processor(cost_processor)
179
+ logger.info("Cost enrichment processor added for OpenInference instrumentors")
180
+ except Exception as e:
181
+ logger.warning(f"Failed to add cost enrichment processor: {e}", exc_info=True)
182
+
166
183
  logger.debug(f"OTelConfig endpoint: {config.endpoint}")
167
184
  if config.endpoint:
168
185
  # Convert timeout to float safely
genai_otel/config.py CHANGED
@@ -11,7 +11,7 @@ import logging
11
11
  import os
12
12
  import sys
13
13
  from dataclasses import dataclass, field
14
- from typing import Dict, List, Optional
14
+ from typing import Any, Callable, Dict, List, Optional, Tuple
15
15
 
16
16
  logger = logging.getLogger(__name__)
17
17
 
@@ -44,6 +44,9 @@ DEFAULT_INSTRUMENTORS = [
44
44
  ]
45
45
 
46
46
  # Add OpenInference instrumentors only for Python >= 3.10
47
+ # IMPORTANT: Order matters! Load in this specific sequence:
48
+ # 1. smolagents - instruments the agent framework
49
+ # 2. litellm - instruments the LLM calls made by agents
47
50
  if sys.version_info >= (3, 10):
48
51
  DEFAULT_INSTRUMENTORS.extend(["smolagents", "litellm"])
49
52
 
@@ -101,6 +104,10 @@ class OTelConfig:
101
104
  default_factory=lambda: float(os.getenv("GENAI_CARBON_INTENSITY", "475.0"))
102
105
  ) # gCO2e/kWh
103
106
 
107
+ power_cost_per_kwh: float = field(
108
+ default_factory=lambda: float(os.getenv("GENAI_POWER_COST_PER_KWH", "0.12"))
109
+ ) # USD per kWh - electricity cost for power consumption tracking
110
+
104
111
  gpu_collection_interval: int = field(
105
112
  default_factory=lambda: int(os.getenv("GENAI_GPU_COLLECTION_INTERVAL", "5"))
106
113
  ) # seconds - how often to collect GPU metrics and CO2 emissions
@@ -117,6 +124,20 @@ class OTelConfig:
117
124
  default_factory=lambda: os.getenv("GENAI_ENABLE_CONTENT_CAPTURE", "false").lower() == "true"
118
125
  )
119
126
 
127
+ # Custom pricing configuration for models not in llm_pricing.json
128
+ # Format: JSON string with same structure as llm_pricing.json
129
+ # Example: {"chat": {"custom-model": {"promptPrice": 0.001, "completionPrice": 0.002}}}
130
+ custom_pricing_json: Optional[str] = field(
131
+ default_factory=lambda: os.getenv("GENAI_CUSTOM_PRICING_JSON")
132
+ )
133
+
134
+ # Session and user tracking (Phase 4.1)
135
+ # Optional callable functions to extract session_id and user_id from requests
136
+ # Signature: (instance, args, kwargs) -> Optional[str]
137
+ # Example: lambda instance, args, kwargs: kwargs.get("metadata", {}).get("session_id")
138
+ session_id_extractor: Optional[Callable[[Any, Tuple, Dict], Optional[str]]] = None
139
+ user_id_extractor: Optional[Callable[[Any, Tuple, Dict], Optional[str]]] = None
140
+
120
141
 
121
142
  import os
122
143
 
@@ -2,6 +2,7 @@
2
2
 
3
3
  import json
4
4
  import logging
5
+ import re
5
6
  from typing import Any, Dict, Optional
6
7
 
7
8
  logger = logging.getLogger(__name__)
@@ -12,10 +13,18 @@ class CostCalculator:
12
13
 
13
14
  DEFAULT_PRICING_FILE = "llm_pricing.json"
14
15
 
15
- def __init__(self):
16
- """Initializes the CostCalculator by loading pricing data from a JSON file."""
16
+ def __init__(self, custom_pricing_json: Optional[str] = None):
17
+ """Initializes the CostCalculator by loading pricing data from a JSON file.
18
+
19
+ Args:
20
+ custom_pricing_json: Optional JSON string with custom model pricing.
21
+ Format: {"chat": {"model-name": {"promptPrice": 0.001, "completionPrice": 0.002}}}
22
+ Custom prices will be merged with default pricing, with custom taking precedence.
23
+ """
17
24
  self.pricing_data: Dict[str, Any] = {}
18
25
  self._load_pricing()
26
+ if custom_pricing_json:
27
+ self._merge_custom_pricing(custom_pricing_json)
19
28
 
20
29
  def _load_pricing(self):
21
30
  """Load pricing data from the JSON configuration file."""
@@ -59,6 +68,64 @@ class CostCalculator:
59
68
  except Exception as e:
60
69
  logger.error("An unexpected error occurred while loading pricing: %s", e, exc_info=True)
61
70
 
71
+ def _merge_custom_pricing(self, custom_pricing_json: str):
72
+ """Merge custom pricing from JSON string into existing pricing data.
73
+
74
+ Args:
75
+ custom_pricing_json: JSON string with custom model pricing.
76
+ Format: {"chat": {"model-name": {"promptPrice": 0.001, "completionPrice": 0.002}}}
77
+ """
78
+ try:
79
+ custom_pricing = json.loads(custom_pricing_json)
80
+
81
+ if not isinstance(custom_pricing, dict):
82
+ logger.error(
83
+ "Custom pricing must be a JSON object/dict. Got: %s",
84
+ type(custom_pricing).__name__,
85
+ )
86
+ return
87
+
88
+ # Merge custom pricing into each category (chat, embeddings, images, audio)
89
+ for category, models in custom_pricing.items():
90
+ if category not in ["chat", "embeddings", "images", "audio"]:
91
+ logger.warning(
92
+ "Unknown pricing category '%s' in custom pricing. Valid categories: "
93
+ "chat, embeddings, images, audio",
94
+ category,
95
+ )
96
+ continue
97
+
98
+ if not isinstance(models, dict):
99
+ logger.error(
100
+ "Custom pricing for category '%s' must be a dict. Got: %s",
101
+ category,
102
+ type(models).__name__,
103
+ )
104
+ continue
105
+
106
+ # Initialize category if it doesn't exist
107
+ if category not in self.pricing_data:
108
+ self.pricing_data[category] = {}
109
+
110
+ # Merge models into the category
111
+ for model_name, pricing in models.items():
112
+ self.pricing_data[category][model_name] = pricing
113
+ logger.info(
114
+ "Added custom pricing for %s model '%s': %s",
115
+ category,
116
+ model_name,
117
+ pricing,
118
+ )
119
+
120
+ except json.JSONDecodeError as e:
121
+ logger.error(
122
+ "Failed to decode custom pricing JSON: %s. Custom pricing will be ignored.", e
123
+ )
124
+ except Exception as e:
125
+ logger.error(
126
+ "An unexpected error occurred while merging custom pricing: %s", e, exc_info=True
127
+ )
128
+
62
129
  def calculate_cost(
63
130
  self,
64
131
  model: str,
@@ -137,18 +204,32 @@ class CostCalculator:
137
204
  Dict with keys: total, prompt, completion, reasoning, cache_read, cache_write
138
205
  """
139
206
  model_key = self._normalize_model_name(model, "chat")
140
- if not model_key:
141
- logger.debug("Pricing not found for chat model: %s", model)
142
- return {
143
- "total": 0.0,
144
- "prompt": 0.0,
145
- "completion": 0.0,
146
- "reasoning": 0.0,
147
- "cache_read": 0.0,
148
- "cache_write": 0.0,
149
- }
150
207
 
151
- pricing = self.pricing_data["chat"][model_key]
208
+ # Fallback for unknown local models (Ollama, HuggingFace): estimate pricing based on parameter count
209
+ if not model_key:
210
+ param_count = self._extract_param_count_from_model_name(model)
211
+ if param_count is not None:
212
+ pricing = self._get_local_model_price_tier(param_count)
213
+ logger.info(
214
+ "Using fallback pricing for unknown local model '%s' with %.2fB parameters: "
215
+ "$%.4f prompt / $%.4f completion per 1k tokens",
216
+ model,
217
+ param_count,
218
+ pricing["promptPrice"],
219
+ pricing["completionPrice"],
220
+ )
221
+ else:
222
+ logger.debug("Pricing not found for chat model: %s", model)
223
+ return {
224
+ "total": 0.0,
225
+ "prompt": 0.0,
226
+ "completion": 0.0,
227
+ "reasoning": 0.0,
228
+ "cache_read": 0.0,
229
+ "cache_write": 0.0,
230
+ }
231
+ else:
232
+ pricing = self.pricing_data["chat"][model_key]
152
233
 
153
234
  # Standard prompt and completion tokens
154
235
  prompt_tokens = usage.get("prompt_tokens", 0)
@@ -274,3 +355,113 @@ class CostCalculator:
274
355
  if key.lower() in normalized_model:
275
356
  return key
276
357
  return None
358
+
359
+ def _extract_param_count_from_model_name(self, model: str) -> Optional[float]:
360
+ """Extract parameter count from Ollama or HuggingFace model name.
361
+
362
+ Supports both explicit size indicators and common model size names.
363
+
364
+ Examples:
365
+ Ollama models:
366
+ "smollm2:360m" -> 0.36
367
+ "llama3:7b" -> 7.0
368
+ "llama3.1:70b" -> 70.0
369
+ "deepseek-r1:32b" -> 32.0
370
+
371
+ HuggingFace models:
372
+ "gpt2" -> 0.124 (base)
373
+ "gpt2-xl" -> 1.5
374
+ "bert-base-uncased" -> 0.11
375
+ "bert-large-uncased" -> 0.34
376
+ "t5-small" -> 0.06
377
+ "t5-xxl" -> 11.0
378
+ "llama-2-7b" -> 7.0
379
+ "mistral-7b-v0.1" -> 7.0
380
+
381
+ Returns:
382
+ Parameter count in billions, or None if not parseable.
383
+ """
384
+ model_lower = model.lower()
385
+
386
+ # First try explicit parameter count patterns (e.g., 135m, 7b, 70b)
387
+ # Matches: digits followed by optional decimal, then 'm' or 'b'
388
+ pattern = r"(\d+(?:\.\d+)?)(m|b)(?:\s|:|$|-)"
389
+ match = re.search(pattern, model_lower)
390
+ if match:
391
+ value = float(match.group(1))
392
+ unit = match.group(2)
393
+ if unit == "m":
394
+ return value / 1000 # Convert millions to billions
395
+ elif unit == "b":
396
+ return value
397
+
398
+ # Fallback to common model size indicators for HuggingFace models
399
+ # These are approximate values based on typical model sizes
400
+ size_map = {
401
+ # T5 family
402
+ "t5-small": 0.06,
403
+ "t5-base": 0.22,
404
+ "t5-large": 0.77,
405
+ "t5-xl": 3.0,
406
+ "t5-xxl": 11.0,
407
+ # GPT-2 family
408
+ "gpt2-small": 0.124,
409
+ "gpt2-medium": 0.355,
410
+ "gpt2-large": 0.774,
411
+ "gpt2-xl": 1.5,
412
+ "gpt2": 0.124, # default GPT-2 is small
413
+ # BERT family
414
+ "bert-tiny": 0.004,
415
+ "bert-mini": 0.011,
416
+ "bert-small": 0.029,
417
+ "bert-medium": 0.041,
418
+ "bert-base": 0.11,
419
+ "bert-large": 0.34,
420
+ # Generic size indicators (fallback)
421
+ "tiny": 0.01,
422
+ "mini": 0.02,
423
+ "small": 0.06,
424
+ "base": 0.11,
425
+ "medium": 0.35,
426
+ "large": 0.77,
427
+ "xl": 1.5,
428
+ "xxl": 11.0,
429
+ }
430
+
431
+ # Check for size indicators in the model name
432
+ for size_key, param_count in size_map.items():
433
+ if size_key in model_lower:
434
+ return param_count
435
+
436
+ return None
437
+
438
+ def _get_local_model_price_tier(self, param_count_billions: float) -> Dict[str, float]:
439
+ """Get pricing tier based on parameter count for local models (Ollama, HuggingFace).
440
+
441
+ Local models (Ollama, HuggingFace Transformers) are free but consume GPU power
442
+ and electricity. We estimate costs based on parameter count and comparable
443
+ cloud API pricing.
444
+
445
+ Price Tiers (based on parameter count):
446
+ - Tiny (< 1B params): $0.0001 / $0.0002 (prompt/completion)
447
+ - Small (1-10B): $0.0003 / $0.0006
448
+ - Medium (10-20B): $0.0005 / $0.001
449
+ - Large (20-80B): $0.0008 / $0.0008
450
+ - XLarge (80B+): $0.0012 / $0.0012
451
+
452
+ Args:
453
+ param_count_billions: Model parameter count in billions
454
+
455
+ Returns:
456
+ Dict with promptPrice and completionPrice
457
+ """
458
+ if param_count_billions < 1.0:
459
+ return {"promptPrice": 0.0001, "completionPrice": 0.0002}
460
+ elif param_count_billions < 10.0:
461
+ return {"promptPrice": 0.0003, "completionPrice": 0.0006}
462
+ elif param_count_billions < 20.0:
463
+ return {"promptPrice": 0.0005, "completionPrice": 0.001}
464
+ elif param_count_billions < 80.0:
465
+ return {"promptPrice": 0.0008, "completionPrice": 0.0008}
466
+ else:
467
+ return {"promptPrice": 0.0012, "completionPrice": 0.0012}
@@ -0,0 +1,175 @@
1
+ """Custom SpanProcessor to enrich OpenInference spans with cost tracking.
2
+
3
+ This processor adds cost attributes to spans created by OpenInference instrumentors
4
+ (smolagents, litellm, mcp) by extracting token usage and model information from
5
+ existing span attributes and calculating costs using our CostCalculator.
6
+
7
+ Supports both OpenTelemetry GenAI and OpenInference semantic conventions:
8
+ - GenAI: gen_ai.request.model, gen_ai.usage.{prompt_tokens,completion_tokens}
9
+ - OpenInference: llm.model_name, llm.token_count.{prompt,completion}
10
+ """
11
+
12
+ import logging
13
+ from typing import Optional
14
+
15
+ from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
16
+ from opentelemetry.trace import SpanContext
17
+
18
+ from .cost_calculator import CostCalculator
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class CostEnrichmentSpanProcessor(SpanProcessor):
24
+ """Enriches spans with cost tracking attributes.
25
+
26
+ This processor:
27
+ 1. Identifies spans from OpenInference instrumentors (smolagents, litellm, mcp)
28
+ 2. Extracts model name and token usage from span attributes
29
+ 3. Calculates cost using CostCalculator
30
+ 4. Adds cost attributes (gen_ai.usage.cost.total, etc.) to the span
31
+ """
32
+
33
+ def __init__(self, cost_calculator: Optional[CostCalculator] = None):
34
+ """Initialize the cost enrichment processor.
35
+
36
+ Args:
37
+ cost_calculator: CostCalculator instance to use for cost calculations.
38
+ If None, creates a new instance.
39
+ """
40
+ self.cost_calculator = cost_calculator or CostCalculator()
41
+ logger.info("CostEnrichmentSpanProcessor initialized")
42
+
43
+ def on_start(self, span: Span, parent_context: Optional[SpanContext] = None) -> None:
44
+ """Called when a span starts. No action needed."""
45
+ pass
46
+
47
+ def on_end(self, span: ReadableSpan) -> None:
48
+ """Called when a span ends. Enriches with cost attributes if applicable.
49
+
50
+ Args:
51
+ span: The span that just ended.
52
+ """
53
+ try:
54
+ # Only process spans that have LLM-related attributes
55
+ if not span.attributes:
56
+ return
57
+
58
+ attributes = span.attributes
59
+
60
+ # Check for model name - support both GenAI and OpenInference conventions
61
+ model = (
62
+ attributes.get("gen_ai.request.model")
63
+ or attributes.get("llm.model_name")
64
+ or attributes.get("embedding.model_name")
65
+ )
66
+ if not model:
67
+ return
68
+
69
+ # Skip if cost attributes are already present (added by instrumentor)
70
+ if "gen_ai.usage.cost.total" in attributes:
71
+ logger.debug(f"Span '{span.name}' already has cost attributes, skipping enrichment")
72
+ return
73
+
74
+ # Extract token usage - support GenAI, OpenInference, and legacy conventions
75
+ prompt_tokens = (
76
+ attributes.get("gen_ai.usage.prompt_tokens")
77
+ or attributes.get("gen_ai.usage.input_tokens")
78
+ or attributes.get("llm.token_count.prompt") # OpenInference
79
+ or 0
80
+ )
81
+ completion_tokens = (
82
+ attributes.get("gen_ai.usage.completion_tokens")
83
+ or attributes.get("gen_ai.usage.output_tokens")
84
+ or attributes.get("llm.token_count.completion") # OpenInference
85
+ or 0
86
+ )
87
+
88
+ # Skip if no tokens recorded
89
+ if prompt_tokens == 0 and completion_tokens == 0:
90
+ return
91
+
92
+ # Get call type - support both GenAI and OpenInference conventions
93
+ # OpenInference uses openinference.span.kind (values: LLM, EMBEDDING, etc.)
94
+ span_kind = attributes.get("openinference.span.kind", "").upper()
95
+ call_type = attributes.get("gen_ai.operation.name") or span_kind.lower() or "chat"
96
+
97
+ # Map operation names to call types for cost calculator
98
+ # Supports both GenAI and OpenInference conventions
99
+ call_type_mapping = {
100
+ # GenAI conventions
101
+ "chat": "chat",
102
+ "completion": "chat",
103
+ "embedding": "embedding",
104
+ "embeddings": "embedding",
105
+ "text_generation": "chat",
106
+ "image_generation": "image",
107
+ "audio": "audio",
108
+ # OpenInference conventions (span.kind values)
109
+ "llm": "chat",
110
+ "embedding": "embedding",
111
+ "chain": "chat",
112
+ "retriever": "embedding",
113
+ "reranker": "embedding",
114
+ "tool": "chat",
115
+ "agent": "chat",
116
+ }
117
+ normalized_call_type = call_type_mapping.get(str(call_type).lower(), "chat")
118
+
119
+ # Calculate cost
120
+ usage = {
121
+ "prompt_tokens": int(prompt_tokens),
122
+ "completion_tokens": int(completion_tokens),
123
+ "total_tokens": int(prompt_tokens) + int(completion_tokens),
124
+ }
125
+
126
+ # Use calculate_granular_cost to get detailed breakdown
127
+ cost_info = self.cost_calculator.calculate_granular_cost(
128
+ model=str(model),
129
+ usage=usage,
130
+ call_type=normalized_call_type,
131
+ )
132
+
133
+ if cost_info and cost_info.get("total", 0.0) > 0:
134
+ # Add cost attributes to the span
135
+ # Note: We can't modify ReadableSpan attributes directly,
136
+ # but we can if span is still a Span instance
137
+ if isinstance(span, Span):
138
+ span.set_attribute("gen_ai.usage.cost.total", cost_info["total"])
139
+
140
+ if cost_info.get("prompt", 0.0) > 0:
141
+ span.set_attribute("gen_ai.usage.cost.prompt", cost_info["prompt"])
142
+ if cost_info.get("completion", 0.0) > 0:
143
+ span.set_attribute("gen_ai.usage.cost.completion", cost_info["completion"])
144
+
145
+ logger.info(
146
+ f"Enriched span '{span.name}' with cost: {cost_info['total']:.6f} USD "
147
+ f"for model {model} ({usage['total_tokens']} tokens)"
148
+ )
149
+ else:
150
+ logger.warning(
151
+ f"Span '{span.name}' is not mutable (type: {type(span).__name__}), "
152
+ "cannot add cost attributes"
153
+ )
154
+
155
+ except Exception as e:
156
+ # Don't fail span processing due to cost enrichment errors
157
+ logger.warning(
158
+ f"Failed to enrich span '{getattr(span, 'name', 'unknown')}' with cost: {e}",
159
+ exc_info=True,
160
+ )
161
+
162
+ def shutdown(self) -> None:
163
+ """Called when the processor is shutdown."""
164
+ logger.info("CostEnrichmentSpanProcessor shutdown")
165
+
166
+ def force_flush(self, timeout_millis: int = 30000) -> bool:
167
+ """Force flush any pending spans.
168
+
169
+ Args:
170
+ timeout_millis: Timeout in milliseconds.
171
+
172
+ Returns:
173
+ True if flush succeeded.
174
+ """
175
+ return True
genai_otel/gpu_metrics.py CHANGED
@@ -42,6 +42,7 @@ class GPUMetricsCollector:
42
42
  self._stop_event = threading.Event()
43
43
  self.gpu_utilization_counter: Optional[ObservableCounter] = None
44
44
  self.gpu_memory_used_gauge: Optional[ObservableGauge] = None
45
+ self.gpu_memory_total_gauge: Optional[ObservableGauge] = None
45
46
  self.gpu_temperature_gauge: Optional[ObservableGauge] = None
46
47
  self.gpu_power_gauge: Optional[ObservableGauge] = None
47
48
  self.config = config
@@ -67,6 +68,11 @@ class GPUMetricsCollector:
67
68
  description="Cumulative CO2 equivalent emissions in grams",
68
69
  unit="gCO2e",
69
70
  )
71
+ self.power_cost_counter = meter.create_counter(
72
+ "gen_ai.power.cost", # New metric name
73
+ description="Cumulative electricity cost in USD based on GPU power consumption",
74
+ unit="USD",
75
+ )
70
76
  if not NVML_AVAILABLE:
71
77
  logger.warning(
72
78
  "GPU metrics collection not available - nvidia-ml-py not installed. "
@@ -88,6 +94,12 @@ class GPUMetricsCollector:
88
94
  description="GPU memory used in MiB",
89
95
  unit="MiB",
90
96
  )
97
+ self.gpu_memory_total_gauge = self.meter.create_observable_gauge(
98
+ "gen_ai.gpu.memory.total", # Fixed metric name
99
+ callbacks=[self._observe_gpu_memory_total],
100
+ description="Total GPU memory capacity in MiB",
101
+ unit="MiB",
102
+ )
91
103
  self.gpu_temperature_gauge = self.meter.create_observable_gauge(
92
104
  "gen_ai.gpu.temperature", # Fixed metric name
93
105
  callbacks=[self._observe_gpu_temperature],
@@ -167,6 +179,33 @@ class GPUMetricsCollector:
167
179
  except Exception as e:
168
180
  logger.error("Error observing GPU memory: %s", e)
169
181
 
182
+ def _observe_gpu_memory_total(self, options):
183
+ """Observable callback for total GPU memory capacity."""
184
+ if not NVML_AVAILABLE or not self.gpu_available:
185
+ return
186
+
187
+ try:
188
+ pynvml.nvmlInit()
189
+ device_count = pynvml.nvmlDeviceGetCount()
190
+
191
+ for i in range(device_count):
192
+ handle = pynvml.nvmlDeviceGetHandleByIndex(i)
193
+ device_name = self._get_device_name(handle, i)
194
+
195
+ try:
196
+ memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
197
+ gpu_memory_total = memory_info.total / (1024**2) # Convert to MiB
198
+ yield Observation(
199
+ value=gpu_memory_total,
200
+ attributes={"gpu_id": str(i), "gpu_name": device_name},
201
+ )
202
+ except Exception as e:
203
+ logger.debug("Failed to get total GPU memory for GPU %d: %s", i, e)
204
+
205
+ pynvml.nvmlShutdown()
206
+ except Exception as e:
207
+ logger.error("Error observing total GPU memory: %s", e)
208
+
170
209
  def _observe_gpu_temperature(self, options):
171
210
  """Observable callback for GPU temperature."""
172
211
  if not NVML_AVAILABLE or not self.gpu_available:
@@ -249,11 +288,22 @@ class GPUMetricsCollector:
249
288
  delta_time_hours * 3600.0
250
289
  ) # Wh (power in kW * hours = kWh, but track in Wh for precision)
251
290
  self.cumulative_energy_wh[i] += delta_energy_wh
291
+
292
+ # Calculate and record CO2 emissions if enabled
252
293
  if self.config.enable_co2_tracking:
253
294
  delta_co2_g = (
254
295
  delta_energy_wh / 1000.0
255
296
  ) * self.config.carbon_intensity # gCO2e
256
297
  self.co2_counter.add(delta_co2_g, {"gpu_id": str(i)})
298
+
299
+ # Calculate and record power cost
300
+ # delta_energy_wh is in Wh, convert to kWh and multiply by cost per kWh
301
+ delta_cost_usd = (delta_energy_wh / 1000.0) * self.config.power_cost_per_kwh
302
+ device_name = self._get_device_name(handle, i)
303
+ self.power_cost_counter.add(
304
+ delta_cost_usd, {"gpu_id": str(i), "gpu_name": device_name}
305
+ )
306
+
257
307
  self.last_timestamp[i] = current_time
258
308
  except Exception as e:
259
309
  logger.error(f"Error collecting GPU {i} metrics: {e}")