genai-otel-instrument 0.1.1.dev0__py3-none-any.whl → 0.1.4.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of genai-otel-instrument might be problematic. Click here for more details.

genai_otel/__version__.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.1.dev0'
32
- __version_tuple__ = version_tuple = (0, 1, 1, 'dev0')
31
+ __version__ = version = '0.1.4.dev0'
32
+ __version_tuple__ = version_tuple = (0, 1, 4, 'dev0')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -17,6 +17,8 @@ from opentelemetry.sdk.trace import TracerProvider
17
17
  from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
18
18
 
19
19
  from .config import OTelConfig
20
+ from .cost_calculator import CostCalculator
21
+ from .cost_enrichment_processor import CostEnrichmentSpanProcessor
20
22
  from .gpu_metrics import GPUMetricsCollector
21
23
  from .mcp_instrumentors import MCPInstrumentorManager
22
24
  from .metrics import (
@@ -117,12 +119,16 @@ INSTRUMENTORS = {
117
119
  }
118
120
 
119
121
  # Add OpenInference instrumentors if available (requires Python >= 3.10)
122
+ # IMPORTANT: Order matters! Load in this specific sequence:
123
+ # 1. smolagents - instruments the agent framework
124
+ # 2. litellm - instruments LLM calls made by agents
125
+ # 3. mcp - instruments Model Context Protocol tools
120
126
  if OPENINFERENCE_AVAILABLE:
121
127
  INSTRUMENTORS.update(
122
128
  {
123
129
  "smolagents": SmolagentsInstrumentor,
124
- "mcp": MCPInstrumentor,
125
130
  "litellm": LiteLLMInstrumentor,
131
+ "mcp": MCPInstrumentor,
126
132
  }
127
133
  )
128
134
 
@@ -163,6 +169,17 @@ def setup_auto_instrumentation(config: OTelConfig):
163
169
 
164
170
  set_global_textmap(TraceContextTextMapPropagator())
165
171
 
172
+ # Add cost enrichment processor for OpenInference instrumentors
173
+ # This enriches spans from smolagents, litellm, mcp with cost attributes
174
+ if config.enable_cost_tracking:
175
+ try:
176
+ cost_calculator = CostCalculator()
177
+ cost_processor = CostEnrichmentSpanProcessor(cost_calculator)
178
+ tracer_provider.add_span_processor(cost_processor)
179
+ logger.info("Cost enrichment processor added for OpenInference instrumentors")
180
+ except Exception as e:
181
+ logger.warning(f"Failed to add cost enrichment processor: {e}", exc_info=True)
182
+
166
183
  logger.debug(f"OTelConfig endpoint: {config.endpoint}")
167
184
  if config.endpoint:
168
185
  # Convert timeout to float safely
genai_otel/config.py CHANGED
@@ -44,6 +44,9 @@ DEFAULT_INSTRUMENTORS = [
44
44
  ]
45
45
 
46
46
  # Add OpenInference instrumentors only for Python >= 3.10
47
+ # IMPORTANT: Order matters! Load in this specific sequence:
48
+ # 1. smolagents - instruments the agent framework
49
+ # 2. litellm - instruments the LLM calls made by agents
47
50
  if sys.version_info >= (3, 10):
48
51
  DEFAULT_INSTRUMENTORS.extend(["smolagents", "litellm"])
49
52
 
@@ -2,6 +2,7 @@
2
2
 
3
3
  import json
4
4
  import logging
5
+ import re
5
6
  from typing import Any, Dict, Optional
6
7
 
7
8
  logger = logging.getLogger(__name__)
@@ -137,18 +138,32 @@ class CostCalculator:
137
138
  Dict with keys: total, prompt, completion, reasoning, cache_read, cache_write
138
139
  """
139
140
  model_key = self._normalize_model_name(model, "chat")
140
- if not model_key:
141
- logger.debug("Pricing not found for chat model: %s", model)
142
- return {
143
- "total": 0.0,
144
- "prompt": 0.0,
145
- "completion": 0.0,
146
- "reasoning": 0.0,
147
- "cache_read": 0.0,
148
- "cache_write": 0.0,
149
- }
150
141
 
151
- pricing = self.pricing_data["chat"][model_key]
142
+ # Fallback for unknown local models (Ollama, HuggingFace): estimate pricing based on parameter count
143
+ if not model_key:
144
+ param_count = self._extract_param_count_from_model_name(model)
145
+ if param_count is not None:
146
+ pricing = self._get_local_model_price_tier(param_count)
147
+ logger.info(
148
+ "Using fallback pricing for unknown local model '%s' with %.2fB parameters: "
149
+ "$%.4f prompt / $%.4f completion per 1k tokens",
150
+ model,
151
+ param_count,
152
+ pricing["promptPrice"],
153
+ pricing["completionPrice"]
154
+ )
155
+ else:
156
+ logger.debug("Pricing not found for chat model: %s", model)
157
+ return {
158
+ "total": 0.0,
159
+ "prompt": 0.0,
160
+ "completion": 0.0,
161
+ "reasoning": 0.0,
162
+ "cache_read": 0.0,
163
+ "cache_write": 0.0,
164
+ }
165
+ else:
166
+ pricing = self.pricing_data["chat"][model_key]
152
167
 
153
168
  # Standard prompt and completion tokens
154
169
  prompt_tokens = usage.get("prompt_tokens", 0)
@@ -274,3 +289,113 @@ class CostCalculator:
274
289
  if key.lower() in normalized_model:
275
290
  return key
276
291
  return None
292
+
293
+ def _extract_param_count_from_model_name(self, model: str) -> Optional[float]:
294
+ """Extract parameter count from Ollama or HuggingFace model name.
295
+
296
+ Supports both explicit size indicators and common model size names.
297
+
298
+ Examples:
299
+ Ollama models:
300
+ "smollm2:360m" -> 0.36
301
+ "llama3:7b" -> 7.0
302
+ "llama3.1:70b" -> 70.0
303
+ "deepseek-r1:32b" -> 32.0
304
+
305
+ HuggingFace models:
306
+ "gpt2" -> 0.124 (base)
307
+ "gpt2-xl" -> 1.5
308
+ "bert-base-uncased" -> 0.11
309
+ "bert-large-uncased" -> 0.34
310
+ "t5-small" -> 0.06
311
+ "t5-xxl" -> 11.0
312
+ "llama-2-7b" -> 7.0
313
+ "mistral-7b-v0.1" -> 7.0
314
+
315
+ Returns:
316
+ Parameter count in billions, or None if not parseable.
317
+ """
318
+ model_lower = model.lower()
319
+
320
+ # First try explicit parameter count patterns (e.g., 135m, 7b, 70b)
321
+ # Matches: digits followed by optional decimal, then 'm' or 'b'
322
+ pattern = r'(\d+(?:\.\d+)?)(m|b)(?:\s|:|$|-)'
323
+ match = re.search(pattern, model_lower)
324
+ if match:
325
+ value = float(match.group(1))
326
+ unit = match.group(2)
327
+ if unit == 'm':
328
+ return value / 1000 # Convert millions to billions
329
+ elif unit == 'b':
330
+ return value
331
+
332
+ # Fallback to common model size indicators for HuggingFace models
333
+ # These are approximate values based on typical model sizes
334
+ size_map = {
335
+ # T5 family
336
+ "t5-small": 0.06,
337
+ "t5-base": 0.22,
338
+ "t5-large": 0.77,
339
+ "t5-xl": 3.0,
340
+ "t5-xxl": 11.0,
341
+ # GPT-2 family
342
+ "gpt2-small": 0.124,
343
+ "gpt2-medium": 0.355,
344
+ "gpt2-large": 0.774,
345
+ "gpt2-xl": 1.5,
346
+ "gpt2": 0.124, # default GPT-2 is small
347
+ # BERT family
348
+ "bert-tiny": 0.004,
349
+ "bert-mini": 0.011,
350
+ "bert-small": 0.029,
351
+ "bert-medium": 0.041,
352
+ "bert-base": 0.11,
353
+ "bert-large": 0.34,
354
+ # Generic size indicators (fallback)
355
+ "tiny": 0.01,
356
+ "mini": 0.02,
357
+ "small": 0.06,
358
+ "base": 0.11,
359
+ "medium": 0.35,
360
+ "large": 0.77,
361
+ "xl": 1.5,
362
+ "xxl": 11.0,
363
+ }
364
+
365
+ # Check for size indicators in the model name
366
+ for size_key, param_count in size_map.items():
367
+ if size_key in model_lower:
368
+ return param_count
369
+
370
+ return None
371
+
372
+ def _get_local_model_price_tier(self, param_count_billions: float) -> Dict[str, float]:
373
+ """Get pricing tier based on parameter count for local models (Ollama, HuggingFace).
374
+
375
+ Local models (Ollama, HuggingFace Transformers) are free but consume GPU power
376
+ and electricity. We estimate costs based on parameter count and comparable
377
+ cloud API pricing.
378
+
379
+ Price Tiers (based on parameter count):
380
+ - Tiny (< 1B params): $0.0001 / $0.0002 (prompt/completion)
381
+ - Small (1-10B): $0.0003 / $0.0006
382
+ - Medium (10-20B): $0.0005 / $0.001
383
+ - Large (20-80B): $0.0008 / $0.0008
384
+ - XLarge (80B+): $0.0012 / $0.0012
385
+
386
+ Args:
387
+ param_count_billions: Model parameter count in billions
388
+
389
+ Returns:
390
+ Dict with promptPrice and completionPrice
391
+ """
392
+ if param_count_billions < 1.0:
393
+ return {"promptPrice": 0.0001, "completionPrice": 0.0002}
394
+ elif param_count_billions < 10.0:
395
+ return {"promptPrice": 0.0003, "completionPrice": 0.0006}
396
+ elif param_count_billions < 20.0:
397
+ return {"promptPrice": 0.0005, "completionPrice": 0.001}
398
+ elif param_count_billions < 80.0:
399
+ return {"promptPrice": 0.0008, "completionPrice": 0.0008}
400
+ else:
401
+ return {"promptPrice": 0.0012, "completionPrice": 0.0012}
@@ -0,0 +1,177 @@
1
+ """Custom SpanProcessor to enrich OpenInference spans with cost tracking.
2
+
3
+ This processor adds cost attributes to spans created by OpenInference instrumentors
4
+ (smolagents, litellm, mcp) by extracting token usage and model information from
5
+ existing span attributes and calculating costs using our CostCalculator.
6
+
7
+ Supports both OpenTelemetry GenAI and OpenInference semantic conventions:
8
+ - GenAI: gen_ai.request.model, gen_ai.usage.{prompt_tokens,completion_tokens}
9
+ - OpenInference: llm.model_name, llm.token_count.{prompt,completion}
10
+ """
11
+
12
+ import logging
13
+ from typing import Optional
14
+
15
+ from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
16
+ from opentelemetry.trace import SpanContext
17
+
18
+ from .cost_calculator import CostCalculator
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class CostEnrichmentSpanProcessor(SpanProcessor):
24
+ """Enriches spans with cost tracking attributes.
25
+
26
+ This processor:
27
+ 1. Identifies spans from OpenInference instrumentors (smolagents, litellm, mcp)
28
+ 2. Extracts model name and token usage from span attributes
29
+ 3. Calculates cost using CostCalculator
30
+ 4. Adds cost attributes (gen_ai.usage.cost.total, etc.) to the span
31
+ """
32
+
33
+ def __init__(self, cost_calculator: Optional[CostCalculator] = None):
34
+ """Initialize the cost enrichment processor.
35
+
36
+ Args:
37
+ cost_calculator: CostCalculator instance to use for cost calculations.
38
+ If None, creates a new instance.
39
+ """
40
+ self.cost_calculator = cost_calculator or CostCalculator()
41
+ logger.info("CostEnrichmentSpanProcessor initialized")
42
+
43
+ def on_start(self, span: Span, parent_context: Optional[SpanContext] = None) -> None:
44
+ """Called when a span starts. No action needed."""
45
+ pass
46
+
47
+ def on_end(self, span: ReadableSpan) -> None:
48
+ """Called when a span ends. Enriches with cost attributes if applicable.
49
+
50
+ Args:
51
+ span: The span that just ended.
52
+ """
53
+ try:
54
+ # Only process spans that have LLM-related attributes
55
+ if not span.attributes:
56
+ return
57
+
58
+ attributes = span.attributes
59
+
60
+ # Check for model name - support both GenAI and OpenInference conventions
61
+ model = (
62
+ attributes.get("gen_ai.request.model")
63
+ or attributes.get("llm.model_name")
64
+ or attributes.get("embedding.model_name")
65
+ )
66
+ if not model:
67
+ return
68
+
69
+ # Skip if cost attributes are already present (added by instrumentor)
70
+ if "gen_ai.usage.cost.total" in attributes:
71
+ logger.debug(
72
+ f"Span '{span.name}' already has cost attributes, skipping enrichment"
73
+ )
74
+ return
75
+
76
+ # Extract token usage - support GenAI, OpenInference, and legacy conventions
77
+ prompt_tokens = (
78
+ attributes.get("gen_ai.usage.prompt_tokens")
79
+ or attributes.get("gen_ai.usage.input_tokens")
80
+ or attributes.get("llm.token_count.prompt") # OpenInference
81
+ or 0
82
+ )
83
+ completion_tokens = (
84
+ attributes.get("gen_ai.usage.completion_tokens")
85
+ or attributes.get("gen_ai.usage.output_tokens")
86
+ or attributes.get("llm.token_count.completion") # OpenInference
87
+ or 0
88
+ )
89
+
90
+ # Skip if no tokens recorded
91
+ if prompt_tokens == 0 and completion_tokens == 0:
92
+ return
93
+
94
+ # Get call type - support both GenAI and OpenInference conventions
95
+ # OpenInference uses openinference.span.kind (values: LLM, EMBEDDING, etc.)
96
+ span_kind = attributes.get("openinference.span.kind", "").upper()
97
+ call_type = attributes.get("gen_ai.operation.name") or span_kind.lower() or "chat"
98
+
99
+ # Map operation names to call types for cost calculator
100
+ # Supports both GenAI and OpenInference conventions
101
+ call_type_mapping = {
102
+ # GenAI conventions
103
+ "chat": "chat",
104
+ "completion": "chat",
105
+ "embedding": "embedding",
106
+ "embeddings": "embedding",
107
+ "text_generation": "chat",
108
+ "image_generation": "image",
109
+ "audio": "audio",
110
+ # OpenInference conventions (span.kind values)
111
+ "llm": "chat",
112
+ "embedding": "embedding",
113
+ "chain": "chat",
114
+ "retriever": "embedding",
115
+ "reranker": "embedding",
116
+ "tool": "chat",
117
+ "agent": "chat",
118
+ }
119
+ normalized_call_type = call_type_mapping.get(str(call_type).lower(), "chat")
120
+
121
+ # Calculate cost
122
+ usage = {
123
+ "prompt_tokens": int(prompt_tokens),
124
+ "completion_tokens": int(completion_tokens),
125
+ "total_tokens": int(prompt_tokens) + int(completion_tokens),
126
+ }
127
+
128
+ # Use calculate_granular_cost to get detailed breakdown
129
+ cost_info = self.cost_calculator.calculate_granular_cost(
130
+ model=str(model),
131
+ usage=usage,
132
+ call_type=normalized_call_type,
133
+ )
134
+
135
+ if cost_info and cost_info.get("total", 0.0) > 0:
136
+ # Add cost attributes to the span
137
+ # Note: We can't modify ReadableSpan attributes directly,
138
+ # but we can if span is still a Span instance
139
+ if isinstance(span, Span):
140
+ span.set_attribute("gen_ai.usage.cost.total", cost_info["total"])
141
+
142
+ if cost_info.get("prompt", 0.0) > 0:
143
+ span.set_attribute("gen_ai.usage.cost.prompt", cost_info["prompt"])
144
+ if cost_info.get("completion", 0.0) > 0:
145
+ span.set_attribute("gen_ai.usage.cost.completion", cost_info["completion"])
146
+
147
+ logger.info(
148
+ f"Enriched span '{span.name}' with cost: {cost_info['total']:.6f} USD "
149
+ f"for model {model} ({usage['total_tokens']} tokens)"
150
+ )
151
+ else:
152
+ logger.warning(
153
+ f"Span '{span.name}' is not mutable (type: {type(span).__name__}), "
154
+ "cannot add cost attributes"
155
+ )
156
+
157
+ except Exception as e:
158
+ # Don't fail span processing due to cost enrichment errors
159
+ logger.warning(
160
+ f"Failed to enrich span '{getattr(span, 'name', 'unknown')}' with cost: {e}",
161
+ exc_info=True,
162
+ )
163
+
164
+ def shutdown(self) -> None:
165
+ """Called when the processor is shutdown."""
166
+ logger.info("CostEnrichmentSpanProcessor shutdown")
167
+
168
+ def force_flush(self, timeout_millis: int = 30000) -> bool:
169
+ """Force flush any pending spans.
170
+
171
+ Args:
172
+ timeout_millis: Timeout in milliseconds.
173
+
174
+ Returns:
175
+ True if flush succeeded.
176
+ """
177
+ return True
genai_otel/gpu_metrics.py CHANGED
@@ -43,6 +43,7 @@ class GPUMetricsCollector:
43
43
  self.gpu_utilization_counter: Optional[ObservableCounter] = None
44
44
  self.gpu_memory_used_gauge: Optional[ObservableGauge] = None
45
45
  self.gpu_temperature_gauge: Optional[ObservableGauge] = None
46
+ self.gpu_power_gauge: Optional[ObservableGauge] = None
46
47
  self.config = config
47
48
  self.interval = interval # seconds
48
49
  self.gpu_available = False
@@ -93,6 +94,12 @@ class GPUMetricsCollector:
93
94
  description="GPU temperature in Celsius",
94
95
  unit="Cel",
95
96
  )
97
+ self.gpu_power_gauge = self.meter.create_observable_gauge(
98
+ "gen_ai.gpu.power", # Fixed metric name
99
+ callbacks=[self._observe_gpu_power],
100
+ description="GPU power consumption in Watts",
101
+ unit="W",
102
+ )
96
103
  except Exception as e:
97
104
  logger.error("Failed to create GPU metrics instruments: %s", e, exc_info=True)
98
105
 
@@ -185,6 +192,33 @@ class GPUMetricsCollector:
185
192
  except Exception as e:
186
193
  logger.error("Error observing GPU temperature: %s", e)
187
194
 
195
+ def _observe_gpu_power(self, options):
196
+ """Observable callback for GPU power consumption."""
197
+ if not NVML_AVAILABLE or not self.gpu_available:
198
+ return
199
+
200
+ try:
201
+ pynvml.nvmlInit()
202
+ device_count = pynvml.nvmlDeviceGetCount()
203
+
204
+ for i in range(device_count):
205
+ handle = pynvml.nvmlDeviceGetHandleByIndex(i)
206
+ device_name = self._get_device_name(handle, i)
207
+
208
+ try:
209
+ # Power usage is returned in milliwatts, convert to watts
210
+ power_mw = pynvml.nvmlDeviceGetPowerUsage(handle)
211
+ power_w = power_mw / 1000.0
212
+ yield Observation(
213
+ value=power_w, attributes={"gpu_id": str(i), "gpu_name": device_name}
214
+ )
215
+ except Exception as e:
216
+ logger.debug("Failed to get GPU power for GPU %d: %s", i, e)
217
+
218
+ pynvml.nvmlShutdown()
219
+ except Exception as e:
220
+ logger.error("Error observing GPU power: %s", e)
221
+
188
222
  def start(self):
189
223
  """Starts the GPU metrics collection.
190
224
 
@@ -82,6 +82,12 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
82
82
  _shared_latency_histogram = None
83
83
  _shared_cost_counter = None
84
84
  _shared_error_counter = None
85
+ # Granular cost counters (Phase 3.2)
86
+ _shared_prompt_cost_counter = None
87
+ _shared_completion_cost_counter = None
88
+ _shared_reasoning_cost_counter = None
89
+ _shared_cache_read_cost_counter = None
90
+ _shared_cache_write_cost_counter = None
85
91
  # Streaming metrics (Phase 3.4)
86
92
  _shared_ttft_histogram = None
87
93
  _shared_tbt_histogram = None
@@ -103,6 +109,12 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
103
109
  self.latency_histogram = self._shared_latency_histogram
104
110
  self.cost_counter = self._shared_cost_counter
105
111
  self.error_counter = self._shared_error_counter
112
+ # Granular cost counters (Phase 3.2)
113
+ self.prompt_cost_counter = self._shared_prompt_cost_counter
114
+ self.completion_cost_counter = self._shared_completion_cost_counter
115
+ self.reasoning_cost_counter = self._shared_reasoning_cost_counter
116
+ self.cache_read_cost_counter = self._shared_cache_read_cost_counter
117
+ self.cache_write_cost_counter = self._shared_cache_write_cost_counter
106
118
  # Streaming metrics
107
119
  self.ttft_histogram = self._shared_ttft_histogram
108
120
  self.tbt_histogram = self._shared_tbt_histogram
@@ -346,45 +358,54 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
346
358
  and "dup" in self.config.semconv_stability_opt_in
347
359
  )
348
360
 
349
- if (
350
- self.token_counter
351
- and isinstance(prompt_tokens, (int, float))
352
- and prompt_tokens > 0
353
- ):
354
- self.token_counter.add(
355
- prompt_tokens, {"token_type": "prompt", "operation": span.name}
356
- )
357
- # New semantic convention
361
+ # Record prompt tokens
362
+ if isinstance(prompt_tokens, (int, float)) and prompt_tokens > 0:
363
+ # Record metric if available
364
+ if self.token_counter:
365
+ self.token_counter.add(
366
+ prompt_tokens, {"token_type": "prompt", "operation": span.name}
367
+ )
368
+ # Always set span attributes (needed for cost calculation)
358
369
  span.set_attribute("gen_ai.usage.prompt_tokens", int(prompt_tokens))
359
370
  # Old semantic convention (if dual emission enabled)
360
371
  if emit_old_attrs:
361
372
  span.set_attribute("gen_ai.usage.input_tokens", int(prompt_tokens))
362
373
 
363
- if (
364
- self.token_counter
365
- and isinstance(completion_tokens, (int, float))
366
- and completion_tokens > 0
367
- ):
368
- self.token_counter.add(
369
- completion_tokens, {"token_type": "completion", "operation": span.name}
370
- )
371
- # New semantic convention
374
+ # Record completion tokens
375
+ if isinstance(completion_tokens, (int, float)) and completion_tokens > 0:
376
+ # Record metric if available
377
+ if self.token_counter:
378
+ self.token_counter.add(
379
+ completion_tokens, {"token_type": "completion", "operation": span.name}
380
+ )
381
+ # Always set span attributes (needed for cost calculation)
372
382
  span.set_attribute("gen_ai.usage.completion_tokens", int(completion_tokens))
373
383
  # Old semantic convention (if dual emission enabled)
374
384
  if emit_old_attrs:
375
385
  span.set_attribute("gen_ai.usage.output_tokens", int(completion_tokens))
376
386
 
387
+ # Record total tokens
377
388
  if isinstance(total_tokens, (int, float)) and total_tokens > 0:
378
389
  span.set_attribute("gen_ai.usage.total_tokens", int(total_tokens))
379
390
 
380
391
  # Calculate and record cost if enabled and applicable
381
- if self.config and self.config.enable_cost_tracking and self._shared_cost_counter:
392
+ logger.debug(
393
+ f"Cost tracking check: config={self.config is not None}, "
394
+ f"enable_cost_tracking={self.config.enable_cost_tracking if self.config else 'N/A'}"
395
+ )
396
+ if self.config and self.config.enable_cost_tracking:
382
397
  try:
383
398
  model = span.attributes.get("gen_ai.request.model", "unknown")
384
399
  # Assuming 'chat' as a default call_type for generic base instrumentor tests.
385
400
  # Specific instrumentors will provide the actual call_type.
386
401
  call_type = span.attributes.get("gen_ai.request.type", "chat")
387
402
 
403
+ logger.debug(
404
+ f"Calculating cost for model={model}, call_type={call_type}, "
405
+ f"prompt_tokens={usage.get('prompt_tokens')}, "
406
+ f"completion_tokens={usage.get('completion_tokens')}"
407
+ )
408
+
388
409
  # Use granular cost calculation for chat requests
389
410
  if call_type == "chat":
390
411
  costs = self.cost_calculator.calculate_granular_cost(
@@ -394,45 +415,55 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
394
415
 
395
416
  # Record total cost
396
417
  if total_cost > 0:
397
- self._shared_cost_counter.add(total_cost, {"model": str(model)})
398
- # Set span attributes for granular costs
418
+ if self.cost_counter:
419
+ self.cost_counter.add(total_cost, {"model": str(model)})
420
+ # Always set span attributes (needed for cost tracking)
399
421
  span.set_attribute("gen_ai.usage.cost.total", total_cost)
422
+ logger.debug(f"Set cost attribute: gen_ai.usage.cost.total={total_cost}")
423
+ else:
424
+ logger.debug(f"Cost is zero, not setting attributes. Costs: {costs}")
400
425
 
401
426
  # Record and set attributes for granular costs
402
- if costs["prompt"] > 0 and self._shared_prompt_cost_counter:
403
- self._shared_prompt_cost_counter.add(
404
- costs["prompt"], {"model": str(model)}
405
- )
427
+ # Note: Metrics recording is optional, span attributes are always set
428
+ if costs["prompt"] > 0:
429
+ if self.prompt_cost_counter:
430
+ self.prompt_cost_counter.add(
431
+ costs["prompt"], {"model": str(model)}
432
+ )
406
433
  span.set_attribute("gen_ai.usage.cost.prompt", costs["prompt"])
407
434
 
408
- if costs["completion"] > 0 and self._shared_completion_cost_counter:
409
- self._shared_completion_cost_counter.add(
410
- costs["completion"], {"model": str(model)}
411
- )
435
+ if costs["completion"] > 0:
436
+ if self.completion_cost_counter:
437
+ self.completion_cost_counter.add(
438
+ costs["completion"], {"model": str(model)}
439
+ )
412
440
  span.set_attribute(
413
441
  "gen_ai.usage.cost.completion", costs["completion"]
414
442
  )
415
443
 
416
- if costs["reasoning"] > 0 and self._shared_reasoning_cost_counter:
417
- self._shared_reasoning_cost_counter.add(
418
- costs["reasoning"], {"model": str(model)}
419
- )
444
+ if costs["reasoning"] > 0:
445
+ if self.reasoning_cost_counter:
446
+ self.reasoning_cost_counter.add(
447
+ costs["reasoning"], {"model": str(model)}
448
+ )
420
449
  span.set_attribute(
421
450
  "gen_ai.usage.cost.reasoning", costs["reasoning"]
422
451
  )
423
452
 
424
- if costs["cache_read"] > 0 and self._shared_cache_read_cost_counter:
425
- self._shared_cache_read_cost_counter.add(
426
- costs["cache_read"], {"model": str(model)}
427
- )
453
+ if costs["cache_read"] > 0:
454
+ if self.cache_read_cost_counter:
455
+ self.cache_read_cost_counter.add(
456
+ costs["cache_read"], {"model": str(model)}
457
+ )
428
458
  span.set_attribute(
429
459
  "gen_ai.usage.cost.cache_read", costs["cache_read"]
430
460
  )
431
461
 
432
- if costs["cache_write"] > 0 and self._shared_cache_write_cost_counter:
433
- self._shared_cache_write_cost_counter.add(
434
- costs["cache_write"], {"model": str(model)}
435
- )
462
+ if costs["cache_write"] > 0:
463
+ if self.cache_write_cost_counter:
464
+ self.cache_write_cost_counter.add(
465
+ costs["cache_write"], {"model": str(model)}
466
+ )
436
467
  span.set_attribute(
437
468
  "gen_ai.usage.cost.cache_write", costs["cache_write"]
438
469
  )
@@ -440,7 +471,8 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
440
471
  # For non-chat requests, use simple cost calculation
441
472
  cost = self.cost_calculator.calculate_cost(model, usage, call_type)
442
473
  if cost and cost > 0:
443
- self._shared_cost_counter.add(cost, {"model": str(model)})
474
+ if self.cost_counter:
475
+ self.cost_counter.add(cost, {"model": str(model)})
444
476
  except Exception as e:
445
477
  logger.warning("Failed to calculate cost for span '%s': %s", span.name, e)
446
478