genai-otel-instrument 0.1.4.dev0__py3-none-any.whl → 0.1.7.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of genai-otel-instrument might be problematic. Click here for more details.
- genai_otel/__version__.py +2 -2
- genai_otel/config.py +19 -1
- genai_otel/cost_calculator.py +72 -6
- genai_otel/cost_enrichment_processor.py +175 -177
- genai_otel/gpu_metrics.py +50 -0
- genai_otel/instrumentors/base.py +228 -4
- genai_otel/instrumentors/cohere_instrumentor.py +140 -140
- genai_otel/instrumentors/huggingface_instrumentor.py +6 -2
- genai_otel/instrumentors/langchain_instrumentor.py +75 -75
- genai_otel/instrumentors/mistralai_instrumentor.py +17 -33
- genai_otel/llm_pricing.json +869 -869
- genai_otel/logging_config.py +45 -45
- genai_otel/py.typed +2 -2
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/METADATA +250 -26
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/RECORD +19 -19
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/WHEEL +0 -0
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/entry_points.txt +0 -0
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/licenses/LICENSE +0 -0
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/top_level.txt +0 -0
genai_otel/__version__.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.7.dev0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 7, 'dev0')
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
genai_otel/config.py
CHANGED
|
@@ -11,7 +11,7 @@ import logging
|
|
|
11
11
|
import os
|
|
12
12
|
import sys
|
|
13
13
|
from dataclasses import dataclass, field
|
|
14
|
-
from typing import Dict, List, Optional
|
|
14
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
|
15
15
|
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
17
17
|
|
|
@@ -104,6 +104,10 @@ class OTelConfig:
|
|
|
104
104
|
default_factory=lambda: float(os.getenv("GENAI_CARBON_INTENSITY", "475.0"))
|
|
105
105
|
) # gCO2e/kWh
|
|
106
106
|
|
|
107
|
+
power_cost_per_kwh: float = field(
|
|
108
|
+
default_factory=lambda: float(os.getenv("GENAI_POWER_COST_PER_KWH", "0.12"))
|
|
109
|
+
) # USD per kWh - electricity cost for power consumption tracking
|
|
110
|
+
|
|
107
111
|
gpu_collection_interval: int = field(
|
|
108
112
|
default_factory=lambda: int(os.getenv("GENAI_GPU_COLLECTION_INTERVAL", "5"))
|
|
109
113
|
) # seconds - how often to collect GPU metrics and CO2 emissions
|
|
@@ -120,6 +124,20 @@ class OTelConfig:
|
|
|
120
124
|
default_factory=lambda: os.getenv("GENAI_ENABLE_CONTENT_CAPTURE", "false").lower() == "true"
|
|
121
125
|
)
|
|
122
126
|
|
|
127
|
+
# Custom pricing configuration for models not in llm_pricing.json
|
|
128
|
+
# Format: JSON string with same structure as llm_pricing.json
|
|
129
|
+
# Example: {"chat": {"custom-model": {"promptPrice": 0.001, "completionPrice": 0.002}}}
|
|
130
|
+
custom_pricing_json: Optional[str] = field(
|
|
131
|
+
default_factory=lambda: os.getenv("GENAI_CUSTOM_PRICING_JSON")
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Session and user tracking (Phase 4.1)
|
|
135
|
+
# Optional callable functions to extract session_id and user_id from requests
|
|
136
|
+
# Signature: (instance, args, kwargs) -> Optional[str]
|
|
137
|
+
# Example: lambda instance, args, kwargs: kwargs.get("metadata", {}).get("session_id")
|
|
138
|
+
session_id_extractor: Optional[Callable[[Any, Tuple, Dict], Optional[str]]] = None
|
|
139
|
+
user_id_extractor: Optional[Callable[[Any, Tuple, Dict], Optional[str]]] = None
|
|
140
|
+
|
|
123
141
|
|
|
124
142
|
import os
|
|
125
143
|
|
genai_otel/cost_calculator.py
CHANGED
|
@@ -13,10 +13,18 @@ class CostCalculator:
|
|
|
13
13
|
|
|
14
14
|
DEFAULT_PRICING_FILE = "llm_pricing.json"
|
|
15
15
|
|
|
16
|
-
def __init__(self):
|
|
17
|
-
"""Initializes the CostCalculator by loading pricing data from a JSON file.
|
|
16
|
+
def __init__(self, custom_pricing_json: Optional[str] = None):
|
|
17
|
+
"""Initializes the CostCalculator by loading pricing data from a JSON file.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
custom_pricing_json: Optional JSON string with custom model pricing.
|
|
21
|
+
Format: {"chat": {"model-name": {"promptPrice": 0.001, "completionPrice": 0.002}}}
|
|
22
|
+
Custom prices will be merged with default pricing, with custom taking precedence.
|
|
23
|
+
"""
|
|
18
24
|
self.pricing_data: Dict[str, Any] = {}
|
|
19
25
|
self._load_pricing()
|
|
26
|
+
if custom_pricing_json:
|
|
27
|
+
self._merge_custom_pricing(custom_pricing_json)
|
|
20
28
|
|
|
21
29
|
def _load_pricing(self):
|
|
22
30
|
"""Load pricing data from the JSON configuration file."""
|
|
@@ -60,6 +68,64 @@ class CostCalculator:
|
|
|
60
68
|
except Exception as e:
|
|
61
69
|
logger.error("An unexpected error occurred while loading pricing: %s", e, exc_info=True)
|
|
62
70
|
|
|
71
|
+
def _merge_custom_pricing(self, custom_pricing_json: str):
|
|
72
|
+
"""Merge custom pricing from JSON string into existing pricing data.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
custom_pricing_json: JSON string with custom model pricing.
|
|
76
|
+
Format: {"chat": {"model-name": {"promptPrice": 0.001, "completionPrice": 0.002}}}
|
|
77
|
+
"""
|
|
78
|
+
try:
|
|
79
|
+
custom_pricing = json.loads(custom_pricing_json)
|
|
80
|
+
|
|
81
|
+
if not isinstance(custom_pricing, dict):
|
|
82
|
+
logger.error(
|
|
83
|
+
"Custom pricing must be a JSON object/dict. Got: %s",
|
|
84
|
+
type(custom_pricing).__name__,
|
|
85
|
+
)
|
|
86
|
+
return
|
|
87
|
+
|
|
88
|
+
# Merge custom pricing into each category (chat, embeddings, images, audio)
|
|
89
|
+
for category, models in custom_pricing.items():
|
|
90
|
+
if category not in ["chat", "embeddings", "images", "audio"]:
|
|
91
|
+
logger.warning(
|
|
92
|
+
"Unknown pricing category '%s' in custom pricing. Valid categories: "
|
|
93
|
+
"chat, embeddings, images, audio",
|
|
94
|
+
category,
|
|
95
|
+
)
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
if not isinstance(models, dict):
|
|
99
|
+
logger.error(
|
|
100
|
+
"Custom pricing for category '%s' must be a dict. Got: %s",
|
|
101
|
+
category,
|
|
102
|
+
type(models).__name__,
|
|
103
|
+
)
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
# Initialize category if it doesn't exist
|
|
107
|
+
if category not in self.pricing_data:
|
|
108
|
+
self.pricing_data[category] = {}
|
|
109
|
+
|
|
110
|
+
# Merge models into the category
|
|
111
|
+
for model_name, pricing in models.items():
|
|
112
|
+
self.pricing_data[category][model_name] = pricing
|
|
113
|
+
logger.info(
|
|
114
|
+
"Added custom pricing for %s model '%s': %s",
|
|
115
|
+
category,
|
|
116
|
+
model_name,
|
|
117
|
+
pricing,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
except json.JSONDecodeError as e:
|
|
121
|
+
logger.error(
|
|
122
|
+
"Failed to decode custom pricing JSON: %s. Custom pricing will be ignored.", e
|
|
123
|
+
)
|
|
124
|
+
except Exception as e:
|
|
125
|
+
logger.error(
|
|
126
|
+
"An unexpected error occurred while merging custom pricing: %s", e, exc_info=True
|
|
127
|
+
)
|
|
128
|
+
|
|
63
129
|
def calculate_cost(
|
|
64
130
|
self,
|
|
65
131
|
model: str,
|
|
@@ -150,7 +216,7 @@ class CostCalculator:
|
|
|
150
216
|
model,
|
|
151
217
|
param_count,
|
|
152
218
|
pricing["promptPrice"],
|
|
153
|
-
pricing["completionPrice"]
|
|
219
|
+
pricing["completionPrice"],
|
|
154
220
|
)
|
|
155
221
|
else:
|
|
156
222
|
logger.debug("Pricing not found for chat model: %s", model)
|
|
@@ -319,14 +385,14 @@ class CostCalculator:
|
|
|
319
385
|
|
|
320
386
|
# First try explicit parameter count patterns (e.g., 135m, 7b, 70b)
|
|
321
387
|
# Matches: digits followed by optional decimal, then 'm' or 'b'
|
|
322
|
-
pattern = r
|
|
388
|
+
pattern = r"(\d+(?:\.\d+)?)(m|b)(?:\s|:|$|-)"
|
|
323
389
|
match = re.search(pattern, model_lower)
|
|
324
390
|
if match:
|
|
325
391
|
value = float(match.group(1))
|
|
326
392
|
unit = match.group(2)
|
|
327
|
-
if unit ==
|
|
393
|
+
if unit == "m":
|
|
328
394
|
return value / 1000 # Convert millions to billions
|
|
329
|
-
elif unit ==
|
|
395
|
+
elif unit == "b":
|
|
330
396
|
return value
|
|
331
397
|
|
|
332
398
|
# Fallback to common model size indicators for HuggingFace models
|
|
@@ -1,177 +1,175 @@
|
|
|
1
|
-
"""Custom SpanProcessor to enrich OpenInference spans with cost tracking.
|
|
2
|
-
|
|
3
|
-
This processor adds cost attributes to spans created by OpenInference instrumentors
|
|
4
|
-
(smolagents, litellm, mcp) by extracting token usage and model information from
|
|
5
|
-
existing span attributes and calculating costs using our CostCalculator.
|
|
6
|
-
|
|
7
|
-
Supports both OpenTelemetry GenAI and OpenInference semantic conventions:
|
|
8
|
-
- GenAI: gen_ai.request.model, gen_ai.usage.{prompt_tokens,completion_tokens}
|
|
9
|
-
- OpenInference: llm.model_name, llm.token_count.{prompt,completion}
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
import logging
|
|
13
|
-
from typing import Optional
|
|
14
|
-
|
|
15
|
-
from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
|
|
16
|
-
from opentelemetry.trace import SpanContext
|
|
17
|
-
|
|
18
|
-
from .cost_calculator import CostCalculator
|
|
19
|
-
|
|
20
|
-
logger = logging.getLogger(__name__)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class CostEnrichmentSpanProcessor(SpanProcessor):
|
|
24
|
-
"""Enriches spans with cost tracking attributes.
|
|
25
|
-
|
|
26
|
-
This processor:
|
|
27
|
-
1. Identifies spans from OpenInference instrumentors (smolagents, litellm, mcp)
|
|
28
|
-
2. Extracts model name and token usage from span attributes
|
|
29
|
-
3. Calculates cost using CostCalculator
|
|
30
|
-
4. Adds cost attributes (gen_ai.usage.cost.total, etc.) to the span
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
def __init__(self, cost_calculator: Optional[CostCalculator] = None):
|
|
34
|
-
"""Initialize the cost enrichment processor.
|
|
35
|
-
|
|
36
|
-
Args:
|
|
37
|
-
cost_calculator: CostCalculator instance to use for cost calculations.
|
|
38
|
-
If None, creates a new instance.
|
|
39
|
-
"""
|
|
40
|
-
self.cost_calculator = cost_calculator or CostCalculator()
|
|
41
|
-
logger.info("CostEnrichmentSpanProcessor initialized")
|
|
42
|
-
|
|
43
|
-
def on_start(self, span: Span, parent_context: Optional[SpanContext] = None) -> None:
|
|
44
|
-
"""Called when a span starts. No action needed."""
|
|
45
|
-
pass
|
|
46
|
-
|
|
47
|
-
def on_end(self, span: ReadableSpan) -> None:
|
|
48
|
-
"""Called when a span ends. Enriches with cost attributes if applicable.
|
|
49
|
-
|
|
50
|
-
Args:
|
|
51
|
-
span: The span that just ended.
|
|
52
|
-
"""
|
|
53
|
-
try:
|
|
54
|
-
# Only process spans that have LLM-related attributes
|
|
55
|
-
if not span.attributes:
|
|
56
|
-
return
|
|
57
|
-
|
|
58
|
-
attributes = span.attributes
|
|
59
|
-
|
|
60
|
-
# Check for model name - support both GenAI and OpenInference conventions
|
|
61
|
-
model = (
|
|
62
|
-
attributes.get("gen_ai.request.model")
|
|
63
|
-
or attributes.get("llm.model_name")
|
|
64
|
-
or attributes.get("embedding.model_name")
|
|
65
|
-
)
|
|
66
|
-
if not model:
|
|
67
|
-
return
|
|
68
|
-
|
|
69
|
-
# Skip if cost attributes are already present (added by instrumentor)
|
|
70
|
-
if "gen_ai.usage.cost.total" in attributes:
|
|
71
|
-
logger.debug(
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
attributes.get("
|
|
79
|
-
or
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
attributes.get("
|
|
85
|
-
or
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
"
|
|
104
|
-
"
|
|
105
|
-
"
|
|
106
|
-
"
|
|
107
|
-
"
|
|
108
|
-
|
|
109
|
-
"
|
|
110
|
-
|
|
111
|
-
"
|
|
112
|
-
"
|
|
113
|
-
"
|
|
114
|
-
"
|
|
115
|
-
"
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
"
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
#
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
if cost_info.get("
|
|
143
|
-
span.set_attribute("gen_ai.usage.cost.
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
"""
|
|
177
|
-
return True
|
|
1
|
+
"""Custom SpanProcessor to enrich OpenInference spans with cost tracking.
|
|
2
|
+
|
|
3
|
+
This processor adds cost attributes to spans created by OpenInference instrumentors
|
|
4
|
+
(smolagents, litellm, mcp) by extracting token usage and model information from
|
|
5
|
+
existing span attributes and calculating costs using our CostCalculator.
|
|
6
|
+
|
|
7
|
+
Supports both OpenTelemetry GenAI and OpenInference semantic conventions:
|
|
8
|
+
- GenAI: gen_ai.request.model, gen_ai.usage.{prompt_tokens,completion_tokens}
|
|
9
|
+
- OpenInference: llm.model_name, llm.token_count.{prompt,completion}
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
|
|
16
|
+
from opentelemetry.trace import SpanContext
|
|
17
|
+
|
|
18
|
+
from .cost_calculator import CostCalculator
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CostEnrichmentSpanProcessor(SpanProcessor):
|
|
24
|
+
"""Enriches spans with cost tracking attributes.
|
|
25
|
+
|
|
26
|
+
This processor:
|
|
27
|
+
1. Identifies spans from OpenInference instrumentors (smolagents, litellm, mcp)
|
|
28
|
+
2. Extracts model name and token usage from span attributes
|
|
29
|
+
3. Calculates cost using CostCalculator
|
|
30
|
+
4. Adds cost attributes (gen_ai.usage.cost.total, etc.) to the span
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, cost_calculator: Optional[CostCalculator] = None):
|
|
34
|
+
"""Initialize the cost enrichment processor.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
cost_calculator: CostCalculator instance to use for cost calculations.
|
|
38
|
+
If None, creates a new instance.
|
|
39
|
+
"""
|
|
40
|
+
self.cost_calculator = cost_calculator or CostCalculator()
|
|
41
|
+
logger.info("CostEnrichmentSpanProcessor initialized")
|
|
42
|
+
|
|
43
|
+
def on_start(self, span: Span, parent_context: Optional[SpanContext] = None) -> None:
|
|
44
|
+
"""Called when a span starts. No action needed."""
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
def on_end(self, span: ReadableSpan) -> None:
|
|
48
|
+
"""Called when a span ends. Enriches with cost attributes if applicable.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
span: The span that just ended.
|
|
52
|
+
"""
|
|
53
|
+
try:
|
|
54
|
+
# Only process spans that have LLM-related attributes
|
|
55
|
+
if not span.attributes:
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
attributes = span.attributes
|
|
59
|
+
|
|
60
|
+
# Check for model name - support both GenAI and OpenInference conventions
|
|
61
|
+
model = (
|
|
62
|
+
attributes.get("gen_ai.request.model")
|
|
63
|
+
or attributes.get("llm.model_name")
|
|
64
|
+
or attributes.get("embedding.model_name")
|
|
65
|
+
)
|
|
66
|
+
if not model:
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
# Skip if cost attributes are already present (added by instrumentor)
|
|
70
|
+
if "gen_ai.usage.cost.total" in attributes:
|
|
71
|
+
logger.debug(f"Span '{span.name}' already has cost attributes, skipping enrichment")
|
|
72
|
+
return
|
|
73
|
+
|
|
74
|
+
# Extract token usage - support GenAI, OpenInference, and legacy conventions
|
|
75
|
+
prompt_tokens = (
|
|
76
|
+
attributes.get("gen_ai.usage.prompt_tokens")
|
|
77
|
+
or attributes.get("gen_ai.usage.input_tokens")
|
|
78
|
+
or attributes.get("llm.token_count.prompt") # OpenInference
|
|
79
|
+
or 0
|
|
80
|
+
)
|
|
81
|
+
completion_tokens = (
|
|
82
|
+
attributes.get("gen_ai.usage.completion_tokens")
|
|
83
|
+
or attributes.get("gen_ai.usage.output_tokens")
|
|
84
|
+
or attributes.get("llm.token_count.completion") # OpenInference
|
|
85
|
+
or 0
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Skip if no tokens recorded
|
|
89
|
+
if prompt_tokens == 0 and completion_tokens == 0:
|
|
90
|
+
return
|
|
91
|
+
|
|
92
|
+
# Get call type - support both GenAI and OpenInference conventions
|
|
93
|
+
# OpenInference uses openinference.span.kind (values: LLM, EMBEDDING, etc.)
|
|
94
|
+
span_kind = attributes.get("openinference.span.kind", "").upper()
|
|
95
|
+
call_type = attributes.get("gen_ai.operation.name") or span_kind.lower() or "chat"
|
|
96
|
+
|
|
97
|
+
# Map operation names to call types for cost calculator
|
|
98
|
+
# Supports both GenAI and OpenInference conventions
|
|
99
|
+
call_type_mapping = {
|
|
100
|
+
# GenAI conventions
|
|
101
|
+
"chat": "chat",
|
|
102
|
+
"completion": "chat",
|
|
103
|
+
"embedding": "embedding",
|
|
104
|
+
"embeddings": "embedding",
|
|
105
|
+
"text_generation": "chat",
|
|
106
|
+
"image_generation": "image",
|
|
107
|
+
"audio": "audio",
|
|
108
|
+
# OpenInference conventions (span.kind values)
|
|
109
|
+
"llm": "chat",
|
|
110
|
+
"embedding": "embedding",
|
|
111
|
+
"chain": "chat",
|
|
112
|
+
"retriever": "embedding",
|
|
113
|
+
"reranker": "embedding",
|
|
114
|
+
"tool": "chat",
|
|
115
|
+
"agent": "chat",
|
|
116
|
+
}
|
|
117
|
+
normalized_call_type = call_type_mapping.get(str(call_type).lower(), "chat")
|
|
118
|
+
|
|
119
|
+
# Calculate cost
|
|
120
|
+
usage = {
|
|
121
|
+
"prompt_tokens": int(prompt_tokens),
|
|
122
|
+
"completion_tokens": int(completion_tokens),
|
|
123
|
+
"total_tokens": int(prompt_tokens) + int(completion_tokens),
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
# Use calculate_granular_cost to get detailed breakdown
|
|
127
|
+
cost_info = self.cost_calculator.calculate_granular_cost(
|
|
128
|
+
model=str(model),
|
|
129
|
+
usage=usage,
|
|
130
|
+
call_type=normalized_call_type,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
if cost_info and cost_info.get("total", 0.0) > 0:
|
|
134
|
+
# Add cost attributes to the span
|
|
135
|
+
# Note: We can't modify ReadableSpan attributes directly,
|
|
136
|
+
# but we can if span is still a Span instance
|
|
137
|
+
if isinstance(span, Span):
|
|
138
|
+
span.set_attribute("gen_ai.usage.cost.total", cost_info["total"])
|
|
139
|
+
|
|
140
|
+
if cost_info.get("prompt", 0.0) > 0:
|
|
141
|
+
span.set_attribute("gen_ai.usage.cost.prompt", cost_info["prompt"])
|
|
142
|
+
if cost_info.get("completion", 0.0) > 0:
|
|
143
|
+
span.set_attribute("gen_ai.usage.cost.completion", cost_info["completion"])
|
|
144
|
+
|
|
145
|
+
logger.info(
|
|
146
|
+
f"Enriched span '{span.name}' with cost: {cost_info['total']:.6f} USD "
|
|
147
|
+
f"for model {model} ({usage['total_tokens']} tokens)"
|
|
148
|
+
)
|
|
149
|
+
else:
|
|
150
|
+
logger.warning(
|
|
151
|
+
f"Span '{span.name}' is not mutable (type: {type(span).__name__}), "
|
|
152
|
+
"cannot add cost attributes"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
except Exception as e:
|
|
156
|
+
# Don't fail span processing due to cost enrichment errors
|
|
157
|
+
logger.warning(
|
|
158
|
+
f"Failed to enrich span '{getattr(span, 'name', 'unknown')}' with cost: {e}",
|
|
159
|
+
exc_info=True,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
def shutdown(self) -> None:
|
|
163
|
+
"""Called when the processor is shutdown."""
|
|
164
|
+
logger.info("CostEnrichmentSpanProcessor shutdown")
|
|
165
|
+
|
|
166
|
+
def force_flush(self, timeout_millis: int = 30000) -> bool:
|
|
167
|
+
"""Force flush any pending spans.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
timeout_millis: Timeout in milliseconds.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
True if flush succeeded.
|
|
174
|
+
"""
|
|
175
|
+
return True
|
genai_otel/gpu_metrics.py
CHANGED
|
@@ -42,6 +42,7 @@ class GPUMetricsCollector:
|
|
|
42
42
|
self._stop_event = threading.Event()
|
|
43
43
|
self.gpu_utilization_counter: Optional[ObservableCounter] = None
|
|
44
44
|
self.gpu_memory_used_gauge: Optional[ObservableGauge] = None
|
|
45
|
+
self.gpu_memory_total_gauge: Optional[ObservableGauge] = None
|
|
45
46
|
self.gpu_temperature_gauge: Optional[ObservableGauge] = None
|
|
46
47
|
self.gpu_power_gauge: Optional[ObservableGauge] = None
|
|
47
48
|
self.config = config
|
|
@@ -67,6 +68,11 @@ class GPUMetricsCollector:
|
|
|
67
68
|
description="Cumulative CO2 equivalent emissions in grams",
|
|
68
69
|
unit="gCO2e",
|
|
69
70
|
)
|
|
71
|
+
self.power_cost_counter = meter.create_counter(
|
|
72
|
+
"gen_ai.power.cost", # New metric name
|
|
73
|
+
description="Cumulative electricity cost in USD based on GPU power consumption",
|
|
74
|
+
unit="USD",
|
|
75
|
+
)
|
|
70
76
|
if not NVML_AVAILABLE:
|
|
71
77
|
logger.warning(
|
|
72
78
|
"GPU metrics collection not available - nvidia-ml-py not installed. "
|
|
@@ -88,6 +94,12 @@ class GPUMetricsCollector:
|
|
|
88
94
|
description="GPU memory used in MiB",
|
|
89
95
|
unit="MiB",
|
|
90
96
|
)
|
|
97
|
+
self.gpu_memory_total_gauge = self.meter.create_observable_gauge(
|
|
98
|
+
"gen_ai.gpu.memory.total", # Fixed metric name
|
|
99
|
+
callbacks=[self._observe_gpu_memory_total],
|
|
100
|
+
description="Total GPU memory capacity in MiB",
|
|
101
|
+
unit="MiB",
|
|
102
|
+
)
|
|
91
103
|
self.gpu_temperature_gauge = self.meter.create_observable_gauge(
|
|
92
104
|
"gen_ai.gpu.temperature", # Fixed metric name
|
|
93
105
|
callbacks=[self._observe_gpu_temperature],
|
|
@@ -167,6 +179,33 @@ class GPUMetricsCollector:
|
|
|
167
179
|
except Exception as e:
|
|
168
180
|
logger.error("Error observing GPU memory: %s", e)
|
|
169
181
|
|
|
182
|
+
def _observe_gpu_memory_total(self, options):
|
|
183
|
+
"""Observable callback for total GPU memory capacity."""
|
|
184
|
+
if not NVML_AVAILABLE or not self.gpu_available:
|
|
185
|
+
return
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
pynvml.nvmlInit()
|
|
189
|
+
device_count = pynvml.nvmlDeviceGetCount()
|
|
190
|
+
|
|
191
|
+
for i in range(device_count):
|
|
192
|
+
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
|
193
|
+
device_name = self._get_device_name(handle, i)
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
|
|
197
|
+
gpu_memory_total = memory_info.total / (1024**2) # Convert to MiB
|
|
198
|
+
yield Observation(
|
|
199
|
+
value=gpu_memory_total,
|
|
200
|
+
attributes={"gpu_id": str(i), "gpu_name": device_name},
|
|
201
|
+
)
|
|
202
|
+
except Exception as e:
|
|
203
|
+
logger.debug("Failed to get total GPU memory for GPU %d: %s", i, e)
|
|
204
|
+
|
|
205
|
+
pynvml.nvmlShutdown()
|
|
206
|
+
except Exception as e:
|
|
207
|
+
logger.error("Error observing total GPU memory: %s", e)
|
|
208
|
+
|
|
170
209
|
def _observe_gpu_temperature(self, options):
|
|
171
210
|
"""Observable callback for GPU temperature."""
|
|
172
211
|
if not NVML_AVAILABLE or not self.gpu_available:
|
|
@@ -249,11 +288,22 @@ class GPUMetricsCollector:
|
|
|
249
288
|
delta_time_hours * 3600.0
|
|
250
289
|
) # Wh (power in kW * hours = kWh, but track in Wh for precision)
|
|
251
290
|
self.cumulative_energy_wh[i] += delta_energy_wh
|
|
291
|
+
|
|
292
|
+
# Calculate and record CO2 emissions if enabled
|
|
252
293
|
if self.config.enable_co2_tracking:
|
|
253
294
|
delta_co2_g = (
|
|
254
295
|
delta_energy_wh / 1000.0
|
|
255
296
|
) * self.config.carbon_intensity # gCO2e
|
|
256
297
|
self.co2_counter.add(delta_co2_g, {"gpu_id": str(i)})
|
|
298
|
+
|
|
299
|
+
# Calculate and record power cost
|
|
300
|
+
# delta_energy_wh is in Wh, convert to kWh and multiply by cost per kWh
|
|
301
|
+
delta_cost_usd = (delta_energy_wh / 1000.0) * self.config.power_cost_per_kwh
|
|
302
|
+
device_name = self._get_device_name(handle, i)
|
|
303
|
+
self.power_cost_counter.add(
|
|
304
|
+
delta_cost_usd, {"gpu_id": str(i), "gpu_name": device_name}
|
|
305
|
+
)
|
|
306
|
+
|
|
257
307
|
self.last_timestamp[i] = current_time
|
|
258
308
|
except Exception as e:
|
|
259
309
|
logger.error(f"Error collecting GPU {i} metrics: {e}")
|