genai-otel-instrument 0.1.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genai_otel/__init__.py +132 -0
- genai_otel/__version__.py +34 -0
- genai_otel/auto_instrument.py +602 -0
- genai_otel/cli.py +92 -0
- genai_otel/config.py +333 -0
- genai_otel/cost_calculator.py +467 -0
- genai_otel/cost_enriching_exporter.py +207 -0
- genai_otel/cost_enrichment_processor.py +174 -0
- genai_otel/evaluation/__init__.py +76 -0
- genai_otel/evaluation/bias_detector.py +364 -0
- genai_otel/evaluation/config.py +261 -0
- genai_otel/evaluation/hallucination_detector.py +525 -0
- genai_otel/evaluation/pii_detector.py +356 -0
- genai_otel/evaluation/prompt_injection_detector.py +262 -0
- genai_otel/evaluation/restricted_topics_detector.py +316 -0
- genai_otel/evaluation/span_processor.py +962 -0
- genai_otel/evaluation/toxicity_detector.py +406 -0
- genai_otel/exceptions.py +17 -0
- genai_otel/gpu_metrics.py +516 -0
- genai_otel/instrumentors/__init__.py +71 -0
- genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
- genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
- genai_otel/instrumentors/autogen_instrumentor.py +394 -0
- genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
- genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
- genai_otel/instrumentors/base.py +919 -0
- genai_otel/instrumentors/bedrock_agents_instrumentor.py +398 -0
- genai_otel/instrumentors/cohere_instrumentor.py +140 -0
- genai_otel/instrumentors/crewai_instrumentor.py +311 -0
- genai_otel/instrumentors/dspy_instrumentor.py +661 -0
- genai_otel/instrumentors/google_ai_instrumentor.py +310 -0
- genai_otel/instrumentors/groq_instrumentor.py +106 -0
- genai_otel/instrumentors/guardrails_ai_instrumentor.py +510 -0
- genai_otel/instrumentors/haystack_instrumentor.py +503 -0
- genai_otel/instrumentors/huggingface_instrumentor.py +399 -0
- genai_otel/instrumentors/hyperbolic_instrumentor.py +236 -0
- genai_otel/instrumentors/instructor_instrumentor.py +425 -0
- genai_otel/instrumentors/langchain_instrumentor.py +340 -0
- genai_otel/instrumentors/langgraph_instrumentor.py +328 -0
- genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
- genai_otel/instrumentors/mistralai_instrumentor.py +315 -0
- genai_otel/instrumentors/ollama_instrumentor.py +197 -0
- genai_otel/instrumentors/ollama_server_metrics_poller.py +336 -0
- genai_otel/instrumentors/openai_agents_instrumentor.py +291 -0
- genai_otel/instrumentors/openai_instrumentor.py +260 -0
- genai_otel/instrumentors/pydantic_ai_instrumentor.py +362 -0
- genai_otel/instrumentors/replicate_instrumentor.py +87 -0
- genai_otel/instrumentors/sambanova_instrumentor.py +196 -0
- genai_otel/instrumentors/togetherai_instrumentor.py +146 -0
- genai_otel/instrumentors/vertexai_instrumentor.py +106 -0
- genai_otel/llm_pricing.json +1676 -0
- genai_otel/logging_config.py +45 -0
- genai_otel/mcp_instrumentors/__init__.py +14 -0
- genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
- genai_otel/mcp_instrumentors/base.py +105 -0
- genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
- genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/manager.py +139 -0
- genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
- genai_otel/metrics.py +148 -0
- genai_otel/py.typed +2 -0
- genai_otel/server_metrics.py +197 -0
- genai_otel_instrument-0.1.24.dist-info/METADATA +1404 -0
- genai_otel_instrument-0.1.24.dist-info/RECORD +69 -0
- genai_otel_instrument-0.1.24.dist-info/WHEEL +5 -0
- genai_otel_instrument-0.1.24.dist-info/entry_points.txt +2 -0
- genai_otel_instrument-0.1.24.dist-info/licenses/LICENSE +680 -0
- genai_otel_instrument-0.1.24.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
"""Module for calculating estimated costs of LLM API calls."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import re
|
|
6
|
+
from typing import Any, Dict, Optional
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CostCalculator:
|
|
12
|
+
"""Calculate estimated costs for LLM API calls based on loaded pricing data."""
|
|
13
|
+
|
|
14
|
+
DEFAULT_PRICING_FILE = "llm_pricing.json"
|
|
15
|
+
|
|
16
|
+
def __init__(self, custom_pricing_json: Optional[str] = None):
|
|
17
|
+
"""Initializes the CostCalculator by loading pricing data from a JSON file.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
custom_pricing_json: Optional JSON string with custom model pricing.
|
|
21
|
+
Format: {"chat": {"model-name": {"promptPrice": 0.001, "completionPrice": 0.002}}}
|
|
22
|
+
Custom prices will be merged with default pricing, with custom taking precedence.
|
|
23
|
+
"""
|
|
24
|
+
self.pricing_data: Dict[str, Any] = {}
|
|
25
|
+
self._load_pricing()
|
|
26
|
+
if custom_pricing_json:
|
|
27
|
+
self._merge_custom_pricing(custom_pricing_json)
|
|
28
|
+
|
|
29
|
+
def _load_pricing(self):
|
|
30
|
+
"""Load pricing data from the JSON configuration file."""
|
|
31
|
+
try:
|
|
32
|
+
try:
|
|
33
|
+
from importlib.resources import files
|
|
34
|
+
|
|
35
|
+
pricing_file = files("genai_otel").joinpath(self.DEFAULT_PRICING_FILE)
|
|
36
|
+
data = json.loads(pricing_file.read_text(encoding="utf-8"))
|
|
37
|
+
except (ImportError, AttributeError):
|
|
38
|
+
try:
|
|
39
|
+
import importlib_resources
|
|
40
|
+
|
|
41
|
+
pricing_file = importlib_resources.files("genai_otel").joinpath(
|
|
42
|
+
self.DEFAULT_PRICING_FILE
|
|
43
|
+
)
|
|
44
|
+
data = json.loads(pricing_file.read_text(encoding="utf-8"))
|
|
45
|
+
except ImportError:
|
|
46
|
+
import pkg_resources
|
|
47
|
+
|
|
48
|
+
pricing_file_path = pkg_resources.resource_filename(
|
|
49
|
+
"genai_otel", self.DEFAULT_PRICING_FILE
|
|
50
|
+
)
|
|
51
|
+
with open(pricing_file_path, "r", encoding="utf-8") as f:
|
|
52
|
+
data = json.load(f)
|
|
53
|
+
|
|
54
|
+
if isinstance(data, dict):
|
|
55
|
+
self.pricing_data = data
|
|
56
|
+
logger.info("Successfully loaded pricing data.")
|
|
57
|
+
else:
|
|
58
|
+
logger.error("Invalid format in pricing file. Root element is not a dictionary.")
|
|
59
|
+
except FileNotFoundError:
|
|
60
|
+
logger.warning(
|
|
61
|
+
"Pricing file '%s' not found. Cost tracking will be disabled.",
|
|
62
|
+
self.DEFAULT_PRICING_FILE,
|
|
63
|
+
)
|
|
64
|
+
except json.JSONDecodeError as e:
|
|
65
|
+
logger.error(
|
|
66
|
+
"Failed to decode JSON from pricing file: %s. Cost tracking will be disabled.", e
|
|
67
|
+
)
|
|
68
|
+
except Exception as e:
|
|
69
|
+
logger.error("An unexpected error occurred while loading pricing: %s", e, exc_info=True)
|
|
70
|
+
|
|
71
|
+
def _merge_custom_pricing(self, custom_pricing_json: str):
|
|
72
|
+
"""Merge custom pricing from JSON string into existing pricing data.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
custom_pricing_json: JSON string with custom model pricing.
|
|
76
|
+
Format: {"chat": {"model-name": {"promptPrice": 0.001, "completionPrice": 0.002}}}
|
|
77
|
+
"""
|
|
78
|
+
try:
|
|
79
|
+
custom_pricing = json.loads(custom_pricing_json)
|
|
80
|
+
|
|
81
|
+
if not isinstance(custom_pricing, dict):
|
|
82
|
+
logger.error(
|
|
83
|
+
"Custom pricing must be a JSON object/dict. Got: %s",
|
|
84
|
+
type(custom_pricing).__name__,
|
|
85
|
+
)
|
|
86
|
+
return
|
|
87
|
+
|
|
88
|
+
# Merge custom pricing into each category (chat, embeddings, images, audio)
|
|
89
|
+
for category, models in custom_pricing.items():
|
|
90
|
+
if category not in ["chat", "embeddings", "images", "audio"]:
|
|
91
|
+
logger.warning(
|
|
92
|
+
"Unknown pricing category '%s' in custom pricing. Valid categories: "
|
|
93
|
+
"chat, embeddings, images, audio",
|
|
94
|
+
category,
|
|
95
|
+
)
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
if not isinstance(models, dict):
|
|
99
|
+
logger.error(
|
|
100
|
+
"Custom pricing for category '%s' must be a dict. Got: %s",
|
|
101
|
+
category,
|
|
102
|
+
type(models).__name__,
|
|
103
|
+
)
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
# Initialize category if it doesn't exist
|
|
107
|
+
if category not in self.pricing_data:
|
|
108
|
+
self.pricing_data[category] = {}
|
|
109
|
+
|
|
110
|
+
# Merge models into the category
|
|
111
|
+
for model_name, pricing in models.items():
|
|
112
|
+
self.pricing_data[category][model_name] = pricing
|
|
113
|
+
logger.info(
|
|
114
|
+
"Added custom pricing for %s model '%s': %s",
|
|
115
|
+
category,
|
|
116
|
+
model_name,
|
|
117
|
+
pricing,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
except json.JSONDecodeError as e:
|
|
121
|
+
logger.error(
|
|
122
|
+
"Failed to decode custom pricing JSON: %s. Custom pricing will be ignored.", e
|
|
123
|
+
)
|
|
124
|
+
except Exception as e:
|
|
125
|
+
logger.error(
|
|
126
|
+
"An unexpected error occurred while merging custom pricing: %s", e, exc_info=True
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
def calculate_cost(
|
|
130
|
+
self,
|
|
131
|
+
model: str,
|
|
132
|
+
usage: Dict[str, Any],
|
|
133
|
+
call_type: str,
|
|
134
|
+
) -> float:
|
|
135
|
+
"""Calculate cost in USD for a request based on model, usage, and call type.
|
|
136
|
+
|
|
137
|
+
Note: For chat requests, use calculate_granular_cost() to get prompt/completion/reasoning/cache breakdown.
|
|
138
|
+
This method returns total cost for backwards compatibility.
|
|
139
|
+
"""
|
|
140
|
+
if not self.pricing_data:
|
|
141
|
+
return 0.0
|
|
142
|
+
|
|
143
|
+
if call_type == "chat":
|
|
144
|
+
return self._calculate_chat_cost(model, usage)
|
|
145
|
+
if call_type == "embedding":
|
|
146
|
+
return self._calculate_embedding_cost(model, usage)
|
|
147
|
+
if call_type == "image":
|
|
148
|
+
return self._calculate_image_cost(model, usage)
|
|
149
|
+
if call_type == "audio":
|
|
150
|
+
return self._calculate_audio_cost(model, usage)
|
|
151
|
+
|
|
152
|
+
logger.warning("Unknown call type '%s' for cost calculation.", call_type)
|
|
153
|
+
return 0.0
|
|
154
|
+
|
|
155
|
+
def calculate_granular_cost(
|
|
156
|
+
self,
|
|
157
|
+
model: str,
|
|
158
|
+
usage: Dict[str, Any],
|
|
159
|
+
call_type: str,
|
|
160
|
+
) -> Dict[str, float]:
|
|
161
|
+
"""Calculate granular cost breakdown for a request.
|
|
162
|
+
|
|
163
|
+
Returns a dictionary with:
|
|
164
|
+
- total: Total cost
|
|
165
|
+
- prompt: Prompt tokens cost
|
|
166
|
+
- completion: Completion tokens cost
|
|
167
|
+
- reasoning: Reasoning tokens cost (OpenAI o1 models)
|
|
168
|
+
- cache_read: Cache read cost (Anthropic)
|
|
169
|
+
- cache_write: Cache write cost (Anthropic)
|
|
170
|
+
"""
|
|
171
|
+
if not self.pricing_data:
|
|
172
|
+
return {
|
|
173
|
+
"total": 0.0,
|
|
174
|
+
"prompt": 0.0,
|
|
175
|
+
"completion": 0.0,
|
|
176
|
+
"reasoning": 0.0,
|
|
177
|
+
"cache_read": 0.0,
|
|
178
|
+
"cache_write": 0.0,
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if call_type == "chat":
|
|
182
|
+
return self._calculate_chat_cost_granular(model, usage)
|
|
183
|
+
|
|
184
|
+
# For non-chat requests, only return total cost
|
|
185
|
+
total_cost = self.calculate_cost(model, usage, call_type)
|
|
186
|
+
return {
|
|
187
|
+
"total": total_cost,
|
|
188
|
+
"prompt": 0.0,
|
|
189
|
+
"completion": 0.0,
|
|
190
|
+
"reasoning": 0.0,
|
|
191
|
+
"cache_read": 0.0,
|
|
192
|
+
"cache_write": 0.0,
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
def _calculate_chat_cost(self, model: str, usage: Dict[str, int]) -> float:
|
|
196
|
+
"""Calculate cost for chat models."""
|
|
197
|
+
granular = self._calculate_chat_cost_granular(model, usage)
|
|
198
|
+
return granular["total"]
|
|
199
|
+
|
|
200
|
+
def _calculate_chat_cost_granular(self, model: str, usage: Dict[str, int]) -> Dict[str, float]:
|
|
201
|
+
"""Calculate granular cost breakdown for chat models.
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
Dict with keys: total, prompt, completion, reasoning, cache_read, cache_write
|
|
205
|
+
"""
|
|
206
|
+
model_key = self._normalize_model_name(model, "chat")
|
|
207
|
+
|
|
208
|
+
# Fallback for unknown local models (Ollama, HuggingFace): estimate pricing based on parameter count
|
|
209
|
+
if not model_key:
|
|
210
|
+
param_count = self._extract_param_count_from_model_name(model)
|
|
211
|
+
if param_count is not None:
|
|
212
|
+
pricing = self._get_local_model_price_tier(param_count)
|
|
213
|
+
logger.info(
|
|
214
|
+
"Using fallback pricing for unknown local model '%s' with %.2fB parameters: "
|
|
215
|
+
"$%.4f prompt / $%.4f completion per 1k tokens",
|
|
216
|
+
model,
|
|
217
|
+
param_count,
|
|
218
|
+
pricing["promptPrice"],
|
|
219
|
+
pricing["completionPrice"],
|
|
220
|
+
)
|
|
221
|
+
else:
|
|
222
|
+
logger.debug("Pricing not found for chat model: %s", model)
|
|
223
|
+
return {
|
|
224
|
+
"total": 0.0,
|
|
225
|
+
"prompt": 0.0,
|
|
226
|
+
"completion": 0.0,
|
|
227
|
+
"reasoning": 0.0,
|
|
228
|
+
"cache_read": 0.0,
|
|
229
|
+
"cache_write": 0.0,
|
|
230
|
+
}
|
|
231
|
+
else:
|
|
232
|
+
pricing = self.pricing_data["chat"][model_key]
|
|
233
|
+
|
|
234
|
+
# Standard prompt and completion tokens
|
|
235
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
236
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
|
237
|
+
|
|
238
|
+
prompt_cost = (prompt_tokens / 1000) * pricing.get("promptPrice", 0.0)
|
|
239
|
+
completion_cost = (completion_tokens / 1000) * pricing.get("completionPrice", 0.0)
|
|
240
|
+
|
|
241
|
+
# Reasoning tokens (OpenAI o1 models)
|
|
242
|
+
reasoning_tokens = usage.get("completion_tokens_details", {}).get("reasoning_tokens", 0)
|
|
243
|
+
reasoning_cost = 0.0
|
|
244
|
+
if reasoning_tokens > 0 and "reasoningPrice" in pricing:
|
|
245
|
+
reasoning_cost = (reasoning_tokens / 1000) * pricing.get("reasoningPrice", 0.0)
|
|
246
|
+
|
|
247
|
+
# Cache costs (Anthropic models)
|
|
248
|
+
cache_read_tokens = usage.get("cache_read_input_tokens", 0)
|
|
249
|
+
cache_write_tokens = usage.get("cache_creation_input_tokens", 0)
|
|
250
|
+
cache_read_cost = 0.0
|
|
251
|
+
cache_write_cost = 0.0
|
|
252
|
+
|
|
253
|
+
if cache_read_tokens > 0 and "cacheReadPrice" in pricing:
|
|
254
|
+
cache_read_cost = (cache_read_tokens / 1000) * pricing.get("cacheReadPrice", 0.0)
|
|
255
|
+
if cache_write_tokens > 0 and "cacheWritePrice" in pricing:
|
|
256
|
+
cache_write_cost = (cache_write_tokens / 1000) * pricing.get("cacheWritePrice", 0.0)
|
|
257
|
+
|
|
258
|
+
total_cost = (
|
|
259
|
+
prompt_cost + completion_cost + reasoning_cost + cache_read_cost + cache_write_cost
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
return {
|
|
263
|
+
"total": total_cost,
|
|
264
|
+
"prompt": prompt_cost,
|
|
265
|
+
"completion": completion_cost,
|
|
266
|
+
"reasoning": reasoning_cost,
|
|
267
|
+
"cache_read": cache_read_cost,
|
|
268
|
+
"cache_write": cache_write_cost,
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
def _calculate_embedding_cost(self, model: str, usage: Dict[str, int]) -> float:
|
|
272
|
+
"""Calculate cost for embedding models."""
|
|
273
|
+
model_key = self._normalize_model_name(model, "embeddings")
|
|
274
|
+
if not model_key:
|
|
275
|
+
logger.debug("Pricing not found for embedding model: %s", model)
|
|
276
|
+
return 0.0
|
|
277
|
+
|
|
278
|
+
price_per_1k_tokens = self.pricing_data["embeddings"][model_key]
|
|
279
|
+
total_tokens = usage.get("prompt_tokens", 0) or usage.get("total_tokens", 0)
|
|
280
|
+
return (total_tokens / 1000) * price_per_1k_tokens
|
|
281
|
+
|
|
282
|
+
def _calculate_image_cost(self, model: str, usage: Dict[str, Any]) -> float:
|
|
283
|
+
"""Calculate cost for image generation models."""
|
|
284
|
+
model_key = self._normalize_model_name(model, "images")
|
|
285
|
+
if not model_key:
|
|
286
|
+
logger.debug("Pricing not found for image model: %s", model)
|
|
287
|
+
return 0.0
|
|
288
|
+
|
|
289
|
+
pricing_info = self.pricing_data["images"][model_key]
|
|
290
|
+
quality = usage.get("quality", "standard")
|
|
291
|
+
size = usage.get("size")
|
|
292
|
+
n = usage.get("n", 1)
|
|
293
|
+
|
|
294
|
+
if quality not in pricing_info:
|
|
295
|
+
logger.warning("Quality '%s' not found for image model %s", quality, model_key)
|
|
296
|
+
return 0.0
|
|
297
|
+
|
|
298
|
+
# Handle pricing per million pixels
|
|
299
|
+
if "1000000" in pricing_info[quality]:
|
|
300
|
+
price_per_million_pixels = pricing_info[quality]["1000000"]
|
|
301
|
+
height = usage.get("height", 0)
|
|
302
|
+
width = usage.get("width", 0)
|
|
303
|
+
return (height * width / 1_000_000) * price_per_million_pixels * n
|
|
304
|
+
|
|
305
|
+
if not size:
|
|
306
|
+
logger.warning("Image size not provided for model %s", model_key)
|
|
307
|
+
return 0.0
|
|
308
|
+
|
|
309
|
+
if size not in pricing_info[quality]:
|
|
310
|
+
logger.warning(
|
|
311
|
+
"Size '%s' not found for image model %s with quality '%s'", size, model_key, quality
|
|
312
|
+
)
|
|
313
|
+
return 0.0
|
|
314
|
+
|
|
315
|
+
price_per_image = pricing_info[quality][size]
|
|
316
|
+
return price_per_image * n
|
|
317
|
+
|
|
318
|
+
def _calculate_audio_cost(self, model: str, usage: Dict[str, int]) -> float:
|
|
319
|
+
"""Calculate cost for audio models."""
|
|
320
|
+
model_key = self._normalize_model_name(model, "audio")
|
|
321
|
+
if not model_key:
|
|
322
|
+
logger.debug("Pricing not found for audio model: %s", model)
|
|
323
|
+
return 0.0
|
|
324
|
+
|
|
325
|
+
pricing = self.pricing_data["audio"][model_key]
|
|
326
|
+
|
|
327
|
+
if "characters" in usage:
|
|
328
|
+
# Price is per 1000 characters
|
|
329
|
+
return (usage["characters"] / 1000) * pricing
|
|
330
|
+
if "seconds" in usage:
|
|
331
|
+
# Price is per second
|
|
332
|
+
return usage["seconds"] * pricing
|
|
333
|
+
|
|
334
|
+
logger.warning(
|
|
335
|
+
"Could not determine usage unit for audio model %s. Expected 'characters' or 'seconds'.",
|
|
336
|
+
model_key,
|
|
337
|
+
)
|
|
338
|
+
return 0.0
|
|
339
|
+
|
|
340
|
+
def _normalize_model_name(self, model: str, category: str) -> Optional[str]:
|
|
341
|
+
"""Normalize model name to match pricing keys for a specific category."""
|
|
342
|
+
if category not in self.pricing_data:
|
|
343
|
+
return None
|
|
344
|
+
|
|
345
|
+
normalized_model = model.lower()
|
|
346
|
+
|
|
347
|
+
# Exact match (case-insensitive)
|
|
348
|
+
for key in self.pricing_data[category]:
|
|
349
|
+
if normalized_model == key.lower():
|
|
350
|
+
return key
|
|
351
|
+
|
|
352
|
+
# Substring match (case-insensitive)
|
|
353
|
+
sorted_keys = sorted(self.pricing_data[category].keys(), key=len, reverse=True)
|
|
354
|
+
for key in sorted_keys:
|
|
355
|
+
if key.lower() in normalized_model:
|
|
356
|
+
return key
|
|
357
|
+
return None
|
|
358
|
+
|
|
359
|
+
def _extract_param_count_from_model_name(self, model: str) -> Optional[float]:
|
|
360
|
+
"""Extract parameter count from Ollama or HuggingFace model name.
|
|
361
|
+
|
|
362
|
+
Supports both explicit size indicators and common model size names.
|
|
363
|
+
|
|
364
|
+
Examples:
|
|
365
|
+
Ollama models:
|
|
366
|
+
"smollm2:360m" -> 0.36
|
|
367
|
+
"llama3:7b" -> 7.0
|
|
368
|
+
"llama3.1:70b" -> 70.0
|
|
369
|
+
"deepseek-r1:32b" -> 32.0
|
|
370
|
+
|
|
371
|
+
HuggingFace models:
|
|
372
|
+
"gpt2" -> 0.124 (base)
|
|
373
|
+
"gpt2-xl" -> 1.5
|
|
374
|
+
"bert-base-uncased" -> 0.11
|
|
375
|
+
"bert-large-uncased" -> 0.34
|
|
376
|
+
"t5-small" -> 0.06
|
|
377
|
+
"t5-xxl" -> 11.0
|
|
378
|
+
"llama-2-7b" -> 7.0
|
|
379
|
+
"mistral-7b-v0.1" -> 7.0
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Parameter count in billions, or None if not parseable.
|
|
383
|
+
"""
|
|
384
|
+
model_lower = model.lower()
|
|
385
|
+
|
|
386
|
+
# First try explicit parameter count patterns (e.g., 135m, 7b, 70b)
|
|
387
|
+
# Matches: digits followed by optional decimal, then 'm' or 'b'
|
|
388
|
+
pattern = r"(\d+(?:\.\d+)?)(m|b)(?:\s|:|$|-)"
|
|
389
|
+
match = re.search(pattern, model_lower)
|
|
390
|
+
if match:
|
|
391
|
+
value = float(match.group(1))
|
|
392
|
+
unit = match.group(2)
|
|
393
|
+
if unit == "m":
|
|
394
|
+
return value / 1000 # Convert millions to billions
|
|
395
|
+
elif unit == "b":
|
|
396
|
+
return value
|
|
397
|
+
|
|
398
|
+
# Fallback to common model size indicators for HuggingFace models
|
|
399
|
+
# These are approximate values based on typical model sizes
|
|
400
|
+
size_map = {
|
|
401
|
+
# T5 family
|
|
402
|
+
"t5-small": 0.06,
|
|
403
|
+
"t5-base": 0.22,
|
|
404
|
+
"t5-large": 0.77,
|
|
405
|
+
"t5-xl": 3.0,
|
|
406
|
+
"t5-xxl": 11.0,
|
|
407
|
+
# GPT-2 family
|
|
408
|
+
"gpt2-small": 0.124,
|
|
409
|
+
"gpt2-medium": 0.355,
|
|
410
|
+
"gpt2-large": 0.774,
|
|
411
|
+
"gpt2-xl": 1.5,
|
|
412
|
+
"gpt2": 0.124, # default GPT-2 is small
|
|
413
|
+
# BERT family
|
|
414
|
+
"bert-tiny": 0.004,
|
|
415
|
+
"bert-mini": 0.011,
|
|
416
|
+
"bert-small": 0.029,
|
|
417
|
+
"bert-medium": 0.041,
|
|
418
|
+
"bert-base": 0.11,
|
|
419
|
+
"bert-large": 0.34,
|
|
420
|
+
# Generic size indicators (fallback)
|
|
421
|
+
"tiny": 0.01,
|
|
422
|
+
"mini": 0.02,
|
|
423
|
+
"small": 0.06,
|
|
424
|
+
"base": 0.11,
|
|
425
|
+
"medium": 0.35,
|
|
426
|
+
"large": 0.77,
|
|
427
|
+
"xl": 1.5,
|
|
428
|
+
"xxl": 11.0,
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
# Check for size indicators in the model name
|
|
432
|
+
for size_key, param_count in size_map.items():
|
|
433
|
+
if size_key in model_lower:
|
|
434
|
+
return param_count
|
|
435
|
+
|
|
436
|
+
return None
|
|
437
|
+
|
|
438
|
+
def _get_local_model_price_tier(self, param_count_billions: float) -> Dict[str, float]:
|
|
439
|
+
"""Get pricing tier based on parameter count for local models (Ollama, HuggingFace).
|
|
440
|
+
|
|
441
|
+
Local models (Ollama, HuggingFace Transformers) are free but consume GPU power
|
|
442
|
+
and electricity. We estimate costs based on parameter count and comparable
|
|
443
|
+
cloud API pricing.
|
|
444
|
+
|
|
445
|
+
Price Tiers (based on parameter count):
|
|
446
|
+
- Tiny (< 1B params): $0.0001 / $0.0002 (prompt/completion)
|
|
447
|
+
- Small (1-10B): $0.0003 / $0.0006
|
|
448
|
+
- Medium (10-20B): $0.0005 / $0.001
|
|
449
|
+
- Large (20-80B): $0.0008 / $0.0008
|
|
450
|
+
- XLarge (80B+): $0.0012 / $0.0012
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
param_count_billions: Model parameter count in billions
|
|
454
|
+
|
|
455
|
+
Returns:
|
|
456
|
+
Dict with promptPrice and completionPrice
|
|
457
|
+
"""
|
|
458
|
+
if param_count_billions < 1.0:
|
|
459
|
+
return {"promptPrice": 0.0001, "completionPrice": 0.0002}
|
|
460
|
+
elif param_count_billions < 10.0:
|
|
461
|
+
return {"promptPrice": 0.0003, "completionPrice": 0.0006}
|
|
462
|
+
elif param_count_billions < 20.0:
|
|
463
|
+
return {"promptPrice": 0.0005, "completionPrice": 0.001}
|
|
464
|
+
elif param_count_billions < 80.0:
|
|
465
|
+
return {"promptPrice": 0.0008, "completionPrice": 0.0008}
|
|
466
|
+
else:
|
|
467
|
+
return {"promptPrice": 0.0012, "completionPrice": 0.0012}
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""Custom SpanExporter that enriches spans with cost attributes before export.
|
|
2
|
+
|
|
3
|
+
This exporter wraps another exporter (like OTLPSpanExporter) and adds cost
|
|
4
|
+
attributes to spans before passing them to the wrapped exporter.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Optional, Sequence
|
|
9
|
+
|
|
10
|
+
from opentelemetry.sdk.trace import ReadableSpan
|
|
11
|
+
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
|
|
12
|
+
|
|
13
|
+
from .cost_calculator import CostCalculator
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class CostEnrichingSpanExporter(SpanExporter):
|
|
19
|
+
"""Wraps a SpanExporter and enriches spans with cost attributes before export.
|
|
20
|
+
|
|
21
|
+
This exporter:
|
|
22
|
+
1. Receives ReadableSpan objects from the SDK
|
|
23
|
+
2. Extracts model name and token usage from span attributes
|
|
24
|
+
3. Calculates cost using CostCalculator
|
|
25
|
+
4. Creates enriched span data with cost attributes
|
|
26
|
+
5. Exports to the wrapped exporter (e.g., OTLP)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self, wrapped_exporter: SpanExporter, cost_calculator: Optional[CostCalculator] = None
|
|
31
|
+
):
|
|
32
|
+
"""Initialize the cost enriching exporter.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
wrapped_exporter: The underlying exporter to send enriched spans to.
|
|
36
|
+
cost_calculator: CostCalculator instance to use for cost calculations.
|
|
37
|
+
If None, creates a new instance.
|
|
38
|
+
"""
|
|
39
|
+
self.wrapped_exporter = wrapped_exporter
|
|
40
|
+
self.cost_calculator = cost_calculator or CostCalculator()
|
|
41
|
+
logger.info(
|
|
42
|
+
f"CostEnrichingSpanExporter initialized, wrapping {type(wrapped_exporter).__name__}"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
|
|
46
|
+
"""Export spans after enriching them with cost attributes.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
spans: Sequence of ReadableSpan objects to export.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
SpanExportResult from the wrapped exporter.
|
|
53
|
+
"""
|
|
54
|
+
try:
|
|
55
|
+
# Enrich spans with cost attributes
|
|
56
|
+
enriched_spans = []
|
|
57
|
+
for span in spans:
|
|
58
|
+
enriched_span = self._enrich_span(span)
|
|
59
|
+
enriched_spans.append(enriched_span)
|
|
60
|
+
|
|
61
|
+
# Export to wrapped exporter
|
|
62
|
+
return self.wrapped_exporter.export(enriched_spans)
|
|
63
|
+
|
|
64
|
+
except Exception as e:
|
|
65
|
+
logger.error(f"Failed to export spans: {e}", exc_info=True)
|
|
66
|
+
return SpanExportResult.FAILURE
|
|
67
|
+
|
|
68
|
+
def _enrich_span(self, span: ReadableSpan) -> ReadableSpan:
|
|
69
|
+
"""Enrich a span with cost attributes if applicable.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
span: The original ReadableSpan.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
A new ReadableSpan with cost attributes added (or the original if not applicable).
|
|
76
|
+
"""
|
|
77
|
+
try:
|
|
78
|
+
# Check if span has LLM-related attributes
|
|
79
|
+
if not span.attributes:
|
|
80
|
+
return span
|
|
81
|
+
|
|
82
|
+
attributes = dict(span.attributes) # Make a mutable copy
|
|
83
|
+
|
|
84
|
+
# Check for model name - support both GenAI and OpenInference conventions
|
|
85
|
+
model = (
|
|
86
|
+
attributes.get("gen_ai.request.model")
|
|
87
|
+
or attributes.get("llm.model_name")
|
|
88
|
+
or attributes.get("embedding.model_name")
|
|
89
|
+
)
|
|
90
|
+
if not model:
|
|
91
|
+
return span
|
|
92
|
+
|
|
93
|
+
# Skip if cost attributes are already present
|
|
94
|
+
if "gen_ai.usage.cost.total" in attributes:
|
|
95
|
+
logger.debug(f"Span '{span.name}' already has cost attributes, skipping enrichment")
|
|
96
|
+
return span
|
|
97
|
+
|
|
98
|
+
# Extract token usage - support GenAI, OpenInference, and legacy conventions
|
|
99
|
+
prompt_tokens = (
|
|
100
|
+
attributes.get("gen_ai.usage.prompt_tokens")
|
|
101
|
+
or attributes.get("gen_ai.usage.input_tokens")
|
|
102
|
+
or attributes.get("llm.token_count.prompt") # OpenInference
|
|
103
|
+
or 0
|
|
104
|
+
)
|
|
105
|
+
completion_tokens = (
|
|
106
|
+
attributes.get("gen_ai.usage.completion_tokens")
|
|
107
|
+
or attributes.get("gen_ai.usage.output_tokens")
|
|
108
|
+
or attributes.get("llm.token_count.completion") # OpenInference
|
|
109
|
+
or 0
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Skip if no tokens recorded
|
|
113
|
+
if prompt_tokens == 0 and completion_tokens == 0:
|
|
114
|
+
return span
|
|
115
|
+
|
|
116
|
+
# Get call type - support both GenAI and OpenInference conventions
|
|
117
|
+
span_kind = attributes.get("openinference.span.kind", "").upper()
|
|
118
|
+
call_type = attributes.get("gen_ai.operation.name") or span_kind.lower() or "chat"
|
|
119
|
+
|
|
120
|
+
# Map operation names to call types
|
|
121
|
+
call_type_mapping = {
|
|
122
|
+
"chat": "chat",
|
|
123
|
+
"completion": "chat",
|
|
124
|
+
"embedding": "embedding",
|
|
125
|
+
"embeddings": "embedding",
|
|
126
|
+
"text_generation": "chat",
|
|
127
|
+
"image_generation": "image",
|
|
128
|
+
"audio": "audio",
|
|
129
|
+
"llm": "chat",
|
|
130
|
+
"chain": "chat",
|
|
131
|
+
"retriever": "embedding",
|
|
132
|
+
"reranker": "embedding",
|
|
133
|
+
"tool": "chat",
|
|
134
|
+
"agent": "chat",
|
|
135
|
+
}
|
|
136
|
+
normalized_call_type = call_type_mapping.get(str(call_type).lower(), "chat")
|
|
137
|
+
|
|
138
|
+
# Calculate cost
|
|
139
|
+
usage = {
|
|
140
|
+
"prompt_tokens": int(prompt_tokens),
|
|
141
|
+
"completion_tokens": int(completion_tokens),
|
|
142
|
+
"total_tokens": int(prompt_tokens) + int(completion_tokens),
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
cost_info = self.cost_calculator.calculate_granular_cost(
|
|
146
|
+
model=str(model),
|
|
147
|
+
usage=usage,
|
|
148
|
+
call_type=normalized_call_type,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
if cost_info and cost_info.get("total", 0.0) > 0:
|
|
152
|
+
# Add cost attributes to the mutable copy
|
|
153
|
+
attributes["gen_ai.usage.cost.total"] = cost_info["total"]
|
|
154
|
+
|
|
155
|
+
if cost_info.get("prompt", 0.0) > 0:
|
|
156
|
+
attributes["gen_ai.usage.cost.prompt"] = cost_info["prompt"]
|
|
157
|
+
if cost_info.get("completion", 0.0) > 0:
|
|
158
|
+
attributes["gen_ai.usage.cost.completion"] = cost_info["completion"]
|
|
159
|
+
|
|
160
|
+
logger.info(
|
|
161
|
+
f"Enriched span '{span.name}' with cost: {cost_info['total']:.6f} USD "
|
|
162
|
+
f"for model {model} ({usage['total_tokens']} tokens)"
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Create a new ReadableSpan with enriched attributes
|
|
166
|
+
# ReadableSpan is a NamedTuple, so we need to replace it
|
|
167
|
+
from opentelemetry.sdk.trace import ReadableSpan as RS
|
|
168
|
+
|
|
169
|
+
enriched_span = RS(
|
|
170
|
+
name=span.name,
|
|
171
|
+
context=span.context,
|
|
172
|
+
kind=span.kind,
|
|
173
|
+
parent=span.parent,
|
|
174
|
+
start_time=span.start_time,
|
|
175
|
+
end_time=span.end_time,
|
|
176
|
+
status=span.status,
|
|
177
|
+
attributes=attributes, # Use enriched attributes
|
|
178
|
+
events=span.events,
|
|
179
|
+
links=span.links,
|
|
180
|
+
resource=span.resource,
|
|
181
|
+
instrumentation_scope=span.instrumentation_scope,
|
|
182
|
+
)
|
|
183
|
+
return enriched_span
|
|
184
|
+
|
|
185
|
+
except Exception as e:
|
|
186
|
+
logger.warning(
|
|
187
|
+
f"Failed to enrich span '{getattr(span, 'name', 'unknown')}' with cost: {e}",
|
|
188
|
+
exc_info=True,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
return span
|
|
192
|
+
|
|
193
|
+
def shutdown(self) -> None:
|
|
194
|
+
"""Shutdown the wrapped exporter."""
|
|
195
|
+
logger.info("CostEnrichingSpanExporter shutting down")
|
|
196
|
+
self.wrapped_exporter.shutdown()
|
|
197
|
+
|
|
198
|
+
def force_flush(self, timeout_millis: int = 30000) -> bool:
|
|
199
|
+
"""Force flush the wrapped exporter.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
timeout_millis: Timeout in milliseconds.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
True if flush succeeded.
|
|
206
|
+
"""
|
|
207
|
+
return self.wrapped_exporter.force_flush(timeout_millis)
|