genai-otel-instrument 0.1.7.dev0__py3-none-any.whl → 0.1.10.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of genai-otel-instrument might be problematic. Click here for more details.
- genai_otel/__version__.py +2 -2
- genai_otel/auto_instrument.py +7 -3
- genai_otel/cost_enriching_exporter.py +207 -0
- genai_otel/cost_enrichment_processor.py +2 -3
- genai_otel/instrumentors/huggingface_instrumentor.py +178 -5
- genai_otel/llm_pricing.json +305 -7
- {genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.10.dev0.dist-info}/METADATA +80 -35
- {genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.10.dev0.dist-info}/RECORD +12 -11
- {genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.10.dev0.dist-info}/WHEEL +0 -0
- {genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.10.dev0.dist-info}/entry_points.txt +0 -0
- {genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.10.dev0.dist-info}/licenses/LICENSE +0 -0
- {genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.10.dev0.dist-info}/top_level.txt +0 -0
genai_otel/__version__.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.10.dev0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 10, 'dev0')
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
genai_otel/auto_instrument.py
CHANGED
|
@@ -19,6 +19,7 @@ from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExport
|
|
|
19
19
|
from .config import OTelConfig
|
|
20
20
|
from .cost_calculator import CostCalculator
|
|
21
21
|
from .cost_enrichment_processor import CostEnrichmentSpanProcessor
|
|
22
|
+
from .cost_enriching_exporter import CostEnrichingSpanExporter
|
|
22
23
|
from .gpu_metrics import GPUMetricsCollector
|
|
23
24
|
from .mcp_instrumentors import MCPInstrumentorManager
|
|
24
25
|
from .metrics import (
|
|
@@ -169,14 +170,17 @@ def setup_auto_instrumentation(config: OTelConfig):
|
|
|
169
170
|
|
|
170
171
|
set_global_textmap(TraceContextTextMapPropagator())
|
|
171
172
|
|
|
172
|
-
# Add cost enrichment processor for
|
|
173
|
-
#
|
|
173
|
+
# Add cost enrichment processor for custom instrumentors (OpenAI, Ollama, etc.)
|
|
174
|
+
# These instrumentors set cost attributes directly, so processor is mainly for logging
|
|
175
|
+
# Also attempts to enrich OpenInference spans (smolagents, litellm, mcp), though
|
|
176
|
+
# the processor can't modify ReadableSpan - the exporter below handles that
|
|
177
|
+
cost_calculator = None
|
|
174
178
|
if config.enable_cost_tracking:
|
|
175
179
|
try:
|
|
176
180
|
cost_calculator = CostCalculator()
|
|
177
181
|
cost_processor = CostEnrichmentSpanProcessor(cost_calculator)
|
|
178
182
|
tracer_provider.add_span_processor(cost_processor)
|
|
179
|
-
logger.info("Cost enrichment processor added
|
|
183
|
+
logger.info("Cost enrichment processor added")
|
|
180
184
|
except Exception as e:
|
|
181
185
|
logger.warning(f"Failed to add cost enrichment processor: {e}", exc_info=True)
|
|
182
186
|
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""Custom SpanExporter that enriches spans with cost attributes before export.
|
|
2
|
+
|
|
3
|
+
This exporter wraps another exporter (like OTLPSpanExporter) and adds cost
|
|
4
|
+
attributes to spans before passing them to the wrapped exporter.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Optional, Sequence
|
|
9
|
+
|
|
10
|
+
from opentelemetry.sdk.trace import ReadableSpan
|
|
11
|
+
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
|
|
12
|
+
|
|
13
|
+
from .cost_calculator import CostCalculator
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class CostEnrichingSpanExporter(SpanExporter):
|
|
19
|
+
"""Wraps a SpanExporter and enriches spans with cost attributes before export.
|
|
20
|
+
|
|
21
|
+
This exporter:
|
|
22
|
+
1. Receives ReadableSpan objects from the SDK
|
|
23
|
+
2. Extracts model name and token usage from span attributes
|
|
24
|
+
3. Calculates cost using CostCalculator
|
|
25
|
+
4. Creates enriched span data with cost attributes
|
|
26
|
+
5. Exports to the wrapped exporter (e.g., OTLP)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self, wrapped_exporter: SpanExporter, cost_calculator: Optional[CostCalculator] = None
|
|
31
|
+
):
|
|
32
|
+
"""Initialize the cost enriching exporter.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
wrapped_exporter: The underlying exporter to send enriched spans to.
|
|
36
|
+
cost_calculator: CostCalculator instance to use for cost calculations.
|
|
37
|
+
If None, creates a new instance.
|
|
38
|
+
"""
|
|
39
|
+
self.wrapped_exporter = wrapped_exporter
|
|
40
|
+
self.cost_calculator = cost_calculator or CostCalculator()
|
|
41
|
+
logger.info(
|
|
42
|
+
f"CostEnrichingSpanExporter initialized, wrapping {type(wrapped_exporter).__name__}"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
|
|
46
|
+
"""Export spans after enriching them with cost attributes.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
spans: Sequence of ReadableSpan objects to export.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
SpanExportResult from the wrapped exporter.
|
|
53
|
+
"""
|
|
54
|
+
try:
|
|
55
|
+
# Enrich spans with cost attributes
|
|
56
|
+
enriched_spans = []
|
|
57
|
+
for span in spans:
|
|
58
|
+
enriched_span = self._enrich_span(span)
|
|
59
|
+
enriched_spans.append(enriched_span)
|
|
60
|
+
|
|
61
|
+
# Export to wrapped exporter
|
|
62
|
+
return self.wrapped_exporter.export(enriched_spans)
|
|
63
|
+
|
|
64
|
+
except Exception as e:
|
|
65
|
+
logger.error(f"Failed to export spans: {e}", exc_info=True)
|
|
66
|
+
return SpanExportResult.FAILURE
|
|
67
|
+
|
|
68
|
+
def _enrich_span(self, span: ReadableSpan) -> ReadableSpan:
|
|
69
|
+
"""Enrich a span with cost attributes if applicable.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
span: The original ReadableSpan.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
A new ReadableSpan with cost attributes added (or the original if not applicable).
|
|
76
|
+
"""
|
|
77
|
+
try:
|
|
78
|
+
# Check if span has LLM-related attributes
|
|
79
|
+
if not span.attributes:
|
|
80
|
+
return span
|
|
81
|
+
|
|
82
|
+
attributes = dict(span.attributes) # Make a mutable copy
|
|
83
|
+
|
|
84
|
+
# Check for model name - support both GenAI and OpenInference conventions
|
|
85
|
+
model = (
|
|
86
|
+
attributes.get("gen_ai.request.model")
|
|
87
|
+
or attributes.get("llm.model_name")
|
|
88
|
+
or attributes.get("embedding.model_name")
|
|
89
|
+
)
|
|
90
|
+
if not model:
|
|
91
|
+
return span
|
|
92
|
+
|
|
93
|
+
# Skip if cost attributes are already present
|
|
94
|
+
if "gen_ai.usage.cost.total" in attributes:
|
|
95
|
+
logger.debug(f"Span '{span.name}' already has cost attributes, skipping enrichment")
|
|
96
|
+
return span
|
|
97
|
+
|
|
98
|
+
# Extract token usage - support GenAI, OpenInference, and legacy conventions
|
|
99
|
+
prompt_tokens = (
|
|
100
|
+
attributes.get("gen_ai.usage.prompt_tokens")
|
|
101
|
+
or attributes.get("gen_ai.usage.input_tokens")
|
|
102
|
+
or attributes.get("llm.token_count.prompt") # OpenInference
|
|
103
|
+
or 0
|
|
104
|
+
)
|
|
105
|
+
completion_tokens = (
|
|
106
|
+
attributes.get("gen_ai.usage.completion_tokens")
|
|
107
|
+
or attributes.get("gen_ai.usage.output_tokens")
|
|
108
|
+
or attributes.get("llm.token_count.completion") # OpenInference
|
|
109
|
+
or 0
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Skip if no tokens recorded
|
|
113
|
+
if prompt_tokens == 0 and completion_tokens == 0:
|
|
114
|
+
return span
|
|
115
|
+
|
|
116
|
+
# Get call type - support both GenAI and OpenInference conventions
|
|
117
|
+
span_kind = attributes.get("openinference.span.kind", "").upper()
|
|
118
|
+
call_type = attributes.get("gen_ai.operation.name") or span_kind.lower() or "chat"
|
|
119
|
+
|
|
120
|
+
# Map operation names to call types
|
|
121
|
+
call_type_mapping = {
|
|
122
|
+
"chat": "chat",
|
|
123
|
+
"completion": "chat",
|
|
124
|
+
"embedding": "embedding",
|
|
125
|
+
"embeddings": "embedding",
|
|
126
|
+
"text_generation": "chat",
|
|
127
|
+
"image_generation": "image",
|
|
128
|
+
"audio": "audio",
|
|
129
|
+
"llm": "chat",
|
|
130
|
+
"chain": "chat",
|
|
131
|
+
"retriever": "embedding",
|
|
132
|
+
"reranker": "embedding",
|
|
133
|
+
"tool": "chat",
|
|
134
|
+
"agent": "chat",
|
|
135
|
+
}
|
|
136
|
+
normalized_call_type = call_type_mapping.get(str(call_type).lower(), "chat")
|
|
137
|
+
|
|
138
|
+
# Calculate cost
|
|
139
|
+
usage = {
|
|
140
|
+
"prompt_tokens": int(prompt_tokens),
|
|
141
|
+
"completion_tokens": int(completion_tokens),
|
|
142
|
+
"total_tokens": int(prompt_tokens) + int(completion_tokens),
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
cost_info = self.cost_calculator.calculate_granular_cost(
|
|
146
|
+
model=str(model),
|
|
147
|
+
usage=usage,
|
|
148
|
+
call_type=normalized_call_type,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
if cost_info and cost_info.get("total", 0.0) > 0:
|
|
152
|
+
# Add cost attributes to the mutable copy
|
|
153
|
+
attributes["gen_ai.usage.cost.total"] = cost_info["total"]
|
|
154
|
+
|
|
155
|
+
if cost_info.get("prompt", 0.0) > 0:
|
|
156
|
+
attributes["gen_ai.usage.cost.prompt"] = cost_info["prompt"]
|
|
157
|
+
if cost_info.get("completion", 0.0) > 0:
|
|
158
|
+
attributes["gen_ai.usage.cost.completion"] = cost_info["completion"]
|
|
159
|
+
|
|
160
|
+
logger.info(
|
|
161
|
+
f"Enriched span '{span.name}' with cost: {cost_info['total']:.6f} USD "
|
|
162
|
+
f"for model {model} ({usage['total_tokens']} tokens)"
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Create a new ReadableSpan with enriched attributes
|
|
166
|
+
# ReadableSpan is a NamedTuple, so we need to replace it
|
|
167
|
+
from opentelemetry.sdk.trace import ReadableSpan as RS
|
|
168
|
+
|
|
169
|
+
enriched_span = RS(
|
|
170
|
+
name=span.name,
|
|
171
|
+
context=span.context,
|
|
172
|
+
kind=span.kind,
|
|
173
|
+
parent=span.parent,
|
|
174
|
+
start_time=span.start_time,
|
|
175
|
+
end_time=span.end_time,
|
|
176
|
+
status=span.status,
|
|
177
|
+
attributes=attributes, # Use enriched attributes
|
|
178
|
+
events=span.events,
|
|
179
|
+
links=span.links,
|
|
180
|
+
resource=span.resource,
|
|
181
|
+
instrumentation_scope=span.instrumentation_scope,
|
|
182
|
+
)
|
|
183
|
+
return enriched_span
|
|
184
|
+
|
|
185
|
+
except Exception as e:
|
|
186
|
+
logger.warning(
|
|
187
|
+
f"Failed to enrich span '{getattr(span, 'name', 'unknown')}' with cost: {e}",
|
|
188
|
+
exc_info=True,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
return span
|
|
192
|
+
|
|
193
|
+
def shutdown(self) -> None:
|
|
194
|
+
"""Shutdown the wrapped exporter."""
|
|
195
|
+
logger.info("CostEnrichingSpanExporter shutting down")
|
|
196
|
+
self.wrapped_exporter.shutdown()
|
|
197
|
+
|
|
198
|
+
def force_flush(self, timeout_millis: int = 30000) -> bool:
|
|
199
|
+
"""Force flush the wrapped exporter.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
timeout_millis: Timeout in milliseconds.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
True if flush succeeded.
|
|
206
|
+
"""
|
|
207
|
+
return self.wrapped_exporter.force_flush(timeout_millis)
|
|
@@ -132,9 +132,8 @@ class CostEnrichmentSpanProcessor(SpanProcessor):
|
|
|
132
132
|
|
|
133
133
|
if cost_info and cost_info.get("total", 0.0) > 0:
|
|
134
134
|
# Add cost attributes to the span
|
|
135
|
-
#
|
|
136
|
-
|
|
137
|
-
if isinstance(span, Span):
|
|
135
|
+
# Use duck typing to check if span supports set_attribute
|
|
136
|
+
if hasattr(span, "set_attribute") and callable(getattr(span, "set_attribute")):
|
|
138
137
|
span.set_attribute("gen_ai.usage.cost.total", cost_info["total"])
|
|
139
138
|
|
|
140
139
|
if cost_info.get("prompt", 0.0) > 0:
|
|
@@ -3,9 +3,11 @@
|
|
|
3
3
|
This instrumentor automatically traces:
|
|
4
4
|
1. HuggingFace Transformers pipelines (local model execution)
|
|
5
5
|
2. HuggingFace Inference API calls via InferenceClient (used by smolagents)
|
|
6
|
+
3. Direct model usage via AutoModelForCausalLM.generate() and forward()
|
|
6
7
|
|
|
7
8
|
Note: Transformers runs models locally (no API costs), but InferenceClient makes
|
|
8
9
|
API calls to HuggingFace endpoints which may have costs based on usage.
|
|
10
|
+
Local model costs are estimated based on parameter count and token usage.
|
|
9
11
|
"""
|
|
10
12
|
|
|
11
13
|
import logging
|
|
@@ -20,8 +22,10 @@ logger = logging.getLogger(__name__)
|
|
|
20
22
|
class HuggingFaceInstrumentor(BaseInstrumentor):
|
|
21
23
|
"""Instrumentor for HuggingFace Transformers and Inference API.
|
|
22
24
|
|
|
23
|
-
Instruments
|
|
24
|
-
- transformers.pipeline (local execution,
|
|
25
|
+
Instruments:
|
|
26
|
+
- transformers.pipeline (local execution, estimated costs)
|
|
27
|
+
- transformers.AutoModelForCausalLM.generate() (local execution, estimated costs)
|
|
28
|
+
- transformers.AutoModelForCausalLM.forward() (local execution, estimated costs)
|
|
25
29
|
- huggingface_hub.InferenceClient (API calls, may have costs)
|
|
26
30
|
"""
|
|
27
31
|
|
|
@@ -30,6 +34,7 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
|
|
|
30
34
|
super().__init__()
|
|
31
35
|
self._transformers_available = False
|
|
32
36
|
self._inference_client_available = False
|
|
37
|
+
self._model_classes_instrumented = False
|
|
33
38
|
self._check_availability()
|
|
34
39
|
|
|
35
40
|
def _check_availability(self):
|
|
@@ -55,13 +60,14 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
|
|
|
55
60
|
self._inference_client_available = False
|
|
56
61
|
|
|
57
62
|
def instrument(self, config: OTelConfig):
|
|
58
|
-
"""Instrument HuggingFace Transformers pipelines and InferenceClient."""
|
|
59
|
-
self.config
|
|
63
|
+
"""Instrument HuggingFace Transformers pipelines, model classes, and InferenceClient."""
|
|
64
|
+
self._setup_config(config)
|
|
60
65
|
|
|
61
66
|
instrumented_count = 0
|
|
62
67
|
|
|
63
|
-
# Instrument transformers
|
|
68
|
+
# Instrument transformers components if available
|
|
64
69
|
if self._transformers_available:
|
|
70
|
+
# Instrument pipeline
|
|
65
71
|
try:
|
|
66
72
|
self._instrument_transformers()
|
|
67
73
|
instrumented_count += 1
|
|
@@ -70,6 +76,15 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
|
|
|
70
76
|
if config.fail_on_error:
|
|
71
77
|
raise
|
|
72
78
|
|
|
79
|
+
# Instrument model classes (AutoModelForCausalLM, etc.)
|
|
80
|
+
try:
|
|
81
|
+
self._instrument_model_classes()
|
|
82
|
+
instrumented_count += 1
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.error("Failed to instrument HuggingFace model classes: %s", e, exc_info=True)
|
|
85
|
+
if config.fail_on_error:
|
|
86
|
+
raise
|
|
87
|
+
|
|
73
88
|
# Instrument InferenceClient if available
|
|
74
89
|
if self._inference_client_available:
|
|
75
90
|
try:
|
|
@@ -166,6 +181,164 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
|
|
|
166
181
|
InferenceClient.text_generation = wrapped_text_generation
|
|
167
182
|
logger.debug("HuggingFace InferenceClient instrumented")
|
|
168
183
|
|
|
184
|
+
def _instrument_model_classes(self):
|
|
185
|
+
"""Instrument HuggingFace model classes for direct model usage."""
|
|
186
|
+
try:
|
|
187
|
+
import wrapt
|
|
188
|
+
|
|
189
|
+
# Import GenerationMixin - the base class that provides generate() method
|
|
190
|
+
# All generative models (AutoModelForCausalLM, AutoModelForSeq2SeqLM, etc.) inherit from it
|
|
191
|
+
try:
|
|
192
|
+
from transformers.generation.utils import GenerationMixin
|
|
193
|
+
except ImportError:
|
|
194
|
+
# Fallback for older transformers versions
|
|
195
|
+
from transformers.generation import GenerationMixin
|
|
196
|
+
|
|
197
|
+
# Store reference to instrumentor for use in wrapper
|
|
198
|
+
instrumentor = self
|
|
199
|
+
|
|
200
|
+
# Wrap the generate() method at GenerationMixin level (all models inherit from this)
|
|
201
|
+
original_generate = GenerationMixin.generate
|
|
202
|
+
|
|
203
|
+
@wrapt.decorator
|
|
204
|
+
def generate_wrapper(wrapped, instance, args, kwargs):
|
|
205
|
+
"""Wrapper for model.generate() method."""
|
|
206
|
+
# Extract model info
|
|
207
|
+
model_name = getattr(instance, "name_or_path", "unknown")
|
|
208
|
+
if hasattr(instance.config, "_name_or_path"):
|
|
209
|
+
model_name = instance.config._name_or_path
|
|
210
|
+
|
|
211
|
+
# Get input token count
|
|
212
|
+
input_ids = kwargs.get("input_ids") or (args[0] if args else None)
|
|
213
|
+
prompt_tokens = 0
|
|
214
|
+
if input_ids is not None:
|
|
215
|
+
if hasattr(input_ids, "shape"):
|
|
216
|
+
prompt_tokens = int(input_ids.shape[-1])
|
|
217
|
+
elif isinstance(input_ids, (list, tuple)):
|
|
218
|
+
prompt_tokens = len(input_ids[0]) if input_ids else 0
|
|
219
|
+
|
|
220
|
+
# Create span
|
|
221
|
+
with instrumentor.tracer.start_as_current_span(
|
|
222
|
+
"huggingface.model.generate"
|
|
223
|
+
) as span:
|
|
224
|
+
# Set attributes
|
|
225
|
+
span.set_attribute("gen_ai.system", "huggingface")
|
|
226
|
+
span.set_attribute("gen_ai.request.model", model_name)
|
|
227
|
+
span.set_attribute("gen_ai.operation.name", "text_generation")
|
|
228
|
+
span.set_attribute("gen_ai.request.type", "chat")
|
|
229
|
+
|
|
230
|
+
# Extract generation parameters
|
|
231
|
+
if "max_length" in kwargs:
|
|
232
|
+
span.set_attribute("gen_ai.request.max_tokens", kwargs["max_length"])
|
|
233
|
+
if "max_new_tokens" in kwargs:
|
|
234
|
+
span.set_attribute("gen_ai.request.max_tokens", kwargs["max_new_tokens"])
|
|
235
|
+
if "temperature" in kwargs:
|
|
236
|
+
span.set_attribute("gen_ai.request.temperature", kwargs["temperature"])
|
|
237
|
+
if "top_p" in kwargs:
|
|
238
|
+
span.set_attribute("gen_ai.request.top_p", kwargs["top_p"])
|
|
239
|
+
|
|
240
|
+
# Call original generate
|
|
241
|
+
import time
|
|
242
|
+
|
|
243
|
+
start_time = time.time()
|
|
244
|
+
result = wrapped(*args, **kwargs)
|
|
245
|
+
duration = time.time() - start_time
|
|
246
|
+
|
|
247
|
+
# Extract output token count
|
|
248
|
+
completion_tokens = 0
|
|
249
|
+
if hasattr(result, "shape"):
|
|
250
|
+
# result is a tensor
|
|
251
|
+
total_length = int(result.shape[-1])
|
|
252
|
+
completion_tokens = max(0, total_length - prompt_tokens)
|
|
253
|
+
elif isinstance(result, (list, tuple)):
|
|
254
|
+
# result is a list of sequences
|
|
255
|
+
if result and hasattr(result[0], "shape"):
|
|
256
|
+
total_length = int(result[0].shape[-1])
|
|
257
|
+
completion_tokens = max(0, total_length - prompt_tokens)
|
|
258
|
+
|
|
259
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
260
|
+
|
|
261
|
+
# Set token usage attributes
|
|
262
|
+
if prompt_tokens > 0:
|
|
263
|
+
span.set_attribute("gen_ai.usage.prompt_tokens", prompt_tokens)
|
|
264
|
+
if completion_tokens > 0:
|
|
265
|
+
span.set_attribute("gen_ai.usage.completion_tokens", completion_tokens)
|
|
266
|
+
if total_tokens > 0:
|
|
267
|
+
span.set_attribute("gen_ai.usage.total_tokens", total_tokens)
|
|
268
|
+
|
|
269
|
+
# Record metrics
|
|
270
|
+
if instrumentor.request_counter:
|
|
271
|
+
instrumentor.request_counter.add(
|
|
272
|
+
1, {"model": model_name, "provider": "huggingface"}
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
if instrumentor.token_counter and total_tokens > 0:
|
|
276
|
+
if prompt_tokens > 0:
|
|
277
|
+
instrumentor.token_counter.add(
|
|
278
|
+
prompt_tokens, {"token_type": "prompt", "operation": span.name}
|
|
279
|
+
)
|
|
280
|
+
if completion_tokens > 0:
|
|
281
|
+
instrumentor.token_counter.add(
|
|
282
|
+
completion_tokens,
|
|
283
|
+
{"token_type": "completion", "operation": span.name},
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
if instrumentor.latency_histogram:
|
|
287
|
+
instrumentor.latency_histogram.record(duration, {"operation": span.name})
|
|
288
|
+
|
|
289
|
+
# Calculate and record cost if enabled
|
|
290
|
+
if (
|
|
291
|
+
instrumentor.config
|
|
292
|
+
and instrumentor.config.enable_cost_tracking
|
|
293
|
+
and total_tokens > 0
|
|
294
|
+
):
|
|
295
|
+
try:
|
|
296
|
+
usage = {
|
|
297
|
+
"prompt_tokens": prompt_tokens,
|
|
298
|
+
"completion_tokens": completion_tokens,
|
|
299
|
+
"total_tokens": total_tokens,
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
costs = instrumentor.cost_calculator.calculate_granular_cost(
|
|
303
|
+
model=model_name, usage=usage, call_type="chat"
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
if costs["total"] > 0:
|
|
307
|
+
if instrumentor.cost_counter:
|
|
308
|
+
instrumentor.cost_counter.add(
|
|
309
|
+
costs["total"], {"model": model_name}
|
|
310
|
+
)
|
|
311
|
+
span.set_attribute("gen_ai.usage.cost.total", costs["total"])
|
|
312
|
+
if costs["prompt"] > 0:
|
|
313
|
+
span.set_attribute("gen_ai.usage.cost.prompt", costs["prompt"])
|
|
314
|
+
if costs["completion"] > 0:
|
|
315
|
+
span.set_attribute(
|
|
316
|
+
"gen_ai.usage.cost.completion", costs["completion"]
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
logger.debug(
|
|
320
|
+
f"HuggingFace model {model_name}: {total_tokens} tokens, "
|
|
321
|
+
f"cost: ${costs['total']:.6f}"
|
|
322
|
+
)
|
|
323
|
+
except Exception as e:
|
|
324
|
+
logger.warning(f"Failed to calculate cost: {e}")
|
|
325
|
+
|
|
326
|
+
return result
|
|
327
|
+
|
|
328
|
+
# Apply wrapper to GenerationMixin.generate (all models inherit this)
|
|
329
|
+
GenerationMixin.generate = generate_wrapper(original_generate)
|
|
330
|
+
|
|
331
|
+
self._model_classes_instrumented = True
|
|
332
|
+
logger.debug(
|
|
333
|
+
"HuggingFace GenerationMixin.generate() instrumented "
|
|
334
|
+
"(covers all models: AutoModelForCausalLM, AutoModelForSeq2SeqLM, etc.)"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
except ImportError as e:
|
|
338
|
+
logger.debug(f"Could not import model classes for instrumentation: {e}")
|
|
339
|
+
except Exception as e:
|
|
340
|
+
raise # Re-raise to be caught by instrument() method
|
|
341
|
+
|
|
169
342
|
def _extract_inference_client_attributes(self, instance, args, kwargs) -> Dict[str, str]:
|
|
170
343
|
"""Extract attributes from Inference API call."""
|
|
171
344
|
attrs = {}
|
genai_otel/llm_pricing.json
CHANGED
|
@@ -109,6 +109,16 @@
|
|
|
109
109
|
"512x512": 0.01,
|
|
110
110
|
"1024x1024": 0.012
|
|
111
111
|
}
|
|
112
|
+
},
|
|
113
|
+
"grok-image": {
|
|
114
|
+
"standard": {
|
|
115
|
+
"per_image": 0.07
|
|
116
|
+
}
|
|
117
|
+
},
|
|
118
|
+
"xai-grok-image": {
|
|
119
|
+
"standard": {
|
|
120
|
+
"per_image": 0.07
|
|
121
|
+
}
|
|
112
122
|
}
|
|
113
123
|
},
|
|
114
124
|
"audio": {
|
|
@@ -149,6 +159,22 @@
|
|
|
149
159
|
"promptPrice": 0.002,
|
|
150
160
|
"completionPrice": 0.008
|
|
151
161
|
},
|
|
162
|
+
"gpt-5": {
|
|
163
|
+
"promptPrice": 0.00125,
|
|
164
|
+
"completionPrice": 0.01
|
|
165
|
+
},
|
|
166
|
+
"gpt-5-2025-08-07": {
|
|
167
|
+
"promptPrice": 0.00125,
|
|
168
|
+
"completionPrice": 0.01
|
|
169
|
+
},
|
|
170
|
+
"gpt-5-mini": {
|
|
171
|
+
"promptPrice": 0.00025,
|
|
172
|
+
"completionPrice": 0.002
|
|
173
|
+
},
|
|
174
|
+
"gpt-5-nano": {
|
|
175
|
+
"promptPrice": 0.0001,
|
|
176
|
+
"completionPrice": 0.0004
|
|
177
|
+
},
|
|
152
178
|
"gpt-4o": {
|
|
153
179
|
"promptPrice": 0.0005,
|
|
154
180
|
"completionPrice": 0.0015
|
|
@@ -249,6 +275,22 @@
|
|
|
249
275
|
"promptPrice": 0.015,
|
|
250
276
|
"completionPrice": 0.075
|
|
251
277
|
},
|
|
278
|
+
"claude-4-opus": {
|
|
279
|
+
"promptPrice": 0.015,
|
|
280
|
+
"completionPrice": 0.075
|
|
281
|
+
},
|
|
282
|
+
"claude-opus-4": {
|
|
283
|
+
"promptPrice": 0.015,
|
|
284
|
+
"completionPrice": 0.075
|
|
285
|
+
},
|
|
286
|
+
"claude-opus-4-1": {
|
|
287
|
+
"promptPrice": 0.015,
|
|
288
|
+
"completionPrice": 0.075
|
|
289
|
+
},
|
|
290
|
+
"claude-opus-4.1": {
|
|
291
|
+
"promptPrice": 0.015,
|
|
292
|
+
"completionPrice": 0.075
|
|
293
|
+
},
|
|
252
294
|
"claude-3-sonnet-20240229": {
|
|
253
295
|
"promptPrice": 0.003,
|
|
254
296
|
"completionPrice": 0.015
|
|
@@ -257,6 +299,34 @@
|
|
|
257
299
|
"promptPrice": 0.00025,
|
|
258
300
|
"completionPrice": 0.00125
|
|
259
301
|
},
|
|
302
|
+
"claude-3-5-sonnet-20240620": {
|
|
303
|
+
"promptPrice": 0.003,
|
|
304
|
+
"completionPrice": 0.015
|
|
305
|
+
},
|
|
306
|
+
"claude-3-5-sonnet-20241022": {
|
|
307
|
+
"promptPrice": 0.003,
|
|
308
|
+
"completionPrice": 0.015
|
|
309
|
+
},
|
|
310
|
+
"claude-3-5-haiku-20241022": {
|
|
311
|
+
"promptPrice": 0.0008,
|
|
312
|
+
"completionPrice": 0.004
|
|
313
|
+
},
|
|
314
|
+
"claude-sonnet-4-5": {
|
|
315
|
+
"promptPrice": 0.003,
|
|
316
|
+
"completionPrice": 0.015
|
|
317
|
+
},
|
|
318
|
+
"claude-sonnet-4-5-20250929": {
|
|
319
|
+
"promptPrice": 0.003,
|
|
320
|
+
"completionPrice": 0.015
|
|
321
|
+
},
|
|
322
|
+
"claude-3-7-sonnet": {
|
|
323
|
+
"promptPrice": 0.003,
|
|
324
|
+
"completionPrice": 0.015
|
|
325
|
+
},
|
|
326
|
+
"claude-haiku-4-5": {
|
|
327
|
+
"promptPrice": 0.001,
|
|
328
|
+
"completionPrice": 0.005
|
|
329
|
+
},
|
|
260
330
|
"command": {
|
|
261
331
|
"promptPrice": 0.001,
|
|
262
332
|
"completionPrice": 0.002
|
|
@@ -441,6 +511,14 @@
|
|
|
441
511
|
"promptPrice": 0.00125,
|
|
442
512
|
"completionPrice": 0.01
|
|
443
513
|
},
|
|
514
|
+
"gemini-2-5-flash-image": {
|
|
515
|
+
"promptPrice": 0.0003,
|
|
516
|
+
"completionPrice": 0.03
|
|
517
|
+
},
|
|
518
|
+
"nano-banana": {
|
|
519
|
+
"promptPrice": 0.0003,
|
|
520
|
+
"completionPrice": 0.03
|
|
521
|
+
},
|
|
444
522
|
"text-bison": {
|
|
445
523
|
"promptPrice": 0.001,
|
|
446
524
|
"completionPrice": 0.002
|
|
@@ -537,6 +615,38 @@
|
|
|
537
615
|
"promptPrice": 0.0005,
|
|
538
616
|
"completionPrice": 0.0015
|
|
539
617
|
},
|
|
618
|
+
"grok-2-1212": {
|
|
619
|
+
"promptPrice": 0.002,
|
|
620
|
+
"completionPrice": 0.01
|
|
621
|
+
},
|
|
622
|
+
"grok-2-vision-1212": {
|
|
623
|
+
"promptPrice": 0.002,
|
|
624
|
+
"completionPrice": 0.01
|
|
625
|
+
},
|
|
626
|
+
"grok-3": {
|
|
627
|
+
"promptPrice": 0.003,
|
|
628
|
+
"completionPrice": 0.015
|
|
629
|
+
},
|
|
630
|
+
"grok-3-mini": {
|
|
631
|
+
"promptPrice": 0.0003,
|
|
632
|
+
"completionPrice": 0.0005
|
|
633
|
+
},
|
|
634
|
+
"grok-3-fast": {
|
|
635
|
+
"promptPrice": 0.005,
|
|
636
|
+
"completionPrice": 0.025
|
|
637
|
+
},
|
|
638
|
+
"grok-3-mini-fast": {
|
|
639
|
+
"promptPrice": 0.0006,
|
|
640
|
+
"completionPrice": 0.004
|
|
641
|
+
},
|
|
642
|
+
"grok-4": {
|
|
643
|
+
"promptPrice": 0.003,
|
|
644
|
+
"completionPrice": 0.015
|
|
645
|
+
},
|
|
646
|
+
"grok-4-fast": {
|
|
647
|
+
"promptPrice": 0.0002,
|
|
648
|
+
"completionPrice": 0.0005
|
|
649
|
+
},
|
|
540
650
|
"jamba-1.5-mini": {
|
|
541
651
|
"promptPrice": 0.0002,
|
|
542
652
|
"completionPrice": 0.0004
|
|
@@ -565,6 +675,26 @@
|
|
|
565
675
|
"promptPrice": 0.0012,
|
|
566
676
|
"completionPrice": 0.0012
|
|
567
677
|
},
|
|
678
|
+
"qwen3-next-80b-a3b-instruct": {
|
|
679
|
+
"promptPrice": 0.000525,
|
|
680
|
+
"completionPrice": 0.0021
|
|
681
|
+
},
|
|
682
|
+
"qwen3-next-80b-a3b-thinking": {
|
|
683
|
+
"promptPrice": 0.000525,
|
|
684
|
+
"completionPrice": 0.0063
|
|
685
|
+
},
|
|
686
|
+
"qwen3-coder-480b-a35b-instruct": {
|
|
687
|
+
"promptPrice": 0.001,
|
|
688
|
+
"completionPrice": 0.005
|
|
689
|
+
},
|
|
690
|
+
"qwen3-max": {
|
|
691
|
+
"promptPrice": 0.0012,
|
|
692
|
+
"completionPrice": 0.006
|
|
693
|
+
},
|
|
694
|
+
"qwen-qwen3-max": {
|
|
695
|
+
"promptPrice": 0.0012,
|
|
696
|
+
"completionPrice": 0.006
|
|
697
|
+
},
|
|
568
698
|
"codellama/CodeLlama-34b-Instruct-hf": {
|
|
569
699
|
"promptPrice": 0.0008,
|
|
570
700
|
"completionPrice": 0.0008
|
|
@@ -613,13 +743,29 @@
|
|
|
613
743
|
"promptPrice": 0.00059,
|
|
614
744
|
"completionPrice": 0.00079
|
|
615
745
|
},
|
|
746
|
+
"llama-4-scout": {
|
|
747
|
+
"promptPrice": 0.00015,
|
|
748
|
+
"completionPrice": 0.0005
|
|
749
|
+
},
|
|
616
750
|
"llama-4-scout-17bx16e-128k": {
|
|
617
|
-
"promptPrice": 0.
|
|
618
|
-
"completionPrice": 0.
|
|
751
|
+
"promptPrice": 0.00015,
|
|
752
|
+
"completionPrice": 0.0005
|
|
753
|
+
},
|
|
754
|
+
"llama-4-maverick": {
|
|
755
|
+
"promptPrice": 0.00022,
|
|
756
|
+
"completionPrice": 0.00085
|
|
619
757
|
},
|
|
620
758
|
"llama-4-maverick-17bx128e-128k": {
|
|
621
|
-
"promptPrice": 0.
|
|
622
|
-
"completionPrice": 0.
|
|
759
|
+
"promptPrice": 0.00022,
|
|
760
|
+
"completionPrice": 0.00085
|
|
761
|
+
},
|
|
762
|
+
"meta-llama/Llama-4-Scout": {
|
|
763
|
+
"promptPrice": 0.00015,
|
|
764
|
+
"completionPrice": 0.0005
|
|
765
|
+
},
|
|
766
|
+
"meta-llama/Llama-4-Maverick": {
|
|
767
|
+
"promptPrice": 0.00022,
|
|
768
|
+
"completionPrice": 0.00085
|
|
623
769
|
},
|
|
624
770
|
"llama-guard-4-12b": {
|
|
625
771
|
"promptPrice": 0.0002,
|
|
@@ -773,7 +919,7 @@
|
|
|
773
919
|
"promptPrice": 0.0003,
|
|
774
920
|
"completionPrice": 0.0006
|
|
775
921
|
},
|
|
776
|
-
"qwen3:
|
|
922
|
+
"qwen3:4b": {
|
|
777
923
|
"promptPrice": 0.0003,
|
|
778
924
|
"completionPrice": 0.0006
|
|
779
925
|
},
|
|
@@ -857,9 +1003,161 @@
|
|
|
857
1003
|
"promptPrice": 0.0008,
|
|
858
1004
|
"completionPrice": 0.0008
|
|
859
1005
|
},
|
|
1006
|
+
"ibm-granite-3-1-8b-instruct": {
|
|
1007
|
+
"promptPrice": 0.0002,
|
|
1008
|
+
"completionPrice": 0.0002
|
|
1009
|
+
},
|
|
1010
|
+
"ibm-granite-3-8b-instruct": {
|
|
1011
|
+
"promptPrice": 0.0002,
|
|
1012
|
+
"completionPrice": 0.0002
|
|
1013
|
+
},
|
|
1014
|
+
"granite-3-8b-instruct": {
|
|
1015
|
+
"promptPrice": 0.0002,
|
|
1016
|
+
"completionPrice": 0.0002
|
|
1017
|
+
},
|
|
1018
|
+
"granite-embedding-107m-multilingual": {
|
|
1019
|
+
"promptPrice": 0.0001,
|
|
1020
|
+
"completionPrice": 0.0001
|
|
1021
|
+
},
|
|
1022
|
+
"granite-embedding-278m-multilingual": {
|
|
1023
|
+
"promptPrice": 0.0001,
|
|
1024
|
+
"completionPrice": 0.0001
|
|
1025
|
+
},
|
|
860
1026
|
"deepseek-v3.1": {
|
|
861
|
-
"promptPrice": 0.
|
|
862
|
-
"completionPrice": 0.
|
|
1027
|
+
"promptPrice": 0.00056,
|
|
1028
|
+
"completionPrice": 0.00168
|
|
1029
|
+
},
|
|
1030
|
+
"sarvam-m": {
|
|
1031
|
+
"promptPrice": 0,
|
|
1032
|
+
"completionPrice": 0
|
|
1033
|
+
},
|
|
1034
|
+
"sarvamai/sarvam-m": {
|
|
1035
|
+
"promptPrice": 0,
|
|
1036
|
+
"completionPrice": 0
|
|
1037
|
+
},
|
|
1038
|
+
"sarvam-chat": {
|
|
1039
|
+
"promptPrice": 0,
|
|
1040
|
+
"completionPrice": 0
|
|
1041
|
+
},
|
|
1042
|
+
"granite-4-0-h-small": {
|
|
1043
|
+
"promptPrice": 0.0002,
|
|
1044
|
+
"completionPrice": 0.0002
|
|
1045
|
+
},
|
|
1046
|
+
"granite-4-0-h-tiny": {
|
|
1047
|
+
"promptPrice": 0.0002,
|
|
1048
|
+
"completionPrice": 0.0002
|
|
1049
|
+
},
|
|
1050
|
+
"granite-4-0-h-micro": {
|
|
1051
|
+
"promptPrice": 0.0002,
|
|
1052
|
+
"completionPrice": 0.0002
|
|
1053
|
+
},
|
|
1054
|
+
"granite-4-0-micro": {
|
|
1055
|
+
"promptPrice": 0.0002,
|
|
1056
|
+
"completionPrice": 0.0002
|
|
1057
|
+
},
|
|
1058
|
+
"ibm-granite/granite-4.0-h-small": {
|
|
1059
|
+
"promptPrice": 0.0002,
|
|
1060
|
+
"completionPrice": 0.0002
|
|
1061
|
+
},
|
|
1062
|
+
"ibm-granite/granite-4.0-h-tiny": {
|
|
1063
|
+
"promptPrice": 0.0002,
|
|
1064
|
+
"completionPrice": 0.0002
|
|
1065
|
+
},
|
|
1066
|
+
"ibm-granite/granite-4.0-h-micro": {
|
|
1067
|
+
"promptPrice": 0.0002,
|
|
1068
|
+
"completionPrice": 0.0002
|
|
1069
|
+
},
|
|
1070
|
+
"granite:3b": {
|
|
1071
|
+
"promptPrice": 0.0002,
|
|
1072
|
+
"completionPrice": 0.0002
|
|
1073
|
+
},
|
|
1074
|
+
"granite:8b": {
|
|
1075
|
+
"promptPrice": 0.0002,
|
|
1076
|
+
"completionPrice": 0.0002
|
|
1077
|
+
},
|
|
1078
|
+
"mistral-large-24-11": {
|
|
1079
|
+
"promptPrice": 0.008,
|
|
1080
|
+
"completionPrice": 0.024
|
|
1081
|
+
},
|
|
1082
|
+
"mistral-large-2411": {
|
|
1083
|
+
"promptPrice": 0.008,
|
|
1084
|
+
"completionPrice": 0.024
|
|
1085
|
+
},
|
|
1086
|
+
"mistral-small-3-1": {
|
|
1087
|
+
"promptPrice": 0.001,
|
|
1088
|
+
"completionPrice": 0.003
|
|
1089
|
+
},
|
|
1090
|
+
"mistral-small-3.1": {
|
|
1091
|
+
"promptPrice": 0.001,
|
|
1092
|
+
"completionPrice": 0.003
|
|
1093
|
+
},
|
|
1094
|
+
"mistral-medium-3": {
|
|
1095
|
+
"promptPrice": 0.0004,
|
|
1096
|
+
"completionPrice": 0.002
|
|
1097
|
+
},
|
|
1098
|
+
"mistral-medium-2025": {
|
|
1099
|
+
"promptPrice": 0.0004,
|
|
1100
|
+
"completionPrice": 0.002
|
|
1101
|
+
},
|
|
1102
|
+
"magistral-small": {
|
|
1103
|
+
"promptPrice": 0.001,
|
|
1104
|
+
"completionPrice": 0.003
|
|
1105
|
+
},
|
|
1106
|
+
"magistral-medium": {
|
|
1107
|
+
"promptPrice": 0.003,
|
|
1108
|
+
"completionPrice": 0.009
|
|
1109
|
+
},
|
|
1110
|
+
"codestral-25-01": {
|
|
1111
|
+
"promptPrice": 0.001,
|
|
1112
|
+
"completionPrice": 0.003
|
|
1113
|
+
},
|
|
1114
|
+
"codestral-2501": {
|
|
1115
|
+
"promptPrice": 0.001,
|
|
1116
|
+
"completionPrice": 0.003
|
|
1117
|
+
},
|
|
1118
|
+
"lfm-7b": {
|
|
1119
|
+
"promptPrice": 0.0003,
|
|
1120
|
+
"completionPrice": 0.0006
|
|
1121
|
+
},
|
|
1122
|
+
"liquid/lfm-7b": {
|
|
1123
|
+
"promptPrice": 0.0003,
|
|
1124
|
+
"completionPrice": 0.0006
|
|
1125
|
+
},
|
|
1126
|
+
"snowflake-arctic": {
|
|
1127
|
+
"promptPrice": 0.0008,
|
|
1128
|
+
"completionPrice": 0.0024
|
|
1129
|
+
},
|
|
1130
|
+
"snowflake-arctic-instruct": {
|
|
1131
|
+
"promptPrice": 0.0008,
|
|
1132
|
+
"completionPrice": 0.0024
|
|
1133
|
+
},
|
|
1134
|
+
"snowflake/snowflake-arctic-instruct": {
|
|
1135
|
+
"promptPrice": 0.0008,
|
|
1136
|
+
"completionPrice": 0.0024
|
|
1137
|
+
},
|
|
1138
|
+
"snowflake-arctic-embed-l-v2.0": {
|
|
1139
|
+
"promptPrice": 0.00005,
|
|
1140
|
+
"completionPrice": 0.00005
|
|
1141
|
+
},
|
|
1142
|
+
"nvidia-nemotron-4-340b-instruct": {
|
|
1143
|
+
"promptPrice": 0.003,
|
|
1144
|
+
"completionPrice": 0.009
|
|
1145
|
+
},
|
|
1146
|
+
"nvidia/nemotron-4-340b-instruct": {
|
|
1147
|
+
"promptPrice": 0.003,
|
|
1148
|
+
"completionPrice": 0.009
|
|
1149
|
+
},
|
|
1150
|
+
"nvidia-nemotron-mini": {
|
|
1151
|
+
"promptPrice": 0.0002,
|
|
1152
|
+
"completionPrice": 0.0004
|
|
1153
|
+
},
|
|
1154
|
+
"nvidia/llama-3.1-nemotron-70b-instruct": {
|
|
1155
|
+
"promptPrice": 0.0008,
|
|
1156
|
+
"completionPrice": 0.0008
|
|
1157
|
+
},
|
|
1158
|
+
"servicenow-now-assist": {
|
|
1159
|
+
"promptPrice": 0.001,
|
|
1160
|
+
"completionPrice": 0.003
|
|
863
1161
|
},
|
|
864
1162
|
"llama3.1:405b": {
|
|
865
1163
|
"promptPrice": 0.0012,
|
{genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.10.dev0.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: genai-otel-instrument
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.10.dev0
|
|
4
4
|
Summary: Comprehensive OpenTelemetry auto-instrumentation for LLM/GenAI applications
|
|
5
5
|
Author-email: Kshitij Thakkar <kshitijthakkar@rocketmail.com>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -180,6 +180,12 @@ Dynamic: license-file
|
|
|
180
180
|
|
|
181
181
|
# GenAI OpenTelemetry Auto-Instrumentation
|
|
182
182
|
|
|
183
|
+
<div align="center">
|
|
184
|
+
<img src=".github/images/Logo.jpg" alt="GenAI OpenTelemetry Instrumentation Logo" width="400"/>
|
|
185
|
+
</div>
|
|
186
|
+
|
|
187
|
+
<br/>
|
|
188
|
+
|
|
183
189
|
[](https://badge.fury.io/py/genai-otel-instrument)
|
|
184
190
|
[](https://pypi.org/project/genai-otel-instrument/)
|
|
185
191
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
@@ -200,6 +206,14 @@ Dynamic: license-file
|
|
|
200
206
|
[](https://opentelemetry.io/docs/specs/semconv/gen-ai/)
|
|
201
207
|
[](https://github.com/Mandark-droid/genai_otel_instrument/actions)
|
|
202
208
|
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
<div align="center">
|
|
212
|
+
<img src=".github/images/Landing_Page.jpg" alt="GenAI OpenTelemetry Instrumentation Overview" width="800"/>
|
|
213
|
+
</div>
|
|
214
|
+
|
|
215
|
+
---
|
|
216
|
+
|
|
203
217
|
Production-ready OpenTelemetry instrumentation for GenAI/LLM applications with zero-code setup.
|
|
204
218
|
|
|
205
219
|
## Features
|
|
@@ -257,7 +271,8 @@ For a more comprehensive demonstration of various LLM providers and MCP tools, r
|
|
|
257
271
|
|
|
258
272
|
### LLM Providers (Auto-detected)
|
|
259
273
|
- **With Full Cost Tracking**: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure OpenAI, Cohere, Mistral AI, Together AI, Groq, Ollama, Vertex AI
|
|
260
|
-
- **Hardware/Local Pricing**: Replicate (hardware-based $/second), HuggingFace (local execution
|
|
274
|
+
- **Hardware/Local Pricing**: Replicate (hardware-based $/second), HuggingFace (local execution with estimated costs)
|
|
275
|
+
- **HuggingFace Support**: `pipeline()`, `AutoModelForCausalLM.generate()`, `AutoModelForSeq2SeqLM.generate()`, `InferenceClient` API calls
|
|
261
276
|
- **Other Providers**: Anyscale
|
|
262
277
|
|
|
263
278
|
### Frameworks
|
|
@@ -288,6 +303,65 @@ The processor supports OpenInference semantic conventions:
|
|
|
288
303
|
pip install genai-otel-instrument[openinference]
|
|
289
304
|
```
|
|
290
305
|
|
|
306
|
+
## Screenshots
|
|
307
|
+
|
|
308
|
+
See the instrumentation in action across different LLM providers and observability backends.
|
|
309
|
+
|
|
310
|
+
### OpenAI Instrumentation
|
|
311
|
+
Full trace capture for OpenAI API calls with token usage, costs, and latency metrics.
|
|
312
|
+
|
|
313
|
+
<div align="center">
|
|
314
|
+
<img src=".github/images/Screenshots/Traces_OpenAI.png" alt="OpenAI Traces" width="900"/>
|
|
315
|
+
</div>
|
|
316
|
+
|
|
317
|
+
### Ollama (Local LLM) Instrumentation
|
|
318
|
+
Zero-code instrumentation for local models running on Ollama with comprehensive observability.
|
|
319
|
+
|
|
320
|
+
<div align="center">
|
|
321
|
+
<img src=".github/images/Screenshots/Traces_Ollama.png" alt="Ollama Traces" width="900"/>
|
|
322
|
+
</div>
|
|
323
|
+
|
|
324
|
+
### HuggingFace Transformers
|
|
325
|
+
Direct instrumentation of HuggingFace Transformers with automatic token counting and cost estimation.
|
|
326
|
+
|
|
327
|
+
<div align="center">
|
|
328
|
+
<img src=".github/images/Screenshots/Trace_HuggingFace_Transformer_Models.png" alt="HuggingFace Transformer Traces" width="900"/>
|
|
329
|
+
</div>
|
|
330
|
+
|
|
331
|
+
### SmolAgents Framework
|
|
332
|
+
Complete agent workflow tracing with tool calls, iterations, and cost breakdown.
|
|
333
|
+
|
|
334
|
+
<div align="center">
|
|
335
|
+
<img src=".github/images/Screenshots/Traces_SmolAgent_with_tool_calls.png" alt="SmolAgent Traces with Tool Calls" width="900"/>
|
|
336
|
+
</div>
|
|
337
|
+
|
|
338
|
+
### GPU Metrics Collection
|
|
339
|
+
Real-time GPU utilization, memory, temperature, and power consumption metrics.
|
|
340
|
+
|
|
341
|
+
<div align="center">
|
|
342
|
+
<img src=".github/images/Screenshots/GPU_Metrics.png" alt="GPU Metrics Dashboard" width="900"/>
|
|
343
|
+
</div>
|
|
344
|
+
|
|
345
|
+
### Additional Screenshots
|
|
346
|
+
|
|
347
|
+
- **[Token Cost Breakdown](.github/images/Screenshots/Traces_SmolAgent_Token_Cost_breakdown.png)** - Detailed token usage and cost analysis for SmolAgent workflows
|
|
348
|
+
- **[OpenSearch Dashboard](.github/images/Screenshots/GENAI_OpenSearch_output.png)** - GenAI metrics visualization in OpenSearch/Kibana
|
|
349
|
+
|
|
350
|
+
---
|
|
351
|
+
|
|
352
|
+
## Demo Video
|
|
353
|
+
|
|
354
|
+
Watch a comprehensive walkthrough of GenAI OpenTelemetry Auto-Instrumentation in action, demonstrating setup, configuration, and real-time observability across multiple LLM providers.
|
|
355
|
+
|
|
356
|
+
<div align="center">
|
|
357
|
+
|
|
358
|
+
**🎥 [Watch Demo Video](https://youtu.be/YOUR_VIDEO_ID_HERE)**
|
|
359
|
+
*(Coming Soon)*
|
|
360
|
+
|
|
361
|
+
</div>
|
|
362
|
+
|
|
363
|
+
---
|
|
364
|
+
|
|
291
365
|
## Cost Tracking Coverage
|
|
292
366
|
|
|
293
367
|
The library includes comprehensive cost tracking with pricing data for **145+ models** across **11 providers**:
|
|
@@ -307,7 +381,10 @@ The library includes comprehensive cost tracking with pricing data for **145+ mo
|
|
|
307
381
|
|
|
308
382
|
### Special Pricing Models
|
|
309
383
|
- **Replicate**: Hardware-based pricing ($/second of GPU/CPU time) - not token-based
|
|
310
|
-
- **HuggingFace Transformers**: Local execution
|
|
384
|
+
- **HuggingFace Transformers**: Local model execution with estimated costs based on parameter count
|
|
385
|
+
- Supports `pipeline()`, `AutoModelForCausalLM.generate()`, `AutoModelForSeq2SeqLM.generate()`
|
|
386
|
+
- Cost estimation uses GPU/compute resource pricing tiers (tiny/small/medium/large)
|
|
387
|
+
- Automatic token counting from tensor shapes
|
|
311
388
|
|
|
312
389
|
### Pricing Features
|
|
313
390
|
- **Differential Pricing**: Separate rates for prompt tokens vs. completion tokens
|
|
@@ -836,38 +913,6 @@ genai_otel.instrument(
|
|
|
836
913
|
- `gen_ai.eval.bias_categories` - Detected bias types (array)
|
|
837
914
|
- `gen_ai.eval.toxicity_categories` - Toxicity categories (array)
|
|
838
915
|
|
|
839
|
-
#### 📊 Enhanced OpenTelemetry Compliance
|
|
840
|
-
|
|
841
|
-
Completing remaining items from [OTEL_SEMANTIC_GAP_ANALYSIS_AND_IMPLEMENTATION_PLAN.md](OTEL_SEMANTIC_GAP_ANALYSIS_AND_IMPLEMENTATION_PLAN.md):
|
|
842
|
-
|
|
843
|
-
**Phase 4: Optional Enhancements (✅ COMPLETED)**
|
|
844
|
-
|
|
845
|
-
All Phase 4 features are now available! See the [Advanced Features](#advanced-features) section for detailed documentation.
|
|
846
|
-
|
|
847
|
-
- ✅ **Session & User Tracking** - Track sessions and users across requests with custom extractor functions
|
|
848
|
-
- Configurable via `session_id_extractor` and `user_id_extractor` in `OTelConfig`
|
|
849
|
-
- Automatically adds `session.id` and `user.id` span attributes
|
|
850
|
-
- See [Session and User Tracking](#session-and-user-tracking) for usage examples
|
|
851
|
-
|
|
852
|
-
- ✅ **RAG/Embedding Attributes** - Enhanced observability for retrieval-augmented generation
|
|
853
|
-
- Helper methods: `add_embedding_attributes()` and `add_retrieval_attributes()`
|
|
854
|
-
- Embedding attributes: `embedding.model_name`, `embedding.text`, `embedding.vector.dimension`
|
|
855
|
-
- Retrieval attributes: `retrieval.query`, `retrieval.document_count`, `retrieval.documents.{i}.document.*`
|
|
856
|
-
- See [RAG and Embedding Attributes](#rag-and-embedding-attributes) for usage examples
|
|
857
|
-
- Complete example: `examples/phase4_session_rag_tracking.py`
|
|
858
|
-
|
|
859
|
-
**Note on Agent Workflow Tracking:**
|
|
860
|
-
|
|
861
|
-
Agent workflow observability is already provided by the OpenInference Smolagents instrumentor (included when `smolagents` is in `enabled_instrumentors`). This is not a new Phase 4 feature, but an existing capability:
|
|
862
|
-
|
|
863
|
-
- `openinference.span.kind: "AGENT"` - Identifies agent spans
|
|
864
|
-
- `agent.name` - Agent identifier (via OpenInference)
|
|
865
|
-
- `agent.iteration` - Current iteration number (via OpenInference)
|
|
866
|
-
- `agent.action` - Action taken (via OpenInference)
|
|
867
|
-
- `agent.observation` - Observation received (via OpenInference)
|
|
868
|
-
|
|
869
|
-
Agent tracking requires Python >= 3.10 and the `smolagents` library. See [OpenInference Integration](#openinference-optional---python-310-only) for details.
|
|
870
|
-
|
|
871
916
|
#### 🔄 Migration Support
|
|
872
917
|
|
|
873
918
|
**Backward Compatibility:**
|
{genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.10.dev0.dist-info}/RECORD
RENAMED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
genai_otel/__init__.py,sha256=OWgm1dihRkwBQU8fUPnVhE5XCZeF5f15UyH4w6LqGZU,4469
|
|
2
|
-
genai_otel/__version__.py,sha256=
|
|
3
|
-
genai_otel/auto_instrument.py,sha256=
|
|
2
|
+
genai_otel/__version__.py,sha256=TXc4a_5Wlqj1sa6bOI4rf0g8ARueANgrhEv_5PaXbcs,753
|
|
3
|
+
genai_otel/auto_instrument.py,sha256=uHJGTlSI4UO-sdFtWgxSmNkhd1_GTWvr3S-rY8MQ4E4,16513
|
|
4
4
|
genai_otel/cli.py,sha256=mbhaTU0WIAkvPKdIing-guIxPDjEKQftChWQUtPFzkY,3170
|
|
5
5
|
genai_otel/config.py,sha256=2CIbZH8WKkVzr73y9AOWmscvEW-kUwMLSAyOy9BFqGI,7871
|
|
6
6
|
genai_otel/cost_calculator.py,sha256=BOW-TC41lJ1GcL4hIGZ4NySyV8aro4_juMOe2IqtJ-A,18115
|
|
7
|
-
genai_otel/
|
|
7
|
+
genai_otel/cost_enriching_exporter.py,sha256=iED7njK21UBKlxRElGfqSs66gMkzDCr8fm-4ZkJBiLU,7874
|
|
8
|
+
genai_otel/cost_enrichment_processor.py,sha256=fQoVosBUgshD9ZRxWpwqqPWYnyhrvKBTJAW0S2H7t1E,7090
|
|
8
9
|
genai_otel/exceptions.py,sha256=gIRvbI7c4V-M-PG9jS0o4ESRwHUWCm6DVihjfyJI1yg,429
|
|
9
10
|
genai_otel/gpu_metrics.py,sha256=hBawkm-NErviwiLzb7z92INstFHec2pREn945rYgrT4,13408
|
|
10
|
-
genai_otel/llm_pricing.json,sha256=
|
|
11
|
+
genai_otel/llm_pricing.json,sha256=f3WNQwiby6rLOQaPX_jJharftHgclNQM3bzibnDiiEQ,27879
|
|
11
12
|
genai_otel/logging_config.py,sha256=S8apGf93nBjoi_Bhce-LxwTwGTaJUeduPXKiWZ5SIa8,1418
|
|
12
13
|
genai_otel/metrics.py,sha256=Vngwtc1MAMAE7JVpbT_KfiCQ5TdIAKIs_0oztjJdDTg,2671
|
|
13
14
|
genai_otel/py.typed,sha256=WJtVGe64tcQSssSo4RD7zCf_3u7X2BmFCWDCroWOcaQ,88
|
|
@@ -20,7 +21,7 @@ genai_otel/instrumentors/base.py,sha256=5N0eMDoPT49PedhoDM0EGu8NE9UvseaiWhqfb9UH
|
|
|
20
21
|
genai_otel/instrumentors/cohere_instrumentor.py,sha256=fsKvHaWvMRAGRbOtybVJVVz-FS_-wmgTJo3Q_F86BOY,5074
|
|
21
22
|
genai_otel/instrumentors/google_ai_instrumentor.py,sha256=ExNo0_OxfCxaRpuUXYU8UZ-ClQRHRLUvf7-kMC6zdc8,2984
|
|
22
23
|
genai_otel/instrumentors/groq_instrumentor.py,sha256=bCm7IDmDyvg0-XuzcCSO5xf9QvDlQGwb7bdQ_ooS6QI,3398
|
|
23
|
-
genai_otel/instrumentors/huggingface_instrumentor.py,sha256=
|
|
24
|
+
genai_otel/instrumentors/huggingface_instrumentor.py,sha256=wvolJZnq9YKfJsvNvUnoOpL1tbeGy0DuxVmmmI1_BoA,17815
|
|
24
25
|
genai_otel/instrumentors/langchain_instrumentor.py,sha256=002ZrKP04l7VaYxo7nAAwl-uvMVwpzVehO2oS23ed-o,2685
|
|
25
26
|
genai_otel/instrumentors/llamaindex_instrumentor.py,sha256=zZ1J7W4yQo1Ur6Y5y0UXpDdEx9oDnmsqNIin5Jrv9os,1206
|
|
26
27
|
genai_otel/instrumentors/mistralai_instrumentor.py,sha256=Blo8X4WV-xQe-xF-jhkaGPavkgayANf1F3zCTzuhuL0,12478
|
|
@@ -37,9 +38,9 @@ genai_otel/mcp_instrumentors/kafka_instrumentor.py,sha256=QJYJC1rvo_zZAIaw-cp_Ic
|
|
|
37
38
|
genai_otel/mcp_instrumentors/manager.py,sha256=1Pj5lkEOL8Yq1Oeud4ZExN6k6NLIVtTzKnFLNiFdJvw,5895
|
|
38
39
|
genai_otel/mcp_instrumentors/redis_instrumentor.py,sha256=KUbs0dMyfMzU4T0SS8u43I5fvr09lcBBM92I3KCsYUw,943
|
|
39
40
|
genai_otel/mcp_instrumentors/vector_db_instrumentor.py,sha256=2vhnk4PGpfYKr-XlRbnCIOap4BPKHOn--fh-ai2YXlM,9994
|
|
40
|
-
genai_otel_instrument-0.1.
|
|
41
|
-
genai_otel_instrument-0.1.
|
|
42
|
-
genai_otel_instrument-0.1.
|
|
43
|
-
genai_otel_instrument-0.1.
|
|
44
|
-
genai_otel_instrument-0.1.
|
|
45
|
-
genai_otel_instrument-0.1.
|
|
41
|
+
genai_otel_instrument-0.1.10.dev0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
42
|
+
genai_otel_instrument-0.1.10.dev0.dist-info/METADATA,sha256=kg9GntpKyP9CarQ53N3RJuzDIcdk2Gtp2Bbb2NC1LVQ,40024
|
|
43
|
+
genai_otel_instrument-0.1.10.dev0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
44
|
+
genai_otel_instrument-0.1.10.dev0.dist-info/entry_points.txt,sha256=E9UqoHA_fq69yNGAY3SRYf5HH94sZT5DiDueiU1v0KM,57
|
|
45
|
+
genai_otel_instrument-0.1.10.dev0.dist-info/top_level.txt,sha256=cvCm8PUwvYUSQKruk-x6S-_YuDyhOBk8gD910XICcbg,11
|
|
46
|
+
genai_otel_instrument-0.1.10.dev0.dist-info/RECORD,,
|
{genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.10.dev0.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|