genai-otel-instrument 0.1.7.dev0__py3-none-any.whl → 0.1.9.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of genai-otel-instrument might be problematic. Click here for more details.
- genai_otel/__version__.py +2 -2
- genai_otel/auto_instrument.py +7 -3
- genai_otel/cost_enriching_exporter.py +207 -0
- genai_otel/cost_enrichment_processor.py +2 -3
- genai_otel/instrumentors/huggingface_instrumentor.py +178 -5
- {genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/METADATA +7 -3
- {genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/RECORD +11 -10
- {genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/WHEEL +0 -0
- {genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/entry_points.txt +0 -0
- {genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/licenses/LICENSE +0 -0
- {genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/top_level.txt +0 -0
genai_otel/__version__.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.9.dev0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 9, 'dev0')
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
genai_otel/auto_instrument.py
CHANGED
|
@@ -19,6 +19,7 @@ from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExport
|
|
|
19
19
|
from .config import OTelConfig
|
|
20
20
|
from .cost_calculator import CostCalculator
|
|
21
21
|
from .cost_enrichment_processor import CostEnrichmentSpanProcessor
|
|
22
|
+
from .cost_enriching_exporter import CostEnrichingSpanExporter
|
|
22
23
|
from .gpu_metrics import GPUMetricsCollector
|
|
23
24
|
from .mcp_instrumentors import MCPInstrumentorManager
|
|
24
25
|
from .metrics import (
|
|
@@ -169,14 +170,17 @@ def setup_auto_instrumentation(config: OTelConfig):
|
|
|
169
170
|
|
|
170
171
|
set_global_textmap(TraceContextTextMapPropagator())
|
|
171
172
|
|
|
172
|
-
# Add cost enrichment processor for
|
|
173
|
-
#
|
|
173
|
+
# Add cost enrichment processor for custom instrumentors (OpenAI, Ollama, etc.)
|
|
174
|
+
# These instrumentors set cost attributes directly, so processor is mainly for logging
|
|
175
|
+
# Also attempts to enrich OpenInference spans (smolagents, litellm, mcp), though
|
|
176
|
+
# the processor can't modify ReadableSpan - the exporter below handles that
|
|
177
|
+
cost_calculator = None
|
|
174
178
|
if config.enable_cost_tracking:
|
|
175
179
|
try:
|
|
176
180
|
cost_calculator = CostCalculator()
|
|
177
181
|
cost_processor = CostEnrichmentSpanProcessor(cost_calculator)
|
|
178
182
|
tracer_provider.add_span_processor(cost_processor)
|
|
179
|
-
logger.info("Cost enrichment processor added
|
|
183
|
+
logger.info("Cost enrichment processor added")
|
|
180
184
|
except Exception as e:
|
|
181
185
|
logger.warning(f"Failed to add cost enrichment processor: {e}", exc_info=True)
|
|
182
186
|
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""Custom SpanExporter that enriches spans with cost attributes before export.
|
|
2
|
+
|
|
3
|
+
This exporter wraps another exporter (like OTLPSpanExporter) and adds cost
|
|
4
|
+
attributes to spans before passing them to the wrapped exporter.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Optional, Sequence
|
|
9
|
+
|
|
10
|
+
from opentelemetry.sdk.trace import ReadableSpan
|
|
11
|
+
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
|
|
12
|
+
|
|
13
|
+
from .cost_calculator import CostCalculator
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class CostEnrichingSpanExporter(SpanExporter):
|
|
19
|
+
"""Wraps a SpanExporter and enriches spans with cost attributes before export.
|
|
20
|
+
|
|
21
|
+
This exporter:
|
|
22
|
+
1. Receives ReadableSpan objects from the SDK
|
|
23
|
+
2. Extracts model name and token usage from span attributes
|
|
24
|
+
3. Calculates cost using CostCalculator
|
|
25
|
+
4. Creates enriched span data with cost attributes
|
|
26
|
+
5. Exports to the wrapped exporter (e.g., OTLP)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self, wrapped_exporter: SpanExporter, cost_calculator: Optional[CostCalculator] = None
|
|
31
|
+
):
|
|
32
|
+
"""Initialize the cost enriching exporter.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
wrapped_exporter: The underlying exporter to send enriched spans to.
|
|
36
|
+
cost_calculator: CostCalculator instance to use for cost calculations.
|
|
37
|
+
If None, creates a new instance.
|
|
38
|
+
"""
|
|
39
|
+
self.wrapped_exporter = wrapped_exporter
|
|
40
|
+
self.cost_calculator = cost_calculator or CostCalculator()
|
|
41
|
+
logger.info(
|
|
42
|
+
f"CostEnrichingSpanExporter initialized, wrapping {type(wrapped_exporter).__name__}"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
|
|
46
|
+
"""Export spans after enriching them with cost attributes.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
spans: Sequence of ReadableSpan objects to export.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
SpanExportResult from the wrapped exporter.
|
|
53
|
+
"""
|
|
54
|
+
try:
|
|
55
|
+
# Enrich spans with cost attributes
|
|
56
|
+
enriched_spans = []
|
|
57
|
+
for span in spans:
|
|
58
|
+
enriched_span = self._enrich_span(span)
|
|
59
|
+
enriched_spans.append(enriched_span)
|
|
60
|
+
|
|
61
|
+
# Export to wrapped exporter
|
|
62
|
+
return self.wrapped_exporter.export(enriched_spans)
|
|
63
|
+
|
|
64
|
+
except Exception as e:
|
|
65
|
+
logger.error(f"Failed to export spans: {e}", exc_info=True)
|
|
66
|
+
return SpanExportResult.FAILURE
|
|
67
|
+
|
|
68
|
+
def _enrich_span(self, span: ReadableSpan) -> ReadableSpan:
|
|
69
|
+
"""Enrich a span with cost attributes if applicable.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
span: The original ReadableSpan.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
A new ReadableSpan with cost attributes added (or the original if not applicable).
|
|
76
|
+
"""
|
|
77
|
+
try:
|
|
78
|
+
# Check if span has LLM-related attributes
|
|
79
|
+
if not span.attributes:
|
|
80
|
+
return span
|
|
81
|
+
|
|
82
|
+
attributes = dict(span.attributes) # Make a mutable copy
|
|
83
|
+
|
|
84
|
+
# Check for model name - support both GenAI and OpenInference conventions
|
|
85
|
+
model = (
|
|
86
|
+
attributes.get("gen_ai.request.model")
|
|
87
|
+
or attributes.get("llm.model_name")
|
|
88
|
+
or attributes.get("embedding.model_name")
|
|
89
|
+
)
|
|
90
|
+
if not model:
|
|
91
|
+
return span
|
|
92
|
+
|
|
93
|
+
# Skip if cost attributes are already present
|
|
94
|
+
if "gen_ai.usage.cost.total" in attributes:
|
|
95
|
+
logger.debug(f"Span '{span.name}' already has cost attributes, skipping enrichment")
|
|
96
|
+
return span
|
|
97
|
+
|
|
98
|
+
# Extract token usage - support GenAI, OpenInference, and legacy conventions
|
|
99
|
+
prompt_tokens = (
|
|
100
|
+
attributes.get("gen_ai.usage.prompt_tokens")
|
|
101
|
+
or attributes.get("gen_ai.usage.input_tokens")
|
|
102
|
+
or attributes.get("llm.token_count.prompt") # OpenInference
|
|
103
|
+
or 0
|
|
104
|
+
)
|
|
105
|
+
completion_tokens = (
|
|
106
|
+
attributes.get("gen_ai.usage.completion_tokens")
|
|
107
|
+
or attributes.get("gen_ai.usage.output_tokens")
|
|
108
|
+
or attributes.get("llm.token_count.completion") # OpenInference
|
|
109
|
+
or 0
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Skip if no tokens recorded
|
|
113
|
+
if prompt_tokens == 0 and completion_tokens == 0:
|
|
114
|
+
return span
|
|
115
|
+
|
|
116
|
+
# Get call type - support both GenAI and OpenInference conventions
|
|
117
|
+
span_kind = attributes.get("openinference.span.kind", "").upper()
|
|
118
|
+
call_type = attributes.get("gen_ai.operation.name") or span_kind.lower() or "chat"
|
|
119
|
+
|
|
120
|
+
# Map operation names to call types
|
|
121
|
+
call_type_mapping = {
|
|
122
|
+
"chat": "chat",
|
|
123
|
+
"completion": "chat",
|
|
124
|
+
"embedding": "embedding",
|
|
125
|
+
"embeddings": "embedding",
|
|
126
|
+
"text_generation": "chat",
|
|
127
|
+
"image_generation": "image",
|
|
128
|
+
"audio": "audio",
|
|
129
|
+
"llm": "chat",
|
|
130
|
+
"chain": "chat",
|
|
131
|
+
"retriever": "embedding",
|
|
132
|
+
"reranker": "embedding",
|
|
133
|
+
"tool": "chat",
|
|
134
|
+
"agent": "chat",
|
|
135
|
+
}
|
|
136
|
+
normalized_call_type = call_type_mapping.get(str(call_type).lower(), "chat")
|
|
137
|
+
|
|
138
|
+
# Calculate cost
|
|
139
|
+
usage = {
|
|
140
|
+
"prompt_tokens": int(prompt_tokens),
|
|
141
|
+
"completion_tokens": int(completion_tokens),
|
|
142
|
+
"total_tokens": int(prompt_tokens) + int(completion_tokens),
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
cost_info = self.cost_calculator.calculate_granular_cost(
|
|
146
|
+
model=str(model),
|
|
147
|
+
usage=usage,
|
|
148
|
+
call_type=normalized_call_type,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
if cost_info and cost_info.get("total", 0.0) > 0:
|
|
152
|
+
# Add cost attributes to the mutable copy
|
|
153
|
+
attributes["gen_ai.usage.cost.total"] = cost_info["total"]
|
|
154
|
+
|
|
155
|
+
if cost_info.get("prompt", 0.0) > 0:
|
|
156
|
+
attributes["gen_ai.usage.cost.prompt"] = cost_info["prompt"]
|
|
157
|
+
if cost_info.get("completion", 0.0) > 0:
|
|
158
|
+
attributes["gen_ai.usage.cost.completion"] = cost_info["completion"]
|
|
159
|
+
|
|
160
|
+
logger.info(
|
|
161
|
+
f"Enriched span '{span.name}' with cost: {cost_info['total']:.6f} USD "
|
|
162
|
+
f"for model {model} ({usage['total_tokens']} tokens)"
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Create a new ReadableSpan with enriched attributes
|
|
166
|
+
# ReadableSpan is a NamedTuple, so we need to replace it
|
|
167
|
+
from opentelemetry.sdk.trace import ReadableSpan as RS
|
|
168
|
+
|
|
169
|
+
enriched_span = RS(
|
|
170
|
+
name=span.name,
|
|
171
|
+
context=span.context,
|
|
172
|
+
kind=span.kind,
|
|
173
|
+
parent=span.parent,
|
|
174
|
+
start_time=span.start_time,
|
|
175
|
+
end_time=span.end_time,
|
|
176
|
+
status=span.status,
|
|
177
|
+
attributes=attributes, # Use enriched attributes
|
|
178
|
+
events=span.events,
|
|
179
|
+
links=span.links,
|
|
180
|
+
resource=span.resource,
|
|
181
|
+
instrumentation_scope=span.instrumentation_scope,
|
|
182
|
+
)
|
|
183
|
+
return enriched_span
|
|
184
|
+
|
|
185
|
+
except Exception as e:
|
|
186
|
+
logger.warning(
|
|
187
|
+
f"Failed to enrich span '{getattr(span, 'name', 'unknown')}' with cost: {e}",
|
|
188
|
+
exc_info=True,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
return span
|
|
192
|
+
|
|
193
|
+
def shutdown(self) -> None:
|
|
194
|
+
"""Shutdown the wrapped exporter."""
|
|
195
|
+
logger.info("CostEnrichingSpanExporter shutting down")
|
|
196
|
+
self.wrapped_exporter.shutdown()
|
|
197
|
+
|
|
198
|
+
def force_flush(self, timeout_millis: int = 30000) -> bool:
|
|
199
|
+
"""Force flush the wrapped exporter.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
timeout_millis: Timeout in milliseconds.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
True if flush succeeded.
|
|
206
|
+
"""
|
|
207
|
+
return self.wrapped_exporter.force_flush(timeout_millis)
|
|
@@ -132,9 +132,8 @@ class CostEnrichmentSpanProcessor(SpanProcessor):
|
|
|
132
132
|
|
|
133
133
|
if cost_info and cost_info.get("total", 0.0) > 0:
|
|
134
134
|
# Add cost attributes to the span
|
|
135
|
-
#
|
|
136
|
-
|
|
137
|
-
if isinstance(span, Span):
|
|
135
|
+
# Use duck typing to check if span supports set_attribute
|
|
136
|
+
if hasattr(span, "set_attribute") and callable(getattr(span, "set_attribute")):
|
|
138
137
|
span.set_attribute("gen_ai.usage.cost.total", cost_info["total"])
|
|
139
138
|
|
|
140
139
|
if cost_info.get("prompt", 0.0) > 0:
|
|
@@ -3,9 +3,11 @@
|
|
|
3
3
|
This instrumentor automatically traces:
|
|
4
4
|
1. HuggingFace Transformers pipelines (local model execution)
|
|
5
5
|
2. HuggingFace Inference API calls via InferenceClient (used by smolagents)
|
|
6
|
+
3. Direct model usage via AutoModelForCausalLM.generate() and forward()
|
|
6
7
|
|
|
7
8
|
Note: Transformers runs models locally (no API costs), but InferenceClient makes
|
|
8
9
|
API calls to HuggingFace endpoints which may have costs based on usage.
|
|
10
|
+
Local model costs are estimated based on parameter count and token usage.
|
|
9
11
|
"""
|
|
10
12
|
|
|
11
13
|
import logging
|
|
@@ -20,8 +22,10 @@ logger = logging.getLogger(__name__)
|
|
|
20
22
|
class HuggingFaceInstrumentor(BaseInstrumentor):
|
|
21
23
|
"""Instrumentor for HuggingFace Transformers and Inference API.
|
|
22
24
|
|
|
23
|
-
Instruments
|
|
24
|
-
- transformers.pipeline (local execution,
|
|
25
|
+
Instruments:
|
|
26
|
+
- transformers.pipeline (local execution, estimated costs)
|
|
27
|
+
- transformers.AutoModelForCausalLM.generate() (local execution, estimated costs)
|
|
28
|
+
- transformers.AutoModelForCausalLM.forward() (local execution, estimated costs)
|
|
25
29
|
- huggingface_hub.InferenceClient (API calls, may have costs)
|
|
26
30
|
"""
|
|
27
31
|
|
|
@@ -30,6 +34,7 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
|
|
|
30
34
|
super().__init__()
|
|
31
35
|
self._transformers_available = False
|
|
32
36
|
self._inference_client_available = False
|
|
37
|
+
self._model_classes_instrumented = False
|
|
33
38
|
self._check_availability()
|
|
34
39
|
|
|
35
40
|
def _check_availability(self):
|
|
@@ -55,13 +60,14 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
|
|
|
55
60
|
self._inference_client_available = False
|
|
56
61
|
|
|
57
62
|
def instrument(self, config: OTelConfig):
|
|
58
|
-
"""Instrument HuggingFace Transformers pipelines and InferenceClient."""
|
|
59
|
-
self.config
|
|
63
|
+
"""Instrument HuggingFace Transformers pipelines, model classes, and InferenceClient."""
|
|
64
|
+
self._setup_config(config)
|
|
60
65
|
|
|
61
66
|
instrumented_count = 0
|
|
62
67
|
|
|
63
|
-
# Instrument transformers
|
|
68
|
+
# Instrument transformers components if available
|
|
64
69
|
if self._transformers_available:
|
|
70
|
+
# Instrument pipeline
|
|
65
71
|
try:
|
|
66
72
|
self._instrument_transformers()
|
|
67
73
|
instrumented_count += 1
|
|
@@ -70,6 +76,15 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
|
|
|
70
76
|
if config.fail_on_error:
|
|
71
77
|
raise
|
|
72
78
|
|
|
79
|
+
# Instrument model classes (AutoModelForCausalLM, etc.)
|
|
80
|
+
try:
|
|
81
|
+
self._instrument_model_classes()
|
|
82
|
+
instrumented_count += 1
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.error("Failed to instrument HuggingFace model classes: %s", e, exc_info=True)
|
|
85
|
+
if config.fail_on_error:
|
|
86
|
+
raise
|
|
87
|
+
|
|
73
88
|
# Instrument InferenceClient if available
|
|
74
89
|
if self._inference_client_available:
|
|
75
90
|
try:
|
|
@@ -166,6 +181,164 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
|
|
|
166
181
|
InferenceClient.text_generation = wrapped_text_generation
|
|
167
182
|
logger.debug("HuggingFace InferenceClient instrumented")
|
|
168
183
|
|
|
184
|
+
def _instrument_model_classes(self):
|
|
185
|
+
"""Instrument HuggingFace model classes for direct model usage."""
|
|
186
|
+
try:
|
|
187
|
+
import wrapt
|
|
188
|
+
|
|
189
|
+
# Import GenerationMixin - the base class that provides generate() method
|
|
190
|
+
# All generative models (AutoModelForCausalLM, AutoModelForSeq2SeqLM, etc.) inherit from it
|
|
191
|
+
try:
|
|
192
|
+
from transformers.generation.utils import GenerationMixin
|
|
193
|
+
except ImportError:
|
|
194
|
+
# Fallback for older transformers versions
|
|
195
|
+
from transformers.generation import GenerationMixin
|
|
196
|
+
|
|
197
|
+
# Store reference to instrumentor for use in wrapper
|
|
198
|
+
instrumentor = self
|
|
199
|
+
|
|
200
|
+
# Wrap the generate() method at GenerationMixin level (all models inherit from this)
|
|
201
|
+
original_generate = GenerationMixin.generate
|
|
202
|
+
|
|
203
|
+
@wrapt.decorator
|
|
204
|
+
def generate_wrapper(wrapped, instance, args, kwargs):
|
|
205
|
+
"""Wrapper for model.generate() method."""
|
|
206
|
+
# Extract model info
|
|
207
|
+
model_name = getattr(instance, "name_or_path", "unknown")
|
|
208
|
+
if hasattr(instance.config, "_name_or_path"):
|
|
209
|
+
model_name = instance.config._name_or_path
|
|
210
|
+
|
|
211
|
+
# Get input token count
|
|
212
|
+
input_ids = kwargs.get("input_ids") or (args[0] if args else None)
|
|
213
|
+
prompt_tokens = 0
|
|
214
|
+
if input_ids is not None:
|
|
215
|
+
if hasattr(input_ids, "shape"):
|
|
216
|
+
prompt_tokens = int(input_ids.shape[-1])
|
|
217
|
+
elif isinstance(input_ids, (list, tuple)):
|
|
218
|
+
prompt_tokens = len(input_ids[0]) if input_ids else 0
|
|
219
|
+
|
|
220
|
+
# Create span
|
|
221
|
+
with instrumentor.tracer.start_as_current_span(
|
|
222
|
+
"huggingface.model.generate"
|
|
223
|
+
) as span:
|
|
224
|
+
# Set attributes
|
|
225
|
+
span.set_attribute("gen_ai.system", "huggingface")
|
|
226
|
+
span.set_attribute("gen_ai.request.model", model_name)
|
|
227
|
+
span.set_attribute("gen_ai.operation.name", "text_generation")
|
|
228
|
+
span.set_attribute("gen_ai.request.type", "chat")
|
|
229
|
+
|
|
230
|
+
# Extract generation parameters
|
|
231
|
+
if "max_length" in kwargs:
|
|
232
|
+
span.set_attribute("gen_ai.request.max_tokens", kwargs["max_length"])
|
|
233
|
+
if "max_new_tokens" in kwargs:
|
|
234
|
+
span.set_attribute("gen_ai.request.max_tokens", kwargs["max_new_tokens"])
|
|
235
|
+
if "temperature" in kwargs:
|
|
236
|
+
span.set_attribute("gen_ai.request.temperature", kwargs["temperature"])
|
|
237
|
+
if "top_p" in kwargs:
|
|
238
|
+
span.set_attribute("gen_ai.request.top_p", kwargs["top_p"])
|
|
239
|
+
|
|
240
|
+
# Call original generate
|
|
241
|
+
import time
|
|
242
|
+
|
|
243
|
+
start_time = time.time()
|
|
244
|
+
result = wrapped(*args, **kwargs)
|
|
245
|
+
duration = time.time() - start_time
|
|
246
|
+
|
|
247
|
+
# Extract output token count
|
|
248
|
+
completion_tokens = 0
|
|
249
|
+
if hasattr(result, "shape"):
|
|
250
|
+
# result is a tensor
|
|
251
|
+
total_length = int(result.shape[-1])
|
|
252
|
+
completion_tokens = max(0, total_length - prompt_tokens)
|
|
253
|
+
elif isinstance(result, (list, tuple)):
|
|
254
|
+
# result is a list of sequences
|
|
255
|
+
if result and hasattr(result[0], "shape"):
|
|
256
|
+
total_length = int(result[0].shape[-1])
|
|
257
|
+
completion_tokens = max(0, total_length - prompt_tokens)
|
|
258
|
+
|
|
259
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
260
|
+
|
|
261
|
+
# Set token usage attributes
|
|
262
|
+
if prompt_tokens > 0:
|
|
263
|
+
span.set_attribute("gen_ai.usage.prompt_tokens", prompt_tokens)
|
|
264
|
+
if completion_tokens > 0:
|
|
265
|
+
span.set_attribute("gen_ai.usage.completion_tokens", completion_tokens)
|
|
266
|
+
if total_tokens > 0:
|
|
267
|
+
span.set_attribute("gen_ai.usage.total_tokens", total_tokens)
|
|
268
|
+
|
|
269
|
+
# Record metrics
|
|
270
|
+
if instrumentor.request_counter:
|
|
271
|
+
instrumentor.request_counter.add(
|
|
272
|
+
1, {"model": model_name, "provider": "huggingface"}
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
if instrumentor.token_counter and total_tokens > 0:
|
|
276
|
+
if prompt_tokens > 0:
|
|
277
|
+
instrumentor.token_counter.add(
|
|
278
|
+
prompt_tokens, {"token_type": "prompt", "operation": span.name}
|
|
279
|
+
)
|
|
280
|
+
if completion_tokens > 0:
|
|
281
|
+
instrumentor.token_counter.add(
|
|
282
|
+
completion_tokens,
|
|
283
|
+
{"token_type": "completion", "operation": span.name},
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
if instrumentor.latency_histogram:
|
|
287
|
+
instrumentor.latency_histogram.record(duration, {"operation": span.name})
|
|
288
|
+
|
|
289
|
+
# Calculate and record cost if enabled
|
|
290
|
+
if (
|
|
291
|
+
instrumentor.config
|
|
292
|
+
and instrumentor.config.enable_cost_tracking
|
|
293
|
+
and total_tokens > 0
|
|
294
|
+
):
|
|
295
|
+
try:
|
|
296
|
+
usage = {
|
|
297
|
+
"prompt_tokens": prompt_tokens,
|
|
298
|
+
"completion_tokens": completion_tokens,
|
|
299
|
+
"total_tokens": total_tokens,
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
costs = instrumentor.cost_calculator.calculate_granular_cost(
|
|
303
|
+
model=model_name, usage=usage, call_type="chat"
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
if costs["total"] > 0:
|
|
307
|
+
if instrumentor.cost_counter:
|
|
308
|
+
instrumentor.cost_counter.add(
|
|
309
|
+
costs["total"], {"model": model_name}
|
|
310
|
+
)
|
|
311
|
+
span.set_attribute("gen_ai.usage.cost.total", costs["total"])
|
|
312
|
+
if costs["prompt"] > 0:
|
|
313
|
+
span.set_attribute("gen_ai.usage.cost.prompt", costs["prompt"])
|
|
314
|
+
if costs["completion"] > 0:
|
|
315
|
+
span.set_attribute(
|
|
316
|
+
"gen_ai.usage.cost.completion", costs["completion"]
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
logger.debug(
|
|
320
|
+
f"HuggingFace model {model_name}: {total_tokens} tokens, "
|
|
321
|
+
f"cost: ${costs['total']:.6f}"
|
|
322
|
+
)
|
|
323
|
+
except Exception as e:
|
|
324
|
+
logger.warning(f"Failed to calculate cost: {e}")
|
|
325
|
+
|
|
326
|
+
return result
|
|
327
|
+
|
|
328
|
+
# Apply wrapper to GenerationMixin.generate (all models inherit this)
|
|
329
|
+
GenerationMixin.generate = generate_wrapper(original_generate)
|
|
330
|
+
|
|
331
|
+
self._model_classes_instrumented = True
|
|
332
|
+
logger.debug(
|
|
333
|
+
"HuggingFace GenerationMixin.generate() instrumented "
|
|
334
|
+
"(covers all models: AutoModelForCausalLM, AutoModelForSeq2SeqLM, etc.)"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
except ImportError as e:
|
|
338
|
+
logger.debug(f"Could not import model classes for instrumentation: {e}")
|
|
339
|
+
except Exception as e:
|
|
340
|
+
raise # Re-raise to be caught by instrument() method
|
|
341
|
+
|
|
169
342
|
def _extract_inference_client_attributes(self, instance, args, kwargs) -> Dict[str, str]:
|
|
170
343
|
"""Extract attributes from Inference API call."""
|
|
171
344
|
attrs = {}
|
{genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: genai-otel-instrument
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.9.dev0
|
|
4
4
|
Summary: Comprehensive OpenTelemetry auto-instrumentation for LLM/GenAI applications
|
|
5
5
|
Author-email: Kshitij Thakkar <kshitijthakkar@rocketmail.com>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -257,7 +257,8 @@ For a more comprehensive demonstration of various LLM providers and MCP tools, r
|
|
|
257
257
|
|
|
258
258
|
### LLM Providers (Auto-detected)
|
|
259
259
|
- **With Full Cost Tracking**: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure OpenAI, Cohere, Mistral AI, Together AI, Groq, Ollama, Vertex AI
|
|
260
|
-
- **Hardware/Local Pricing**: Replicate (hardware-based $/second), HuggingFace (local execution
|
|
260
|
+
- **Hardware/Local Pricing**: Replicate (hardware-based $/second), HuggingFace (local execution with estimated costs)
|
|
261
|
+
- **HuggingFace Support**: `pipeline()`, `AutoModelForCausalLM.generate()`, `AutoModelForSeq2SeqLM.generate()`, `InferenceClient` API calls
|
|
261
262
|
- **Other Providers**: Anyscale
|
|
262
263
|
|
|
263
264
|
### Frameworks
|
|
@@ -307,7 +308,10 @@ The library includes comprehensive cost tracking with pricing data for **145+ mo
|
|
|
307
308
|
|
|
308
309
|
### Special Pricing Models
|
|
309
310
|
- **Replicate**: Hardware-based pricing ($/second of GPU/CPU time) - not token-based
|
|
310
|
-
- **HuggingFace Transformers**: Local execution
|
|
311
|
+
- **HuggingFace Transformers**: Local model execution with estimated costs based on parameter count
|
|
312
|
+
- Supports `pipeline()`, `AutoModelForCausalLM.generate()`, `AutoModelForSeq2SeqLM.generate()`
|
|
313
|
+
- Cost estimation uses GPU/compute resource pricing tiers (tiny/small/medium/large)
|
|
314
|
+
- Automatic token counting from tensor shapes
|
|
311
315
|
|
|
312
316
|
### Pricing Features
|
|
313
317
|
- **Differential Pricing**: Separate rates for prompt tokens vs. completion tokens
|
{genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/RECORD
RENAMED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
genai_otel/__init__.py,sha256=OWgm1dihRkwBQU8fUPnVhE5XCZeF5f15UyH4w6LqGZU,4469
|
|
2
|
-
genai_otel/__version__.py,sha256
|
|
3
|
-
genai_otel/auto_instrument.py,sha256=
|
|
2
|
+
genai_otel/__version__.py,sha256=-7XfyoqjXB3n4CmJWgQNXd5cM7X1D0ZC3i_A8QhrN6c,751
|
|
3
|
+
genai_otel/auto_instrument.py,sha256=uHJGTlSI4UO-sdFtWgxSmNkhd1_GTWvr3S-rY8MQ4E4,16513
|
|
4
4
|
genai_otel/cli.py,sha256=mbhaTU0WIAkvPKdIing-guIxPDjEKQftChWQUtPFzkY,3170
|
|
5
5
|
genai_otel/config.py,sha256=2CIbZH8WKkVzr73y9AOWmscvEW-kUwMLSAyOy9BFqGI,7871
|
|
6
6
|
genai_otel/cost_calculator.py,sha256=BOW-TC41lJ1GcL4hIGZ4NySyV8aro4_juMOe2IqtJ-A,18115
|
|
7
|
-
genai_otel/
|
|
7
|
+
genai_otel/cost_enriching_exporter.py,sha256=iED7njK21UBKlxRElGfqSs66gMkzDCr8fm-4ZkJBiLU,7874
|
|
8
|
+
genai_otel/cost_enrichment_processor.py,sha256=fQoVosBUgshD9ZRxWpwqqPWYnyhrvKBTJAW0S2H7t1E,7090
|
|
8
9
|
genai_otel/exceptions.py,sha256=gIRvbI7c4V-M-PG9jS0o4ESRwHUWCm6DVihjfyJI1yg,429
|
|
9
10
|
genai_otel/gpu_metrics.py,sha256=hBawkm-NErviwiLzb7z92INstFHec2pREn945rYgrT4,13408
|
|
10
11
|
genai_otel/llm_pricing.json,sha256=ZQ1uILEdQ_yNzenvlPpKazo9NnYqEZgbL_tzQ6Mw2oc,20825
|
|
@@ -20,7 +21,7 @@ genai_otel/instrumentors/base.py,sha256=5N0eMDoPT49PedhoDM0EGu8NE9UvseaiWhqfb9UH
|
|
|
20
21
|
genai_otel/instrumentors/cohere_instrumentor.py,sha256=fsKvHaWvMRAGRbOtybVJVVz-FS_-wmgTJo3Q_F86BOY,5074
|
|
21
22
|
genai_otel/instrumentors/google_ai_instrumentor.py,sha256=ExNo0_OxfCxaRpuUXYU8UZ-ClQRHRLUvf7-kMC6zdc8,2984
|
|
22
23
|
genai_otel/instrumentors/groq_instrumentor.py,sha256=bCm7IDmDyvg0-XuzcCSO5xf9QvDlQGwb7bdQ_ooS6QI,3398
|
|
23
|
-
genai_otel/instrumentors/huggingface_instrumentor.py,sha256=
|
|
24
|
+
genai_otel/instrumentors/huggingface_instrumentor.py,sha256=wvolJZnq9YKfJsvNvUnoOpL1tbeGy0DuxVmmmI1_BoA,17815
|
|
24
25
|
genai_otel/instrumentors/langchain_instrumentor.py,sha256=002ZrKP04l7VaYxo7nAAwl-uvMVwpzVehO2oS23ed-o,2685
|
|
25
26
|
genai_otel/instrumentors/llamaindex_instrumentor.py,sha256=zZ1J7W4yQo1Ur6Y5y0UXpDdEx9oDnmsqNIin5Jrv9os,1206
|
|
26
27
|
genai_otel/instrumentors/mistralai_instrumentor.py,sha256=Blo8X4WV-xQe-xF-jhkaGPavkgayANf1F3zCTzuhuL0,12478
|
|
@@ -37,9 +38,9 @@ genai_otel/mcp_instrumentors/kafka_instrumentor.py,sha256=QJYJC1rvo_zZAIaw-cp_Ic
|
|
|
37
38
|
genai_otel/mcp_instrumentors/manager.py,sha256=1Pj5lkEOL8Yq1Oeud4ZExN6k6NLIVtTzKnFLNiFdJvw,5895
|
|
38
39
|
genai_otel/mcp_instrumentors/redis_instrumentor.py,sha256=KUbs0dMyfMzU4T0SS8u43I5fvr09lcBBM92I3KCsYUw,943
|
|
39
40
|
genai_otel/mcp_instrumentors/vector_db_instrumentor.py,sha256=2vhnk4PGpfYKr-XlRbnCIOap4BPKHOn--fh-ai2YXlM,9994
|
|
40
|
-
genai_otel_instrument-0.1.
|
|
41
|
-
genai_otel_instrument-0.1.
|
|
42
|
-
genai_otel_instrument-0.1.
|
|
43
|
-
genai_otel_instrument-0.1.
|
|
44
|
-
genai_otel_instrument-0.1.
|
|
45
|
-
genai_otel_instrument-0.1.
|
|
41
|
+
genai_otel_instrument-0.1.9.dev0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
42
|
+
genai_otel_instrument-0.1.9.dev0.dist-info/METADATA,sha256=0VWI_mIy84sv8pA0Ae_WY3X3XJKpJZTdjw_7n0o9-XQ,39613
|
|
43
|
+
genai_otel_instrument-0.1.9.dev0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
44
|
+
genai_otel_instrument-0.1.9.dev0.dist-info/entry_points.txt,sha256=E9UqoHA_fq69yNGAY3SRYf5HH94sZT5DiDueiU1v0KM,57
|
|
45
|
+
genai_otel_instrument-0.1.9.dev0.dist-info/top_level.txt,sha256=cvCm8PUwvYUSQKruk-x6S-_YuDyhOBk8gD910XICcbg,11
|
|
46
|
+
genai_otel_instrument-0.1.9.dev0.dist-info/RECORD,,
|
{genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|