genai-otel-instrument 0.1.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genai_otel/__init__.py +132 -0
- genai_otel/__version__.py +34 -0
- genai_otel/auto_instrument.py +602 -0
- genai_otel/cli.py +92 -0
- genai_otel/config.py +333 -0
- genai_otel/cost_calculator.py +467 -0
- genai_otel/cost_enriching_exporter.py +207 -0
- genai_otel/cost_enrichment_processor.py +174 -0
- genai_otel/evaluation/__init__.py +76 -0
- genai_otel/evaluation/bias_detector.py +364 -0
- genai_otel/evaluation/config.py +261 -0
- genai_otel/evaluation/hallucination_detector.py +525 -0
- genai_otel/evaluation/pii_detector.py +356 -0
- genai_otel/evaluation/prompt_injection_detector.py +262 -0
- genai_otel/evaluation/restricted_topics_detector.py +316 -0
- genai_otel/evaluation/span_processor.py +962 -0
- genai_otel/evaluation/toxicity_detector.py +406 -0
- genai_otel/exceptions.py +17 -0
- genai_otel/gpu_metrics.py +516 -0
- genai_otel/instrumentors/__init__.py +71 -0
- genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
- genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
- genai_otel/instrumentors/autogen_instrumentor.py +394 -0
- genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
- genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
- genai_otel/instrumentors/base.py +919 -0
- genai_otel/instrumentors/bedrock_agents_instrumentor.py +398 -0
- genai_otel/instrumentors/cohere_instrumentor.py +140 -0
- genai_otel/instrumentors/crewai_instrumentor.py +311 -0
- genai_otel/instrumentors/dspy_instrumentor.py +661 -0
- genai_otel/instrumentors/google_ai_instrumentor.py +310 -0
- genai_otel/instrumentors/groq_instrumentor.py +106 -0
- genai_otel/instrumentors/guardrails_ai_instrumentor.py +510 -0
- genai_otel/instrumentors/haystack_instrumentor.py +503 -0
- genai_otel/instrumentors/huggingface_instrumentor.py +399 -0
- genai_otel/instrumentors/hyperbolic_instrumentor.py +236 -0
- genai_otel/instrumentors/instructor_instrumentor.py +425 -0
- genai_otel/instrumentors/langchain_instrumentor.py +340 -0
- genai_otel/instrumentors/langgraph_instrumentor.py +328 -0
- genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
- genai_otel/instrumentors/mistralai_instrumentor.py +315 -0
- genai_otel/instrumentors/ollama_instrumentor.py +197 -0
- genai_otel/instrumentors/ollama_server_metrics_poller.py +336 -0
- genai_otel/instrumentors/openai_agents_instrumentor.py +291 -0
- genai_otel/instrumentors/openai_instrumentor.py +260 -0
- genai_otel/instrumentors/pydantic_ai_instrumentor.py +362 -0
- genai_otel/instrumentors/replicate_instrumentor.py +87 -0
- genai_otel/instrumentors/sambanova_instrumentor.py +196 -0
- genai_otel/instrumentors/togetherai_instrumentor.py +146 -0
- genai_otel/instrumentors/vertexai_instrumentor.py +106 -0
- genai_otel/llm_pricing.json +1676 -0
- genai_otel/logging_config.py +45 -0
- genai_otel/mcp_instrumentors/__init__.py +14 -0
- genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
- genai_otel/mcp_instrumentors/base.py +105 -0
- genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
- genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/manager.py +139 -0
- genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
- genai_otel/metrics.py +148 -0
- genai_otel/py.typed +2 -0
- genai_otel/server_metrics.py +197 -0
- genai_otel_instrument-0.1.24.dist-info/METADATA +1404 -0
- genai_otel_instrument-0.1.24.dist-info/RECORD +69 -0
- genai_otel_instrument-0.1.24.dist-info/WHEEL +5 -0
- genai_otel_instrument-0.1.24.dist-info/entry_points.txt +2 -0
- genai_otel_instrument-0.1.24.dist-info/licenses/LICENSE +680 -0
- genai_otel_instrument-0.1.24.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
"""OpenTelemetry instrumentor for HuggingFace Transformers and Inference API.
|
|
2
|
+
|
|
3
|
+
This instrumentor automatically traces:
|
|
4
|
+
1. HuggingFace Transformers pipelines (local model execution)
|
|
5
|
+
2. HuggingFace Inference API calls via InferenceClient (used by smolagents)
|
|
6
|
+
3. Direct model usage via AutoModelForCausalLM.generate() and forward()
|
|
7
|
+
|
|
8
|
+
Note: Transformers runs models locally (no API costs), but InferenceClient makes
|
|
9
|
+
API calls to HuggingFace endpoints which may have costs based on usage.
|
|
10
|
+
Local model costs are estimated based on parameter count and token usage.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Dict, Optional
|
|
15
|
+
|
|
16
|
+
from ..config import OTelConfig
|
|
17
|
+
from .base import BaseInstrumentor
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class HuggingFaceInstrumentor(BaseInstrumentor):
|
|
23
|
+
"""Instrumentor for HuggingFace Transformers and Inference API.
|
|
24
|
+
|
|
25
|
+
Instruments:
|
|
26
|
+
- transformers.pipeline (local execution, estimated costs)
|
|
27
|
+
- transformers.AutoModelForCausalLM.generate() (local execution, estimated costs)
|
|
28
|
+
- transformers.AutoModelForCausalLM.forward() (local execution, estimated costs)
|
|
29
|
+
- huggingface_hub.InferenceClient (API calls, may have costs)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self):
|
|
33
|
+
"""Initialize the instrumentor."""
|
|
34
|
+
super().__init__()
|
|
35
|
+
self._transformers_available = False
|
|
36
|
+
self._inference_client_available = False
|
|
37
|
+
self._model_classes_instrumented = False
|
|
38
|
+
self._check_availability()
|
|
39
|
+
|
|
40
|
+
def _check_availability(self):
|
|
41
|
+
"""Check if Transformers and InferenceClient libraries are available."""
|
|
42
|
+
try:
|
|
43
|
+
import transformers
|
|
44
|
+
|
|
45
|
+
self._transformers_available = True
|
|
46
|
+
logger.debug("Transformers library detected and available for instrumentation")
|
|
47
|
+
except ImportError:
|
|
48
|
+
logger.debug("Transformers library not installed, instrumentation will be skipped")
|
|
49
|
+
self._transformers_available = False
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
from huggingface_hub import InferenceClient
|
|
53
|
+
|
|
54
|
+
self._inference_client_available = True
|
|
55
|
+
logger.debug("HuggingFace InferenceClient detected and available for instrumentation")
|
|
56
|
+
except ImportError:
|
|
57
|
+
logger.debug(
|
|
58
|
+
"huggingface_hub not installed, InferenceClient instrumentation will be skipped"
|
|
59
|
+
)
|
|
60
|
+
self._inference_client_available = False
|
|
61
|
+
|
|
62
|
+
def instrument(self, config: OTelConfig):
|
|
63
|
+
"""Instrument HuggingFace Transformers pipelines, model classes, and InferenceClient."""
|
|
64
|
+
self._setup_config(config)
|
|
65
|
+
|
|
66
|
+
instrumented_count = 0
|
|
67
|
+
|
|
68
|
+
# Instrument transformers components if available
|
|
69
|
+
if self._transformers_available:
|
|
70
|
+
# Instrument pipeline
|
|
71
|
+
try:
|
|
72
|
+
self._instrument_transformers()
|
|
73
|
+
instrumented_count += 1
|
|
74
|
+
except Exception as e:
|
|
75
|
+
logger.error("Failed to instrument HuggingFace Transformers: %s", e, exc_info=True)
|
|
76
|
+
if config.fail_on_error:
|
|
77
|
+
raise
|
|
78
|
+
|
|
79
|
+
# Instrument model classes (AutoModelForCausalLM, etc.)
|
|
80
|
+
try:
|
|
81
|
+
self._instrument_model_classes()
|
|
82
|
+
instrumented_count += 1
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.error("Failed to instrument HuggingFace model classes: %s", e, exc_info=True)
|
|
85
|
+
if config.fail_on_error:
|
|
86
|
+
raise
|
|
87
|
+
|
|
88
|
+
# Instrument InferenceClient if available
|
|
89
|
+
if self._inference_client_available:
|
|
90
|
+
try:
|
|
91
|
+
self._instrument_inference_client()
|
|
92
|
+
instrumented_count += 1
|
|
93
|
+
except Exception as e:
|
|
94
|
+
logger.error(
|
|
95
|
+
"Failed to instrument HuggingFace InferenceClient: %s", e, exc_info=True
|
|
96
|
+
)
|
|
97
|
+
if config.fail_on_error:
|
|
98
|
+
raise
|
|
99
|
+
|
|
100
|
+
if instrumented_count > 0:
|
|
101
|
+
self._instrumented = True
|
|
102
|
+
logger.info(f"HuggingFace instrumentation enabled ({instrumented_count} components)")
|
|
103
|
+
|
|
104
|
+
def _instrument_transformers(self):
|
|
105
|
+
"""Instrument transformers.pipeline for local model execution."""
|
|
106
|
+
try:
|
|
107
|
+
import importlib
|
|
108
|
+
|
|
109
|
+
transformers_module = importlib.import_module("transformers")
|
|
110
|
+
original_pipeline = transformers_module.pipeline
|
|
111
|
+
|
|
112
|
+
# Capture self reference for use in nested classes
|
|
113
|
+
instrumentor = self
|
|
114
|
+
|
|
115
|
+
def wrapped_pipeline(*args, **kwargs):
|
|
116
|
+
pipe = original_pipeline(*args, **kwargs)
|
|
117
|
+
|
|
118
|
+
class WrappedPipeline:
|
|
119
|
+
def __init__(self, original_pipe):
|
|
120
|
+
self._original_pipe = original_pipe
|
|
121
|
+
|
|
122
|
+
def __call__(self, *call_args, **call_kwargs):
|
|
123
|
+
# Use instrumentor.tracer instead of config.tracer
|
|
124
|
+
with instrumentor.tracer.start_span("huggingface.pipeline") as span:
|
|
125
|
+
task = getattr(self._original_pipe, "task", "unknown")
|
|
126
|
+
model = getattr(
|
|
127
|
+
getattr(self._original_pipe, "model", None),
|
|
128
|
+
"name_or_path",
|
|
129
|
+
"unknown",
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
span.set_attribute("gen_ai.system", "huggingface")
|
|
133
|
+
span.set_attribute("gen_ai.request.model", model)
|
|
134
|
+
span.set_attribute("gen_ai.operation.name", task)
|
|
135
|
+
span.set_attribute("huggingface.task", task)
|
|
136
|
+
|
|
137
|
+
if instrumentor.request_counter:
|
|
138
|
+
instrumentor.request_counter.add(
|
|
139
|
+
1, {"model": model, "provider": "huggingface"}
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
result = self._original_pipe(*call_args, **call_kwargs)
|
|
143
|
+
|
|
144
|
+
# End span manually
|
|
145
|
+
span.end()
|
|
146
|
+
return result
|
|
147
|
+
|
|
148
|
+
def __getattr__(self, name):
|
|
149
|
+
# Delegate all other attribute access to the original pipe
|
|
150
|
+
return getattr(self._original_pipe, name)
|
|
151
|
+
|
|
152
|
+
return WrappedPipeline(pipe)
|
|
153
|
+
|
|
154
|
+
transformers_module.pipeline = wrapped_pipeline
|
|
155
|
+
logger.debug("HuggingFace Transformers pipeline instrumented")
|
|
156
|
+
|
|
157
|
+
except Exception as e:
|
|
158
|
+
raise # Re-raise to be caught by instrument() method
|
|
159
|
+
|
|
160
|
+
def _instrument_inference_client(self):
|
|
161
|
+
"""Instrument HuggingFace InferenceClient for API calls."""
|
|
162
|
+
from huggingface_hub import InferenceClient
|
|
163
|
+
|
|
164
|
+
# Store original methods
|
|
165
|
+
original_chat_completion = InferenceClient.chat_completion
|
|
166
|
+
original_text_generation = InferenceClient.text_generation
|
|
167
|
+
|
|
168
|
+
# Wrap chat_completion method
|
|
169
|
+
wrapped_chat_completion = self.create_span_wrapper(
|
|
170
|
+
span_name="huggingface.inference.chat_completion",
|
|
171
|
+
extract_attributes=self._extract_inference_client_attributes,
|
|
172
|
+
)(original_chat_completion)
|
|
173
|
+
|
|
174
|
+
# Wrap text_generation method
|
|
175
|
+
wrapped_text_generation = self.create_span_wrapper(
|
|
176
|
+
span_name="huggingface.inference.text_generation",
|
|
177
|
+
extract_attributes=self._extract_inference_client_attributes,
|
|
178
|
+
)(original_text_generation)
|
|
179
|
+
|
|
180
|
+
InferenceClient.chat_completion = wrapped_chat_completion
|
|
181
|
+
InferenceClient.text_generation = wrapped_text_generation
|
|
182
|
+
logger.debug("HuggingFace InferenceClient instrumented")
|
|
183
|
+
|
|
184
|
+
def _instrument_model_classes(self):
|
|
185
|
+
"""Instrument HuggingFace model classes for direct model usage."""
|
|
186
|
+
try:
|
|
187
|
+
import wrapt
|
|
188
|
+
|
|
189
|
+
# Import GenerationMixin - the base class that provides generate() method
|
|
190
|
+
# All generative models (AutoModelForCausalLM, AutoModelForSeq2SeqLM, etc.) inherit from it
|
|
191
|
+
try:
|
|
192
|
+
from transformers.generation.utils import GenerationMixin
|
|
193
|
+
except ImportError:
|
|
194
|
+
# Fallback for older transformers versions
|
|
195
|
+
from transformers.generation import GenerationMixin
|
|
196
|
+
|
|
197
|
+
# Store reference to instrumentor for use in wrapper
|
|
198
|
+
instrumentor = self
|
|
199
|
+
|
|
200
|
+
# Wrap the generate() method at GenerationMixin level (all models inherit from this)
|
|
201
|
+
original_generate = GenerationMixin.generate
|
|
202
|
+
|
|
203
|
+
@wrapt.decorator
|
|
204
|
+
def generate_wrapper(wrapped, instance, args, kwargs):
|
|
205
|
+
"""Wrapper for model.generate() method."""
|
|
206
|
+
# Extract model info
|
|
207
|
+
model_name = getattr(instance, "name_or_path", "unknown")
|
|
208
|
+
if hasattr(instance.config, "_name_or_path"):
|
|
209
|
+
model_name = instance.config._name_or_path
|
|
210
|
+
|
|
211
|
+
# Get input token count
|
|
212
|
+
input_ids = kwargs.get("input_ids") or (args[0] if args else None)
|
|
213
|
+
prompt_tokens = 0
|
|
214
|
+
if input_ids is not None:
|
|
215
|
+
if hasattr(input_ids, "shape"):
|
|
216
|
+
prompt_tokens = int(input_ids.shape[-1])
|
|
217
|
+
elif isinstance(input_ids, (list, tuple)):
|
|
218
|
+
prompt_tokens = len(input_ids[0]) if input_ids else 0
|
|
219
|
+
|
|
220
|
+
# Create span
|
|
221
|
+
with instrumentor.tracer.start_as_current_span(
|
|
222
|
+
"huggingface.model.generate"
|
|
223
|
+
) as span:
|
|
224
|
+
# Set attributes
|
|
225
|
+
span.set_attribute("gen_ai.system", "huggingface")
|
|
226
|
+
span.set_attribute("gen_ai.request.model", model_name)
|
|
227
|
+
span.set_attribute("gen_ai.operation.name", "text_generation")
|
|
228
|
+
span.set_attribute("gen_ai.request.type", "chat")
|
|
229
|
+
|
|
230
|
+
# Extract generation parameters
|
|
231
|
+
if "max_length" in kwargs:
|
|
232
|
+
span.set_attribute("gen_ai.request.max_tokens", kwargs["max_length"])
|
|
233
|
+
if "max_new_tokens" in kwargs:
|
|
234
|
+
span.set_attribute("gen_ai.request.max_tokens", kwargs["max_new_tokens"])
|
|
235
|
+
if "temperature" in kwargs:
|
|
236
|
+
span.set_attribute("gen_ai.request.temperature", kwargs["temperature"])
|
|
237
|
+
if "top_p" in kwargs:
|
|
238
|
+
span.set_attribute("gen_ai.request.top_p", kwargs["top_p"])
|
|
239
|
+
|
|
240
|
+
# Call original generate
|
|
241
|
+
import time
|
|
242
|
+
|
|
243
|
+
start_time = time.time()
|
|
244
|
+
result = wrapped(*args, **kwargs)
|
|
245
|
+
duration = time.time() - start_time
|
|
246
|
+
|
|
247
|
+
# Extract output token count
|
|
248
|
+
completion_tokens = 0
|
|
249
|
+
if hasattr(result, "shape"):
|
|
250
|
+
# result is a tensor
|
|
251
|
+
total_length = int(result.shape[-1])
|
|
252
|
+
completion_tokens = max(0, total_length - prompt_tokens)
|
|
253
|
+
elif isinstance(result, (list, tuple)):
|
|
254
|
+
# result is a list of sequences
|
|
255
|
+
if result and hasattr(result[0], "shape"):
|
|
256
|
+
total_length = int(result[0].shape[-1])
|
|
257
|
+
completion_tokens = max(0, total_length - prompt_tokens)
|
|
258
|
+
|
|
259
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
260
|
+
|
|
261
|
+
# Set token usage attributes
|
|
262
|
+
if prompt_tokens > 0:
|
|
263
|
+
span.set_attribute("gen_ai.usage.prompt_tokens", prompt_tokens)
|
|
264
|
+
if completion_tokens > 0:
|
|
265
|
+
span.set_attribute("gen_ai.usage.completion_tokens", completion_tokens)
|
|
266
|
+
if total_tokens > 0:
|
|
267
|
+
span.set_attribute("gen_ai.usage.total_tokens", total_tokens)
|
|
268
|
+
|
|
269
|
+
# Record metrics
|
|
270
|
+
if instrumentor.request_counter:
|
|
271
|
+
instrumentor.request_counter.add(
|
|
272
|
+
1, {"model": model_name, "provider": "huggingface"}
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
if instrumentor.token_counter and total_tokens > 0:
|
|
276
|
+
if prompt_tokens > 0:
|
|
277
|
+
instrumentor.token_counter.add(
|
|
278
|
+
prompt_tokens, {"token_type": "prompt", "operation": span.name}
|
|
279
|
+
)
|
|
280
|
+
if completion_tokens > 0:
|
|
281
|
+
instrumentor.token_counter.add(
|
|
282
|
+
completion_tokens,
|
|
283
|
+
{"token_type": "completion", "operation": span.name},
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
if instrumentor.latency_histogram:
|
|
287
|
+
instrumentor.latency_histogram.record(duration, {"operation": span.name})
|
|
288
|
+
|
|
289
|
+
# Calculate and record cost if enabled
|
|
290
|
+
if (
|
|
291
|
+
instrumentor.config
|
|
292
|
+
and instrumentor.config.enable_cost_tracking
|
|
293
|
+
and total_tokens > 0
|
|
294
|
+
):
|
|
295
|
+
try:
|
|
296
|
+
usage = {
|
|
297
|
+
"prompt_tokens": prompt_tokens,
|
|
298
|
+
"completion_tokens": completion_tokens,
|
|
299
|
+
"total_tokens": total_tokens,
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
costs = instrumentor.cost_calculator.calculate_granular_cost(
|
|
303
|
+
model=model_name, usage=usage, call_type="chat"
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
if costs["total"] > 0:
|
|
307
|
+
if instrumentor.cost_counter:
|
|
308
|
+
instrumentor.cost_counter.add(
|
|
309
|
+
costs["total"], {"model": model_name}
|
|
310
|
+
)
|
|
311
|
+
span.set_attribute("gen_ai.usage.cost.total", costs["total"])
|
|
312
|
+
if costs["prompt"] > 0:
|
|
313
|
+
span.set_attribute("gen_ai.usage.cost.prompt", costs["prompt"])
|
|
314
|
+
if costs["completion"] > 0:
|
|
315
|
+
span.set_attribute(
|
|
316
|
+
"gen_ai.usage.cost.completion", costs["completion"]
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
logger.debug(
|
|
320
|
+
f"HuggingFace model {model_name}: {total_tokens} tokens, "
|
|
321
|
+
f"cost: ${costs['total']:.6f}"
|
|
322
|
+
)
|
|
323
|
+
except Exception as e:
|
|
324
|
+
logger.warning(f"Failed to calculate cost: {e}")
|
|
325
|
+
|
|
326
|
+
return result
|
|
327
|
+
|
|
328
|
+
# Apply wrapper to GenerationMixin.generate (all models inherit this)
|
|
329
|
+
GenerationMixin.generate = generate_wrapper(original_generate)
|
|
330
|
+
|
|
331
|
+
self._model_classes_instrumented = True
|
|
332
|
+
logger.debug(
|
|
333
|
+
"HuggingFace GenerationMixin.generate() instrumented "
|
|
334
|
+
"(covers all models: AutoModelForCausalLM, AutoModelForSeq2SeqLM, etc.)"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
except ImportError as e:
|
|
338
|
+
logger.debug(f"Could not import model classes for instrumentation: {e}")
|
|
339
|
+
except Exception as e:
|
|
340
|
+
raise # Re-raise to be caught by instrument() method
|
|
341
|
+
|
|
342
|
+
def _extract_inference_client_attributes(self, instance, args, kwargs) -> Dict[str, str]:
|
|
343
|
+
"""Extract attributes from Inference API call."""
|
|
344
|
+
attrs = {}
|
|
345
|
+
model = kwargs.get("model") or (args[0] if args else "unknown")
|
|
346
|
+
|
|
347
|
+
attrs["gen_ai.system"] = "huggingface"
|
|
348
|
+
attrs["gen_ai.request.model"] = str(model)
|
|
349
|
+
attrs["gen_ai.operation.name"] = "chat" # Default to chat
|
|
350
|
+
|
|
351
|
+
# Extract parameters if available
|
|
352
|
+
if "max_tokens" in kwargs:
|
|
353
|
+
attrs["gen_ai.request.max_tokens"] = kwargs["max_tokens"]
|
|
354
|
+
if "temperature" in kwargs:
|
|
355
|
+
attrs["gen_ai.request.temperature"] = kwargs["temperature"]
|
|
356
|
+
if "top_p" in kwargs:
|
|
357
|
+
attrs["gen_ai.request.top_p"] = kwargs["top_p"]
|
|
358
|
+
|
|
359
|
+
return attrs
|
|
360
|
+
|
|
361
|
+
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
362
|
+
"""Extract token usage from HuggingFace response.
|
|
363
|
+
|
|
364
|
+
Handles both:
|
|
365
|
+
1. Transformers pipeline (local execution) - returns None
|
|
366
|
+
2. InferenceClient API calls - extracts token usage from response
|
|
367
|
+
|
|
368
|
+
Args:
|
|
369
|
+
result: The pipeline output or InferenceClient response.
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
Dict with token counts for InferenceClient calls, None for local execution.
|
|
373
|
+
"""
|
|
374
|
+
# Check if this is an InferenceClient API response
|
|
375
|
+
if result is not None and hasattr(result, "usage"):
|
|
376
|
+
usage = result.usage
|
|
377
|
+
|
|
378
|
+
# Extract token counts from usage object
|
|
379
|
+
prompt_tokens = getattr(usage, "prompt_tokens", None)
|
|
380
|
+
completion_tokens = getattr(usage, "completion_tokens", None)
|
|
381
|
+
total_tokens = getattr(usage, "total_tokens", None)
|
|
382
|
+
|
|
383
|
+
# If usage is a dict instead of object
|
|
384
|
+
if isinstance(usage, dict):
|
|
385
|
+
prompt_tokens = usage.get("prompt_tokens")
|
|
386
|
+
completion_tokens = usage.get("completion_tokens")
|
|
387
|
+
total_tokens = usage.get("total_tokens")
|
|
388
|
+
|
|
389
|
+
# Return token counts if available
|
|
390
|
+
if prompt_tokens is not None or completion_tokens is not None:
|
|
391
|
+
return {
|
|
392
|
+
"prompt_tokens": prompt_tokens or 0,
|
|
393
|
+
"completion_tokens": completion_tokens or 0,
|
|
394
|
+
"total_tokens": total_tokens or (prompt_tokens or 0) + (completion_tokens or 0),
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
# HuggingFace Transformers is free (local execution)
|
|
398
|
+
# No token-based costs to track
|
|
399
|
+
return None
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""OpenTelemetry instrumentor for Hyperbolic API calls.
|
|
2
|
+
|
|
3
|
+
This instrumentor automatically traces HTTP requests to Hyperbolic's API,
|
|
4
|
+
capturing relevant LLM attributes such as model name and token usage from
|
|
5
|
+
the raw HTTP response.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Any, Dict, Optional
|
|
11
|
+
|
|
12
|
+
import wrapt
|
|
13
|
+
|
|
14
|
+
from ..config import OTelConfig
|
|
15
|
+
from .base import BaseInstrumentor
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class HyperbolicInstrumentor(BaseInstrumentor):
|
|
21
|
+
"""Instrumentor for Hyperbolic API (raw HTTP requests)"""
|
|
22
|
+
|
|
23
|
+
HYPERBOLIC_API_BASE = "https://api.hyperbolic.xyz"
|
|
24
|
+
|
|
25
|
+
def __init__(self):
|
|
26
|
+
"""Initialize the instrumentor."""
|
|
27
|
+
super().__init__()
|
|
28
|
+
self._requests_available = False
|
|
29
|
+
self._check_availability()
|
|
30
|
+
|
|
31
|
+
def _check_availability(self):
|
|
32
|
+
"""Check if requests library is available."""
|
|
33
|
+
try:
|
|
34
|
+
import requests
|
|
35
|
+
|
|
36
|
+
self._requests_available = True
|
|
37
|
+
logger.debug("Requests library detected, Hyperbolic instrumentation available")
|
|
38
|
+
except ImportError:
|
|
39
|
+
logger.debug("Requests library not installed, Hyperbolic instrumentation skipped")
|
|
40
|
+
self._requests_available = False
|
|
41
|
+
|
|
42
|
+
def instrument(self, config: OTelConfig):
|
|
43
|
+
"""Instrument requests library for Hyperbolic API calls.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
config (OTelConfig): The OpenTelemetry configuration object.
|
|
47
|
+
"""
|
|
48
|
+
if not self._requests_available:
|
|
49
|
+
logger.debug("Skipping Hyperbolic instrumentation - requests library not available")
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
self.config = config
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
import requests
|
|
56
|
+
|
|
57
|
+
# Wrap requests.post to intercept Hyperbolic API calls
|
|
58
|
+
original_post = requests.post
|
|
59
|
+
|
|
60
|
+
@wrapt.decorator
|
|
61
|
+
def hyperbolic_post_wrapper(wrapped, instance, args, kwargs):
|
|
62
|
+
# Check if this is a Hyperbolic API call
|
|
63
|
+
url = args[0] if args else kwargs.get("url", "")
|
|
64
|
+
if not url.startswith(self.HYPERBOLIC_API_BASE):
|
|
65
|
+
# Not a Hyperbolic call, pass through
|
|
66
|
+
return wrapped(*args, **kwargs)
|
|
67
|
+
|
|
68
|
+
# Extract attributes before the call
|
|
69
|
+
request_data = kwargs.get("json", {})
|
|
70
|
+
attrs = self._extract_request_attributes(request_data)
|
|
71
|
+
|
|
72
|
+
# Create span wrapper
|
|
73
|
+
with self.tracer.start_as_current_span("hyperbolic.chat.completion") as span:
|
|
74
|
+
# Set request attributes
|
|
75
|
+
for key, value in attrs.items():
|
|
76
|
+
span.set_attribute(key, value)
|
|
77
|
+
|
|
78
|
+
# Record request metric
|
|
79
|
+
model = attrs.get("gen_ai.request.model", "unknown")
|
|
80
|
+
if self.request_counter:
|
|
81
|
+
self.request_counter.add(1, {"model": model, "provider": "hyperbolic"})
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
# Make the actual API call
|
|
85
|
+
response = wrapped(*args, **kwargs)
|
|
86
|
+
|
|
87
|
+
# Extract response attributes
|
|
88
|
+
if response.status_code == 200:
|
|
89
|
+
response_data = response.json()
|
|
90
|
+
self._extract_and_record_response(span, response_data)
|
|
91
|
+
else:
|
|
92
|
+
span.set_attribute("error", True)
|
|
93
|
+
span.set_attribute("http.status_code", response.status_code)
|
|
94
|
+
|
|
95
|
+
return response
|
|
96
|
+
|
|
97
|
+
except Exception as e:
|
|
98
|
+
span.set_attribute("error", True)
|
|
99
|
+
span.record_exception(e)
|
|
100
|
+
if self.error_counter:
|
|
101
|
+
self.error_counter.add(
|
|
102
|
+
1,
|
|
103
|
+
{
|
|
104
|
+
"operation": "chat.completion",
|
|
105
|
+
"error.type": type(e).__name__,
|
|
106
|
+
"provider": "hyperbolic",
|
|
107
|
+
},
|
|
108
|
+
)
|
|
109
|
+
raise
|
|
110
|
+
|
|
111
|
+
# Apply the wrapper
|
|
112
|
+
requests.post = hyperbolic_post_wrapper(original_post)
|
|
113
|
+
self._instrumented = True
|
|
114
|
+
logger.info("Hyperbolic instrumentation enabled")
|
|
115
|
+
|
|
116
|
+
except Exception as e:
|
|
117
|
+
logger.error("Failed to instrument Hyperbolic: %s", e, exc_info=True)
|
|
118
|
+
if config.fail_on_error:
|
|
119
|
+
raise
|
|
120
|
+
|
|
121
|
+
def _extract_request_attributes(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
122
|
+
"""Extract attributes from Hyperbolic API request.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
request_data: The JSON request payload.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Dict[str, Any]: Dictionary of attributes to set on the span.
|
|
129
|
+
"""
|
|
130
|
+
attrs = {}
|
|
131
|
+
|
|
132
|
+
# Core attributes
|
|
133
|
+
attrs["gen_ai.system"] = "hyperbolic"
|
|
134
|
+
attrs["gen_ai.request.model"] = request_data.get("model", "unknown")
|
|
135
|
+
attrs["gen_ai.operation.name"] = "chat"
|
|
136
|
+
|
|
137
|
+
messages = request_data.get("messages", [])
|
|
138
|
+
attrs["gen_ai.request.message_count"] = len(messages)
|
|
139
|
+
|
|
140
|
+
# Request parameters
|
|
141
|
+
if "temperature" in request_data:
|
|
142
|
+
attrs["gen_ai.request.temperature"] = request_data["temperature"]
|
|
143
|
+
if "top_p" in request_data:
|
|
144
|
+
attrs["gen_ai.request.top_p"] = request_data["top_p"]
|
|
145
|
+
if "max_tokens" in request_data:
|
|
146
|
+
attrs["gen_ai.request.max_tokens"] = request_data["max_tokens"]
|
|
147
|
+
|
|
148
|
+
# First message preview
|
|
149
|
+
if messages:
|
|
150
|
+
first_message = str(messages[0])[:200]
|
|
151
|
+
attrs["gen_ai.request.first_message"] = first_message
|
|
152
|
+
|
|
153
|
+
return attrs
|
|
154
|
+
|
|
155
|
+
def _extract_and_record_response(self, span, response_data: Dict[str, Any]):
|
|
156
|
+
"""Extract response attributes and record metrics.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
span: The OpenTelemetry span.
|
|
160
|
+
response_data: The JSON response from Hyperbolic API.
|
|
161
|
+
"""
|
|
162
|
+
# Response ID
|
|
163
|
+
if "id" in response_data:
|
|
164
|
+
span.set_attribute("gen_ai.response.id", response_data["id"])
|
|
165
|
+
|
|
166
|
+
# Response model
|
|
167
|
+
if "model" in response_data:
|
|
168
|
+
span.set_attribute("gen_ai.response.model", response_data["model"])
|
|
169
|
+
|
|
170
|
+
# Finish reasons
|
|
171
|
+
choices = response_data.get("choices", [])
|
|
172
|
+
if choices:
|
|
173
|
+
finish_reasons = [
|
|
174
|
+
choice.get("finish_reason") for choice in choices if "finish_reason" in choice
|
|
175
|
+
]
|
|
176
|
+
if finish_reasons:
|
|
177
|
+
span.set_attribute("gen_ai.response.finish_reasons", finish_reasons)
|
|
178
|
+
|
|
179
|
+
# Extract token usage
|
|
180
|
+
usage_data = response_data.get("usage", {})
|
|
181
|
+
if usage_data:
|
|
182
|
+
usage_dict = {
|
|
183
|
+
"prompt_tokens": usage_data.get("prompt_tokens", 0),
|
|
184
|
+
"completion_tokens": usage_data.get("completion_tokens", 0),
|
|
185
|
+
"total_tokens": usage_data.get("total_tokens", 0),
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
# Record token usage as span attributes
|
|
189
|
+
span.set_attribute("gen_ai.usage.prompt_tokens", usage_dict["prompt_tokens"])
|
|
190
|
+
span.set_attribute("gen_ai.usage.completion_tokens", usage_dict["completion_tokens"])
|
|
191
|
+
span.set_attribute("gen_ai.usage.total_tokens", usage_dict["total_tokens"])
|
|
192
|
+
|
|
193
|
+
# Record token metrics
|
|
194
|
+
if self.token_counter:
|
|
195
|
+
model = span.attributes.get("gen_ai.request.model", "unknown")
|
|
196
|
+
self.token_counter.add(
|
|
197
|
+
usage_dict["prompt_tokens"],
|
|
198
|
+
{"token_type": "prompt", "model": model, "provider": "hyperbolic"},
|
|
199
|
+
)
|
|
200
|
+
self.token_counter.add(
|
|
201
|
+
usage_dict["completion_tokens"],
|
|
202
|
+
{"token_type": "completion", "model": model, "provider": "hyperbolic"},
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# Calculate and record cost
|
|
206
|
+
if self.config.enable_cost_tracking:
|
|
207
|
+
from ..cost_calculator import CostCalculator
|
|
208
|
+
|
|
209
|
+
cost_calc = CostCalculator(custom_pricing_json=self.config.custom_pricing_json)
|
|
210
|
+
model = span.attributes.get("gen_ai.request.model", "unknown")
|
|
211
|
+
cost = cost_calc.calculate_cost(
|
|
212
|
+
model_name=model,
|
|
213
|
+
prompt_tokens=usage_dict["prompt_tokens"],
|
|
214
|
+
completion_tokens=usage_dict["completion_tokens"],
|
|
215
|
+
call_type="chat",
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if cost > 0 and self.cost_counter:
|
|
219
|
+
span.set_attribute("gen_ai.cost.amount", cost)
|
|
220
|
+
self.cost_counter.add(
|
|
221
|
+
cost, {"model": model, "provider": "hyperbolic", "call_type": "chat"}
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
225
|
+
"""Extract token usage from response.
|
|
226
|
+
|
|
227
|
+
Note: This method is required by BaseInstrumentor but not used for HTTP-based
|
|
228
|
+
instrumentation. Token extraction is handled in _extract_and_record_response.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
result: The API response (unused for HTTP instrumentation).
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
None
|
|
235
|
+
"""
|
|
236
|
+
return None
|