genai-otel-instrument 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. genai_otel/__init__.py +132 -0
  2. genai_otel/__version__.py +34 -0
  3. genai_otel/auto_instrument.py +602 -0
  4. genai_otel/cli.py +92 -0
  5. genai_otel/config.py +333 -0
  6. genai_otel/cost_calculator.py +467 -0
  7. genai_otel/cost_enriching_exporter.py +207 -0
  8. genai_otel/cost_enrichment_processor.py +174 -0
  9. genai_otel/evaluation/__init__.py +76 -0
  10. genai_otel/evaluation/bias_detector.py +364 -0
  11. genai_otel/evaluation/config.py +261 -0
  12. genai_otel/evaluation/hallucination_detector.py +525 -0
  13. genai_otel/evaluation/pii_detector.py +356 -0
  14. genai_otel/evaluation/prompt_injection_detector.py +262 -0
  15. genai_otel/evaluation/restricted_topics_detector.py +316 -0
  16. genai_otel/evaluation/span_processor.py +962 -0
  17. genai_otel/evaluation/toxicity_detector.py +406 -0
  18. genai_otel/exceptions.py +17 -0
  19. genai_otel/gpu_metrics.py +516 -0
  20. genai_otel/instrumentors/__init__.py +71 -0
  21. genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
  22. genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
  23. genai_otel/instrumentors/autogen_instrumentor.py +394 -0
  24. genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
  25. genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
  26. genai_otel/instrumentors/base.py +919 -0
  27. genai_otel/instrumentors/bedrock_agents_instrumentor.py +398 -0
  28. genai_otel/instrumentors/cohere_instrumentor.py +140 -0
  29. genai_otel/instrumentors/crewai_instrumentor.py +311 -0
  30. genai_otel/instrumentors/dspy_instrumentor.py +661 -0
  31. genai_otel/instrumentors/google_ai_instrumentor.py +310 -0
  32. genai_otel/instrumentors/groq_instrumentor.py +106 -0
  33. genai_otel/instrumentors/guardrails_ai_instrumentor.py +510 -0
  34. genai_otel/instrumentors/haystack_instrumentor.py +503 -0
  35. genai_otel/instrumentors/huggingface_instrumentor.py +399 -0
  36. genai_otel/instrumentors/hyperbolic_instrumentor.py +236 -0
  37. genai_otel/instrumentors/instructor_instrumentor.py +425 -0
  38. genai_otel/instrumentors/langchain_instrumentor.py +340 -0
  39. genai_otel/instrumentors/langgraph_instrumentor.py +328 -0
  40. genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
  41. genai_otel/instrumentors/mistralai_instrumentor.py +315 -0
  42. genai_otel/instrumentors/ollama_instrumentor.py +197 -0
  43. genai_otel/instrumentors/ollama_server_metrics_poller.py +336 -0
  44. genai_otel/instrumentors/openai_agents_instrumentor.py +291 -0
  45. genai_otel/instrumentors/openai_instrumentor.py +260 -0
  46. genai_otel/instrumentors/pydantic_ai_instrumentor.py +362 -0
  47. genai_otel/instrumentors/replicate_instrumentor.py +87 -0
  48. genai_otel/instrumentors/sambanova_instrumentor.py +196 -0
  49. genai_otel/instrumentors/togetherai_instrumentor.py +146 -0
  50. genai_otel/instrumentors/vertexai_instrumentor.py +106 -0
  51. genai_otel/llm_pricing.json +1676 -0
  52. genai_otel/logging_config.py +45 -0
  53. genai_otel/mcp_instrumentors/__init__.py +14 -0
  54. genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
  55. genai_otel/mcp_instrumentors/base.py +105 -0
  56. genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
  57. genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
  58. genai_otel/mcp_instrumentors/manager.py +139 -0
  59. genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
  60. genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
  61. genai_otel/metrics.py +148 -0
  62. genai_otel/py.typed +2 -0
  63. genai_otel/server_metrics.py +197 -0
  64. genai_otel_instrument-0.1.24.dist-info/METADATA +1404 -0
  65. genai_otel_instrument-0.1.24.dist-info/RECORD +69 -0
  66. genai_otel_instrument-0.1.24.dist-info/WHEEL +5 -0
  67. genai_otel_instrument-0.1.24.dist-info/entry_points.txt +2 -0
  68. genai_otel_instrument-0.1.24.dist-info/licenses/LICENSE +680 -0
  69. genai_otel_instrument-0.1.24.dist-info/top_level.txt +1 -0
@@ -0,0 +1,399 @@
1
+ """OpenTelemetry instrumentor for HuggingFace Transformers and Inference API.
2
+
3
+ This instrumentor automatically traces:
4
+ 1. HuggingFace Transformers pipelines (local model execution)
5
+ 2. HuggingFace Inference API calls via InferenceClient (used by smolagents)
6
+ 3. Direct model usage via AutoModelForCausalLM.generate() and forward()
7
+
8
+ Note: Transformers runs models locally (no API costs), but InferenceClient makes
9
+ API calls to HuggingFace endpoints which may have costs based on usage.
10
+ Local model costs are estimated based on parameter count and token usage.
11
+ """
12
+
13
+ import logging
14
+ from typing import Dict, Optional
15
+
16
+ from ..config import OTelConfig
17
+ from .base import BaseInstrumentor
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class HuggingFaceInstrumentor(BaseInstrumentor):
23
+ """Instrumentor for HuggingFace Transformers and Inference API.
24
+
25
+ Instruments:
26
+ - transformers.pipeline (local execution, estimated costs)
27
+ - transformers.AutoModelForCausalLM.generate() (local execution, estimated costs)
28
+ - transformers.AutoModelForCausalLM.forward() (local execution, estimated costs)
29
+ - huggingface_hub.InferenceClient (API calls, may have costs)
30
+ """
31
+
32
+ def __init__(self):
33
+ """Initialize the instrumentor."""
34
+ super().__init__()
35
+ self._transformers_available = False
36
+ self._inference_client_available = False
37
+ self._model_classes_instrumented = False
38
+ self._check_availability()
39
+
40
+ def _check_availability(self):
41
+ """Check if Transformers and InferenceClient libraries are available."""
42
+ try:
43
+ import transformers
44
+
45
+ self._transformers_available = True
46
+ logger.debug("Transformers library detected and available for instrumentation")
47
+ except ImportError:
48
+ logger.debug("Transformers library not installed, instrumentation will be skipped")
49
+ self._transformers_available = False
50
+
51
+ try:
52
+ from huggingface_hub import InferenceClient
53
+
54
+ self._inference_client_available = True
55
+ logger.debug("HuggingFace InferenceClient detected and available for instrumentation")
56
+ except ImportError:
57
+ logger.debug(
58
+ "huggingface_hub not installed, InferenceClient instrumentation will be skipped"
59
+ )
60
+ self._inference_client_available = False
61
+
62
+ def instrument(self, config: OTelConfig):
63
+ """Instrument HuggingFace Transformers pipelines, model classes, and InferenceClient."""
64
+ self._setup_config(config)
65
+
66
+ instrumented_count = 0
67
+
68
+ # Instrument transformers components if available
69
+ if self._transformers_available:
70
+ # Instrument pipeline
71
+ try:
72
+ self._instrument_transformers()
73
+ instrumented_count += 1
74
+ except Exception as e:
75
+ logger.error("Failed to instrument HuggingFace Transformers: %s", e, exc_info=True)
76
+ if config.fail_on_error:
77
+ raise
78
+
79
+ # Instrument model classes (AutoModelForCausalLM, etc.)
80
+ try:
81
+ self._instrument_model_classes()
82
+ instrumented_count += 1
83
+ except Exception as e:
84
+ logger.error("Failed to instrument HuggingFace model classes: %s", e, exc_info=True)
85
+ if config.fail_on_error:
86
+ raise
87
+
88
+ # Instrument InferenceClient if available
89
+ if self._inference_client_available:
90
+ try:
91
+ self._instrument_inference_client()
92
+ instrumented_count += 1
93
+ except Exception as e:
94
+ logger.error(
95
+ "Failed to instrument HuggingFace InferenceClient: %s", e, exc_info=True
96
+ )
97
+ if config.fail_on_error:
98
+ raise
99
+
100
+ if instrumented_count > 0:
101
+ self._instrumented = True
102
+ logger.info(f"HuggingFace instrumentation enabled ({instrumented_count} components)")
103
+
104
+ def _instrument_transformers(self):
105
+ """Instrument transformers.pipeline for local model execution."""
106
+ try:
107
+ import importlib
108
+
109
+ transformers_module = importlib.import_module("transformers")
110
+ original_pipeline = transformers_module.pipeline
111
+
112
+ # Capture self reference for use in nested classes
113
+ instrumentor = self
114
+
115
+ def wrapped_pipeline(*args, **kwargs):
116
+ pipe = original_pipeline(*args, **kwargs)
117
+
118
+ class WrappedPipeline:
119
+ def __init__(self, original_pipe):
120
+ self._original_pipe = original_pipe
121
+
122
+ def __call__(self, *call_args, **call_kwargs):
123
+ # Use instrumentor.tracer instead of config.tracer
124
+ with instrumentor.tracer.start_span("huggingface.pipeline") as span:
125
+ task = getattr(self._original_pipe, "task", "unknown")
126
+ model = getattr(
127
+ getattr(self._original_pipe, "model", None),
128
+ "name_or_path",
129
+ "unknown",
130
+ )
131
+
132
+ span.set_attribute("gen_ai.system", "huggingface")
133
+ span.set_attribute("gen_ai.request.model", model)
134
+ span.set_attribute("gen_ai.operation.name", task)
135
+ span.set_attribute("huggingface.task", task)
136
+
137
+ if instrumentor.request_counter:
138
+ instrumentor.request_counter.add(
139
+ 1, {"model": model, "provider": "huggingface"}
140
+ )
141
+
142
+ result = self._original_pipe(*call_args, **call_kwargs)
143
+
144
+ # End span manually
145
+ span.end()
146
+ return result
147
+
148
+ def __getattr__(self, name):
149
+ # Delegate all other attribute access to the original pipe
150
+ return getattr(self._original_pipe, name)
151
+
152
+ return WrappedPipeline(pipe)
153
+
154
+ transformers_module.pipeline = wrapped_pipeline
155
+ logger.debug("HuggingFace Transformers pipeline instrumented")
156
+
157
+ except Exception as e:
158
+ raise # Re-raise to be caught by instrument() method
159
+
160
+ def _instrument_inference_client(self):
161
+ """Instrument HuggingFace InferenceClient for API calls."""
162
+ from huggingface_hub import InferenceClient
163
+
164
+ # Store original methods
165
+ original_chat_completion = InferenceClient.chat_completion
166
+ original_text_generation = InferenceClient.text_generation
167
+
168
+ # Wrap chat_completion method
169
+ wrapped_chat_completion = self.create_span_wrapper(
170
+ span_name="huggingface.inference.chat_completion",
171
+ extract_attributes=self._extract_inference_client_attributes,
172
+ )(original_chat_completion)
173
+
174
+ # Wrap text_generation method
175
+ wrapped_text_generation = self.create_span_wrapper(
176
+ span_name="huggingface.inference.text_generation",
177
+ extract_attributes=self._extract_inference_client_attributes,
178
+ )(original_text_generation)
179
+
180
+ InferenceClient.chat_completion = wrapped_chat_completion
181
+ InferenceClient.text_generation = wrapped_text_generation
182
+ logger.debug("HuggingFace InferenceClient instrumented")
183
+
184
+ def _instrument_model_classes(self):
185
+ """Instrument HuggingFace model classes for direct model usage."""
186
+ try:
187
+ import wrapt
188
+
189
+ # Import GenerationMixin - the base class that provides generate() method
190
+ # All generative models (AutoModelForCausalLM, AutoModelForSeq2SeqLM, etc.) inherit from it
191
+ try:
192
+ from transformers.generation.utils import GenerationMixin
193
+ except ImportError:
194
+ # Fallback for older transformers versions
195
+ from transformers.generation import GenerationMixin
196
+
197
+ # Store reference to instrumentor for use in wrapper
198
+ instrumentor = self
199
+
200
+ # Wrap the generate() method at GenerationMixin level (all models inherit from this)
201
+ original_generate = GenerationMixin.generate
202
+
203
+ @wrapt.decorator
204
+ def generate_wrapper(wrapped, instance, args, kwargs):
205
+ """Wrapper for model.generate() method."""
206
+ # Extract model info
207
+ model_name = getattr(instance, "name_or_path", "unknown")
208
+ if hasattr(instance.config, "_name_or_path"):
209
+ model_name = instance.config._name_or_path
210
+
211
+ # Get input token count
212
+ input_ids = kwargs.get("input_ids") or (args[0] if args else None)
213
+ prompt_tokens = 0
214
+ if input_ids is not None:
215
+ if hasattr(input_ids, "shape"):
216
+ prompt_tokens = int(input_ids.shape[-1])
217
+ elif isinstance(input_ids, (list, tuple)):
218
+ prompt_tokens = len(input_ids[0]) if input_ids else 0
219
+
220
+ # Create span
221
+ with instrumentor.tracer.start_as_current_span(
222
+ "huggingface.model.generate"
223
+ ) as span:
224
+ # Set attributes
225
+ span.set_attribute("gen_ai.system", "huggingface")
226
+ span.set_attribute("gen_ai.request.model", model_name)
227
+ span.set_attribute("gen_ai.operation.name", "text_generation")
228
+ span.set_attribute("gen_ai.request.type", "chat")
229
+
230
+ # Extract generation parameters
231
+ if "max_length" in kwargs:
232
+ span.set_attribute("gen_ai.request.max_tokens", kwargs["max_length"])
233
+ if "max_new_tokens" in kwargs:
234
+ span.set_attribute("gen_ai.request.max_tokens", kwargs["max_new_tokens"])
235
+ if "temperature" in kwargs:
236
+ span.set_attribute("gen_ai.request.temperature", kwargs["temperature"])
237
+ if "top_p" in kwargs:
238
+ span.set_attribute("gen_ai.request.top_p", kwargs["top_p"])
239
+
240
+ # Call original generate
241
+ import time
242
+
243
+ start_time = time.time()
244
+ result = wrapped(*args, **kwargs)
245
+ duration = time.time() - start_time
246
+
247
+ # Extract output token count
248
+ completion_tokens = 0
249
+ if hasattr(result, "shape"):
250
+ # result is a tensor
251
+ total_length = int(result.shape[-1])
252
+ completion_tokens = max(0, total_length - prompt_tokens)
253
+ elif isinstance(result, (list, tuple)):
254
+ # result is a list of sequences
255
+ if result and hasattr(result[0], "shape"):
256
+ total_length = int(result[0].shape[-1])
257
+ completion_tokens = max(0, total_length - prompt_tokens)
258
+
259
+ total_tokens = prompt_tokens + completion_tokens
260
+
261
+ # Set token usage attributes
262
+ if prompt_tokens > 0:
263
+ span.set_attribute("gen_ai.usage.prompt_tokens", prompt_tokens)
264
+ if completion_tokens > 0:
265
+ span.set_attribute("gen_ai.usage.completion_tokens", completion_tokens)
266
+ if total_tokens > 0:
267
+ span.set_attribute("gen_ai.usage.total_tokens", total_tokens)
268
+
269
+ # Record metrics
270
+ if instrumentor.request_counter:
271
+ instrumentor.request_counter.add(
272
+ 1, {"model": model_name, "provider": "huggingface"}
273
+ )
274
+
275
+ if instrumentor.token_counter and total_tokens > 0:
276
+ if prompt_tokens > 0:
277
+ instrumentor.token_counter.add(
278
+ prompt_tokens, {"token_type": "prompt", "operation": span.name}
279
+ )
280
+ if completion_tokens > 0:
281
+ instrumentor.token_counter.add(
282
+ completion_tokens,
283
+ {"token_type": "completion", "operation": span.name},
284
+ )
285
+
286
+ if instrumentor.latency_histogram:
287
+ instrumentor.latency_histogram.record(duration, {"operation": span.name})
288
+
289
+ # Calculate and record cost if enabled
290
+ if (
291
+ instrumentor.config
292
+ and instrumentor.config.enable_cost_tracking
293
+ and total_tokens > 0
294
+ ):
295
+ try:
296
+ usage = {
297
+ "prompt_tokens": prompt_tokens,
298
+ "completion_tokens": completion_tokens,
299
+ "total_tokens": total_tokens,
300
+ }
301
+
302
+ costs = instrumentor.cost_calculator.calculate_granular_cost(
303
+ model=model_name, usage=usage, call_type="chat"
304
+ )
305
+
306
+ if costs["total"] > 0:
307
+ if instrumentor.cost_counter:
308
+ instrumentor.cost_counter.add(
309
+ costs["total"], {"model": model_name}
310
+ )
311
+ span.set_attribute("gen_ai.usage.cost.total", costs["total"])
312
+ if costs["prompt"] > 0:
313
+ span.set_attribute("gen_ai.usage.cost.prompt", costs["prompt"])
314
+ if costs["completion"] > 0:
315
+ span.set_attribute(
316
+ "gen_ai.usage.cost.completion", costs["completion"]
317
+ )
318
+
319
+ logger.debug(
320
+ f"HuggingFace model {model_name}: {total_tokens} tokens, "
321
+ f"cost: ${costs['total']:.6f}"
322
+ )
323
+ except Exception as e:
324
+ logger.warning(f"Failed to calculate cost: {e}")
325
+
326
+ return result
327
+
328
+ # Apply wrapper to GenerationMixin.generate (all models inherit this)
329
+ GenerationMixin.generate = generate_wrapper(original_generate)
330
+
331
+ self._model_classes_instrumented = True
332
+ logger.debug(
333
+ "HuggingFace GenerationMixin.generate() instrumented "
334
+ "(covers all models: AutoModelForCausalLM, AutoModelForSeq2SeqLM, etc.)"
335
+ )
336
+
337
+ except ImportError as e:
338
+ logger.debug(f"Could not import model classes for instrumentation: {e}")
339
+ except Exception as e:
340
+ raise # Re-raise to be caught by instrument() method
341
+
342
+ def _extract_inference_client_attributes(self, instance, args, kwargs) -> Dict[str, str]:
343
+ """Extract attributes from Inference API call."""
344
+ attrs = {}
345
+ model = kwargs.get("model") or (args[0] if args else "unknown")
346
+
347
+ attrs["gen_ai.system"] = "huggingface"
348
+ attrs["gen_ai.request.model"] = str(model)
349
+ attrs["gen_ai.operation.name"] = "chat" # Default to chat
350
+
351
+ # Extract parameters if available
352
+ if "max_tokens" in kwargs:
353
+ attrs["gen_ai.request.max_tokens"] = kwargs["max_tokens"]
354
+ if "temperature" in kwargs:
355
+ attrs["gen_ai.request.temperature"] = kwargs["temperature"]
356
+ if "top_p" in kwargs:
357
+ attrs["gen_ai.request.top_p"] = kwargs["top_p"]
358
+
359
+ return attrs
360
+
361
+ def _extract_usage(self, result) -> Optional[Dict[str, int]]:
362
+ """Extract token usage from HuggingFace response.
363
+
364
+ Handles both:
365
+ 1. Transformers pipeline (local execution) - returns None
366
+ 2. InferenceClient API calls - extracts token usage from response
367
+
368
+ Args:
369
+ result: The pipeline output or InferenceClient response.
370
+
371
+ Returns:
372
+ Dict with token counts for InferenceClient calls, None for local execution.
373
+ """
374
+ # Check if this is an InferenceClient API response
375
+ if result is not None and hasattr(result, "usage"):
376
+ usage = result.usage
377
+
378
+ # Extract token counts from usage object
379
+ prompt_tokens = getattr(usage, "prompt_tokens", None)
380
+ completion_tokens = getattr(usage, "completion_tokens", None)
381
+ total_tokens = getattr(usage, "total_tokens", None)
382
+
383
+ # If usage is a dict instead of object
384
+ if isinstance(usage, dict):
385
+ prompt_tokens = usage.get("prompt_tokens")
386
+ completion_tokens = usage.get("completion_tokens")
387
+ total_tokens = usage.get("total_tokens")
388
+
389
+ # Return token counts if available
390
+ if prompt_tokens is not None or completion_tokens is not None:
391
+ return {
392
+ "prompt_tokens": prompt_tokens or 0,
393
+ "completion_tokens": completion_tokens or 0,
394
+ "total_tokens": total_tokens or (prompt_tokens or 0) + (completion_tokens or 0),
395
+ }
396
+
397
+ # HuggingFace Transformers is free (local execution)
398
+ # No token-based costs to track
399
+ return None
@@ -0,0 +1,236 @@
1
+ """OpenTelemetry instrumentor for Hyperbolic API calls.
2
+
3
+ This instrumentor automatically traces HTTP requests to Hyperbolic's API,
4
+ capturing relevant LLM attributes such as model name and token usage from
5
+ the raw HTTP response.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ from typing import Any, Dict, Optional
11
+
12
+ import wrapt
13
+
14
+ from ..config import OTelConfig
15
+ from .base import BaseInstrumentor
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class HyperbolicInstrumentor(BaseInstrumentor):
21
+ """Instrumentor for Hyperbolic API (raw HTTP requests)"""
22
+
23
+ HYPERBOLIC_API_BASE = "https://api.hyperbolic.xyz"
24
+
25
+ def __init__(self):
26
+ """Initialize the instrumentor."""
27
+ super().__init__()
28
+ self._requests_available = False
29
+ self._check_availability()
30
+
31
+ def _check_availability(self):
32
+ """Check if requests library is available."""
33
+ try:
34
+ import requests
35
+
36
+ self._requests_available = True
37
+ logger.debug("Requests library detected, Hyperbolic instrumentation available")
38
+ except ImportError:
39
+ logger.debug("Requests library not installed, Hyperbolic instrumentation skipped")
40
+ self._requests_available = False
41
+
42
+ def instrument(self, config: OTelConfig):
43
+ """Instrument requests library for Hyperbolic API calls.
44
+
45
+ Args:
46
+ config (OTelConfig): The OpenTelemetry configuration object.
47
+ """
48
+ if not self._requests_available:
49
+ logger.debug("Skipping Hyperbolic instrumentation - requests library not available")
50
+ return
51
+
52
+ self.config = config
53
+
54
+ try:
55
+ import requests
56
+
57
+ # Wrap requests.post to intercept Hyperbolic API calls
58
+ original_post = requests.post
59
+
60
+ @wrapt.decorator
61
+ def hyperbolic_post_wrapper(wrapped, instance, args, kwargs):
62
+ # Check if this is a Hyperbolic API call
63
+ url = args[0] if args else kwargs.get("url", "")
64
+ if not url.startswith(self.HYPERBOLIC_API_BASE):
65
+ # Not a Hyperbolic call, pass through
66
+ return wrapped(*args, **kwargs)
67
+
68
+ # Extract attributes before the call
69
+ request_data = kwargs.get("json", {})
70
+ attrs = self._extract_request_attributes(request_data)
71
+
72
+ # Create span wrapper
73
+ with self.tracer.start_as_current_span("hyperbolic.chat.completion") as span:
74
+ # Set request attributes
75
+ for key, value in attrs.items():
76
+ span.set_attribute(key, value)
77
+
78
+ # Record request metric
79
+ model = attrs.get("gen_ai.request.model", "unknown")
80
+ if self.request_counter:
81
+ self.request_counter.add(1, {"model": model, "provider": "hyperbolic"})
82
+
83
+ try:
84
+ # Make the actual API call
85
+ response = wrapped(*args, **kwargs)
86
+
87
+ # Extract response attributes
88
+ if response.status_code == 200:
89
+ response_data = response.json()
90
+ self._extract_and_record_response(span, response_data)
91
+ else:
92
+ span.set_attribute("error", True)
93
+ span.set_attribute("http.status_code", response.status_code)
94
+
95
+ return response
96
+
97
+ except Exception as e:
98
+ span.set_attribute("error", True)
99
+ span.record_exception(e)
100
+ if self.error_counter:
101
+ self.error_counter.add(
102
+ 1,
103
+ {
104
+ "operation": "chat.completion",
105
+ "error.type": type(e).__name__,
106
+ "provider": "hyperbolic",
107
+ },
108
+ )
109
+ raise
110
+
111
+ # Apply the wrapper
112
+ requests.post = hyperbolic_post_wrapper(original_post)
113
+ self._instrumented = True
114
+ logger.info("Hyperbolic instrumentation enabled")
115
+
116
+ except Exception as e:
117
+ logger.error("Failed to instrument Hyperbolic: %s", e, exc_info=True)
118
+ if config.fail_on_error:
119
+ raise
120
+
121
+ def _extract_request_attributes(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
122
+ """Extract attributes from Hyperbolic API request.
123
+
124
+ Args:
125
+ request_data: The JSON request payload.
126
+
127
+ Returns:
128
+ Dict[str, Any]: Dictionary of attributes to set on the span.
129
+ """
130
+ attrs = {}
131
+
132
+ # Core attributes
133
+ attrs["gen_ai.system"] = "hyperbolic"
134
+ attrs["gen_ai.request.model"] = request_data.get("model", "unknown")
135
+ attrs["gen_ai.operation.name"] = "chat"
136
+
137
+ messages = request_data.get("messages", [])
138
+ attrs["gen_ai.request.message_count"] = len(messages)
139
+
140
+ # Request parameters
141
+ if "temperature" in request_data:
142
+ attrs["gen_ai.request.temperature"] = request_data["temperature"]
143
+ if "top_p" in request_data:
144
+ attrs["gen_ai.request.top_p"] = request_data["top_p"]
145
+ if "max_tokens" in request_data:
146
+ attrs["gen_ai.request.max_tokens"] = request_data["max_tokens"]
147
+
148
+ # First message preview
149
+ if messages:
150
+ first_message = str(messages[0])[:200]
151
+ attrs["gen_ai.request.first_message"] = first_message
152
+
153
+ return attrs
154
+
155
+ def _extract_and_record_response(self, span, response_data: Dict[str, Any]):
156
+ """Extract response attributes and record metrics.
157
+
158
+ Args:
159
+ span: The OpenTelemetry span.
160
+ response_data: The JSON response from Hyperbolic API.
161
+ """
162
+ # Response ID
163
+ if "id" in response_data:
164
+ span.set_attribute("gen_ai.response.id", response_data["id"])
165
+
166
+ # Response model
167
+ if "model" in response_data:
168
+ span.set_attribute("gen_ai.response.model", response_data["model"])
169
+
170
+ # Finish reasons
171
+ choices = response_data.get("choices", [])
172
+ if choices:
173
+ finish_reasons = [
174
+ choice.get("finish_reason") for choice in choices if "finish_reason" in choice
175
+ ]
176
+ if finish_reasons:
177
+ span.set_attribute("gen_ai.response.finish_reasons", finish_reasons)
178
+
179
+ # Extract token usage
180
+ usage_data = response_data.get("usage", {})
181
+ if usage_data:
182
+ usage_dict = {
183
+ "prompt_tokens": usage_data.get("prompt_tokens", 0),
184
+ "completion_tokens": usage_data.get("completion_tokens", 0),
185
+ "total_tokens": usage_data.get("total_tokens", 0),
186
+ }
187
+
188
+ # Record token usage as span attributes
189
+ span.set_attribute("gen_ai.usage.prompt_tokens", usage_dict["prompt_tokens"])
190
+ span.set_attribute("gen_ai.usage.completion_tokens", usage_dict["completion_tokens"])
191
+ span.set_attribute("gen_ai.usage.total_tokens", usage_dict["total_tokens"])
192
+
193
+ # Record token metrics
194
+ if self.token_counter:
195
+ model = span.attributes.get("gen_ai.request.model", "unknown")
196
+ self.token_counter.add(
197
+ usage_dict["prompt_tokens"],
198
+ {"token_type": "prompt", "model": model, "provider": "hyperbolic"},
199
+ )
200
+ self.token_counter.add(
201
+ usage_dict["completion_tokens"],
202
+ {"token_type": "completion", "model": model, "provider": "hyperbolic"},
203
+ )
204
+
205
+ # Calculate and record cost
206
+ if self.config.enable_cost_tracking:
207
+ from ..cost_calculator import CostCalculator
208
+
209
+ cost_calc = CostCalculator(custom_pricing_json=self.config.custom_pricing_json)
210
+ model = span.attributes.get("gen_ai.request.model", "unknown")
211
+ cost = cost_calc.calculate_cost(
212
+ model_name=model,
213
+ prompt_tokens=usage_dict["prompt_tokens"],
214
+ completion_tokens=usage_dict["completion_tokens"],
215
+ call_type="chat",
216
+ )
217
+
218
+ if cost > 0 and self.cost_counter:
219
+ span.set_attribute("gen_ai.cost.amount", cost)
220
+ self.cost_counter.add(
221
+ cost, {"model": model, "provider": "hyperbolic", "call_type": "chat"}
222
+ )
223
+
224
+ def _extract_usage(self, result) -> Optional[Dict[str, int]]:
225
+ """Extract token usage from response.
226
+
227
+ Note: This method is required by BaseInstrumentor but not used for HTTP-based
228
+ instrumentation. Token extraction is handled in _extract_and_record_response.
229
+
230
+ Args:
231
+ result: The API response (unused for HTTP instrumentation).
232
+
233
+ Returns:
234
+ None
235
+ """
236
+ return None