genai-otel-instrument 0.1.4.dev0__py3-none-any.whl → 0.1.9.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of genai-otel-instrument might be problematic. Click here for more details.

@@ -3,9 +3,11 @@
3
3
  This instrumentor automatically traces:
4
4
  1. HuggingFace Transformers pipelines (local model execution)
5
5
  2. HuggingFace Inference API calls via InferenceClient (used by smolagents)
6
+ 3. Direct model usage via AutoModelForCausalLM.generate() and forward()
6
7
 
7
8
  Note: Transformers runs models locally (no API costs), but InferenceClient makes
8
9
  API calls to HuggingFace endpoints which may have costs based on usage.
10
+ Local model costs are estimated based on parameter count and token usage.
9
11
  """
10
12
 
11
13
  import logging
@@ -20,8 +22,10 @@ logger = logging.getLogger(__name__)
20
22
  class HuggingFaceInstrumentor(BaseInstrumentor):
21
23
  """Instrumentor for HuggingFace Transformers and Inference API.
22
24
 
23
- Instruments both:
24
- - transformers.pipeline (local execution, no API costs)
25
+ Instruments:
26
+ - transformers.pipeline (local execution, estimated costs)
27
+ - transformers.AutoModelForCausalLM.generate() (local execution, estimated costs)
28
+ - transformers.AutoModelForCausalLM.forward() (local execution, estimated costs)
25
29
  - huggingface_hub.InferenceClient (API calls, may have costs)
26
30
  """
27
31
 
@@ -30,6 +34,7 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
30
34
  super().__init__()
31
35
  self._transformers_available = False
32
36
  self._inference_client_available = False
37
+ self._model_classes_instrumented = False
33
38
  self._check_availability()
34
39
 
35
40
  def _check_availability(self):
@@ -49,17 +54,20 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
49
54
  self._inference_client_available = True
50
55
  logger.debug("HuggingFace InferenceClient detected and available for instrumentation")
51
56
  except ImportError:
52
- logger.debug("huggingface_hub not installed, InferenceClient instrumentation will be skipped")
57
+ logger.debug(
58
+ "huggingface_hub not installed, InferenceClient instrumentation will be skipped"
59
+ )
53
60
  self._inference_client_available = False
54
61
 
55
62
  def instrument(self, config: OTelConfig):
56
- """Instrument HuggingFace Transformers pipelines and InferenceClient."""
57
- self.config = config
63
+ """Instrument HuggingFace Transformers pipelines, model classes, and InferenceClient."""
64
+ self._setup_config(config)
58
65
 
59
66
  instrumented_count = 0
60
67
 
61
- # Instrument transformers.pipeline if available
68
+ # Instrument transformers components if available
62
69
  if self._transformers_available:
70
+ # Instrument pipeline
63
71
  try:
64
72
  self._instrument_transformers()
65
73
  instrumented_count += 1
@@ -68,13 +76,24 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
68
76
  if config.fail_on_error:
69
77
  raise
70
78
 
79
+ # Instrument model classes (AutoModelForCausalLM, etc.)
80
+ try:
81
+ self._instrument_model_classes()
82
+ instrumented_count += 1
83
+ except Exception as e:
84
+ logger.error("Failed to instrument HuggingFace model classes: %s", e, exc_info=True)
85
+ if config.fail_on_error:
86
+ raise
87
+
71
88
  # Instrument InferenceClient if available
72
89
  if self._inference_client_available:
73
90
  try:
74
91
  self._instrument_inference_client()
75
92
  instrumented_count += 1
76
93
  except Exception as e:
77
- logger.error("Failed to instrument HuggingFace InferenceClient: %s", e, exc_info=True)
94
+ logger.error(
95
+ "Failed to instrument HuggingFace InferenceClient: %s", e, exc_info=True
96
+ )
78
97
  if config.fail_on_error:
79
98
  raise
80
99
 
@@ -162,6 +181,164 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
162
181
  InferenceClient.text_generation = wrapped_text_generation
163
182
  logger.debug("HuggingFace InferenceClient instrumented")
164
183
 
184
+ def _instrument_model_classes(self):
185
+ """Instrument HuggingFace model classes for direct model usage."""
186
+ try:
187
+ import wrapt
188
+
189
+ # Import GenerationMixin - the base class that provides generate() method
190
+ # All generative models (AutoModelForCausalLM, AutoModelForSeq2SeqLM, etc.) inherit from it
191
+ try:
192
+ from transformers.generation.utils import GenerationMixin
193
+ except ImportError:
194
+ # Fallback for older transformers versions
195
+ from transformers.generation import GenerationMixin
196
+
197
+ # Store reference to instrumentor for use in wrapper
198
+ instrumentor = self
199
+
200
+ # Wrap the generate() method at GenerationMixin level (all models inherit from this)
201
+ original_generate = GenerationMixin.generate
202
+
203
+ @wrapt.decorator
204
+ def generate_wrapper(wrapped, instance, args, kwargs):
205
+ """Wrapper for model.generate() method."""
206
+ # Extract model info
207
+ model_name = getattr(instance, "name_or_path", "unknown")
208
+ if hasattr(instance.config, "_name_or_path"):
209
+ model_name = instance.config._name_or_path
210
+
211
+ # Get input token count
212
+ input_ids = kwargs.get("input_ids") or (args[0] if args else None)
213
+ prompt_tokens = 0
214
+ if input_ids is not None:
215
+ if hasattr(input_ids, "shape"):
216
+ prompt_tokens = int(input_ids.shape[-1])
217
+ elif isinstance(input_ids, (list, tuple)):
218
+ prompt_tokens = len(input_ids[0]) if input_ids else 0
219
+
220
+ # Create span
221
+ with instrumentor.tracer.start_as_current_span(
222
+ "huggingface.model.generate"
223
+ ) as span:
224
+ # Set attributes
225
+ span.set_attribute("gen_ai.system", "huggingface")
226
+ span.set_attribute("gen_ai.request.model", model_name)
227
+ span.set_attribute("gen_ai.operation.name", "text_generation")
228
+ span.set_attribute("gen_ai.request.type", "chat")
229
+
230
+ # Extract generation parameters
231
+ if "max_length" in kwargs:
232
+ span.set_attribute("gen_ai.request.max_tokens", kwargs["max_length"])
233
+ if "max_new_tokens" in kwargs:
234
+ span.set_attribute("gen_ai.request.max_tokens", kwargs["max_new_tokens"])
235
+ if "temperature" in kwargs:
236
+ span.set_attribute("gen_ai.request.temperature", kwargs["temperature"])
237
+ if "top_p" in kwargs:
238
+ span.set_attribute("gen_ai.request.top_p", kwargs["top_p"])
239
+
240
+ # Call original generate
241
+ import time
242
+
243
+ start_time = time.time()
244
+ result = wrapped(*args, **kwargs)
245
+ duration = time.time() - start_time
246
+
247
+ # Extract output token count
248
+ completion_tokens = 0
249
+ if hasattr(result, "shape"):
250
+ # result is a tensor
251
+ total_length = int(result.shape[-1])
252
+ completion_tokens = max(0, total_length - prompt_tokens)
253
+ elif isinstance(result, (list, tuple)):
254
+ # result is a list of sequences
255
+ if result and hasattr(result[0], "shape"):
256
+ total_length = int(result[0].shape[-1])
257
+ completion_tokens = max(0, total_length - prompt_tokens)
258
+
259
+ total_tokens = prompt_tokens + completion_tokens
260
+
261
+ # Set token usage attributes
262
+ if prompt_tokens > 0:
263
+ span.set_attribute("gen_ai.usage.prompt_tokens", prompt_tokens)
264
+ if completion_tokens > 0:
265
+ span.set_attribute("gen_ai.usage.completion_tokens", completion_tokens)
266
+ if total_tokens > 0:
267
+ span.set_attribute("gen_ai.usage.total_tokens", total_tokens)
268
+
269
+ # Record metrics
270
+ if instrumentor.request_counter:
271
+ instrumentor.request_counter.add(
272
+ 1, {"model": model_name, "provider": "huggingface"}
273
+ )
274
+
275
+ if instrumentor.token_counter and total_tokens > 0:
276
+ if prompt_tokens > 0:
277
+ instrumentor.token_counter.add(
278
+ prompt_tokens, {"token_type": "prompt", "operation": span.name}
279
+ )
280
+ if completion_tokens > 0:
281
+ instrumentor.token_counter.add(
282
+ completion_tokens,
283
+ {"token_type": "completion", "operation": span.name},
284
+ )
285
+
286
+ if instrumentor.latency_histogram:
287
+ instrumentor.latency_histogram.record(duration, {"operation": span.name})
288
+
289
+ # Calculate and record cost if enabled
290
+ if (
291
+ instrumentor.config
292
+ and instrumentor.config.enable_cost_tracking
293
+ and total_tokens > 0
294
+ ):
295
+ try:
296
+ usage = {
297
+ "prompt_tokens": prompt_tokens,
298
+ "completion_tokens": completion_tokens,
299
+ "total_tokens": total_tokens,
300
+ }
301
+
302
+ costs = instrumentor.cost_calculator.calculate_granular_cost(
303
+ model=model_name, usage=usage, call_type="chat"
304
+ )
305
+
306
+ if costs["total"] > 0:
307
+ if instrumentor.cost_counter:
308
+ instrumentor.cost_counter.add(
309
+ costs["total"], {"model": model_name}
310
+ )
311
+ span.set_attribute("gen_ai.usage.cost.total", costs["total"])
312
+ if costs["prompt"] > 0:
313
+ span.set_attribute("gen_ai.usage.cost.prompt", costs["prompt"])
314
+ if costs["completion"] > 0:
315
+ span.set_attribute(
316
+ "gen_ai.usage.cost.completion", costs["completion"]
317
+ )
318
+
319
+ logger.debug(
320
+ f"HuggingFace model {model_name}: {total_tokens} tokens, "
321
+ f"cost: ${costs['total']:.6f}"
322
+ )
323
+ except Exception as e:
324
+ logger.warning(f"Failed to calculate cost: {e}")
325
+
326
+ return result
327
+
328
+ # Apply wrapper to GenerationMixin.generate (all models inherit this)
329
+ GenerationMixin.generate = generate_wrapper(original_generate)
330
+
331
+ self._model_classes_instrumented = True
332
+ logger.debug(
333
+ "HuggingFace GenerationMixin.generate() instrumented "
334
+ "(covers all models: AutoModelForCausalLM, AutoModelForSeq2SeqLM, etc.)"
335
+ )
336
+
337
+ except ImportError as e:
338
+ logger.debug(f"Could not import model classes for instrumentation: {e}")
339
+ except Exception as e:
340
+ raise # Re-raise to be caught by instrument() method
341
+
165
342
  def _extract_inference_client_attributes(self, instance, args, kwargs) -> Dict[str, str]:
166
343
  """Extract attributes from Inference API call."""
167
344
  attrs = {}
@@ -1,75 +1,75 @@
1
- """OpenTelemetry instrumentor for the LangChain framework.
2
-
3
- This instrumentor automatically traces various components within LangChain,
4
- including chains and agents, capturing relevant attributes for observability.
5
- """
6
-
7
- import logging
8
- from typing import Dict, Optional
9
-
10
- from ..config import OTelConfig
11
- from .base import BaseInstrumentor
12
-
13
- logger = logging.getLogger(__name__)
14
-
15
-
16
- class LangChainInstrumentor(BaseInstrumentor):
17
- """Instrumentor for LangChain"""
18
-
19
- def __init__(self):
20
- """Initialize the instrumentor."""
21
- super().__init__()
22
- self._langchain_available = False
23
- self._check_availability()
24
-
25
- def _check_availability(self):
26
- """Check if langchain library is available."""
27
- try:
28
- import langchain
29
-
30
- self._langchain_available = True
31
- logger.debug("langchain library detected and available for instrumentation")
32
- except ImportError:
33
- logger.debug("langchain library not installed, instrumentation will be skipped")
34
- self._langchain_available = False
35
-
36
- def instrument(self, config: OTelConfig):
37
- """Instrument langchain available if available."""
38
- if not self._langchain_available:
39
- logger.debug("Skipping instrumentation - library not available")
40
- return
41
-
42
- self.config = config
43
- try:
44
- from langchain.agents.agent import AgentExecutor
45
- from langchain.chains.base import Chain
46
-
47
- # Instrument Chains
48
- original_call = Chain.__call__
49
-
50
- def wrapped_call(instance, *args, **kwargs):
51
- chain_type = instance.__class__.__name__
52
- with self.tracer.start_as_current_span(f"langchain.chain.{chain_type}") as span:
53
- span.set_attribute("langchain.chain.type", chain_type)
54
- result = original_call(instance, *args, **kwargs)
55
- return result
56
-
57
- Chain.__call__ = wrapped_call
58
-
59
- # Instrument Agents
60
- original_agent_call = AgentExecutor.__call__
61
-
62
- def wrapped_agent_call(instance, *args, **kwargs):
63
- with self.tracer.start_as_current_span("langchain.agent.execute") as span:
64
- agent_name = getattr(instance, "agent", {}).get("name", "unknown")
65
- span.set_attribute("langchain.agent.name", agent_name)
66
- result = original_agent_call(instance, *args, **kwargs)
67
- return result
68
-
69
- AgentExecutor.__call__ = wrapped_agent_call
70
-
71
- except ImportError:
72
- pass
73
-
74
- def _extract_usage(self, result) -> Optional[Dict[str, int]]:
75
- return None
1
+ """OpenTelemetry instrumentor for the LangChain framework.
2
+
3
+ This instrumentor automatically traces various components within LangChain,
4
+ including chains and agents, capturing relevant attributes for observability.
5
+ """
6
+
7
+ import logging
8
+ from typing import Dict, Optional
9
+
10
+ from ..config import OTelConfig
11
+ from .base import BaseInstrumentor
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class LangChainInstrumentor(BaseInstrumentor):
17
+ """Instrumentor for LangChain"""
18
+
19
+ def __init__(self):
20
+ """Initialize the instrumentor."""
21
+ super().__init__()
22
+ self._langchain_available = False
23
+ self._check_availability()
24
+
25
+ def _check_availability(self):
26
+ """Check if langchain library is available."""
27
+ try:
28
+ import langchain
29
+
30
+ self._langchain_available = True
31
+ logger.debug("langchain library detected and available for instrumentation")
32
+ except ImportError:
33
+ logger.debug("langchain library not installed, instrumentation will be skipped")
34
+ self._langchain_available = False
35
+
36
+ def instrument(self, config: OTelConfig):
37
+ """Instrument langchain available if available."""
38
+ if not self._langchain_available:
39
+ logger.debug("Skipping instrumentation - library not available")
40
+ return
41
+
42
+ self.config = config
43
+ try:
44
+ from langchain.agents.agent import AgentExecutor
45
+ from langchain.chains.base import Chain
46
+
47
+ # Instrument Chains
48
+ original_call = Chain.__call__
49
+
50
+ def wrapped_call(instance, *args, **kwargs):
51
+ chain_type = instance.__class__.__name__
52
+ with self.tracer.start_as_current_span(f"langchain.chain.{chain_type}") as span:
53
+ span.set_attribute("langchain.chain.type", chain_type)
54
+ result = original_call(instance, *args, **kwargs)
55
+ return result
56
+
57
+ Chain.__call__ = wrapped_call
58
+
59
+ # Instrument Agents
60
+ original_agent_call = AgentExecutor.__call__
61
+
62
+ def wrapped_agent_call(instance, *args, **kwargs):
63
+ with self.tracer.start_as_current_span("langchain.agent.execute") as span:
64
+ agent_name = getattr(instance, "agent", {}).get("name", "unknown")
65
+ span.set_attribute("langchain.agent.name", agent_name)
66
+ result = original_agent_call(instance, *args, **kwargs)
67
+ return result
68
+
69
+ AgentExecutor.__call__ = wrapped_agent_call
70
+
71
+ except ImportError:
72
+ pass
73
+
74
+ def _extract_usage(self, result) -> Optional[Dict[str, int]]:
75
+ return None
@@ -32,9 +32,8 @@ class MistralAIInstrumentor(BaseInstrumentor):
32
32
  # In Mistral SDK v1.0+, structure is:
33
33
  # - Mistral client has .chat and .embeddings properties
34
34
  # - These are bound methods that call internal APIs
35
-
36
35
  # Store original methods at module level before any instances are created
37
- if not hasattr(Mistral, '_genai_otel_instrumented'):
36
+ if not hasattr(Mistral, "_genai_otel_instrumented"):
38
37
  self._wrap_mistral_methods(Mistral, wrapt)
39
38
  Mistral._genai_otel_instrumented = True
40
39
  logger.info("MistralAI instrumentation enabled (v1.0+ SDK)")
@@ -54,29 +53,21 @@ class MistralAIInstrumentor(BaseInstrumentor):
54
53
  from mistralai.embeddings import Embeddings
55
54
 
56
55
  # Wrap Chat.complete method
57
- if hasattr(Chat, 'complete'):
56
+ if hasattr(Chat, "complete"):
58
57
  wrapt.wrap_function_wrapper(
59
- 'mistralai.chat',
60
- 'Chat.complete',
61
- self._wrap_chat_complete
58
+ "mistralai.chat", "Chat.complete", self._wrap_chat_complete
62
59
  )
63
60
  logger.debug("Wrapped Mistral Chat.complete")
64
61
 
65
62
  # Wrap Chat.stream method
66
- if hasattr(Chat, 'stream'):
67
- wrapt.wrap_function_wrapper(
68
- 'mistralai.chat',
69
- 'Chat.stream',
70
- self._wrap_chat_stream
71
- )
63
+ if hasattr(Chat, "stream"):
64
+ wrapt.wrap_function_wrapper("mistralai.chat", "Chat.stream", self._wrap_chat_stream)
72
65
  logger.debug("Wrapped Mistral Chat.stream")
73
66
 
74
67
  # Wrap Embeddings.create method
75
- if hasattr(Embeddings, 'create'):
68
+ if hasattr(Embeddings, "create"):
76
69
  wrapt.wrap_function_wrapper(
77
- 'mistralai.embeddings',
78
- 'Embeddings.create',
79
- self._wrap_embeddings_create
70
+ "mistralai.embeddings", "Embeddings.create", self._wrap_embeddings_create
80
71
  )
81
72
  logger.debug("Wrapped Mistral Embeddings.create")
82
73
 
@@ -140,15 +131,11 @@ class MistralAIInstrumentor(BaseInstrumentor):
140
131
  stream = wrapped(*args, **kwargs)
141
132
 
142
133
  # Wrap the stream with our tracking wrapper
143
- return self._StreamWrapper(
144
- stream, span, self, model, start_time, span_name
145
- )
134
+ return self._StreamWrapper(stream, span, self, model, start_time, span_name)
146
135
 
147
136
  except Exception as e:
148
137
  if self.error_counter:
149
- self.error_counter.add(
150
- 1, {"operation": span_name, "error.type": type(e).__name__}
151
- )
138
+ self.error_counter.add(1, {"operation": span_name, "error.type": type(e).__name__})
152
139
  span.record_exception(e)
153
140
  span.end()
154
141
  raise
@@ -240,10 +227,7 @@ class MistralAIInstrumentor(BaseInstrumentor):
240
227
 
241
228
  mock_response = MockResponse(self._usage)
242
229
  self._instrumentor._record_result_metrics(
243
- self._span,
244
- mock_response,
245
- self._start_time,
246
- {"model": self._model}
230
+ self._span, mock_response, self._start_time, {"model": self._model}
247
231
  )
248
232
 
249
233
  finally:
@@ -255,21 +239,21 @@ class MistralAIInstrumentor(BaseInstrumentor):
255
239
  """Process a streaming chunk to extract usage."""
256
240
  try:
257
241
  # Mistral streaming chunks have: data.choices[0].delta.content
258
- if hasattr(chunk, 'data'):
242
+ if hasattr(chunk, "data"):
259
243
  data = chunk.data
260
- if hasattr(data, 'choices') and len(data.choices) > 0:
244
+ if hasattr(data, "choices") and len(data.choices) > 0:
261
245
  delta = data.choices[0].delta
262
- if hasattr(delta, 'content') and delta.content:
246
+ if hasattr(delta, "content") and delta.content:
263
247
  self._response_text += delta.content
264
248
 
265
249
  # Extract usage if available on final chunk
266
- if hasattr(data, 'usage') and data.usage:
250
+ if hasattr(data, "usage") and data.usage:
267
251
  usage = data.usage
268
- if hasattr(usage, 'prompt_tokens'):
252
+ if hasattr(usage, "prompt_tokens"):
269
253
  self._usage["prompt_tokens"] = usage.prompt_tokens
270
- if hasattr(usage, 'completion_tokens'):
254
+ if hasattr(usage, "completion_tokens"):
271
255
  self._usage["completion_tokens"] = usage.completion_tokens
272
- if hasattr(usage, 'total_tokens'):
256
+ if hasattr(usage, "total_tokens"):
273
257
  self._usage["total_tokens"] = usage.total_tokens
274
258
 
275
259
  except Exception as e: