PyPI - genai-otel-instrument - Versions diffs - 0.1.4.dev0__py3-none-any.whl → 0.1.9.dev0__py3-none-any.whl - Mend

genai-otel-instrument 0.1.4.dev0py3-none-any.whl → 0.1.9.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of genai-otel-instrument might be problematic. Click here for more details.

Files changed (21) hide show

genai_otel/instrumentors/huggingface_instrumentor.py CHANGED Viewed

@@ -3,9 +3,11 @@
 This instrumentor automatically traces:
 1. HuggingFace Transformers pipelines (local model execution)
 2. HuggingFace Inference API calls via InferenceClient (used by smolagents)
+3. Direct model usage via AutoModelForCausalLM.generate() and forward()
 Note: Transformers runs models locally (no API costs), but InferenceClient makes
 API calls to HuggingFace endpoints which may have costs based on usage.
+Local model costs are estimated based on parameter count and token usage.
 """
 import logging
@@ -20,8 +22,10 @@ logger = logging.getLogger(__name__)
 class HuggingFaceInstrumentor(BaseInstrumentor):
     """Instrumentor for HuggingFace Transformers and Inference API.
-    Instruments both:
-    - transformers.pipeline (local execution, no API costs)
+    Instruments:
+    - transformers.pipeline (local execution, estimated costs)
+    - transformers.AutoModelForCausalLM.generate() (local execution, estimated costs)
+    - transformers.AutoModelForCausalLM.forward() (local execution, estimated costs)
     - huggingface_hub.InferenceClient (API calls, may have costs)
     """
@@ -30,6 +34,7 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
         super().__init__()
         self._transformers_available = False
         self._inference_client_available = False
+        self._model_classes_instrumented = False
         self._check_availability()
     def _check_availability(self):
@@ -49,17 +54,20 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
             self._inference_client_available = True
             logger.debug("HuggingFace InferenceClient detected and available for instrumentation")
         except ImportError:
-            logger.debug("huggingface_hub not installed, InferenceClient instrumentation will be skipped")
+            logger.debug(
+                "huggingface_hub not installed, InferenceClient instrumentation will be skipped"
+            )
             self._inference_client_available = False
     def instrument(self, config: OTelConfig):
-        """Instrument HuggingFace Transformers pipelines and InferenceClient."""
-        self.config = config
+        """Instrument HuggingFace Transformers pipelines, model classes, and InferenceClient."""
+        self._setup_config(config)
         instrumented_count = 0
-        # Instrument transformers.pipeline if available
+        # Instrument transformers components if available
         if self._transformers_available:
+            # Instrument pipeline
             try:
                 self._instrument_transformers()
                 instrumented_count += 1
@@ -68,13 +76,24 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
                 if config.fail_on_error:
                     raise
+            # Instrument model classes (AutoModelForCausalLM, etc.)
+            try:
+                self._instrument_model_classes()
+                instrumented_count += 1
+            except Exception as e:
+                logger.error("Failed to instrument HuggingFace model classes: %s", e, exc_info=True)
+                if config.fail_on_error:
+                    raise
         # Instrument InferenceClient if available
         if self._inference_client_available:
             try:
                 self._instrument_inference_client()
                 instrumented_count += 1
             except Exception as e:
-                logger.error("Failed to instrument HuggingFace InferenceClient: %s", e, exc_info=True)
+                logger.error(
+                    "Failed to instrument HuggingFace InferenceClient: %s", e, exc_info=True
+                )
                 if config.fail_on_error:
                     raise
@@ -162,6 +181,164 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
         InferenceClient.text_generation = wrapped_text_generation
         logger.debug("HuggingFace InferenceClient instrumented")
+    def _instrument_model_classes(self):
+        """Instrument HuggingFace model classes for direct model usage."""
+        try:
+            import wrapt
+            # Import GenerationMixin - the base class that provides generate() method
+            # All generative models (AutoModelForCausalLM, AutoModelForSeq2SeqLM, etc.) inherit from it
+            try:
+                from transformers.generation.utils import GenerationMixin
+            except ImportError:
+                # Fallback for older transformers versions
+                from transformers.generation import GenerationMixin
+            # Store reference to instrumentor for use in wrapper
+            instrumentor = self
+            # Wrap the generate() method at GenerationMixin level (all models inherit from this)
+            original_generate = GenerationMixin.generate
+            @wrapt.decorator
+            def generate_wrapper(wrapped, instance, args, kwargs):
+                """Wrapper for model.generate() method."""
+                # Extract model info
+                model_name = getattr(instance, "name_or_path", "unknown")
+                if hasattr(instance.config, "_name_or_path"):
+                    model_name = instance.config._name_or_path
+                # Get input token count
+                input_ids = kwargs.get("input_ids") or (args[0] if args else None)
+                prompt_tokens = 0
+                if input_ids is not None:
+                    if hasattr(input_ids, "shape"):
+                        prompt_tokens = int(input_ids.shape[-1])
+                    elif isinstance(input_ids, (list, tuple)):
+                        prompt_tokens = len(input_ids[0]) if input_ids else 0
+                # Create span
+                with instrumentor.tracer.start_as_current_span(
+                    "huggingface.model.generate"
+                ) as span:
+                    # Set attributes
+                    span.set_attribute("gen_ai.system", "huggingface")
+                    span.set_attribute("gen_ai.request.model", model_name)
+                    span.set_attribute("gen_ai.operation.name", "text_generation")
+                    span.set_attribute("gen_ai.request.type", "chat")
+                    # Extract generation parameters
+                    if "max_length" in kwargs:
+                        span.set_attribute("gen_ai.request.max_tokens", kwargs["max_length"])
+                    if "max_new_tokens" in kwargs:
+                        span.set_attribute("gen_ai.request.max_tokens", kwargs["max_new_tokens"])
+                    if "temperature" in kwargs:
+                        span.set_attribute("gen_ai.request.temperature", kwargs["temperature"])
+                    if "top_p" in kwargs:
+                        span.set_attribute("gen_ai.request.top_p", kwargs["top_p"])
+                    # Call original generate
+                    import time
+                    start_time = time.time()
+                    result = wrapped(*args, **kwargs)
+                    duration = time.time() - start_time
+                    # Extract output token count
+                    completion_tokens = 0
+                    if hasattr(result, "shape"):
+                        # result is a tensor
+                        total_length = int(result.shape[-1])
+                        completion_tokens = max(0, total_length - prompt_tokens)
+                    elif isinstance(result, (list, tuple)):
+                        # result is a list of sequences
+                        if result and hasattr(result[0], "shape"):
+                            total_length = int(result[0].shape[-1])
+                            completion_tokens = max(0, total_length - prompt_tokens)
+                    total_tokens = prompt_tokens + completion_tokens
+                    # Set token usage attributes
+                    if prompt_tokens > 0:
+                        span.set_attribute("gen_ai.usage.prompt_tokens", prompt_tokens)
+                    if completion_tokens > 0:
+                        span.set_attribute("gen_ai.usage.completion_tokens", completion_tokens)
+                    if total_tokens > 0:
+                        span.set_attribute("gen_ai.usage.total_tokens", total_tokens)
+                    # Record metrics
+                    if instrumentor.request_counter:
+                        instrumentor.request_counter.add(
+                            1, {"model": model_name, "provider": "huggingface"}
+                        )
+                    if instrumentor.token_counter and total_tokens > 0:
+                        if prompt_tokens > 0:
+                            instrumentor.token_counter.add(
+                                prompt_tokens, {"token_type": "prompt", "operation": span.name}
+                            )
+                        if completion_tokens > 0:
+                            instrumentor.token_counter.add(
+                                completion_tokens,
+                                {"token_type": "completion", "operation": span.name},
+                            )
+                    if instrumentor.latency_histogram:
+                        instrumentor.latency_histogram.record(duration, {"operation": span.name})
+                    # Calculate and record cost if enabled
+                    if (
+                        instrumentor.config
+                        and instrumentor.config.enable_cost_tracking
+                        and total_tokens > 0
+                    ):
+                        try:
+                            usage = {
+                                "prompt_tokens": prompt_tokens,
+                                "completion_tokens": completion_tokens,
+                                "total_tokens": total_tokens,
+                            }
+                            costs = instrumentor.cost_calculator.calculate_granular_cost(
+                                model=model_name, usage=usage, call_type="chat"
+                            )
+                            if costs["total"] > 0:
+                                if instrumentor.cost_counter:
+                                    instrumentor.cost_counter.add(
+                                        costs["total"], {"model": model_name}
+                                    )
+                                span.set_attribute("gen_ai.usage.cost.total", costs["total"])
+                                if costs["prompt"] > 0:
+                                    span.set_attribute("gen_ai.usage.cost.prompt", costs["prompt"])
+                                if costs["completion"] > 0:
+                                    span.set_attribute(
+                                        "gen_ai.usage.cost.completion", costs["completion"]
+                                    )
+                                logger.debug(
+                                    f"HuggingFace model {model_name}: {total_tokens} tokens, "
+                                    f"cost: ${costs['total']:.6f}"
+                                )
+                        except Exception as e:
+                            logger.warning(f"Failed to calculate cost: {e}")
+                    return result
+            # Apply wrapper to GenerationMixin.generate (all models inherit this)
+            GenerationMixin.generate = generate_wrapper(original_generate)
+            self._model_classes_instrumented = True
+            logger.debug(
+                "HuggingFace GenerationMixin.generate() instrumented "
+                "(covers all models: AutoModelForCausalLM, AutoModelForSeq2SeqLM, etc.)"
+            )
+        except ImportError as e:
+            logger.debug(f"Could not import model classes for instrumentation: {e}")
+        except Exception as e:
+            raise  # Re-raise to be caught by instrument() method
     def _extract_inference_client_attributes(self, instance, args, kwargs) -> Dict[str, str]:
         """Extract attributes from Inference API call."""
         attrs = {}

genai_otel/instrumentors/langchain_instrumentor.py CHANGED Viewed

@@ -1,75 +1,75 @@
-"""OpenTelemetry instrumentor for the LangChain framework.
-This instrumentor automatically traces various components within LangChain,
-including chains and agents, capturing relevant attributes for observability.
-"""
-import logging
-from typing import Dict, Optional
-from ..config import OTelConfig
-from .base import BaseInstrumentor
-logger = logging.getLogger(__name__)
-class LangChainInstrumentor(BaseInstrumentor):
-    """Instrumentor for LangChain"""
-    def __init__(self):
-        """Initialize the instrumentor."""
-        super().__init__()
-        self._langchain_available = False
-        self._check_availability()
-    def _check_availability(self):
-        """Check if langchain library is available."""
-        try:
-            import langchain
-            self._langchain_available = True
-            logger.debug("langchain library detected and available for instrumentation")
-        except ImportError:
-            logger.debug("langchain library not installed, instrumentation will be skipped")
-            self._langchain_available = False
-    def instrument(self, config: OTelConfig):
-        """Instrument  langchain available if available."""
-        if not self._langchain_available:
-            logger.debug("Skipping instrumentation - library not available")
-            return
-        self.config = config
-        try:
-            from langchain.agents.agent import AgentExecutor
-            from langchain.chains.base import Chain
-            # Instrument Chains
-            original_call = Chain.__call__
-            def wrapped_call(instance, *args, **kwargs):
-                chain_type = instance.__class__.__name__
-                with self.tracer.start_as_current_span(f"langchain.chain.{chain_type}") as span:
-                    span.set_attribute("langchain.chain.type", chain_type)
-                    result = original_call(instance, *args, **kwargs)
-                    return result
-            Chain.__call__ = wrapped_call
-            # Instrument Agents
-            original_agent_call = AgentExecutor.__call__
-            def wrapped_agent_call(instance, *args, **kwargs):
-                with self.tracer.start_as_current_span("langchain.agent.execute") as span:
-                    agent_name = getattr(instance, "agent", {}).get("name", "unknown")
-                    span.set_attribute("langchain.agent.name", agent_name)
-                    result = original_agent_call(instance, *args, **kwargs)
-                    return result
-            AgentExecutor.__call__ = wrapped_agent_call
-        except ImportError:
-            pass
-    def _extract_usage(self, result) -> Optional[Dict[str, int]]:
-        return None
+"""OpenTelemetry instrumentor for the LangChain framework.
+This instrumentor automatically traces various components within LangChain,
+including chains and agents, capturing relevant attributes for observability.
+"""
+import logging
+from typing import Dict, Optional
+from ..config import OTelConfig
+from .base import BaseInstrumentor
+logger = logging.getLogger(__name__)
+class LangChainInstrumentor(BaseInstrumentor):
+    """Instrumentor for LangChain"""
+    def __init__(self):
+        """Initialize the instrumentor."""
+        super().__init__()
+        self._langchain_available = False
+        self._check_availability()
+    def _check_availability(self):
+        """Check if langchain library is available."""
+        try:
+            import langchain
+            self._langchain_available = True
+            logger.debug("langchain library detected and available for instrumentation")
+        except ImportError:
+            logger.debug("langchain library not installed, instrumentation will be skipped")
+            self._langchain_available = False
+    def instrument(self, config: OTelConfig):
+        """Instrument  langchain available if available."""
+        if not self._langchain_available:
+            logger.debug("Skipping instrumentation - library not available")
+            return
+        self.config = config
+        try:
+            from langchain.agents.agent import AgentExecutor
+            from langchain.chains.base import Chain
+            # Instrument Chains
+            original_call = Chain.__call__
+            def wrapped_call(instance, *args, **kwargs):
+                chain_type = instance.__class__.__name__
+                with self.tracer.start_as_current_span(f"langchain.chain.{chain_type}") as span:
+                    span.set_attribute("langchain.chain.type", chain_type)
+                    result = original_call(instance, *args, **kwargs)
+                    return result
+            Chain.__call__ = wrapped_call
+            # Instrument Agents
+            original_agent_call = AgentExecutor.__call__
+            def wrapped_agent_call(instance, *args, **kwargs):
+                with self.tracer.start_as_current_span("langchain.agent.execute") as span:
+                    agent_name = getattr(instance, "agent", {}).get("name", "unknown")
+                    span.set_attribute("langchain.agent.name", agent_name)
+                    result = original_agent_call(instance, *args, **kwargs)
+                    return result
+            AgentExecutor.__call__ = wrapped_agent_call
+        except ImportError:
+            pass
+    def _extract_usage(self, result) -> Optional[Dict[str, int]]:
+        return None

genai_otel/instrumentors/mistralai_instrumentor.py CHANGED Viewed

@@ -32,9 +32,8 @@ class MistralAIInstrumentor(BaseInstrumentor):
             # In Mistral SDK v1.0+, structure is:
             # - Mistral client has .chat and .embeddings properties
             # - These are bound methods that call internal APIs
             # Store original methods at module level before any instances are created
-            if not hasattr(Mistral, '_genai_otel_instrumented'):
+            if not hasattr(Mistral, "_genai_otel_instrumented"):
                 self._wrap_mistral_methods(Mistral, wrapt)
                 Mistral._genai_otel_instrumented = True
                 logger.info("MistralAI instrumentation enabled (v1.0+ SDK)")
@@ -54,29 +53,21 @@ class MistralAIInstrumentor(BaseInstrumentor):
             from mistralai.embeddings import Embeddings
             # Wrap Chat.complete method
-            if hasattr(Chat, 'complete'):
+            if hasattr(Chat, "complete"):
                 wrapt.wrap_function_wrapper(
-                    'mistralai.chat',
-                    'Chat.complete',
-                    self._wrap_chat_complete
+                    "mistralai.chat", "Chat.complete", self._wrap_chat_complete
                 )
                 logger.debug("Wrapped Mistral Chat.complete")
             # Wrap Chat.stream method
-            if hasattr(Chat, 'stream'):
-                wrapt.wrap_function_wrapper(
-                    'mistralai.chat',
-                    'Chat.stream',
-                    self._wrap_chat_stream
-                )
+            if hasattr(Chat, "stream"):
+                wrapt.wrap_function_wrapper("mistralai.chat", "Chat.stream", self._wrap_chat_stream)
                 logger.debug("Wrapped Mistral Chat.stream")
             # Wrap Embeddings.create method
-            if hasattr(Embeddings, 'create'):
+            if hasattr(Embeddings, "create"):
                 wrapt.wrap_function_wrapper(
-                    'mistralai.embeddings',
-                    'Embeddings.create',
-                    self._wrap_embeddings_create
+                    "mistralai.embeddings", "Embeddings.create", self._wrap_embeddings_create
                 )
                 logger.debug("Wrapped Mistral Embeddings.create")
@@ -140,15 +131,11 @@ class MistralAIInstrumentor(BaseInstrumentor):
             stream = wrapped(*args, **kwargs)
             # Wrap the stream with our tracking wrapper
-            return self._StreamWrapper(
-                stream, span, self, model, start_time, span_name
-            )
+            return self._StreamWrapper(stream, span, self, model, start_time, span_name)
         except Exception as e:
             if self.error_counter:
-                self.error_counter.add(
-                    1, {"operation": span_name, "error.type": type(e).__name__}
-                )
+                self.error_counter.add(1, {"operation": span_name, "error.type": type(e).__name__})
             span.record_exception(e)
             span.end()
             raise
@@ -240,10 +227,7 @@ class MistralAIInstrumentor(BaseInstrumentor):
                         mock_response = MockResponse(self._usage)
                         self._instrumentor._record_result_metrics(
-                            self._span,
-                            mock_response,
-                            self._start_time,
-                            {"model": self._model}
+                            self._span, mock_response, self._start_time, {"model": self._model}
                         )
                 finally:
@@ -255,21 +239,21 @@ class MistralAIInstrumentor(BaseInstrumentor):
             """Process a streaming chunk to extract usage."""
             try:
                 # Mistral streaming chunks have: data.choices[0].delta.content
-                if hasattr(chunk, 'data'):
+                if hasattr(chunk, "data"):
                     data = chunk.data
-                    if hasattr(data, 'choices') and len(data.choices) > 0:
+                    if hasattr(data, "choices") and len(data.choices) > 0:
                         delta = data.choices[0].delta
-                        if hasattr(delta, 'content') and delta.content:
+                        if hasattr(delta, "content") and delta.content:
                             self._response_text += delta.content
                     # Extract usage if available on final chunk
-                    if hasattr(data, 'usage') and data.usage:
+                    if hasattr(data, "usage") and data.usage:
                         usage = data.usage
-                        if hasattr(usage, 'prompt_tokens'):
+                        if hasattr(usage, "prompt_tokens"):
                             self._usage["prompt_tokens"] = usage.prompt_tokens
-                        if hasattr(usage, 'completion_tokens'):
+                        if hasattr(usage, "completion_tokens"):
                             self._usage["completion_tokens"] = usage.completion_tokens
-                        if hasattr(usage, 'total_tokens'):
+                        if hasattr(usage, "total_tokens"):
                             self._usage["total_tokens"] = usage.total_tokens
             except Exception as e:

genai-otel-instrument 0.1.4.dev0__py3-none-any.whl → 0.1.9.dev0__py3-none-any.whl

Potentially problematic release.

genai-otel-instrument 0.1.4.dev0py3-none-any.whl → 0.1.9.dev0py3-none-any.whl