PyPI - genai-otel-instrument - Versions diffs - 0.1.4.dev0__py3-none-any.whl → 0.1.9.dev0__py3-none-any.whl - Mend - Supply Chain Defender

genai-otel-instrument 0.1.4.dev0py3-none-any.whl → 0.1.9.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of genai-otel-instrument might be problematic. Click here for more details.

Files changed (21) hide show

genai_otel/instrumentors/base.py CHANGED Viewed

@@ -7,11 +7,12 @@ It includes methods for creating OpenTelemetry spans, recording metrics,
 and handling configuration and cost calculation.
 """
+import json
 import logging
 import threading
 import time
 from abc import ABC, abstractmethod
-from typing import Any, Callable, Dict, Optional
+from typing import Any, Callable, Dict, List, Optional
 import wrapt
 from opentelemetry import metrics, trace
@@ -97,7 +98,7 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
         self.tracer = trace.get_tracer(__name__)
         self.meter = metrics.get_meter(__name__)
         self.config: Optional[OTelConfig] = None
-        self.cost_calculator = CostCalculator()
+        self.cost_calculator = CostCalculator()  # Will be updated when instrument() is called
         self._instrumented = False
         # Use shared metrics to avoid duplicate warnings
@@ -205,10 +206,25 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
                 cls._shared_ttft_histogram = None
                 cls._shared_tbt_histogram = None
+    def _setup_config(self, config: OTelConfig):
+        """Set up configuration and reinitialize cost calculator with custom pricing if provided.
+        Args:
+            config (OTelConfig): The OpenTelemetry configuration object.
+        """
+        self.config = config
+        # Reinitialize cost calculator with custom pricing if provided
+        if config.custom_pricing_json:
+            self.cost_calculator = CostCalculator(custom_pricing_json=config.custom_pricing_json)
+            logger.info("Cost calculator reinitialized with custom pricing")
     @abstractmethod
     def instrument(self, config: OTelConfig):
         """Abstract method to implement library-specific instrumentation.
+        Implementers should call self._setup_config(config) at the beginning of this method
+        to ensure custom pricing is loaded.
         Args:
             config (OTelConfig): The OpenTelemetry configuration object.
         """
@@ -248,6 +264,26 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
                 span = self.tracer.start_span(span_name, attributes=initial_attributes)
                 start_time = time.time()
+                # Extract session and user context (Phase 4.1)
+                if self.config:
+                    if self.config.session_id_extractor:
+                        try:
+                            session_id = self.config.session_id_extractor(instance, args, kwargs)
+                            if session_id:
+                                span.set_attribute("session.id", session_id)
+                                logger.debug("Set session.id: %s", session_id)
+                        except Exception as e:
+                            logger.debug("Failed to extract session ID: %s", e)
+                    if self.config.user_id_extractor:
+                        try:
+                            user_id = self.config.user_id_extractor(instance, args, kwargs)
+                            if user_id:
+                                span.set_attribute("user.id", user_id)
+                                logger.debug("Set user.id: %s", user_id)
+                        except Exception as e:
+                            logger.debug("Failed to extract user ID: %s", e)
                 try:
                     # Call the original function
                     result = wrapped(*args, **kwargs)
@@ -419,9 +455,13 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
                                     self.cost_counter.add(total_cost, {"model": str(model)})
                                 # Always set span attributes (needed for cost tracking)
                                 span.set_attribute("gen_ai.usage.cost.total", total_cost)
-                                logger.debug(f"Set cost attribute: gen_ai.usage.cost.total={total_cost}")
+                                logger.debug(
+                                    f"Set cost attribute: gen_ai.usage.cost.total={total_cost}"
+                                )
                             else:
-                                logger.debug(f"Cost is zero, not setting attributes. Costs: {costs}")
+                                logger.debug(
+                                    f"Cost is zero, not setting attributes. Costs: {costs}"
+                                )
                             # Record and set attributes for granular costs
                             # Note: Metrics recording is optional, span attributes are always set
@@ -502,6 +542,7 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
         first_token = True
         last_token_time = start_time
         token_count = 0
+        last_chunk = None  # Store last chunk to extract usage
         try:
             for chunk in stream:
@@ -523,6 +564,7 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
                         self.tbt_histogram.record(tbt, {"model": model, "operation": span.name})
                 last_token_time = current_time
+                last_chunk = chunk  # Keep track of last chunk for usage extraction
                 yield chunk
             # Stream completed successfully
@@ -530,6 +572,123 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
             if self.latency_histogram:
                 self.latency_histogram.record(duration, {"operation": span.name})
             span.set_attribute("gen_ai.streaming.token_count", token_count)
+            # Extract usage from last chunk and calculate cost
+            # Many providers (OpenAI, Anthropic, etc.) include usage in the final chunk
+            try:
+                if last_chunk is not None:
+                    usage = self._extract_usage(last_chunk)
+                    if usage and isinstance(usage, dict):
+                        # Record token usage metrics and calculate cost
+                        # This will set span attributes and record cost metrics
+                        prompt_tokens = usage.get("prompt_tokens", 0)
+                        completion_tokens = usage.get("completion_tokens", 0)
+                        total_tokens = usage.get("total_tokens", 0)
+                        # Record token counts
+                        if isinstance(prompt_tokens, (int, float)) and prompt_tokens > 0:
+                            if self.token_counter:
+                                self.token_counter.add(
+                                    prompt_tokens, {"token_type": "prompt", "operation": span.name}
+                                )
+                            span.set_attribute("gen_ai.usage.prompt_tokens", int(prompt_tokens))
+                        if isinstance(completion_tokens, (int, float)) and completion_tokens > 0:
+                            if self.token_counter:
+                                self.token_counter.add(
+                                    completion_tokens,
+                                    {"token_type": "completion", "operation": span.name},
+                                )
+                            span.set_attribute(
+                                "gen_ai.usage.completion_tokens", int(completion_tokens)
+                            )
+                        if isinstance(total_tokens, (int, float)) and total_tokens > 0:
+                            span.set_attribute("gen_ai.usage.total_tokens", int(total_tokens))
+                        # Calculate and record cost if enabled
+                        if self.config and self.config.enable_cost_tracking:
+                            try:
+                                # Get call_type from span attributes or default to "chat"
+                                call_type = span.attributes.get("gen_ai.request.type", "chat")
+                                # Use granular cost calculation for chat requests
+                                if call_type == "chat":
+                                    costs = self.cost_calculator.calculate_granular_cost(
+                                        model, usage, call_type
+                                    )
+                                    total_cost = costs["total"]
+                                    # Record total cost
+                                    if total_cost > 0:
+                                        if self.cost_counter:
+                                            self.cost_counter.add(total_cost, {"model": str(model)})
+                                        span.set_attribute("gen_ai.usage.cost.total", total_cost)
+                                        logger.debug(f"Streaming cost: {total_cost} USD")
+                                    # Record granular costs
+                                    if costs["prompt"] > 0:
+                                        if self.prompt_cost_counter:
+                                            self.prompt_cost_counter.add(
+                                                costs["prompt"], {"model": str(model)}
+                                            )
+                                        span.set_attribute(
+                                            "gen_ai.usage.cost.prompt", costs["prompt"]
+                                        )
+                                    if costs["completion"] > 0:
+                                        if self.completion_cost_counter:
+                                            self.completion_cost_counter.add(
+                                                costs["completion"], {"model": str(model)}
+                                            )
+                                        span.set_attribute(
+                                            "gen_ai.usage.cost.completion", costs["completion"]
+                                        )
+                                    if costs["reasoning"] > 0:
+                                        if self.reasoning_cost_counter:
+                                            self.reasoning_cost_counter.add(
+                                                costs["reasoning"], {"model": str(model)}
+                                            )
+                                        span.set_attribute(
+                                            "gen_ai.usage.cost.reasoning", costs["reasoning"]
+                                        )
+                                    if costs["cache_read"] > 0:
+                                        if self.cache_read_cost_counter:
+                                            self.cache_read_cost_counter.add(
+                                                costs["cache_read"], {"model": str(model)}
+                                            )
+                                        span.set_attribute(
+                                            "gen_ai.usage.cost.cache_read", costs["cache_read"]
+                                        )
+                                    if costs["cache_write"] > 0:
+                                        if self.cache_write_cost_counter:
+                                            self.cache_write_cost_counter.add(
+                                                costs["cache_write"], {"model": str(model)}
+                                            )
+                                        span.set_attribute(
+                                            "gen_ai.usage.cost.cache_write", costs["cache_write"]
+                                        )
+                                else:
+                                    # For non-chat requests, use simple cost calculation
+                                    cost = self.cost_calculator.calculate_cost(
+                                        model, usage, call_type
+                                    )
+                                    if cost and cost > 0:
+                                        if self.cost_counter:
+                                            self.cost_counter.add(cost, {"model": str(model)})
+                                        span.set_attribute("gen_ai.usage.cost.total", cost)
+                            except Exception as e:
+                                logger.warning(
+                                    "Failed to calculate cost for streaming response: %s", e
+                                )
+                    else:
+                        logger.debug("No usage information found in streaming response")
+            except Exception as e:
+                logger.warning("Failed to extract usage from streaming response: %s", e)
             span.set_status(Status(StatusCode.OK))
             span.end()  # Close the span when streaming completes
             logger.debug(f"Streaming completed: {token_count} chunks in {duration:.3f}s")
@@ -544,6 +703,71 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
             logger.warning(f"Error in streaming wrapper: {e}")
             raise
+    # Phase 4.2: RAG/Embedding Helper Methods
+    def add_embedding_attributes(
+        self, span, model: str, input_text: str, vector: Optional[List[float]] = None
+    ):
+        """Add embedding-specific attributes to a span.
+        Args:
+            span: The OpenTelemetry span
+            model: The embedding model name
+            input_text: The text being embedded (will be truncated to 500 chars)
+            vector: Optional embedding vector (use with caution - can be large!)
+        """
+        span.set_attribute("embedding.model_name", model)
+        span.set_attribute("embedding.text", input_text[:500])  # Truncate to avoid large spans
+        if vector and self.config and hasattr(self.config, "capture_embedding_vectors"):
+            # Only capture vectors if explicitly enabled (they can be very large)
+            span.set_attribute("embedding.vector", json.dumps(vector))
+            span.set_attribute("embedding.vector.dimension", len(vector))
+    def add_retrieval_attributes(
+        self,
+        span,
+        documents: List[Dict[str, Any]],
+        query: Optional[str] = None,
+        max_docs: int = 5,
+    ):
+        """Add retrieval/RAG-specific attributes to a span.
+        Args:
+            span: The OpenTelemetry span
+            documents: List of retrieved documents. Each dict should have:
+                - id: Document identifier
+                - score: Relevance score
+                - content: Document content
+                - metadata: Optional metadata dict
+            query: Optional query string
+            max_docs: Maximum number of documents to include in attributes (default: 5)
+        """
+        if query:
+            span.set_attribute("retrieval.query", query[:500])  # Truncate
+        # Limit to first N documents to avoid attribute explosion
+        for i, doc in enumerate(documents[:max_docs]):
+            prefix = f"retrieval.documents.{i}.document"
+            if "id" in doc:
+                span.set_attribute(f"{prefix}.id", str(doc["id"]))
+            if "score" in doc:
+                span.set_attribute(f"{prefix}.score", float(doc["score"]))
+            if "content" in doc:
+                # Truncate content to avoid large attributes
+                content = str(doc["content"])[:500]
+                span.set_attribute(f"{prefix}.content", content)
+            # Add metadata if present
+            if "metadata" in doc and isinstance(doc["metadata"], dict):
+                for key, value in doc["metadata"].items():
+                    # Flatten metadata, limit key names to avoid explosion
+                    safe_key = str(key)[:50]  # Limit key length
+                    safe_value = str(value)[:200]  # Limit value length
+                    span.set_attribute(f"{prefix}.metadata.{safe_key}", safe_value)
+        span.set_attribute("retrieval.document_count", len(documents))
     @abstractmethod
     def _extract_usage(self, result) -> Optional[Dict[str, int]]:
         """Abstract method to extract token usage information from a function result.

genai_otel/instrumentors/cohere_instrumentor.py CHANGED Viewed

@@ -1,140 +1,140 @@
-"""OpenTelemetry instrumentor for the Cohere SDK.
-This instrumentor automatically traces calls to Cohere models, capturing
-relevant attributes such as the model name and token usage.
-"""
-import logging
-from typing import Any, Dict, Optional
-from ..config import OTelConfig
-from .base import BaseInstrumentor
-logger = logging.getLogger(__name__)
-class CohereInstrumentor(BaseInstrumentor):
-    """Instrumentor for Cohere"""
-    def __init__(self):
-        """Initialize the instrumentor."""
-        super().__init__()
-        self._cohere_available = False
-        self._check_availability()
-    def _check_availability(self):
-        """Check if cohere library is available."""
-        try:
-            import cohere
-            self._cohere_available = True
-            logger.debug("cohere library detected and available for instrumentation")
-        except ImportError:
-            logger.debug("cohere library not installed, instrumentation will be skipped")
-            self._cohere_available = False
-    def instrument(self, config: OTelConfig):
-        """Instrument cohere if available."""
-        if not self._cohere_available:
-            logger.debug("Skipping instrumentation - library not available")
-            return
-        self.config = config
-        try:
-            import cohere
-            original_init = cohere.Client.__init__
-            def wrapped_init(instance, *args, **kwargs):
-                original_init(instance, *args, **kwargs)
-                self._instrument_client(instance)
-            cohere.Client.__init__ = wrapped_init
-            self._instrumented = True
-            logger.info("Cohere instrumentation enabled")
-        except Exception as e:
-            logger.error("Failed to instrument Cohere: %s", e, exc_info=True)
-            if config.fail_on_error:
-                raise
-    def _instrument_client(self, client):
-        """Instrument Cohere client methods."""
-        original_generate = client.generate
-        # Wrap using create_span_wrapper
-        wrapped_generate = self.create_span_wrapper(
-            span_name="cohere.generate",
-            extract_attributes=self._extract_generate_attributes,
-        )(original_generate)
-        client.generate = wrapped_generate
-    def _extract_generate_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
-        """Extract attributes from Cohere generate call.
-        Args:
-            instance: The client instance.
-            args: Positional arguments.
-            kwargs: Keyword arguments.
-        Returns:
-            Dict[str, Any]: Dictionary of attributes to set on the span.
-        """
-        attrs = {}
-        model = kwargs.get("model", "command")
-        prompt = kwargs.get("prompt", "")
-        attrs["gen_ai.system"] = "cohere"
-        attrs["gen_ai.request.model"] = model
-        attrs["gen_ai.operation.name"] = "generate"
-        attrs["gen_ai.request.message_count"] = 1 if prompt else 0
-        return attrs
-    def _extract_usage(self, result) -> Optional[Dict[str, int]]:
-        """Extract token usage from Cohere response.
-        Cohere responses include meta.tokens with:
-        - input_tokens: Input tokens
-        - output_tokens: Output tokens
-        Args:
-            result: The API response object.
-        Returns:
-            Optional[Dict[str, int]]: Dictionary with token counts or None.
-        """
-        try:
-            # Handle object response
-            if hasattr(result, "meta") and result.meta:
-                meta = result.meta
-                # Check for tokens object
-                if hasattr(meta, "tokens") and meta.tokens:
-                    tokens = meta.tokens
-                    input_tokens = getattr(tokens, "input_tokens", 0)
-                    output_tokens = getattr(tokens, "output_tokens", 0)
-                    if input_tokens or output_tokens:
-                        return {
-                            "prompt_tokens": int(input_tokens) if input_tokens else 0,
-                            "completion_tokens": int(output_tokens) if output_tokens else 0,
-                            "total_tokens": int(input_tokens or 0) + int(output_tokens or 0),
-                        }
-                # Fallback to billed_units
-                elif hasattr(meta, "billed_units") and meta.billed_units:
-                    billed = meta.billed_units
-                    input_tokens = getattr(billed, "input_tokens", 0)
-                    output_tokens = getattr(billed, "output_tokens", 0)
-                    if input_tokens or output_tokens:
-                        return {
-                            "prompt_tokens": int(input_tokens) if input_tokens else 0,
-                            "completion_tokens": int(output_tokens) if output_tokens else 0,
-                            "total_tokens": int(input_tokens or 0) + int(output_tokens or 0),
-                        }
-            return None
-        except Exception as e:
-            logger.debug("Failed to extract usage from Cohere response: %s", e)
-            return None
+"""OpenTelemetry instrumentor for the Cohere SDK.
+This instrumentor automatically traces calls to Cohere models, capturing
+relevant attributes such as the model name and token usage.
+"""
+import logging
+from typing import Any, Dict, Optional
+from ..config import OTelConfig
+from .base import BaseInstrumentor
+logger = logging.getLogger(__name__)
+class CohereInstrumentor(BaseInstrumentor):
+    """Instrumentor for Cohere"""
+    def __init__(self):
+        """Initialize the instrumentor."""
+        super().__init__()
+        self._cohere_available = False
+        self._check_availability()
+    def _check_availability(self):
+        """Check if cohere library is available."""
+        try:
+            import cohere
+            self._cohere_available = True
+            logger.debug("cohere library detected and available for instrumentation")
+        except ImportError:
+            logger.debug("cohere library not installed, instrumentation will be skipped")
+            self._cohere_available = False
+    def instrument(self, config: OTelConfig):
+        """Instrument cohere if available."""
+        if not self._cohere_available:
+            logger.debug("Skipping instrumentation - library not available")
+            return
+        self.config = config
+        try:
+            import cohere
+            original_init = cohere.Client.__init__
+            def wrapped_init(instance, *args, **kwargs):
+                original_init(instance, *args, **kwargs)
+                self._instrument_client(instance)
+            cohere.Client.__init__ = wrapped_init
+            self._instrumented = True
+            logger.info("Cohere instrumentation enabled")
+        except Exception as e:
+            logger.error("Failed to instrument Cohere: %s", e, exc_info=True)
+            if config.fail_on_error:
+                raise
+    def _instrument_client(self, client):
+        """Instrument Cohere client methods."""
+        original_generate = client.generate
+        # Wrap using create_span_wrapper
+        wrapped_generate = self.create_span_wrapper(
+            span_name="cohere.generate",
+            extract_attributes=self._extract_generate_attributes,
+        )(original_generate)
+        client.generate = wrapped_generate
+    def _extract_generate_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
+        """Extract attributes from Cohere generate call.
+        Args:
+            instance: The client instance.
+            args: Positional arguments.
+            kwargs: Keyword arguments.
+        Returns:
+            Dict[str, Any]: Dictionary of attributes to set on the span.
+        """
+        attrs = {}
+        model = kwargs.get("model", "command")
+        prompt = kwargs.get("prompt", "")
+        attrs["gen_ai.system"] = "cohere"
+        attrs["gen_ai.request.model"] = model
+        attrs["gen_ai.operation.name"] = "generate"
+        attrs["gen_ai.request.message_count"] = 1 if prompt else 0
+        return attrs
+    def _extract_usage(self, result) -> Optional[Dict[str, int]]:
+        """Extract token usage from Cohere response.
+        Cohere responses include meta.tokens with:
+        - input_tokens: Input tokens
+        - output_tokens: Output tokens
+        Args:
+            result: The API response object.
+        Returns:
+            Optional[Dict[str, int]]: Dictionary with token counts or None.
+        """
+        try:
+            # Handle object response
+            if hasattr(result, "meta") and result.meta:
+                meta = result.meta
+                # Check for tokens object
+                if hasattr(meta, "tokens") and meta.tokens:
+                    tokens = meta.tokens
+                    input_tokens = getattr(tokens, "input_tokens", 0)
+                    output_tokens = getattr(tokens, "output_tokens", 0)
+                    if input_tokens or output_tokens:
+                        return {
+                            "prompt_tokens": int(input_tokens) if input_tokens else 0,
+                            "completion_tokens": int(output_tokens) if output_tokens else 0,
+                            "total_tokens": int(input_tokens or 0) + int(output_tokens or 0),
+                        }
+                # Fallback to billed_units
+                elif hasattr(meta, "billed_units") and meta.billed_units:
+                    billed = meta.billed_units
+                    input_tokens = getattr(billed, "input_tokens", 0)
+                    output_tokens = getattr(billed, "output_tokens", 0)
+                    if input_tokens or output_tokens:
+                        return {
+                            "prompt_tokens": int(input_tokens) if input_tokens else 0,
+                            "completion_tokens": int(output_tokens) if output_tokens else 0,
+                            "total_tokens": int(input_tokens or 0) + int(output_tokens or 0),
+                        }
+            return None
+        except Exception as e:
+            logger.debug("Failed to extract usage from Cohere response: %s", e)
+            return None