PyPI - genai-otel-instrument - Versions diffs - 0.1.2.dev0__py3-none-any.whl → 0.1.7.dev0__py3-none-any.whl - Mend

genai-otel-instrument 0.1.2.dev0py3-none-any.whl → 0.1.7.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of genai-otel-instrument might be problematic. Click here for more details.

Files changed (24) hide show

genai_otel/__version__.py +2 -2
genai_otel/auto_instrument.py +18 -1
genai_otel/config.py +22 -1
genai_otel/cost_calculator.py +204 -13
genai_otel/cost_enrichment_processor.py +175 -0
genai_otel/gpu_metrics.py +50 -0
genai_otel/instrumentors/base.py +300 -44
genai_otel/instrumentors/cohere_instrumentor.py +140 -76
genai_otel/instrumentors/huggingface_instrumentor.py +142 -13
genai_otel/instrumentors/langchain_instrumentor.py +75 -75
genai_otel/instrumentors/mistralai_instrumentor.py +234 -38
genai_otel/instrumentors/ollama_instrumentor.py +104 -35
genai_otel/instrumentors/replicate_instrumentor.py +59 -14
genai_otel/instrumentors/togetherai_instrumentor.py +120 -16
genai_otel/instrumentors/vertexai_instrumentor.py +79 -15
genai_otel/llm_pricing.json +869 -589
genai_otel/logging_config.py +45 -45
genai_otel/py.typed +2 -2
{genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/METADATA +294 -33
{genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/RECORD +24 -23
{genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/WHEEL +0 -0
{genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/entry_points.txt +0 -0
{genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/licenses/LICENSE +0 -0
{genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/top_level.txt +0 -0

genai_otel/instrumentors/base.py CHANGED Viewed

@@ -7,11 +7,12 @@ It includes methods for creating OpenTelemetry spans, recording metrics,
 and handling configuration and cost calculation.
 """
+import json
 import logging
 import threading
 import time
 from abc import ABC, abstractmethod
-from typing import Any, Callable, Dict, Optional
+from typing import Any, Callable, Dict, List, Optional
 import wrapt
 from opentelemetry import metrics, trace
@@ -82,6 +83,12 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
     _shared_latency_histogram = None
     _shared_cost_counter = None
     _shared_error_counter = None
+    # Granular cost counters (Phase 3.2)
+    _shared_prompt_cost_counter = None
+    _shared_completion_cost_counter = None
+    _shared_reasoning_cost_counter = None
+    _shared_cache_read_cost_counter = None
+    _shared_cache_write_cost_counter = None
     # Streaming metrics (Phase 3.4)
     _shared_ttft_histogram = None
     _shared_tbt_histogram = None
@@ -91,7 +98,7 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
         self.tracer = trace.get_tracer(__name__)
         self.meter = metrics.get_meter(__name__)
         self.config: Optional[OTelConfig] = None
-        self.cost_calculator = CostCalculator()
+        self.cost_calculator = CostCalculator()  # Will be updated when instrument() is called
         self._instrumented = False
         # Use shared metrics to avoid duplicate warnings
@@ -103,6 +110,12 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
         self.latency_histogram = self._shared_latency_histogram
         self.cost_counter = self._shared_cost_counter
         self.error_counter = self._shared_error_counter
+        # Granular cost counters (Phase 3.2)
+        self.prompt_cost_counter = self._shared_prompt_cost_counter
+        self.completion_cost_counter = self._shared_completion_cost_counter
+        self.reasoning_cost_counter = self._shared_reasoning_cost_counter
+        self.cache_read_cost_counter = self._shared_cache_read_cost_counter
+        self.cache_write_cost_counter = self._shared_cache_write_cost_counter
         # Streaming metrics
         self.ttft_histogram = self._shared_ttft_histogram
         self.tbt_histogram = self._shared_tbt_histogram
@@ -193,10 +206,25 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
                 cls._shared_ttft_histogram = None
                 cls._shared_tbt_histogram = None
+    def _setup_config(self, config: OTelConfig):
+        """Set up configuration and reinitialize cost calculator with custom pricing if provided.
+        Args:
+            config (OTelConfig): The OpenTelemetry configuration object.
+        """
+        self.config = config
+        # Reinitialize cost calculator with custom pricing if provided
+        if config.custom_pricing_json:
+            self.cost_calculator = CostCalculator(custom_pricing_json=config.custom_pricing_json)
+            logger.info("Cost calculator reinitialized with custom pricing")
     @abstractmethod
     def instrument(self, config: OTelConfig):
         """Abstract method to implement library-specific instrumentation.
+        Implementers should call self._setup_config(config) at the beginning of this method
+        to ensure custom pricing is loaded.
         Args:
             config (OTelConfig): The OpenTelemetry configuration object.
         """
@@ -236,6 +264,26 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
                 span = self.tracer.start_span(span_name, attributes=initial_attributes)
                 start_time = time.time()
+                # Extract session and user context (Phase 4.1)
+                if self.config:
+                    if self.config.session_id_extractor:
+                        try:
+                            session_id = self.config.session_id_extractor(instance, args, kwargs)
+                            if session_id:
+                                span.set_attribute("session.id", session_id)
+                                logger.debug("Set session.id: %s", session_id)
+                        except Exception as e:
+                            logger.debug("Failed to extract session ID: %s", e)
+                    if self.config.user_id_extractor:
+                        try:
+                            user_id = self.config.user_id_extractor(instance, args, kwargs)
+                            if user_id:
+                                span.set_attribute("user.id", user_id)
+                                logger.debug("Set user.id: %s", user_id)
+                        except Exception as e:
+                            logger.debug("Failed to extract user ID: %s", e)
                 try:
                     # Call the original function
                     result = wrapped(*args, **kwargs)
@@ -346,45 +394,54 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
                     and "dup" in self.config.semconv_stability_opt_in
                 )
-                if (
-                    self.token_counter
-                    and isinstance(prompt_tokens, (int, float))
-                    and prompt_tokens > 0
-                ):
-                    self.token_counter.add(
-                        prompt_tokens, {"token_type": "prompt", "operation": span.name}
-                    )
-                    # New semantic convention
+                # Record prompt tokens
+                if isinstance(prompt_tokens, (int, float)) and prompt_tokens > 0:
+                    # Record metric if available
+                    if self.token_counter:
+                        self.token_counter.add(
+                            prompt_tokens, {"token_type": "prompt", "operation": span.name}
+                        )
+                    # Always set span attributes (needed for cost calculation)
                     span.set_attribute("gen_ai.usage.prompt_tokens", int(prompt_tokens))
                     # Old semantic convention (if dual emission enabled)
                     if emit_old_attrs:
                         span.set_attribute("gen_ai.usage.input_tokens", int(prompt_tokens))
-                if (
-                    self.token_counter
-                    and isinstance(completion_tokens, (int, float))
-                    and completion_tokens > 0
-                ):
-                    self.token_counter.add(
-                        completion_tokens, {"token_type": "completion", "operation": span.name}
-                    )
-                    # New semantic convention
+                # Record completion tokens
+                if isinstance(completion_tokens, (int, float)) and completion_tokens > 0:
+                    # Record metric if available
+                    if self.token_counter:
+                        self.token_counter.add(
+                            completion_tokens, {"token_type": "completion", "operation": span.name}
+                        )
+                    # Always set span attributes (needed for cost calculation)
                     span.set_attribute("gen_ai.usage.completion_tokens", int(completion_tokens))
                     # Old semantic convention (if dual emission enabled)
                     if emit_old_attrs:
                         span.set_attribute("gen_ai.usage.output_tokens", int(completion_tokens))
+                # Record total tokens
                 if isinstance(total_tokens, (int, float)) and total_tokens > 0:
                     span.set_attribute("gen_ai.usage.total_tokens", int(total_tokens))
                 # Calculate and record cost if enabled and applicable
-                if self.config and self.config.enable_cost_tracking and self._shared_cost_counter:
+                logger.debug(
+                    f"Cost tracking check: config={self.config is not None}, "
+                    f"enable_cost_tracking={self.config.enable_cost_tracking if self.config else 'N/A'}"
+                )
+                if self.config and self.config.enable_cost_tracking:
                     try:
                         model = span.attributes.get("gen_ai.request.model", "unknown")
                         # Assuming 'chat' as a default call_type for generic base instrumentor tests.
                         # Specific instrumentors will provide the actual call_type.
                         call_type = span.attributes.get("gen_ai.request.type", "chat")
+                        logger.debug(
+                            f"Calculating cost for model={model}, call_type={call_type}, "
+                            f"prompt_tokens={usage.get('prompt_tokens')}, "
+                            f"completion_tokens={usage.get('completion_tokens')}"
+                        )
                         # Use granular cost calculation for chat requests
                         if call_type == "chat":
                             costs = self.cost_calculator.calculate_granular_cost(
@@ -394,45 +451,59 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
                             # Record total cost
                             if total_cost > 0:
-                                self._shared_cost_counter.add(total_cost, {"model": str(model)})
-                                # Set span attributes for granular costs
+                                if self.cost_counter:
+                                    self.cost_counter.add(total_cost, {"model": str(model)})
+                                # Always set span attributes (needed for cost tracking)
                                 span.set_attribute("gen_ai.usage.cost.total", total_cost)
+                                logger.debug(
+                                    f"Set cost attribute: gen_ai.usage.cost.total={total_cost}"
+                                )
+                            else:
+                                logger.debug(
+                                    f"Cost is zero, not setting attributes. Costs: {costs}"
+                                )
                             # Record and set attributes for granular costs
-                            if costs["prompt"] > 0 and self._shared_prompt_cost_counter:
-                                self._shared_prompt_cost_counter.add(
-                                    costs["prompt"], {"model": str(model)}
-                                )
+                            # Note: Metrics recording is optional, span attributes are always set
+                            if costs["prompt"] > 0:
+                                if self.prompt_cost_counter:
+                                    self.prompt_cost_counter.add(
+                                        costs["prompt"], {"model": str(model)}
+                                    )
                                 span.set_attribute("gen_ai.usage.cost.prompt", costs["prompt"])
-                            if costs["completion"] > 0 and self._shared_completion_cost_counter:
-                                self._shared_completion_cost_counter.add(
-                                    costs["completion"], {"model": str(model)}
-                                )
+                            if costs["completion"] > 0:
+                                if self.completion_cost_counter:
+                                    self.completion_cost_counter.add(
+                                        costs["completion"], {"model": str(model)}
+                                    )
                                 span.set_attribute(
                                     "gen_ai.usage.cost.completion", costs["completion"]
                                 )
-                            if costs["reasoning"] > 0 and self._shared_reasoning_cost_counter:
-                                self._shared_reasoning_cost_counter.add(
-                                    costs["reasoning"], {"model": str(model)}
-                                )
+                            if costs["reasoning"] > 0:
+                                if self.reasoning_cost_counter:
+                                    self.reasoning_cost_counter.add(
+                                        costs["reasoning"], {"model": str(model)}
+                                    )
                                 span.set_attribute(
                                     "gen_ai.usage.cost.reasoning", costs["reasoning"]
                                 )
-                            if costs["cache_read"] > 0 and self._shared_cache_read_cost_counter:
-                                self._shared_cache_read_cost_counter.add(
-                                    costs["cache_read"], {"model": str(model)}
-                                )
+                            if costs["cache_read"] > 0:
+                                if self.cache_read_cost_counter:
+                                    self.cache_read_cost_counter.add(
+                                        costs["cache_read"], {"model": str(model)}
+                                    )
                                 span.set_attribute(
                                     "gen_ai.usage.cost.cache_read", costs["cache_read"]
                                 )
-                            if costs["cache_write"] > 0 and self._shared_cache_write_cost_counter:
-                                self._shared_cache_write_cost_counter.add(
-                                    costs["cache_write"], {"model": str(model)}
-                                )
+                            if costs["cache_write"] > 0:
+                                if self.cache_write_cost_counter:
+                                    self.cache_write_cost_counter.add(
+                                        costs["cache_write"], {"model": str(model)}
+                                    )
                                 span.set_attribute(
                                     "gen_ai.usage.cost.cache_write", costs["cache_write"]
                                 )
@@ -440,7 +511,8 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
                             # For non-chat requests, use simple cost calculation
                             cost = self.cost_calculator.calculate_cost(model, usage, call_type)
                             if cost and cost > 0:
-                                self._shared_cost_counter.add(cost, {"model": str(model)})
+                                if self.cost_counter:
+                                    self.cost_counter.add(cost, {"model": str(model)})
                     except Exception as e:
                         logger.warning("Failed to calculate cost for span '%s': %s", span.name, e)
@@ -470,6 +542,7 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
         first_token = True
         last_token_time = start_time
         token_count = 0
+        last_chunk = None  # Store last chunk to extract usage
         try:
             for chunk in stream:
@@ -491,6 +564,7 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
                         self.tbt_histogram.record(tbt, {"model": model, "operation": span.name})
                 last_token_time = current_time
+                last_chunk = chunk  # Keep track of last chunk for usage extraction
                 yield chunk
             # Stream completed successfully
@@ -498,6 +572,123 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
             if self.latency_histogram:
                 self.latency_histogram.record(duration, {"operation": span.name})
             span.set_attribute("gen_ai.streaming.token_count", token_count)
+            # Extract usage from last chunk and calculate cost
+            # Many providers (OpenAI, Anthropic, etc.) include usage in the final chunk
+            try:
+                if last_chunk is not None:
+                    usage = self._extract_usage(last_chunk)
+                    if usage and isinstance(usage, dict):
+                        # Record token usage metrics and calculate cost
+                        # This will set span attributes and record cost metrics
+                        prompt_tokens = usage.get("prompt_tokens", 0)
+                        completion_tokens = usage.get("completion_tokens", 0)
+                        total_tokens = usage.get("total_tokens", 0)
+                        # Record token counts
+                        if isinstance(prompt_tokens, (int, float)) and prompt_tokens > 0:
+                            if self.token_counter:
+                                self.token_counter.add(
+                                    prompt_tokens, {"token_type": "prompt", "operation": span.name}
+                                )
+                            span.set_attribute("gen_ai.usage.prompt_tokens", int(prompt_tokens))
+                        if isinstance(completion_tokens, (int, float)) and completion_tokens > 0:
+                            if self.token_counter:
+                                self.token_counter.add(
+                                    completion_tokens,
+                                    {"token_type": "completion", "operation": span.name},
+                                )
+                            span.set_attribute(
+                                "gen_ai.usage.completion_tokens", int(completion_tokens)
+                            )
+                        if isinstance(total_tokens, (int, float)) and total_tokens > 0:
+                            span.set_attribute("gen_ai.usage.total_tokens", int(total_tokens))
+                        # Calculate and record cost if enabled
+                        if self.config and self.config.enable_cost_tracking:
+                            try:
+                                # Get call_type from span attributes or default to "chat"
+                                call_type = span.attributes.get("gen_ai.request.type", "chat")
+                                # Use granular cost calculation for chat requests
+                                if call_type == "chat":
+                                    costs = self.cost_calculator.calculate_granular_cost(
+                                        model, usage, call_type
+                                    )
+                                    total_cost = costs["total"]
+                                    # Record total cost
+                                    if total_cost > 0:
+                                        if self.cost_counter:
+                                            self.cost_counter.add(total_cost, {"model": str(model)})
+                                        span.set_attribute("gen_ai.usage.cost.total", total_cost)
+                                        logger.debug(f"Streaming cost: {total_cost} USD")
+                                    # Record granular costs
+                                    if costs["prompt"] > 0:
+                                        if self.prompt_cost_counter:
+                                            self.prompt_cost_counter.add(
+                                                costs["prompt"], {"model": str(model)}
+                                            )
+                                        span.set_attribute(
+                                            "gen_ai.usage.cost.prompt", costs["prompt"]
+                                        )
+                                    if costs["completion"] > 0:
+                                        if self.completion_cost_counter:
+                                            self.completion_cost_counter.add(
+                                                costs["completion"], {"model": str(model)}
+                                            )
+                                        span.set_attribute(
+                                            "gen_ai.usage.cost.completion", costs["completion"]
+                                        )
+                                    if costs["reasoning"] > 0:
+                                        if self.reasoning_cost_counter:
+                                            self.reasoning_cost_counter.add(
+                                                costs["reasoning"], {"model": str(model)}
+                                            )
+                                        span.set_attribute(
+                                            "gen_ai.usage.cost.reasoning", costs["reasoning"]
+                                        )
+                                    if costs["cache_read"] > 0:
+                                        if self.cache_read_cost_counter:
+                                            self.cache_read_cost_counter.add(
+                                                costs["cache_read"], {"model": str(model)}
+                                            )
+                                        span.set_attribute(
+                                            "gen_ai.usage.cost.cache_read", costs["cache_read"]
+                                        )
+                                    if costs["cache_write"] > 0:
+                                        if self.cache_write_cost_counter:
+                                            self.cache_write_cost_counter.add(
+                                                costs["cache_write"], {"model": str(model)}
+                                            )
+                                        span.set_attribute(
+                                            "gen_ai.usage.cost.cache_write", costs["cache_write"]
+                                        )
+                                else:
+                                    # For non-chat requests, use simple cost calculation
+                                    cost = self.cost_calculator.calculate_cost(
+                                        model, usage, call_type
+                                    )
+                                    if cost and cost > 0:
+                                        if self.cost_counter:
+                                            self.cost_counter.add(cost, {"model": str(model)})
+                                        span.set_attribute("gen_ai.usage.cost.total", cost)
+                            except Exception as e:
+                                logger.warning(
+                                    "Failed to calculate cost for streaming response: %s", e
+                                )
+                    else:
+                        logger.debug("No usage information found in streaming response")
+            except Exception as e:
+                logger.warning("Failed to extract usage from streaming response: %s", e)
             span.set_status(Status(StatusCode.OK))
             span.end()  # Close the span when streaming completes
             logger.debug(f"Streaming completed: {token_count} chunks in {duration:.3f}s")
@@ -512,6 +703,71 @@ class BaseInstrumentor(ABC):  # pylint: disable=R0902
             logger.warning(f"Error in streaming wrapper: {e}")
             raise
+    # Phase 4.2: RAG/Embedding Helper Methods
+    def add_embedding_attributes(
+        self, span, model: str, input_text: str, vector: Optional[List[float]] = None
+    ):
+        """Add embedding-specific attributes to a span.
+        Args:
+            span: The OpenTelemetry span
+            model: The embedding model name
+            input_text: The text being embedded (will be truncated to 500 chars)
+            vector: Optional embedding vector (use with caution - can be large!)
+        """
+        span.set_attribute("embedding.model_name", model)
+        span.set_attribute("embedding.text", input_text[:500])  # Truncate to avoid large spans
+        if vector and self.config and hasattr(self.config, "capture_embedding_vectors"):
+            # Only capture vectors if explicitly enabled (they can be very large)
+            span.set_attribute("embedding.vector", json.dumps(vector))
+            span.set_attribute("embedding.vector.dimension", len(vector))
+    def add_retrieval_attributes(
+        self,
+        span,
+        documents: List[Dict[str, Any]],
+        query: Optional[str] = None,
+        max_docs: int = 5,
+    ):
+        """Add retrieval/RAG-specific attributes to a span.
+        Args:
+            span: The OpenTelemetry span
+            documents: List of retrieved documents. Each dict should have:
+                - id: Document identifier
+                - score: Relevance score
+                - content: Document content
+                - metadata: Optional metadata dict
+            query: Optional query string
+            max_docs: Maximum number of documents to include in attributes (default: 5)
+        """
+        if query:
+            span.set_attribute("retrieval.query", query[:500])  # Truncate
+        # Limit to first N documents to avoid attribute explosion
+        for i, doc in enumerate(documents[:max_docs]):
+            prefix = f"retrieval.documents.{i}.document"
+            if "id" in doc:
+                span.set_attribute(f"{prefix}.id", str(doc["id"]))
+            if "score" in doc:
+                span.set_attribute(f"{prefix}.score", float(doc["score"]))
+            if "content" in doc:
+                # Truncate content to avoid large attributes
+                content = str(doc["content"])[:500]
+                span.set_attribute(f"{prefix}.content", content)
+            # Add metadata if present
+            if "metadata" in doc and isinstance(doc["metadata"], dict):
+                for key, value in doc["metadata"].items():
+                    # Flatten metadata, limit key names to avoid explosion
+                    safe_key = str(key)[:50]  # Limit key length
+                    safe_value = str(value)[:200]  # Limit value length
+                    span.set_attribute(f"{prefix}.metadata.{safe_key}", safe_value)
+        span.set_attribute("retrieval.document_count", len(documents))
     @abstractmethod
     def _extract_usage(self, result) -> Optional[Dict[str, int]]:
         """Abstract method to extract token usage information from a function result.

genai-otel-instrument 0.1.2.dev0__py3-none-any.whl → 0.1.7.dev0__py3-none-any.whl

Potentially problematic release.

genai-otel-instrument 0.1.2.dev0py3-none-any.whl → 0.1.7.dev0py3-none-any.whl