PyPI - genai-otel-instrument - Versions diffs - 0.1.7.dev0__py3-none-any.whl → 0.1.10.dev0__py3-none-any.whl - Mend

genai-otel-instrument 0.1.7.dev0py3-none-any.whl → 0.1.10.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of genai-otel-instrument might be problematic. Click here for more details.

Files changed (12) hide show

genai_otel/__version__.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.1.7.dev0'
-__version_tuple__ = version_tuple = (0, 1, 7, 'dev0')
+__version__ = version = '0.1.10.dev0'
+__version_tuple__ = version_tuple = (0, 1, 10, 'dev0')
 __commit_id__ = commit_id = None

genai_otel/auto_instrument.py CHANGED Viewed

@@ -19,6 +19,7 @@ from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExport
 from .config import OTelConfig
 from .cost_calculator import CostCalculator
 from .cost_enrichment_processor import CostEnrichmentSpanProcessor
+from .cost_enriching_exporter import CostEnrichingSpanExporter
 from .gpu_metrics import GPUMetricsCollector
 from .mcp_instrumentors import MCPInstrumentorManager
 from .metrics import (
@@ -169,14 +170,17 @@ def setup_auto_instrumentation(config: OTelConfig):
     set_global_textmap(TraceContextTextMapPropagator())
-    # Add cost enrichment processor for OpenInference instrumentors
-    # This enriches spans from smolagents, litellm, mcp with cost attributes
+    # Add cost enrichment processor for custom instrumentors (OpenAI, Ollama, etc.)
+    # These instrumentors set cost attributes directly, so processor is mainly for logging
+    # Also attempts to enrich OpenInference spans (smolagents, litellm, mcp), though
+    # the processor can't modify ReadableSpan - the exporter below handles that
+    cost_calculator = None
     if config.enable_cost_tracking:
         try:
             cost_calculator = CostCalculator()
             cost_processor = CostEnrichmentSpanProcessor(cost_calculator)
             tracer_provider.add_span_processor(cost_processor)
-            logger.info("Cost enrichment processor added for OpenInference instrumentors")
+            logger.info("Cost enrichment processor added")
         except Exception as e:
             logger.warning(f"Failed to add cost enrichment processor: {e}", exc_info=True)

genai_otel/cost_enriching_exporter.py ADDED Viewed

@@ -0,0 +1,207 @@
+"""Custom SpanExporter that enriches spans with cost attributes before export.
+This exporter wraps another exporter (like OTLPSpanExporter) and adds cost
+attributes to spans before passing them to the wrapped exporter.
+"""
+import logging
+from typing import Optional, Sequence
+from opentelemetry.sdk.trace import ReadableSpan
+from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
+from .cost_calculator import CostCalculator
+logger = logging.getLogger(__name__)
+class CostEnrichingSpanExporter(SpanExporter):
+    """Wraps a SpanExporter and enriches spans with cost attributes before export.
+    This exporter:
+    1. Receives ReadableSpan objects from the SDK
+    2. Extracts model name and token usage from span attributes
+    3. Calculates cost using CostCalculator
+    4. Creates enriched span data with cost attributes
+    5. Exports to the wrapped exporter (e.g., OTLP)
+    """
+    def __init__(
+        self, wrapped_exporter: SpanExporter, cost_calculator: Optional[CostCalculator] = None
+    ):
+        """Initialize the cost enriching exporter.
+        Args:
+            wrapped_exporter: The underlying exporter to send enriched spans to.
+            cost_calculator: CostCalculator instance to use for cost calculations.
+                           If None, creates a new instance.
+        """
+        self.wrapped_exporter = wrapped_exporter
+        self.cost_calculator = cost_calculator or CostCalculator()
+        logger.info(
+            f"CostEnrichingSpanExporter initialized, wrapping {type(wrapped_exporter).__name__}"
+        )
+    def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
+        """Export spans after enriching them with cost attributes.
+        Args:
+            spans: Sequence of ReadableSpan objects to export.
+        Returns:
+            SpanExportResult from the wrapped exporter.
+        """
+        try:
+            # Enrich spans with cost attributes
+            enriched_spans = []
+            for span in spans:
+                enriched_span = self._enrich_span(span)
+                enriched_spans.append(enriched_span)
+            # Export to wrapped exporter
+            return self.wrapped_exporter.export(enriched_spans)
+        except Exception as e:
+            logger.error(f"Failed to export spans: {e}", exc_info=True)
+            return SpanExportResult.FAILURE
+    def _enrich_span(self, span: ReadableSpan) -> ReadableSpan:
+        """Enrich a span with cost attributes if applicable.
+        Args:
+            span: The original ReadableSpan.
+        Returns:
+            A new ReadableSpan with cost attributes added (or the original if not applicable).
+        """
+        try:
+            # Check if span has LLM-related attributes
+            if not span.attributes:
+                return span
+            attributes = dict(span.attributes)  # Make a mutable copy
+            # Check for model name - support both GenAI and OpenInference conventions
+            model = (
+                attributes.get("gen_ai.request.model")
+                or attributes.get("llm.model_name")
+                or attributes.get("embedding.model_name")
+            )
+            if not model:
+                return span
+            # Skip if cost attributes are already present
+            if "gen_ai.usage.cost.total" in attributes:
+                logger.debug(f"Span '{span.name}' already has cost attributes, skipping enrichment")
+                return span
+            # Extract token usage - support GenAI, OpenInference, and legacy conventions
+            prompt_tokens = (
+                attributes.get("gen_ai.usage.prompt_tokens")
+                or attributes.get("gen_ai.usage.input_tokens")
+                or attributes.get("llm.token_count.prompt")  # OpenInference
+                or 0
+            )
+            completion_tokens = (
+                attributes.get("gen_ai.usage.completion_tokens")
+                or attributes.get("gen_ai.usage.output_tokens")
+                or attributes.get("llm.token_count.completion")  # OpenInference
+                or 0
+            )
+            # Skip if no tokens recorded
+            if prompt_tokens == 0 and completion_tokens == 0:
+                return span
+            # Get call type - support both GenAI and OpenInference conventions
+            span_kind = attributes.get("openinference.span.kind", "").upper()
+            call_type = attributes.get("gen_ai.operation.name") or span_kind.lower() or "chat"
+            # Map operation names to call types
+            call_type_mapping = {
+                "chat": "chat",
+                "completion": "chat",
+                "embedding": "embedding",
+                "embeddings": "embedding",
+                "text_generation": "chat",
+                "image_generation": "image",
+                "audio": "audio",
+                "llm": "chat",
+                "chain": "chat",
+                "retriever": "embedding",
+                "reranker": "embedding",
+                "tool": "chat",
+                "agent": "chat",
+            }
+            normalized_call_type = call_type_mapping.get(str(call_type).lower(), "chat")
+            # Calculate cost
+            usage = {
+                "prompt_tokens": int(prompt_tokens),
+                "completion_tokens": int(completion_tokens),
+                "total_tokens": int(prompt_tokens) + int(completion_tokens),
+            }
+            cost_info = self.cost_calculator.calculate_granular_cost(
+                model=str(model),
+                usage=usage,
+                call_type=normalized_call_type,
+            )
+            if cost_info and cost_info.get("total", 0.0) > 0:
+                # Add cost attributes to the mutable copy
+                attributes["gen_ai.usage.cost.total"] = cost_info["total"]
+                if cost_info.get("prompt", 0.0) > 0:
+                    attributes["gen_ai.usage.cost.prompt"] = cost_info["prompt"]
+                if cost_info.get("completion", 0.0) > 0:
+                    attributes["gen_ai.usage.cost.completion"] = cost_info["completion"]
+                logger.info(
+                    f"Enriched span '{span.name}' with cost: {cost_info['total']:.6f} USD "
+                    f"for model {model} ({usage['total_tokens']} tokens)"
+                )
+                # Create a new ReadableSpan with enriched attributes
+                # ReadableSpan is a NamedTuple, so we need to replace it
+                from opentelemetry.sdk.trace import ReadableSpan as RS
+                enriched_span = RS(
+                    name=span.name,
+                    context=span.context,
+                    kind=span.kind,
+                    parent=span.parent,
+                    start_time=span.start_time,
+                    end_time=span.end_time,
+                    status=span.status,
+                    attributes=attributes,  # Use enriched attributes
+                    events=span.events,
+                    links=span.links,
+                    resource=span.resource,
+                    instrumentation_scope=span.instrumentation_scope,
+                )
+                return enriched_span
+        except Exception as e:
+            logger.warning(
+                f"Failed to enrich span '{getattr(span, 'name', 'unknown')}' with cost: {e}",
+                exc_info=True,
+            )
+        return span
+    def shutdown(self) -> None:
+        """Shutdown the wrapped exporter."""
+        logger.info("CostEnrichingSpanExporter shutting down")
+        self.wrapped_exporter.shutdown()
+    def force_flush(self, timeout_millis: int = 30000) -> bool:
+        """Force flush the wrapped exporter.
+        Args:
+            timeout_millis: Timeout in milliseconds.
+        Returns:
+            True if flush succeeded.
+        """
+        return self.wrapped_exporter.force_flush(timeout_millis)

genai_otel/cost_enrichment_processor.py CHANGED Viewed

@@ -132,9 +132,8 @@ class CostEnrichmentSpanProcessor(SpanProcessor):
             if cost_info and cost_info.get("total", 0.0) > 0:
                 # Add cost attributes to the span
-                # Note: We can't modify ReadableSpan attributes directly,
-                # but we can if span is still a Span instance
-                if isinstance(span, Span):
+                # Use duck typing to check if span supports set_attribute
+                if hasattr(span, "set_attribute") and callable(getattr(span, "set_attribute")):
                     span.set_attribute("gen_ai.usage.cost.total", cost_info["total"])
                     if cost_info.get("prompt", 0.0) > 0:

genai_otel/instrumentors/huggingface_instrumentor.py CHANGED Viewed

@@ -3,9 +3,11 @@
 This instrumentor automatically traces:
 1. HuggingFace Transformers pipelines (local model execution)
 2. HuggingFace Inference API calls via InferenceClient (used by smolagents)
+3. Direct model usage via AutoModelForCausalLM.generate() and forward()
 Note: Transformers runs models locally (no API costs), but InferenceClient makes
 API calls to HuggingFace endpoints which may have costs based on usage.
+Local model costs are estimated based on parameter count and token usage.
 """
 import logging
@@ -20,8 +22,10 @@ logger = logging.getLogger(__name__)
 class HuggingFaceInstrumentor(BaseInstrumentor):
     """Instrumentor for HuggingFace Transformers and Inference API.
-    Instruments both:
-    - transformers.pipeline (local execution, no API costs)
+    Instruments:
+    - transformers.pipeline (local execution, estimated costs)
+    - transformers.AutoModelForCausalLM.generate() (local execution, estimated costs)
+    - transformers.AutoModelForCausalLM.forward() (local execution, estimated costs)
     - huggingface_hub.InferenceClient (API calls, may have costs)
     """
@@ -30,6 +34,7 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
         super().__init__()
         self._transformers_available = False
         self._inference_client_available = False
+        self._model_classes_instrumented = False
         self._check_availability()
     def _check_availability(self):
@@ -55,13 +60,14 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
             self._inference_client_available = False
     def instrument(self, config: OTelConfig):
-        """Instrument HuggingFace Transformers pipelines and InferenceClient."""
-        self.config = config
+        """Instrument HuggingFace Transformers pipelines, model classes, and InferenceClient."""
+        self._setup_config(config)
         instrumented_count = 0
-        # Instrument transformers.pipeline if available
+        # Instrument transformers components if available
         if self._transformers_available:
+            # Instrument pipeline
             try:
                 self._instrument_transformers()
                 instrumented_count += 1
@@ -70,6 +76,15 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
                 if config.fail_on_error:
                     raise
+            # Instrument model classes (AutoModelForCausalLM, etc.)
+            try:
+                self._instrument_model_classes()
+                instrumented_count += 1
+            except Exception as e:
+                logger.error("Failed to instrument HuggingFace model classes: %s", e, exc_info=True)
+                if config.fail_on_error:
+                    raise
         # Instrument InferenceClient if available
         if self._inference_client_available:
             try:
@@ -166,6 +181,164 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
         InferenceClient.text_generation = wrapped_text_generation
         logger.debug("HuggingFace InferenceClient instrumented")
+    def _instrument_model_classes(self):
+        """Instrument HuggingFace model classes for direct model usage."""
+        try:
+            import wrapt
+            # Import GenerationMixin - the base class that provides generate() method
+            # All generative models (AutoModelForCausalLM, AutoModelForSeq2SeqLM, etc.) inherit from it
+            try:
+                from transformers.generation.utils import GenerationMixin
+            except ImportError:
+                # Fallback for older transformers versions
+                from transformers.generation import GenerationMixin
+            # Store reference to instrumentor for use in wrapper
+            instrumentor = self
+            # Wrap the generate() method at GenerationMixin level (all models inherit from this)
+            original_generate = GenerationMixin.generate
+            @wrapt.decorator
+            def generate_wrapper(wrapped, instance, args, kwargs):
+                """Wrapper for model.generate() method."""
+                # Extract model info
+                model_name = getattr(instance, "name_or_path", "unknown")
+                if hasattr(instance.config, "_name_or_path"):
+                    model_name = instance.config._name_or_path
+                # Get input token count
+                input_ids = kwargs.get("input_ids") or (args[0] if args else None)
+                prompt_tokens = 0
+                if input_ids is not None:
+                    if hasattr(input_ids, "shape"):
+                        prompt_tokens = int(input_ids.shape[-1])
+                    elif isinstance(input_ids, (list, tuple)):
+                        prompt_tokens = len(input_ids[0]) if input_ids else 0
+                # Create span
+                with instrumentor.tracer.start_as_current_span(
+                    "huggingface.model.generate"
+                ) as span:
+                    # Set attributes
+                    span.set_attribute("gen_ai.system", "huggingface")
+                    span.set_attribute("gen_ai.request.model", model_name)
+                    span.set_attribute("gen_ai.operation.name", "text_generation")
+                    span.set_attribute("gen_ai.request.type", "chat")
+                    # Extract generation parameters
+                    if "max_length" in kwargs:
+                        span.set_attribute("gen_ai.request.max_tokens", kwargs["max_length"])
+                    if "max_new_tokens" in kwargs:
+                        span.set_attribute("gen_ai.request.max_tokens", kwargs["max_new_tokens"])
+                    if "temperature" in kwargs:
+                        span.set_attribute("gen_ai.request.temperature", kwargs["temperature"])
+                    if "top_p" in kwargs:
+                        span.set_attribute("gen_ai.request.top_p", kwargs["top_p"])
+                    # Call original generate
+                    import time
+                    start_time = time.time()
+                    result = wrapped(*args, **kwargs)
+                    duration = time.time() - start_time
+                    # Extract output token count
+                    completion_tokens = 0
+                    if hasattr(result, "shape"):
+                        # result is a tensor
+                        total_length = int(result.shape[-1])
+                        completion_tokens = max(0, total_length - prompt_tokens)
+                    elif isinstance(result, (list, tuple)):
+                        # result is a list of sequences
+                        if result and hasattr(result[0], "shape"):
+                            total_length = int(result[0].shape[-1])
+                            completion_tokens = max(0, total_length - prompt_tokens)
+                    total_tokens = prompt_tokens + completion_tokens
+                    # Set token usage attributes
+                    if prompt_tokens > 0:
+                        span.set_attribute("gen_ai.usage.prompt_tokens", prompt_tokens)
+                    if completion_tokens > 0:
+                        span.set_attribute("gen_ai.usage.completion_tokens", completion_tokens)
+                    if total_tokens > 0:
+                        span.set_attribute("gen_ai.usage.total_tokens", total_tokens)
+                    # Record metrics
+                    if instrumentor.request_counter:
+                        instrumentor.request_counter.add(
+                            1, {"model": model_name, "provider": "huggingface"}
+                        )
+                    if instrumentor.token_counter and total_tokens > 0:
+                        if prompt_tokens > 0:
+                            instrumentor.token_counter.add(
+                                prompt_tokens, {"token_type": "prompt", "operation": span.name}
+                            )
+                        if completion_tokens > 0:
+                            instrumentor.token_counter.add(
+                                completion_tokens,
+                                {"token_type": "completion", "operation": span.name},
+                            )
+                    if instrumentor.latency_histogram:
+                        instrumentor.latency_histogram.record(duration, {"operation": span.name})
+                    # Calculate and record cost if enabled
+                    if (
+                        instrumentor.config
+                        and instrumentor.config.enable_cost_tracking
+                        and total_tokens > 0
+                    ):
+                        try:
+                            usage = {
+                                "prompt_tokens": prompt_tokens,
+                                "completion_tokens": completion_tokens,
+                                "total_tokens": total_tokens,
+                            }
+                            costs = instrumentor.cost_calculator.calculate_granular_cost(
+                                model=model_name, usage=usage, call_type="chat"
+                            )
+                            if costs["total"] > 0:
+                                if instrumentor.cost_counter:
+                                    instrumentor.cost_counter.add(
+                                        costs["total"], {"model": model_name}
+                                    )
+                                span.set_attribute("gen_ai.usage.cost.total", costs["total"])
+                                if costs["prompt"] > 0:
+                                    span.set_attribute("gen_ai.usage.cost.prompt", costs["prompt"])
+                                if costs["completion"] > 0:
+                                    span.set_attribute(
+                                        "gen_ai.usage.cost.completion", costs["completion"]
+                                    )
+                                logger.debug(
+                                    f"HuggingFace model {model_name}: {total_tokens} tokens, "
+                                    f"cost: ${costs['total']:.6f}"
+                                )
+                        except Exception as e:
+                            logger.warning(f"Failed to calculate cost: {e}")
+                    return result
+            # Apply wrapper to GenerationMixin.generate (all models inherit this)
+            GenerationMixin.generate = generate_wrapper(original_generate)
+            self._model_classes_instrumented = True
+            logger.debug(
+                "HuggingFace GenerationMixin.generate() instrumented "
+                "(covers all models: AutoModelForCausalLM, AutoModelForSeq2SeqLM, etc.)"
+            )
+        except ImportError as e:
+            logger.debug(f"Could not import model classes for instrumentation: {e}")
+        except Exception as e:
+            raise  # Re-raise to be caught by instrument() method
     def _extract_inference_client_attributes(self, instance, args, kwargs) -> Dict[str, str]:
         """Extract attributes from Inference API call."""
         attrs = {}

genai_otel/llm_pricing.json CHANGED Viewed

@@ -109,6 +109,16 @@
         "512x512": 0.01,
         "1024x1024": 0.012
       }
+    },
+    "grok-image": {
+      "standard": {
+        "per_image": 0.07
+      }
+    },
+    "xai-grok-image": {
+      "standard": {
+        "per_image": 0.07
+      }
     }
   },
   "audio": {
@@ -149,6 +159,22 @@
       "promptPrice": 0.002,
       "completionPrice": 0.008
     },
+    "gpt-5": {
+      "promptPrice": 0.00125,
+      "completionPrice": 0.01
+    },
+    "gpt-5-2025-08-07": {
+      "promptPrice": 0.00125,
+      "completionPrice": 0.01
+    },
+    "gpt-5-mini": {
+      "promptPrice": 0.00025,
+      "completionPrice": 0.002
+    },
+    "gpt-5-nano": {
+      "promptPrice": 0.0001,
+      "completionPrice": 0.0004
+    },
     "gpt-4o": {
       "promptPrice": 0.0005,
       "completionPrice": 0.0015
@@ -249,6 +275,22 @@
       "promptPrice": 0.015,
       "completionPrice": 0.075
     },
+    "claude-4-opus": {
+      "promptPrice": 0.015,
+      "completionPrice": 0.075
+    },
+    "claude-opus-4": {
+      "promptPrice": 0.015,
+      "completionPrice": 0.075
+    },
+    "claude-opus-4-1": {
+      "promptPrice": 0.015,
+      "completionPrice": 0.075
+    },
+    "claude-opus-4.1": {
+      "promptPrice": 0.015,
+      "completionPrice": 0.075
+    },
     "claude-3-sonnet-20240229": {
       "promptPrice": 0.003,
       "completionPrice": 0.015
@@ -257,6 +299,34 @@
       "promptPrice": 0.00025,
       "completionPrice": 0.00125
     },
+    "claude-3-5-sonnet-20240620": {
+      "promptPrice": 0.003,
+      "completionPrice": 0.015
+    },
+    "claude-3-5-sonnet-20241022": {
+      "promptPrice": 0.003,
+      "completionPrice": 0.015
+    },
+    "claude-3-5-haiku-20241022": {
+      "promptPrice": 0.0008,
+      "completionPrice": 0.004
+    },
+    "claude-sonnet-4-5": {
+      "promptPrice": 0.003,
+      "completionPrice": 0.015
+    },
+    "claude-sonnet-4-5-20250929": {
+      "promptPrice": 0.003,
+      "completionPrice": 0.015
+    },
+    "claude-3-7-sonnet": {
+      "promptPrice": 0.003,
+      "completionPrice": 0.015
+    },
+    "claude-haiku-4-5": {
+      "promptPrice": 0.001,
+      "completionPrice": 0.005
+    },
     "command": {
       "promptPrice": 0.001,
       "completionPrice": 0.002
@@ -441,6 +511,14 @@
       "promptPrice": 0.00125,
       "completionPrice": 0.01
     },
+    "gemini-2-5-flash-image": {
+      "promptPrice": 0.0003,
+      "completionPrice": 0.03
+    },
+    "nano-banana": {
+      "promptPrice": 0.0003,
+      "completionPrice": 0.03
+    },
     "text-bison": {
       "promptPrice": 0.001,
       "completionPrice": 0.002
@@ -537,6 +615,38 @@
       "promptPrice": 0.0005,
       "completionPrice": 0.0015
     },
+    "grok-2-1212": {
+      "promptPrice": 0.002,
+      "completionPrice": 0.01
+    },
+    "grok-2-vision-1212": {
+      "promptPrice": 0.002,
+      "completionPrice": 0.01
+    },
+    "grok-3": {
+      "promptPrice": 0.003,
+      "completionPrice": 0.015
+    },
+    "grok-3-mini": {
+      "promptPrice": 0.0003,
+      "completionPrice": 0.0005
+    },
+    "grok-3-fast": {
+      "promptPrice": 0.005,
+      "completionPrice": 0.025
+    },
+    "grok-3-mini-fast": {
+      "promptPrice": 0.0006,
+      "completionPrice": 0.004
+    },
+    "grok-4": {
+      "promptPrice": 0.003,
+      "completionPrice": 0.015
+    },
+    "grok-4-fast": {
+      "promptPrice": 0.0002,
+      "completionPrice": 0.0005
+    },
     "jamba-1.5-mini": {
       "promptPrice": 0.0002,
       "completionPrice": 0.0004
@@ -565,6 +675,26 @@
       "promptPrice": 0.0012,
       "completionPrice": 0.0012
     },
+    "qwen3-next-80b-a3b-instruct": {
+      "promptPrice": 0.000525,
+      "completionPrice": 0.0021
+    },
+    "qwen3-next-80b-a3b-thinking": {
+      "promptPrice": 0.000525,
+      "completionPrice": 0.0063
+    },
+    "qwen3-coder-480b-a35b-instruct": {
+      "promptPrice": 0.001,
+      "completionPrice": 0.005
+    },
+    "qwen3-max": {
+      "promptPrice": 0.0012,
+      "completionPrice": 0.006
+    },
+    "qwen-qwen3-max": {
+      "promptPrice": 0.0012,
+      "completionPrice": 0.006
+    },
     "codellama/CodeLlama-34b-Instruct-hf": {
       "promptPrice": 0.0008,
       "completionPrice": 0.0008
@@ -613,13 +743,29 @@
       "promptPrice": 0.00059,
       "completionPrice": 0.00079
     },
+    "llama-4-scout": {
+      "promptPrice": 0.00015,
+      "completionPrice": 0.0005
+    },
     "llama-4-scout-17bx16e-128k": {
-      "promptPrice": 0.00011,
-      "completionPrice": 0.00034
+      "promptPrice": 0.00015,
+      "completionPrice": 0.0005
+    },
+    "llama-4-maverick": {
+      "promptPrice": 0.00022,
+      "completionPrice": 0.00085
     },
     "llama-4-maverick-17bx128e-128k": {
-      "promptPrice": 0.0002,
-      "completionPrice": 0.0006
+      "promptPrice": 0.00022,
+      "completionPrice": 0.00085
+    },
+    "meta-llama/Llama-4-Scout": {
+      "promptPrice": 0.00015,
+      "completionPrice": 0.0005
+    },
+    "meta-llama/Llama-4-Maverick": {
+      "promptPrice": 0.00022,
+      "completionPrice": 0.00085
     },
     "llama-guard-4-12b": {
       "promptPrice": 0.0002,
@@ -773,7 +919,7 @@
       "promptPrice": 0.0003,
       "completionPrice": 0.0006
     },
-    "qwen3:3b": {
+    "qwen3:4b": {
       "promptPrice": 0.0003,
       "completionPrice": 0.0006
     },
@@ -857,9 +1003,161 @@
       "promptPrice": 0.0008,
       "completionPrice": 0.0008
     },
+    "ibm-granite-3-1-8b-instruct": {
+      "promptPrice": 0.0002,
+      "completionPrice": 0.0002
+    },
+    "ibm-granite-3-8b-instruct": {
+      "promptPrice": 0.0002,
+      "completionPrice": 0.0002
+    },
+    "granite-3-8b-instruct": {
+      "promptPrice": 0.0002,
+      "completionPrice": 0.0002
+    },
+    "granite-embedding-107m-multilingual": {
+      "promptPrice": 0.0001,
+      "completionPrice": 0.0001
+    },
+    "granite-embedding-278m-multilingual": {
+      "promptPrice": 0.0001,
+      "completionPrice": 0.0001
+    },
     "deepseek-v3.1": {
-      "promptPrice": 0.0012,
-      "completionPrice": 0.0012
+      "promptPrice": 0.00056,
+      "completionPrice": 0.00168
+    },
+    "sarvam-m": {
+      "promptPrice": 0,
+      "completionPrice": 0
+    },
+    "sarvamai/sarvam-m": {
+      "promptPrice": 0,
+      "completionPrice": 0
+    },
+    "sarvam-chat": {
+      "promptPrice": 0,
+      "completionPrice": 0
+    },
+    "granite-4-0-h-small": {
+      "promptPrice": 0.0002,
+      "completionPrice": 0.0002
+    },
+    "granite-4-0-h-tiny": {
+      "promptPrice": 0.0002,
+      "completionPrice": 0.0002
+    },
+    "granite-4-0-h-micro": {
+      "promptPrice": 0.0002,
+      "completionPrice": 0.0002
+    },
+    "granite-4-0-micro": {
+      "promptPrice": 0.0002,
+      "completionPrice": 0.0002
+    },
+    "ibm-granite/granite-4.0-h-small": {
+      "promptPrice": 0.0002,
+      "completionPrice": 0.0002
+    },
+    "ibm-granite/granite-4.0-h-tiny": {
+      "promptPrice": 0.0002,
+      "completionPrice": 0.0002
+    },
+    "ibm-granite/granite-4.0-h-micro": {
+      "promptPrice": 0.0002,
+      "completionPrice": 0.0002
+    },
+    "granite:3b": {
+      "promptPrice": 0.0002,
+      "completionPrice": 0.0002
+    },
+    "granite:8b": {
+      "promptPrice": 0.0002,
+      "completionPrice": 0.0002
+    },
+    "mistral-large-24-11": {
+      "promptPrice": 0.008,
+      "completionPrice": 0.024
+    },
+    "mistral-large-2411": {
+      "promptPrice": 0.008,
+      "completionPrice": 0.024
+    },
+    "mistral-small-3-1": {
+      "promptPrice": 0.001,
+      "completionPrice": 0.003
+    },
+    "mistral-small-3.1": {
+      "promptPrice": 0.001,
+      "completionPrice": 0.003
+    },
+    "mistral-medium-3": {
+      "promptPrice": 0.0004,
+      "completionPrice": 0.002
+    },
+    "mistral-medium-2025": {
+      "promptPrice": 0.0004,
+      "completionPrice": 0.002
+    },
+    "magistral-small": {
+      "promptPrice": 0.001,
+      "completionPrice": 0.003
+    },
+    "magistral-medium": {
+      "promptPrice": 0.003,
+      "completionPrice": 0.009
+    },
+    "codestral-25-01": {
+      "promptPrice": 0.001,
+      "completionPrice": 0.003
+    },
+    "codestral-2501": {
+      "promptPrice": 0.001,
+      "completionPrice": 0.003
+    },
+    "lfm-7b": {
+      "promptPrice": 0.0003,
+      "completionPrice": 0.0006
+    },
+    "liquid/lfm-7b": {
+      "promptPrice": 0.0003,
+      "completionPrice": 0.0006
+    },
+    "snowflake-arctic": {
+      "promptPrice": 0.0008,
+      "completionPrice": 0.0024
+    },
+    "snowflake-arctic-instruct": {
+      "promptPrice": 0.0008,
+      "completionPrice": 0.0024
+    },
+    "snowflake/snowflake-arctic-instruct": {
+      "promptPrice": 0.0008,
+      "completionPrice": 0.0024
+    },
+    "snowflake-arctic-embed-l-v2.0": {
+      "promptPrice": 0.00005,
+      "completionPrice": 0.00005
+    },
+    "nvidia-nemotron-4-340b-instruct": {
+      "promptPrice": 0.003,
+      "completionPrice": 0.009
+    },
+    "nvidia/nemotron-4-340b-instruct": {
+      "promptPrice": 0.003,
+      "completionPrice": 0.009
+    },
+    "nvidia-nemotron-mini": {
+      "promptPrice": 0.0002,
+      "completionPrice": 0.0004
+    },
+    "nvidia/llama-3.1-nemotron-70b-instruct": {
+      "promptPrice": 0.0008,
+      "completionPrice": 0.0008
+    },
+    "servicenow-now-assist": {
+      "promptPrice": 0.001,
+      "completionPrice": 0.003
     },
     "llama3.1:405b": {
       "promptPrice": 0.0012,

{genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.10.dev0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: genai-otel-instrument
-Version: 0.1.7.dev0
+Version: 0.1.10.dev0
 Summary: Comprehensive OpenTelemetry auto-instrumentation for LLM/GenAI applications
 Author-email: Kshitij Thakkar <kshitijthakkar@rocketmail.com>
 License: Apache-2.0
@@ -180,6 +180,12 @@ Dynamic: license-file
 # GenAI OpenTelemetry Auto-Instrumentation
+<div align="center">
+  <img src=".github/images/Logo.jpg" alt="GenAI OpenTelemetry Instrumentation Logo" width="400"/>
+</div>
+<br/>
 [![PyPI version](https://badge.fury.io/py/genai-otel-instrument.svg)](https://badge.fury.io/py/genai-otel-instrument)
 [![Python Versions](https://img.shields.io/pypi/pyversions/genai-otel-instrument.svg)](https://pypi.org/project/genai-otel-instrument/)
 [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
@@ -200,6 +206,14 @@ Dynamic: license-file
 [![Semantic Conventions](https://img.shields.io/badge/OTel%20Semconv-GenAI%20v1.28-orange)](https://opentelemetry.io/docs/specs/semconv/gen-ai/)
 [![CI/CD](https://img.shields.io/badge/CI%2FCD-GitHub%20Actions-2088FF?logo=github-actions&logoColor=white)](https://github.com/Mandark-droid/genai_otel_instrument/actions)
+---
+<div align="center">
+  <img src=".github/images/Landing_Page.jpg" alt="GenAI OpenTelemetry Instrumentation Overview" width="800"/>
+</div>
+---
 Production-ready OpenTelemetry instrumentation for GenAI/LLM applications with zero-code setup.
 ## Features
@@ -257,7 +271,8 @@ For a more comprehensive demonstration of various LLM providers and MCP tools, r
 ### LLM Providers (Auto-detected)
 - **With Full Cost Tracking**: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure OpenAI, Cohere, Mistral AI, Together AI, Groq, Ollama, Vertex AI
-- **Hardware/Local Pricing**: Replicate (hardware-based $/second), HuggingFace (local execution, free)
+- **Hardware/Local Pricing**: Replicate (hardware-based $/second), HuggingFace (local execution with estimated costs)
+  - **HuggingFace Support**: `pipeline()`, `AutoModelForCausalLM.generate()`, `AutoModelForSeq2SeqLM.generate()`, `InferenceClient` API calls
 - **Other Providers**: Anyscale
 ### Frameworks
@@ -288,6 +303,65 @@ The processor supports OpenInference semantic conventions:
 pip install genai-otel-instrument[openinference]
 ```
+## Screenshots
+See the instrumentation in action across different LLM providers and observability backends.
+### OpenAI Instrumentation
+Full trace capture for OpenAI API calls with token usage, costs, and latency metrics.
+<div align="center">
+  <img src=".github/images/Screenshots/Traces_OpenAI.png" alt="OpenAI Traces" width="900"/>
+</div>
+### Ollama (Local LLM) Instrumentation
+Zero-code instrumentation for local models running on Ollama with comprehensive observability.
+<div align="center">
+  <img src=".github/images/Screenshots/Traces_Ollama.png" alt="Ollama Traces" width="900"/>
+</div>
+### HuggingFace Transformers
+Direct instrumentation of HuggingFace Transformers with automatic token counting and cost estimation.
+<div align="center">
+  <img src=".github/images/Screenshots/Trace_HuggingFace_Transformer_Models.png" alt="HuggingFace Transformer Traces" width="900"/>
+</div>
+### SmolAgents Framework
+Complete agent workflow tracing with tool calls, iterations, and cost breakdown.
+<div align="center">
+  <img src=".github/images/Screenshots/Traces_SmolAgent_with_tool_calls.png" alt="SmolAgent Traces with Tool Calls" width="900"/>
+</div>
+### GPU Metrics Collection
+Real-time GPU utilization, memory, temperature, and power consumption metrics.
+<div align="center">
+  <img src=".github/images/Screenshots/GPU_Metrics.png" alt="GPU Metrics Dashboard" width="900"/>
+</div>
+### Additional Screenshots
+- **[Token Cost Breakdown](.github/images/Screenshots/Traces_SmolAgent_Token_Cost_breakdown.png)** - Detailed token usage and cost analysis for SmolAgent workflows
+- **[OpenSearch Dashboard](.github/images/Screenshots/GENAI_OpenSearch_output.png)** - GenAI metrics visualization in OpenSearch/Kibana
+---
+## Demo Video
+Watch a comprehensive walkthrough of GenAI OpenTelemetry Auto-Instrumentation in action, demonstrating setup, configuration, and real-time observability across multiple LLM providers.
+<div align="center">
+  **🎥 [Watch Demo Video](https://youtu.be/YOUR_VIDEO_ID_HERE)**
+  *(Coming Soon)*
+</div>
+---
 ## Cost Tracking Coverage
 The library includes comprehensive cost tracking with pricing data for **145+ models** across **11 providers**:
@@ -307,7 +381,10 @@ The library includes comprehensive cost tracking with pricing data for **145+ mo
 ### Special Pricing Models
 - **Replicate**: Hardware-based pricing ($/second of GPU/CPU time) - not token-based
-- **HuggingFace Transformers**: Local execution - no API costs
+- **HuggingFace Transformers**: Local model execution with estimated costs based on parameter count
+  - Supports `pipeline()`, `AutoModelForCausalLM.generate()`, `AutoModelForSeq2SeqLM.generate()`
+  - Cost estimation uses GPU/compute resource pricing tiers (tiny/small/medium/large)
+  - Automatic token counting from tensor shapes
 ### Pricing Features
 - **Differential Pricing**: Separate rates for prompt tokens vs. completion tokens
@@ -836,38 +913,6 @@ genai_otel.instrument(
 - `gen_ai.eval.bias_categories` - Detected bias types (array)
 - `gen_ai.eval.toxicity_categories` - Toxicity categories (array)
-#### 📊 Enhanced OpenTelemetry Compliance
-Completing remaining items from [OTEL_SEMANTIC_GAP_ANALYSIS_AND_IMPLEMENTATION_PLAN.md](OTEL_SEMANTIC_GAP_ANALYSIS_AND_IMPLEMENTATION_PLAN.md):
-**Phase 4: Optional Enhancements (✅ COMPLETED)**
-All Phase 4 features are now available! See the [Advanced Features](#advanced-features) section for detailed documentation.
-- ✅ **Session & User Tracking** - Track sessions and users across requests with custom extractor functions
-  - Configurable via `session_id_extractor` and `user_id_extractor` in `OTelConfig`
-  - Automatically adds `session.id` and `user.id` span attributes
-  - See [Session and User Tracking](#session-and-user-tracking) for usage examples
-- ✅ **RAG/Embedding Attributes** - Enhanced observability for retrieval-augmented generation
-  - Helper methods: `add_embedding_attributes()` and `add_retrieval_attributes()`
-  - Embedding attributes: `embedding.model_name`, `embedding.text`, `embedding.vector.dimension`
-  - Retrieval attributes: `retrieval.query`, `retrieval.document_count`, `retrieval.documents.{i}.document.*`
-  - See [RAG and Embedding Attributes](#rag-and-embedding-attributes) for usage examples
-  - Complete example: `examples/phase4_session_rag_tracking.py`
-**Note on Agent Workflow Tracking:**
-Agent workflow observability is already provided by the OpenInference Smolagents instrumentor (included when `smolagents` is in `enabled_instrumentors`). This is not a new Phase 4 feature, but an existing capability:
-- `openinference.span.kind: "AGENT"` - Identifies agent spans
-- `agent.name` - Agent identifier (via OpenInference)
-- `agent.iteration` - Current iteration number (via OpenInference)
-- `agent.action` - Action taken (via OpenInference)
-- `agent.observation` - Observation received (via OpenInference)
-Agent tracking requires Python >= 3.10 and the `smolagents` library. See [OpenInference Integration](#openinference-optional---python-310-only) for details.
 #### 🔄 Migration Support
 **Backward Compatibility:**

{genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.10.dev0.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,14 @@
 genai_otel/__init__.py,sha256=OWgm1dihRkwBQU8fUPnVhE5XCZeF5f15UyH4w6LqGZU,4469
-genai_otel/__version__.py,sha256=amWIeki4bm5YbDNKNBNcQKLS8IoAVLPuipySazUun7Y,751
-genai_otel/auto_instrument.py,sha256=NF0Bo_sFMynSmXNh5KFxdsJQPKuPE2NI_bel1i-CtxU,16260
+genai_otel/__version__.py,sha256=TXc4a_5Wlqj1sa6bOI4rf0g8ARueANgrhEv_5PaXbcs,753
+genai_otel/auto_instrument.py,sha256=uHJGTlSI4UO-sdFtWgxSmNkhd1_GTWvr3S-rY8MQ4E4,16513
 genai_otel/cli.py,sha256=mbhaTU0WIAkvPKdIing-guIxPDjEKQftChWQUtPFzkY,3170
 genai_otel/config.py,sha256=2CIbZH8WKkVzr73y9AOWmscvEW-kUwMLSAyOy9BFqGI,7871
 genai_otel/cost_calculator.py,sha256=BOW-TC41lJ1GcL4hIGZ4NySyV8aro4_juMOe2IqtJ-A,18115
-genai_otel/cost_enrichment_processor.py,sha256=07T7q2gBbB7b-L0Dt5VDsVuxB3XSfnpbtFjcV3ZTpk0,7099
+genai_otel/cost_enriching_exporter.py,sha256=iED7njK21UBKlxRElGfqSs66gMkzDCr8fm-4ZkJBiLU,7874
+genai_otel/cost_enrichment_processor.py,sha256=fQoVosBUgshD9ZRxWpwqqPWYnyhrvKBTJAW0S2H7t1E,7090
 genai_otel/exceptions.py,sha256=gIRvbI7c4V-M-PG9jS0o4ESRwHUWCm6DVihjfyJI1yg,429
 genai_otel/gpu_metrics.py,sha256=hBawkm-NErviwiLzb7z92INstFHec2pREn945rYgrT4,13408
-genai_otel/llm_pricing.json,sha256=ZQ1uILEdQ_yNzenvlPpKazo9NnYqEZgbL_tzQ6Mw2oc,20825
+genai_otel/llm_pricing.json,sha256=f3WNQwiby6rLOQaPX_jJharftHgclNQM3bzibnDiiEQ,27879
 genai_otel/logging_config.py,sha256=S8apGf93nBjoi_Bhce-LxwTwGTaJUeduPXKiWZ5SIa8,1418
 genai_otel/metrics.py,sha256=Vngwtc1MAMAE7JVpbT_KfiCQ5TdIAKIs_0oztjJdDTg,2671
 genai_otel/py.typed,sha256=WJtVGe64tcQSssSo4RD7zCf_3u7X2BmFCWDCroWOcaQ,88
@@ -20,7 +21,7 @@ genai_otel/instrumentors/base.py,sha256=5N0eMDoPT49PedhoDM0EGu8NE9UvseaiWhqfb9UH
 genai_otel/instrumentors/cohere_instrumentor.py,sha256=fsKvHaWvMRAGRbOtybVJVVz-FS_-wmgTJo3Q_F86BOY,5074
 genai_otel/instrumentors/google_ai_instrumentor.py,sha256=ExNo0_OxfCxaRpuUXYU8UZ-ClQRHRLUvf7-kMC6zdc8,2984
 genai_otel/instrumentors/groq_instrumentor.py,sha256=bCm7IDmDyvg0-XuzcCSO5xf9QvDlQGwb7bdQ_ooS6QI,3398
-genai_otel/instrumentors/huggingface_instrumentor.py,sha256=XlSuHEkxEWu0dAXtw1pAFE3n-M8WFRfKUsgbUSV_Arw,9204
+genai_otel/instrumentors/huggingface_instrumentor.py,sha256=wvolJZnq9YKfJsvNvUnoOpL1tbeGy0DuxVmmmI1_BoA,17815
 genai_otel/instrumentors/langchain_instrumentor.py,sha256=002ZrKP04l7VaYxo7nAAwl-uvMVwpzVehO2oS23ed-o,2685
 genai_otel/instrumentors/llamaindex_instrumentor.py,sha256=zZ1J7W4yQo1Ur6Y5y0UXpDdEx9oDnmsqNIin5Jrv9os,1206
 genai_otel/instrumentors/mistralai_instrumentor.py,sha256=Blo8X4WV-xQe-xF-jhkaGPavkgayANf1F3zCTzuhuL0,12478
@@ -37,9 +38,9 @@ genai_otel/mcp_instrumentors/kafka_instrumentor.py,sha256=QJYJC1rvo_zZAIaw-cp_Ic
 genai_otel/mcp_instrumentors/manager.py,sha256=1Pj5lkEOL8Yq1Oeud4ZExN6k6NLIVtTzKnFLNiFdJvw,5895
 genai_otel/mcp_instrumentors/redis_instrumentor.py,sha256=KUbs0dMyfMzU4T0SS8u43I5fvr09lcBBM92I3KCsYUw,943
 genai_otel/mcp_instrumentors/vector_db_instrumentor.py,sha256=2vhnk4PGpfYKr-XlRbnCIOap4BPKHOn--fh-ai2YXlM,9994
-genai_otel_instrument-0.1.7.dev0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-genai_otel_instrument-0.1.7.dev0.dist-info/METADATA,sha256=ogiTaS3MknTz1tAq51JbpZzpxozuLnkD-f2Z4iu57vk,39184
-genai_otel_instrument-0.1.7.dev0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-genai_otel_instrument-0.1.7.dev0.dist-info/entry_points.txt,sha256=E9UqoHA_fq69yNGAY3SRYf5HH94sZT5DiDueiU1v0KM,57
-genai_otel_instrument-0.1.7.dev0.dist-info/top_level.txt,sha256=cvCm8PUwvYUSQKruk-x6S-_YuDyhOBk8gD910XICcbg,11
-genai_otel_instrument-0.1.7.dev0.dist-info/RECORD,,
+genai_otel_instrument-0.1.10.dev0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+genai_otel_instrument-0.1.10.dev0.dist-info/METADATA,sha256=kg9GntpKyP9CarQ53N3RJuzDIcdk2Gtp2Bbb2NC1LVQ,40024
+genai_otel_instrument-0.1.10.dev0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+genai_otel_instrument-0.1.10.dev0.dist-info/entry_points.txt,sha256=E9UqoHA_fq69yNGAY3SRYf5HH94sZT5DiDueiU1v0KM,57
+genai_otel_instrument-0.1.10.dev0.dist-info/top_level.txt,sha256=cvCm8PUwvYUSQKruk-x6S-_YuDyhOBk8gD910XICcbg,11
+genai_otel_instrument-0.1.10.dev0.dist-info/RECORD,,

{genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.10.dev0.dist-info}/WHEEL RENAMED Viewed

File without changes

{genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.10.dev0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.10.dev0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{genai_otel_instrument-0.1.7.dev0.dist-info → genai_otel_instrument-0.1.10.dev0.dist-info}/top_level.txt RENAMED Viewed

File without changes

genai-otel-instrument 0.1.7.dev0__py3-none-any.whl → 0.1.10.dev0__py3-none-any.whl

Potentially problematic release.

genai-otel-instrument 0.1.7.dev0py3-none-any.whl → 0.1.10.dev0py3-none-any.whl