PyPI - netra-sdk - Versions diffs - 0.1.0__py3-none-any.whl - Mend

netra-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of netra-sdk might be problematic. Click here for more details.

Files changed (42) hide show

netra/__init__.py +148 -0
netra/anonymizer/__init__.py +7 -0
netra/anonymizer/anonymizer.py +79 -0
netra/anonymizer/base.py +159 -0
netra/anonymizer/fp_anonymizer.py +182 -0
netra/config.py +111 -0
netra/decorators.py +167 -0
netra/exceptions/__init__.py +6 -0
netra/exceptions/injection.py +33 -0
netra/exceptions/pii.py +46 -0
netra/input_scanner.py +142 -0
netra/instrumentation/__init__.py +257 -0
netra/instrumentation/aiohttp/__init__.py +378 -0
netra/instrumentation/aiohttp/version.py +1 -0
netra/instrumentation/cohere/__init__.py +446 -0
netra/instrumentation/cohere/version.py +1 -0
netra/instrumentation/google_genai/__init__.py +506 -0
netra/instrumentation/google_genai/config.py +5 -0
netra/instrumentation/google_genai/utils.py +31 -0
netra/instrumentation/google_genai/version.py +1 -0
netra/instrumentation/httpx/__init__.py +545 -0
netra/instrumentation/httpx/version.py +1 -0
netra/instrumentation/instruments.py +78 -0
netra/instrumentation/mistralai/__init__.py +545 -0
netra/instrumentation/mistralai/config.py +5 -0
netra/instrumentation/mistralai/utils.py +30 -0
netra/instrumentation/mistralai/version.py +1 -0
netra/instrumentation/weaviate/__init__.py +121 -0
netra/instrumentation/weaviate/version.py +1 -0
netra/pii.py +757 -0
netra/processors/__init__.py +4 -0
netra/processors/session_span_processor.py +55 -0
netra/processors/span_aggregation_processor.py +365 -0
netra/scanner.py +104 -0
netra/session.py +185 -0
netra/session_manager.py +96 -0
netra/tracer.py +99 -0
netra/version.py +1 -0
netra_sdk-0.1.0.dist-info/LICENCE +201 -0
netra_sdk-0.1.0.dist-info/METADATA +573 -0
netra_sdk-0.1.0.dist-info/RECORD +42 -0
netra_sdk-0.1.0.dist-info/WHEEL +4 -0

netra/processors/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from netra.processors.session_span_processor import SessionSpanProcessor
+from netra.processors.span_aggregation_processor import SpanAggregationProcessor
+__all__ = ["SpanAggregationProcessor", "SessionSpanProcessor"]

netra/processors/session_span_processor.py ADDED Viewed

@@ -0,0 +1,55 @@
+import logging
+from typing import Optional
+from opentelemetry import baggage
+from opentelemetry import context as otel_context
+from opentelemetry import trace
+from opentelemetry.sdk.trace import SpanProcessor
+from netra.config import Config
+from netra.session_manager import SessionManager
+logger = logging.getLogger(__name__)
+class SessionSpanProcessor(SpanProcessor):  # type: ignore[misc]
+    """OpenTelemetry span processor that automatically adds session attributes to spans."""
+    def on_start(self, span: trace.Span, parent_context: Optional[otel_context.Context] = None) -> None:
+        """Add session attributes to span when it starts and store current span."""
+        try:
+            # Store the current span in SessionManager
+            SessionManager.set_current_span(span)
+            ctx = otel_context.get_current()
+            session_id = baggage.get_baggage("session_id", ctx)
+            user_id = baggage.get_baggage("user_id", ctx)
+            tenant_id = baggage.get_baggage("tenant_id", ctx)
+            custom_keys = baggage.get_baggage("custom_keys", ctx)
+            span.set_attribute("library.name", Config.LIBRARY_NAME)
+            span.set_attribute("library.version", Config.LIBRARY_VERSION)
+            span.set_attribute("sdk.name", Config.SDK_NAME)
+            if session_id:
+                span.set_attribute(f"{Config.LIBRARY_NAME}.session_id", session_id)
+            if user_id:
+                span.set_attribute(f"{Config.LIBRARY_NAME}.user_id", user_id)
+            if tenant_id:
+                span.set_attribute(f"{Config.LIBRARY_NAME}.tenant_id", tenant_id)
+            if custom_keys:
+                for key in custom_keys.split(","):
+                    value = baggage.get_baggage(f"custom.{key}", ctx)
+                    if value:
+                        span.set_attribute(f"{Config.LIBRARY_NAME}.custom.{key}", value)
+        except Exception as e:
+            logger.exception(f"Error setting span attributes: {e}")
+    def on_end(self, span: trace.Span) -> None:
+        pass
+    def force_flush(self, timeout_millis: int = 30000) -> None:
+        pass
+    def shutdown(self) -> None:
+        pass

netra/processors/span_aggregation_processor.py ADDED Viewed

@@ -0,0 +1,365 @@
+"""
+Span aggregation utilities for Combat SDK.
+Handles aggregation of child span data into parent spans.
+"""
+import json
+import logging
+from collections import defaultdict
+from typing import Any, Dict, Optional, Set
+import httpx
+from opentelemetry import trace
+from opentelemetry.sdk.trace import SpanProcessor
+from opentelemetry.trace import Context, Span
+from netra import Netra
+from netra.config import Config
+logger = logging.getLogger(__name__)
+class SpanAggregationData:
+    """Holds aggregated data for a span."""
+    def __init__(self) -> None:
+        self.tokens: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
+        self.models: Set[str] = set()
+        self.has_pii: bool = False
+        self.pii_entities: Set[str] = set()
+        self.pii_actions: Dict[str, Set[str]] = defaultdict(set)
+        self.has_violation: bool = False
+        self.violations: Set[str] = set()
+        self.violation_actions: Dict[str, Set[str]] = defaultdict(set)
+        self.has_error: bool = False
+        self.status_codes: Set[int] = set()
+    def merge_from_other(self, other: "SpanAggregationData") -> None:
+        """Merge data from another SpanAggregationData instance."""
+        # Merge error data
+        if other.has_error:
+            self.has_error = True
+            self.status_codes.update(other.status_codes)
+        # Merge tokens - take the maximum values for each model
+        for model, token_data in other.tokens.items():
+            if model not in self.tokens:
+                self.tokens[model] = {}
+            for token_type, count in token_data.items():
+                self.tokens[model][token_type] = max(self.tokens[model].get(token_type, 0), count)
+        # Merge models
+        self.models.update(other.models)
+        # Merge PII data
+        if other.has_pii:
+            self.has_pii = True
+        self.pii_entities.update(other.pii_entities)
+        for action, entities in other.pii_actions.items():
+            self.pii_actions[action].update(entities)
+        # Merge violation data
+        if other.has_violation:
+            self.has_violation = True
+        self.violations.update(other.violations)
+        for action, violations in other.violation_actions.items():
+            self.violation_actions[action].update(violations)
+    def to_attributes(self) -> Dict[str, str]:
+        """Convert aggregated data to span attributes."""
+        attributes = {}
+        # Error Data
+        attributes["has_error"] = str(self.has_error).lower()
+        if self.has_error:
+            attributes["status_codes"] = json.dumps(list(self.status_codes))
+        # Token usage by model
+        if self.tokens:
+            tokens_dict = {}
+            for model, usage in self.tokens.items():
+                tokens_dict[model] = dict(usage)
+            attributes["tokens"] = json.dumps(tokens_dict)
+        # Models used
+        if self.models:
+            attributes["models"] = json.dumps(sorted(list(self.models)))
+        # PII information
+        attributes["has_pii"] = str(self.has_pii).lower()
+        if self.pii_entities:
+            attributes["pii_entities"] = json.dumps(sorted(list(self.pii_entities)))
+        if self.pii_actions:
+            pii_actions_dict = {}
+            for action, entities in self.pii_actions.items():
+                pii_actions_dict[action] = sorted(list(entities))
+            attributes["pii_actions"] = json.dumps(pii_actions_dict)
+        # Violation information
+        attributes["has_violation"] = str(self.has_violation).lower()
+        if self.violations:
+            attributes["violations"] = json.dumps(sorted(list(self.violations)))
+        if self.violation_actions:
+            violation_actions_dict = {}
+            for action, violations in self.violation_actions.items():
+                violation_actions_dict[action] = sorted(list(violations))
+            attributes["violation_actions"] = json.dumps(violation_actions_dict)
+        return attributes
+class SpanAggregationProcessor(SpanProcessor):  # type: ignore[misc]
+    """
+    OpenTelemetry span processor that aggregates data from child spans into parent spans.
+    """
+    def __init__(self) -> None:
+        self._span_data: Dict[str, SpanAggregationData] = {}
+        self._span_hierarchy: Dict[str, Optional[str]] = {}  # child_id -> parent_id
+        self._root_spans: Set[str] = set()
+        self._captured_data: Dict[str, Dict[str, Any]] = {}  # span_id -> {attributes, events}
+        self._active_spans: Dict[str, Span] = {}  # span_id -> original span reference
+    def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None:
+        """Called when a span starts."""
+        span_id = self._get_span_id(span)
+        if not span_id:
+            return
+        # Store the original span for later use
+        self._active_spans[span_id] = span
+        # Initialize aggregation data
+        self._span_data[span_id] = SpanAggregationData()
+        self._captured_data[span_id] = {"attributes": {}, "events": []}
+        # Check if this is a root span (no parent)
+        if span.parent is None:
+            self._root_spans.add(span_id)
+        else:
+            # Track parent-child relationship - span.parent is a SpanContext, not a Span
+            try:
+                parent_span_context = span.parent
+                if parent_span_context and parent_span_context.span_id:
+                    parent_span_id = f"{parent_span_context.trace_id:032x}-{parent_span_context.span_id:016x}"
+                    self._span_hierarchy[span_id] = parent_span_id
+                else:
+                    logger.warning(f"DEBUG: Parent span context is invalid for child {span_id}")
+            except Exception as e:
+                logger.warning(f"DEBUG: Could not get parent span ID for child {span_id}: {e}")
+        # Wrap span methods to capture data
+        self._wrap_span_methods(span, span_id)
+    def on_end(self, span: Span) -> None:
+        """Called when a span ends."""
+        span_id = self._get_span_id(span)
+        if not span_id or span_id not in self._span_data:
+            return
+        try:
+            # Process this span's captured data
+            captured = self._captured_data.get(span_id, {})
+            self._process_attributes(self._span_data[span_id], captured.get("attributes", {}))
+            # Set aggregated attributes on this span
+            original_span = self._active_spans.get(span_id)
+            if original_span and original_span.is_recording():
+                self._set_span_attributes(original_span, self._span_data[span_id])
+            # Handle parent-child aggregation for any remaining data
+            self._aggregate_to_all_parents(span_id)
+        except Exception as e:
+            logger.error(f"Error during span aggregation for span {span_id}: {e}")
+            # Even if there's an error, try to do basic aggregation
+            try:
+                original_span = self._active_spans.get(span_id)
+                if original_span and original_span.is_recording():
+                    self._set_span_attributes(original_span, self._span_data[span_id])
+            except Exception as inner_e:
+                logger.error(f"Failed to set basic aggregation attributes: {inner_e}")
+        # Clean up
+        self._span_data.pop(span_id, None)
+        self._captured_data.pop(span_id, None)
+        self._active_spans.pop(span_id, None)
+        self._root_spans.discard(span_id)
+        self._span_hierarchy.pop(span_id, None)
+    def _wrap_span_methods(self, span: Span, span_id: str) -> Any:
+        """Wrap span methods to capture attributes and events."""
+        # Wrap set_attribute
+        original_set_attribute = span.set_attribute
+        def wrapped_set_attribute(key: str, value: Any) -> Any:
+            # Status code processing
+            if key == "http.status_code":
+                self._status_code_processing(value)
+            # Capture the all the attribute data
+            self._captured_data[span_id]["attributes"][key] = value
+            return original_set_attribute(key, value)
+        span.set_attribute = wrapped_set_attribute
+        # Wrap add_event
+        original_add_event = span.add_event
+        def wrapped_add_event(name: str, attributes: Dict[str, Any] = {}, timestamp: int = 0) -> Any:
+            # Only process PII and violation events
+            if name == "pii_detected" and attributes:
+                self._process_pii_event(self._span_data[span_id], attributes)
+                if span.is_recording():
+                    self._set_span_attributes(span, self._span_data[span_id])
+                    # Immediately aggregate to parent spans
+                    self._aggregate_to_all_parents(span_id)
+            elif name == "violation_detected" and attributes:
+                self._process_violation_event(self._span_data[span_id], attributes)
+                if span.is_recording():
+                    self._set_span_attributes(span, self._span_data[span_id])
+                    # Immediately aggregate to parent spans
+                    self._aggregate_to_all_parents(span_id)
+            # Check if span is still recording before adding event
+            if not span.is_recording():
+                logger.debug(f"Attempted to add event to ended span {span_id}")
+                return None
+            return original_add_event(name, attributes, timestamp)
+        span.add_event = wrapped_add_event
+    def _process_attributes(self, data: SpanAggregationData, attributes: Dict[str, Any]) -> None:
+        """Process span attributes for aggregation."""
+        # Extract status code for error identification
+        status_code = attributes.get("http.status_code", 200)
+        if httpx.codes.is_error(status_code):
+            data.has_error = True
+            data.status_codes.update([status_code])
+        # Extract model information
+        model = attributes.get("gen_ai.request.model") or attributes.get("gen_ai.response.model")
+        if model:
+            data.models.add(model)
+            # Extract token usage
+            token_fields = {
+                "prompt_tokens": attributes.get("gen_ai.usage.prompt_tokens", 0),
+                "completion_tokens": attributes.get("gen_ai.usage.completion_tokens", 0),
+                "total_tokens": attributes.get("llm.usage.total_tokens", 0),
+                "cache_read_input_tokens": attributes.get("gen_ai.usage.cache_read_input_tokens", 0),
+            }
+            # Initialize token fields if they don't exist
+            if model not in data.tokens:
+                data.tokens[model] = {}
+            # Add token values
+            for field, value in token_fields.items():
+                if isinstance(value, (int, str)):
+                    current_value = data.tokens[model].get(field, 0)
+                    data.tokens[model][field] = current_value + int(value)
+    def _process_pii_event(self, data: SpanAggregationData, attrs: Dict[str, Any]) -> None:
+        """Process pii_detected event."""
+        if attrs.get("has_pii"):
+            data.has_pii = True
+            # Extract entities from pii_entities field
+            entity_counts_str = attrs.get("pii_entities")
+            if entity_counts_str:
+                try:
+                    entity_counts = (
+                        json.loads(entity_counts_str) if isinstance(entity_counts_str, str) else entity_counts_str
+                    )
+                    if isinstance(entity_counts, dict):
+                        entities = set(entity_counts.keys())
+                        data.pii_entities.update(entities)
+                        # Determine action
+                        if attrs.get("is_blocked"):
+                            data.pii_actions["BLOCK"].update(entities)
+                        elif attrs.get("is_masked"):
+                            data.pii_actions["MASK"].update(entities)
+                        else:
+                            data.pii_actions["FLAG"].update(entities)
+                except (json.JSONDecodeError, TypeError):
+                    logger.error(f"Failed to parse pii_entities: {entity_counts_str}")
+    def _process_violation_event(self, data: SpanAggregationData, attrs: Dict[str, Any]) -> None:
+        """Process violation_detected event."""
+        if attrs.get("has_violation"):
+            data.has_violation = True
+            violations = attrs.get("violations", [])
+            if violations:
+                data.violations.update(violations)
+                # Set action based on is_blocked flag
+                action = "BLOCK" if attrs.get("is_blocked") else "FLAG"
+                data.violation_actions[action].update(violations)
+    def _aggregate_to_all_parents(self, child_span_id: str) -> None:
+        """Aggregate data from child span to all its parent spans in the hierarchy."""
+        if child_span_id not in self._span_data:
+            return
+        child_data = self._span_data[child_span_id]
+        current_span_id = child_span_id
+        # Traverse up the parent hierarchy
+        while True:
+            parent_id = self._span_hierarchy.get(current_span_id)
+            if not parent_id or parent_id not in self._span_data:
+                break
+            # Merge child data into parent
+            self._span_data[parent_id].merge_from_other(child_data)
+            # Update parent span attributes if it's still active and recording
+            parent_span = self._active_spans.get(parent_id)
+            if parent_span and parent_span.is_recording():
+                self._set_span_attributes(parent_span, self._span_data[parent_id])
+            # Move up to the next parent
+            current_span_id = parent_id
+    def _set_span_attributes(self, span: Span, data: SpanAggregationData) -> None:
+        """Set aggregated attributes on the given span."""
+        try:
+            aggregated_attrs = data.to_attributes()
+            # Set all aggregated attributes under a single 'aggregator' key as a JSON object
+            span.set_attribute(f"{Config.LIBRARY_NAME}.aggregated_attributes", json.dumps(aggregated_attrs))
+        except Exception as e:
+            logger.error(f"Failed to set aggregated attributes: {e}")
+    def _get_span_id(self, span: Span) -> Optional[str]:
+        """Get a unique identifier for the span."""
+        try:
+            span_context = span.get_span_context()
+            return f"{span_context.trace_id:032x}-{span_context.span_id:016x}"
+        except Exception:
+            return None
+    def _get_span_id_from_context(self, context: Context) -> Optional[str]:
+        """Extract span ID from context."""
+        if context:
+            span_context = trace.get_current_span(context).get_span_context()
+            if span_context and span_context.span_id:
+                return f"{span_context.trace_id:032x}-{span_context.span_id:016x}"
+        return None
+    def _status_code_processing(self, status_code: int) -> None:
+        if httpx.codes.is_error(status_code):
+            event_attributes = {"has_error": True, "status_code": status_code}
+            Netra.set_custom_event(event_name="error_detected", attributes=event_attributes)
+    def force_flush(self, timeout_millis: int = 30000) -> bool:
+        """Force flush any pending data."""
+        return True
+    def shutdown(self) -> bool:
+        """Shutdown the processor."""
+        self._span_data.clear()
+        self._span_hierarchy.clear()
+        self._root_spans.clear()
+        self._captured_data.clear()
+        self._active_spans.clear()
+        return True

netra/scanner.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""
+Scanner module for Netra SDK to implement various scanning capabilities.
+"""
+import logging
+from abc import ABC, abstractmethod
+from typing import Optional, Tuple
+from netra.exceptions import InjectionException
+logger = logging.getLogger(__name__)
+class Scanner(ABC):
+    """
+    Abstract base class for scanner implementations.
+    Scanners can analyze and process input prompts for various purposes
+    such as security checks, content moderation, etc.
+    """
+    @abstractmethod
+    def scan(self, prompt: str) -> Tuple[str, bool, float]:
+        """
+        Scan the input prompt and return the sanitized prompt, validity flag, and risk score.
+        Args:
+            prompt: The input prompt to scan
+        Returns:
+            Tuple containing:
+                - sanitized_prompt: The potentially modified prompt after scanning
+                - is_valid: Boolean indicating if the prompt passed the scan
+                - risk_score: A score between 0.0 and 1.0 indicating the risk level
+        """
+class PromptInjection(Scanner):
+    """
+    A scanner implementation that detects and handles prompt injection attempts.
+    This scanner uses llm_guard's PromptInjection scanner under the hood.
+    """
+    def __init__(self, threshold: float = 0.5, match_type: Optional[str] = None):
+        """
+        Initialize the PromptInjection scanner.
+        Args:
+            threshold: The threshold value (between 0.0 and 1.0) above which a prompt is considered risky
+            match_type: The type of matching to use
+                (from llm_guard.input_scanners.prompt_injection.MatchType)
+        """
+        self.threshold = threshold
+        self.scanner = None
+        self.llm_guard_available = False
+        try:
+            from llm_guard.input_scanners import PromptInjection as LLMGuardPromptInjection
+            from llm_guard.input_scanners.prompt_injection import MatchType
+            if match_type is None:
+                match_type = MatchType.FULL
+            self.scanner = LLMGuardPromptInjection(threshold=threshold, match_type=match_type)
+            self.llm_guard_available = True
+        except ImportError:
+            logger.warning(
+                "llm-guard package is not installed. Prompt injection scanning will be limited. "
+                "To enable full functionality, install with: pip install 'netra-sdk[llm_guard]'"
+            )
+    def scan(self, prompt: str) -> Tuple[str, bool, float]:
+        """
+        Scan the input prompt for potential prompt injection attempts.
+        Args:
+            prompt: The input prompt to scan
+        Returns:
+            Tuple containing:
+                - sanitized_prompt: The potentially modified prompt after scanning
+                - is_valid: Boolean indicating if the prompt passed the scan
+                - risk_score: A score between 0.0 and 1.0 indicating the risk level
+        """
+        if not self.llm_guard_available or self.scanner is None:
+            # Simple fallback when llm-guard is not available
+            # Always pass validation but log a warning
+            logger.warning(
+                "Using fallback prompt injection detection (llm-guard not available). "
+                "Install the llm_guard optional dependency for full protection."
+            )
+            return prompt, True, 0.0
+        # Use llm_guard's scanner to check for prompt injection
+        assert self.scanner is not None  # This helps mypy understand self.scanner is not None here
+        sanitized_prompt, is_valid, risk_score = self.scanner.scan(prompt)
+        if not is_valid:
+            raise InjectionException(
+                message="Input blocked: detected prompt injection",
+                has_violation=True,
+                violations=["prompt_injection"],
+            )
+        return sanitized_prompt, is_valid, risk_score