PyPI - prela - Versions diffs - 0.1.0__py3-none-any.whl - Mend

prela 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

prela/__init__.py +394 -0
prela/_version.py +3 -0
prela/contrib/CLI.md +431 -0
prela/contrib/README.md +118 -0
prela/contrib/__init__.py +5 -0
prela/contrib/cli.py +1063 -0
prela/contrib/explorer.py +571 -0
prela/core/__init__.py +64 -0
prela/core/clock.py +98 -0
prela/core/context.py +228 -0
prela/core/replay.py +403 -0
prela/core/sampler.py +178 -0
prela/core/span.py +295 -0
prela/core/tracer.py +498 -0
prela/evals/__init__.py +94 -0
prela/evals/assertions/README.md +484 -0
prela/evals/assertions/__init__.py +78 -0
prela/evals/assertions/base.py +90 -0
prela/evals/assertions/multi_agent.py +625 -0
prela/evals/assertions/semantic.py +223 -0
prela/evals/assertions/structural.py +443 -0
prela/evals/assertions/tool.py +380 -0
prela/evals/case.py +370 -0
prela/evals/n8n/__init__.py +69 -0
prela/evals/n8n/assertions.py +450 -0
prela/evals/n8n/runner.py +497 -0
prela/evals/reporters/README.md +184 -0
prela/evals/reporters/__init__.py +32 -0
prela/evals/reporters/console.py +251 -0
prela/evals/reporters/json.py +176 -0
prela/evals/reporters/junit.py +278 -0
prela/evals/runner.py +525 -0
prela/evals/suite.py +316 -0
prela/exporters/__init__.py +27 -0
prela/exporters/base.py +189 -0
prela/exporters/console.py +443 -0
prela/exporters/file.py +322 -0
prela/exporters/http.py +394 -0
prela/exporters/multi.py +154 -0
prela/exporters/otlp.py +388 -0
prela/instrumentation/ANTHROPIC.md +297 -0
prela/instrumentation/LANGCHAIN.md +480 -0
prela/instrumentation/OPENAI.md +59 -0
prela/instrumentation/__init__.py +49 -0
prela/instrumentation/anthropic.py +1436 -0
prela/instrumentation/auto.py +129 -0
prela/instrumentation/base.py +436 -0
prela/instrumentation/langchain.py +959 -0
prela/instrumentation/llamaindex.py +719 -0
prela/instrumentation/multi_agent/__init__.py +48 -0
prela/instrumentation/multi_agent/autogen.py +357 -0
prela/instrumentation/multi_agent/crewai.py +404 -0
prela/instrumentation/multi_agent/langgraph.py +299 -0
prela/instrumentation/multi_agent/models.py +203 -0
prela/instrumentation/multi_agent/swarm.py +231 -0
prela/instrumentation/n8n/__init__.py +68 -0
prela/instrumentation/n8n/code_node.py +534 -0
prela/instrumentation/n8n/models.py +336 -0
prela/instrumentation/n8n/webhook.py +489 -0
prela/instrumentation/openai.py +1198 -0
prela/license.py +245 -0
prela/replay/__init__.py +31 -0
prela/replay/comparison.py +390 -0
prela/replay/engine.py +1227 -0
prela/replay/loader.py +231 -0
prela/replay/result.py +196 -0
prela-0.1.0.dist-info/METADATA +399 -0
prela-0.1.0.dist-info/RECORD +71 -0
prela-0.1.0.dist-info/WHEEL +4 -0
prela-0.1.0.dist-info/entry_points.txt +2 -0
prela-0.1.0.dist-info/licenses/LICENSE +190 -0

prela/core/replay.py ADDED Viewed

@@ -0,0 +1,403 @@
+"""Replay capture for deterministic re-execution of AI agent workflows."""
+from __future__ import annotations
+import json
+import logging
+from datetime import datetime, timezone
+from typing import Any
+logger = logging.getLogger(__name__)
+# Storage size warning threshold (100 KB)
+REPLAY_SIZE_WARNING_THRESHOLD = 100 * 1024
+class ReplaySnapshot:
+    """Complete replay-enabling data for a span.
+    This class holds all information needed to deterministically replay
+    an agent execution, including full request/response data, tool I/O,
+    retrieval results, and agent state.
+    Different span types populate different fields:
+    - LLM spans: llm_request, llm_response, llm_streaming_chunks, model_info
+    - Tool spans: tool_name, tool_input, tool_output, has_side_effects
+    - Retrieval spans: retrieval_query, retrieved_documents, retrieval_metadata
+    - Agent spans: system_prompt, available_tools, agent_memory, agent_config
+    Memory efficiency: Uses __slots__ to minimize per-instance overhead.
+    """
+    __slots__ = (
+        # LLM fields
+        "llm_request",
+        "llm_response",
+        "llm_streaming_chunks",
+        "model_info",
+        "request_timestamp",
+        # Tool fields
+        "tool_name",
+        "tool_description",
+        "tool_input",
+        "tool_output",
+        "has_side_effects",
+        # Retrieval fields
+        "retrieval_query",
+        "retrieved_documents",
+        "retrieval_scores",
+        "retrieval_metadata",
+        # Agent fields
+        "system_prompt",
+        "available_tools",
+        "agent_memory",
+        "agent_config",
+    )
+    def __init__(
+        self,
+        # LLM fields
+        llm_request: dict[str, Any] | None = None,
+        llm_response: dict[str, Any] | None = None,
+        llm_streaming_chunks: list[dict[str, Any]] | None = None,
+        model_info: dict[str, Any] | None = None,
+        request_timestamp: datetime | None = None,
+        # Tool fields
+        tool_name: str | None = None,
+        tool_description: str | None = None,
+        tool_input: dict[str, Any] | str | None = None,
+        tool_output: Any = None,
+        has_side_effects: bool = True,
+        # Retrieval fields
+        retrieval_query: str | None = None,
+        retrieved_documents: list[dict[str, Any]] | None = None,
+        retrieval_scores: list[float] | None = None,
+        retrieval_metadata: dict[str, Any] | None = None,
+        # Agent fields
+        system_prompt: str | None = None,
+        available_tools: list[dict[str, Any]] | None = None,
+        agent_memory: dict[str, Any] | None = None,
+        agent_config: dict[str, Any] | None = None,
+    ) -> None:
+        """Initialize replay snapshot with optional fields."""
+        self.llm_request = llm_request
+        self.llm_response = llm_response
+        self.llm_streaming_chunks = llm_streaming_chunks
+        self.model_info = model_info
+        self.request_timestamp = request_timestamp
+        self.tool_name = tool_name
+        self.tool_description = tool_description
+        self.tool_input = tool_input
+        self.tool_output = tool_output
+        self.has_side_effects = has_side_effects
+        self.retrieval_query = retrieval_query
+        self.retrieved_documents = retrieved_documents
+        self.retrieval_scores = retrieval_scores
+        self.retrieval_metadata = retrieval_metadata
+        self.system_prompt = system_prompt
+        self.available_tools = available_tools
+        self.agent_memory = agent_memory
+        self.agent_config = agent_config
+    def to_dict(self) -> dict[str, Any]:
+        """Serialize to JSON-compatible dict.
+        Returns:
+            Dictionary containing all non-None fields
+        """
+        result = {}
+        for field_name in self.__slots__:
+            value = getattr(self, field_name)
+            if value is not None:
+                # Handle datetime serialization
+                if isinstance(value, datetime):
+                    result[field_name] = value.isoformat()
+                else:
+                    result[field_name] = value
+        return result
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> ReplaySnapshot:
+        """Deserialize from dict.
+        Args:
+            data: Dictionary from to_dict()
+        Returns:
+            ReplaySnapshot instance
+        """
+        # Convert ISO timestamp back to datetime
+        if "request_timestamp" in data and isinstance(data["request_timestamp"], str):
+            data["request_timestamp"] = datetime.fromisoformat(data["request_timestamp"])
+        return cls(**data)
+    def estimate_size_bytes(self) -> int:
+        """Estimate storage size in bytes.
+        This is an approximation based on JSON serialization size.
+        Useful for monitoring storage costs.
+        Logs a warning if size exceeds 100 KB threshold.
+        Returns:
+            Estimated size in bytes
+        """
+        serialized = json.dumps(self.to_dict())
+        size_bytes = len(serialized.encode("utf-8"))
+        # Warn if exceeds threshold
+        if size_bytes > REPLAY_SIZE_WARNING_THRESHOLD:
+            logger.warning(
+                f"Replay snapshot size ({size_bytes / 1024:.1f} KB) exceeds "
+                f"recommended threshold ({REPLAY_SIZE_WARNING_THRESHOLD / 1024:.0f} KB). "
+                f"Consider reducing captured data or increasing storage budget."
+            )
+        return size_bytes
+class ReplayCapture:
+    """Helper for building ReplaySnapshot during span execution.
+    This class provides a builder-style API for incrementally capturing
+    replay data as a span executes.
+    Example:
+        ```python
+        capture = ReplayCapture()
+        capture.set_llm_request(model="gpt-4", messages=[...])
+        capture.set_llm_response(text="...", tokens=100)
+        snapshot = capture.build()
+        ```
+    """
+    def __init__(self) -> None:
+        """Initialize empty capture."""
+        self._snapshot = ReplaySnapshot()
+    # LLM capture methods
+    def set_llm_request(
+        self,
+        model: str,
+        messages: list[dict[str, Any]] | None = None,
+        prompt: str | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        **kwargs: Any,
+    ) -> None:
+        """Capture LLM request details.
+        Args:
+            model: Model identifier (e.g., "gpt-4", "claude-sonnet-4")
+            messages: Chat messages (OpenAI/Anthropic format)
+            prompt: Single prompt string (legacy completions)
+            temperature: Sampling temperature
+            max_tokens: Maximum tokens to generate
+            **kwargs: Additional provider-specific parameters
+        """
+        request: dict[str, Any] = {"model": model}
+        if messages is not None:
+            request["messages"] = messages
+        if prompt is not None:
+            request["prompt"] = prompt
+        if temperature is not None:
+            request["temperature"] = temperature
+        if max_tokens is not None:
+            request["max_tokens"] = max_tokens
+        # Capture all other kwargs (top_p, frequency_penalty, etc.)
+        request.update(kwargs)
+        self._snapshot.llm_request = request
+        self._snapshot.request_timestamp = datetime.now(timezone.utc)
+    def set_llm_response(
+        self,
+        text: str,
+        finish_reason: str | None = None,
+        model: str | None = None,
+        prompt_tokens: int | None = None,
+        completion_tokens: int | None = None,
+        **kwargs: Any,
+    ) -> None:
+        """Capture LLM response details.
+        Args:
+            text: Complete response text
+            finish_reason: Why generation stopped (stop, length, tool_calls)
+            model: Actual model used (may differ from requested)
+            prompt_tokens: Tokens in prompt
+            completion_tokens: Tokens in completion
+            **kwargs: Additional response metadata
+        """
+        response: dict[str, Any] = {"text": text}
+        if finish_reason is not None:
+            response["finish_reason"] = finish_reason
+        if model is not None:
+            response["model"] = model
+        if prompt_tokens is not None:
+            response["prompt_tokens"] = prompt_tokens
+        if completion_tokens is not None:
+            response["completion_tokens"] = completion_tokens
+        response.update(kwargs)
+        self._snapshot.llm_response = response
+    def add_streaming_chunk(
+        self,
+        chunk: dict[str, Any],
+    ) -> None:
+        """Add a streaming chunk to the replay data.
+        For streaming LLM responses, each delta/chunk is captured separately
+        to enable exact replay of streaming behavior.
+        Args:
+            chunk: Chunk data (provider-specific format)
+        """
+        if self._snapshot.llm_streaming_chunks is None:
+            self._snapshot.llm_streaming_chunks = []
+        self._snapshot.llm_streaming_chunks.append(chunk)
+    def set_model_info(self, **info: Any) -> None:
+        """Capture model version/endpoint info.
+        Args:
+            **info: Model metadata (version, endpoint, created timestamp, etc.)
+        """
+        self._snapshot.model_info = info
+    # Tool capture methods
+    def set_tool_call(
+        self,
+        name: str,
+        description: str | None = None,
+        input_args: dict[str, Any] | str | None = None,
+        output: Any = None,
+        has_side_effects: bool = True,  # SAFE DEFAULT
+    ) -> None:
+        """Capture tool call details.
+        Args:
+            name: Tool name
+            description: Tool description
+            input_args: Input arguments (dict or JSON string)
+            output: Tool output/return value
+            has_side_effects: Whether tool modifies external state (default: True)
+        """
+        self._snapshot.tool_name = name
+        self._snapshot.tool_description = description
+        self._snapshot.tool_input = input_args
+        self._snapshot.tool_output = output
+        self._snapshot.has_side_effects = has_side_effects
+    # Retrieval capture methods
+    def set_retrieval(
+        self,
+        query: str,
+        documents: list[dict[str, Any]],
+        scores: list[float] | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        """Capture retrieval operation details.
+        Args:
+            query: Query text
+            documents: Retrieved documents (full content)
+            scores: Similarity scores for each document
+            metadata: Retrieval metadata (index name, collection, etc.)
+        """
+        self._snapshot.retrieval_query = query
+        self._snapshot.retrieved_documents = documents
+        self._snapshot.retrieval_scores = scores
+        self._snapshot.retrieval_metadata = metadata
+    # Agent capture methods
+    def set_agent_context(
+        self,
+        system_prompt: str | None = None,
+        available_tools: list[dict[str, Any]] | None = None,
+        memory: dict[str, Any] | None = None,
+        config: dict[str, Any] | None = None,
+    ) -> None:
+        """Capture agent context and configuration.
+        Args:
+            system_prompt: System/instruction prompt
+            available_tools: List of tools with schemas
+            memory: Agent memory/context state
+            config: Agent configuration
+        """
+        if system_prompt is not None:
+            self._snapshot.system_prompt = system_prompt
+        if available_tools is not None:
+            self._snapshot.available_tools = available_tools
+        if memory is not None:
+            self._snapshot.agent_memory = memory
+        if config is not None:
+            self._snapshot.agent_config = config
+    def build(self) -> ReplaySnapshot:
+        """Return the completed snapshot.
+        Returns:
+            ReplaySnapshot with all captured data
+        """
+        return self._snapshot
+def estimate_replay_storage(
+    span: Any,  # Span type (avoid circular import)
+    replay_snapshot: ReplaySnapshot | None = None,
+) -> int:
+    """Estimate total storage size for span with replay data.
+    Args:
+        span: The span to estimate (must have .to_dict() method)
+        replay_snapshot: Optional replay snapshot (if not attached to span)
+    Returns:
+        Estimated size in bytes
+    """
+    # Base span size
+    span_dict = span.to_dict()
+    base_size = len(json.dumps(span_dict).encode("utf-8"))
+    # Replay data size
+    replay_size = 0
+    if replay_snapshot is not None:
+        replay_size = replay_snapshot.estimate_size_bytes()
+    elif hasattr(span, "replay_snapshot") and span.replay_snapshot is not None:
+        replay_size = span.replay_snapshot.estimate_size_bytes()
+    return base_size + replay_size
+def serialize_replay_data(value: Any) -> Any:
+    """Serialize arbitrary Python values for replay storage.
+    Handles common types that may appear in tool I/O or agent state.
+    Args:
+        value: Value to serialize
+    Returns:
+        JSON-compatible value
+    """
+    if isinstance(value, (str, int, float, bool, type(None))):
+        return value
+    elif isinstance(value, datetime):
+        return value.isoformat()
+    elif isinstance(value, (list, tuple)):
+        return [serialize_replay_data(item) for item in value]
+    elif isinstance(value, dict):
+        return {k: serialize_replay_data(v) for k, v in value.items()}
+    else:
+        # Fallback: convert to string representation
+        return str(value)

prela/core/sampler.py ADDED Viewed

@@ -0,0 +1,178 @@
+"""Sampling strategies for trace collection.
+This module provides different sampling strategies to control which traces
+are collected and exported. Sampling helps reduce overhead and costs while
+still providing useful observability data.
+"""
+from __future__ import annotations
+import hashlib
+import time
+from abc import ABC, abstractmethod
+from threading import Lock
+class BaseSampler(ABC):
+    """Abstract base class for trace samplers.
+    Samplers determine whether a trace should be collected based on
+    the trace ID and potentially other factors.
+    """
+    @abstractmethod
+    def should_sample(self, trace_id: str) -> bool:
+        """Determine if a trace should be sampled.
+        Args:
+            trace_id: The trace ID to make a sampling decision for
+        Returns:
+            True if the trace should be sampled, False otherwise
+        """
+        pass
+class AlwaysOnSampler(BaseSampler):
+    """Sampler that always samples every trace.
+    Use this in development or when you need complete trace coverage.
+    Be aware this may generate high data volumes in production.
+    """
+    def should_sample(self, trace_id: str) -> bool:
+        """Always return True.
+        Args:
+            trace_id: The trace ID (unused)
+        Returns:
+            Always True
+        """
+        return True
+class AlwaysOffSampler(BaseSampler):
+    """Sampler that never samples any traces.
+    Use this to completely disable tracing, for example during
+    maintenance windows or in testing environments.
+    """
+    def should_sample(self, trace_id: str) -> bool:
+        """Always return False.
+        Args:
+            trace_id: The trace ID (unused)
+        Returns:
+            Always False
+        """
+        return False
+class ProbabilitySampler(BaseSampler):
+    """Sampler that samples traces with a fixed probability.
+    This sampler uses a deterministic hash-based approach to ensure
+    consistent sampling decisions for the same trace ID across
+    different services and processes.
+    """
+    def __init__(self, rate: float) -> None:
+        """Initialize the probability sampler.
+        Args:
+            rate: Sampling rate between 0.0 and 1.0 (inclusive)
+        Raises:
+            ValueError: If rate is not between 0.0 and 1.0
+        """
+        if not 0.0 <= rate <= 1.0:
+            raise ValueError(f"Sampling rate must be between 0.0 and 1.0, got {rate}")
+        self.rate = rate
+    def should_sample(self, trace_id: str) -> bool:
+        """Sample based on trace ID hash.
+        Uses MD5 hash of trace_id to make a deterministic sampling decision.
+        This ensures the same trace_id always gets the same sampling decision
+        across different processes and services.
+        Args:
+            trace_id: The trace ID to make a sampling decision for
+        Returns:
+            True if the trace should be sampled, False otherwise
+        """
+        if self.rate == 0.0:
+            return False
+        if self.rate == 1.0:
+            return True
+        # Use MD5 hash to get a deterministic value between 0 and 1
+        hash_bytes = hashlib.md5(trace_id.encode()).digest()
+        # Take first 8 bytes and convert to int, then normalize to [0, 1]
+        hash_value = int.from_bytes(hash_bytes[:8], byteorder="big")
+        probability = hash_value / (2**64 - 1)
+        return probability < self.rate
+class RateLimitingSampler(BaseSampler):
+    """Sampler that limits the number of traces sampled per second.
+    This sampler uses a token bucket algorithm to enforce a maximum
+    rate of sampled traces per second. Useful for controlling costs
+    and backend load.
+    """
+    def __init__(self, traces_per_second: float) -> None:
+        """Initialize the rate limiting sampler.
+        Args:
+            traces_per_second: Maximum number of traces to sample per second
+        Raises:
+            ValueError: If traces_per_second is negative
+        """
+        if traces_per_second < 0:
+            raise ValueError(f"traces_per_second must be non-negative, got {traces_per_second}")
+        self.traces_per_second = traces_per_second
+        self._tokens = traces_per_second
+        self._last_update = time.perf_counter()
+        self._lock = Lock()
+    def should_sample(self, trace_id: str) -> bool:
+        """Sample if tokens are available.
+        Uses a token bucket algorithm: tokens regenerate at the configured
+        rate, and each sampling decision consumes one token.
+        Args:
+            trace_id: The trace ID (unused)
+        Returns:
+            True if a token is available, False otherwise
+        """
+        if self.traces_per_second == 0:
+            return False
+        with self._lock:
+            now = time.perf_counter()
+            elapsed = now - self._last_update
+            # Refill tokens based on elapsed time
+            self._tokens = min(
+                self.traces_per_second,
+                self._tokens + (elapsed * self.traces_per_second),
+            )
+            self._last_update = now
+            # Try to consume a token
+            if self._tokens >= 1.0:
+                self._tokens -= 1.0
+                return True
+            return False