PyPI - prela - Versions diffs - 0.1.0__py3-none-any.whl - Mend

prela 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

prela/__init__.py +394 -0
prela/_version.py +3 -0
prela/contrib/CLI.md +431 -0
prela/contrib/README.md +118 -0
prela/contrib/__init__.py +5 -0
prela/contrib/cli.py +1063 -0
prela/contrib/explorer.py +571 -0
prela/core/__init__.py +64 -0
prela/core/clock.py +98 -0
prela/core/context.py +228 -0
prela/core/replay.py +403 -0
prela/core/sampler.py +178 -0
prela/core/span.py +295 -0
prela/core/tracer.py +498 -0
prela/evals/__init__.py +94 -0
prela/evals/assertions/README.md +484 -0
prela/evals/assertions/__init__.py +78 -0
prela/evals/assertions/base.py +90 -0
prela/evals/assertions/multi_agent.py +625 -0
prela/evals/assertions/semantic.py +223 -0
prela/evals/assertions/structural.py +443 -0
prela/evals/assertions/tool.py +380 -0
prela/evals/case.py +370 -0
prela/evals/n8n/__init__.py +69 -0
prela/evals/n8n/assertions.py +450 -0
prela/evals/n8n/runner.py +497 -0
prela/evals/reporters/README.md +184 -0
prela/evals/reporters/__init__.py +32 -0
prela/evals/reporters/console.py +251 -0
prela/evals/reporters/json.py +176 -0
prela/evals/reporters/junit.py +278 -0
prela/evals/runner.py +525 -0
prela/evals/suite.py +316 -0
prela/exporters/__init__.py +27 -0
prela/exporters/base.py +189 -0
prela/exporters/console.py +443 -0
prela/exporters/file.py +322 -0
prela/exporters/http.py +394 -0
prela/exporters/multi.py +154 -0
prela/exporters/otlp.py +388 -0
prela/instrumentation/ANTHROPIC.md +297 -0
prela/instrumentation/LANGCHAIN.md +480 -0
prela/instrumentation/OPENAI.md +59 -0
prela/instrumentation/__init__.py +49 -0
prela/instrumentation/anthropic.py +1436 -0
prela/instrumentation/auto.py +129 -0
prela/instrumentation/base.py +436 -0
prela/instrumentation/langchain.py +959 -0
prela/instrumentation/llamaindex.py +719 -0
prela/instrumentation/multi_agent/__init__.py +48 -0
prela/instrumentation/multi_agent/autogen.py +357 -0
prela/instrumentation/multi_agent/crewai.py +404 -0
prela/instrumentation/multi_agent/langgraph.py +299 -0
prela/instrumentation/multi_agent/models.py +203 -0
prela/instrumentation/multi_agent/swarm.py +231 -0
prela/instrumentation/n8n/__init__.py +68 -0
prela/instrumentation/n8n/code_node.py +534 -0
prela/instrumentation/n8n/models.py +336 -0
prela/instrumentation/n8n/webhook.py +489 -0
prela/instrumentation/openai.py +1198 -0
prela/license.py +245 -0
prela/replay/__init__.py +31 -0
prela/replay/comparison.py +390 -0
prela/replay/engine.py +1227 -0
prela/replay/loader.py +231 -0
prela/replay/result.py +196 -0
prela-0.1.0.dist-info/METADATA +399 -0
prela-0.1.0.dist-info/RECORD +71 -0
prela-0.1.0.dist-info/WHEEL +4 -0
prela-0.1.0.dist-info/entry_points.txt +2 -0
prela-0.1.0.dist-info/licenses/LICENSE +190 -0

prela/evals/reporters/__init__.py ADDED Viewed

@@ -0,0 +1,32 @@
+"""Reporter implementations for evaluation results.
+This module provides reporters for outputting evaluation results in various formats:
+- ConsoleReporter: Pretty-printed terminal output with colors
+- JSONReporter: JSON file output for programmatic access
+- JUnitReporter: JUnit XML for CI/CD integration
+Example:
+    >>> from prela.evals import EvalRunner
+    >>> from prela.evals.reporters import ConsoleReporter, JSONReporter
+    >>>
+    >>> runner = EvalRunner(suite, agent)
+    >>> result = runner.run()
+    >>>
+    >>> # Print to console
+    >>> console = ConsoleReporter()
+    >>> console.report(result)
+    >>>
+    >>> # Save to JSON
+    >>> json_reporter = JSONReporter("results.json")
+    >>> json_reporter.report(result)
+"""
+from prela.evals.reporters.console import ConsoleReporter
+from prela.evals.reporters.json import JSONReporter
+from prela.evals.reporters.junit import JUnitReporter
+__all__ = [
+    "ConsoleReporter",
+    "JSONReporter",
+    "JUnitReporter",
+]

prela/evals/reporters/console.py ADDED Viewed

@@ -0,0 +1,251 @@
+"""Console reporter for evaluation results with rich terminal output.
+This module provides a reporter that prints evaluation results to the console
+with beautiful formatting, colors, and tree structures for easy debugging.
+"""
+from __future__ import annotations
+from prela.evals.runner import EvalRunResult
+# Try to import rich for colored output
+try:
+    from rich.console import Console
+    from rich.table import Table
+    from rich.panel import Panel
+    from rich.text import Text
+    HAS_RICH = True
+except ImportError:
+    HAS_RICH = False
+class ConsoleReporter:
+    """Reporter that pretty-prints evaluation results to the console.
+    Uses rich library for colored output if available, falls back to
+    plain text formatting otherwise. Provides:
+    - Summary statistics (pass rate, duration)
+    - List of all test cases with pass/fail status
+    - Detailed failure information for failed cases
+    - Color coding (green=pass, red=fail, yellow=warning)
+    Example:
+        >>> from prela.evals import EvalRunner
+        >>> from prela.evals.reporters import ConsoleReporter
+        >>>
+        >>> runner = EvalRunner(suite, agent)
+        >>> result = runner.run()
+        >>>
+        >>> reporter = ConsoleReporter(verbose=True, use_colors=True)
+        >>> reporter.report(result)
+        ✓ Geography QA Suite
+        ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+        Total: 10 | Passed: 9 (90.0%) | Failed: 1
+        Duration: 2.5s
+        ...
+    """
+    def __init__(self, verbose: bool = True, use_colors: bool = True):
+        """Initialize the console reporter.
+        Args:
+            verbose: If True, show detailed failure information. If False,
+                     only show summary statistics and failed case names.
+            use_colors: If True and rich is available, use colored output.
+                        If False or rich unavailable, use plain text.
+        """
+        self.verbose = verbose
+        self.use_colors = use_colors and HAS_RICH
+        if self.use_colors:
+            self.console = Console()
+    def report(self, result: EvalRunResult) -> None:
+        """Print the evaluation results to the console.
+        Args:
+            result: The evaluation run result to report.
+        """
+        if self.use_colors:
+            self._report_rich(result)
+        else:
+            self._report_plain(result)
+    def _report_rich(self, result: EvalRunResult) -> None:
+        """Print results using rich library (colored output)."""
+        # Create title with status symbol
+        title = Text()
+        if result.pass_rate == 1.0:
+            title.append("✓ ", style="bold green")
+        elif result.pass_rate == 0.0:
+            title.append("✗ ", style="bold red")
+        else:
+            title.append("⚠ ", style="bold yellow")
+        title.append(result.suite_name, style="bold")
+        # Create summary statistics
+        duration = (result.completed_at - result.started_at).total_seconds()
+        summary = (
+            f"Total: {result.total_cases} | "
+            f"[green]Passed: {result.passed_cases}[/green] "
+            f"([cyan]{result.pass_rate * 100:.1f}%[/cyan]) | "
+            f"[red]Failed: {result.failed_cases}[/red]\n"
+            f"Duration: {duration:.2f}s"
+        )
+        # Print panel with summary
+        panel = Panel(
+            summary,
+            title=title,
+            border_style="blue" if result.pass_rate == 1.0 else "yellow",
+        )
+        self.console.print(panel)
+        self.console.print()
+        # Create table of test cases
+        table = Table(show_header=True, header_style="bold cyan")
+        table.add_column("Status", width=8)
+        table.add_column("Test Case")
+        table.add_column("Duration", justify="right", width=10)
+        table.add_column("Assertions", justify="center", width=12)
+        for case_result in result.case_results:
+            # Status column with color
+            if case_result.passed:
+                status = Text("✓ PASS", style="bold green")
+            else:
+                status = Text("✗ FAIL", style="bold red")
+            # Duration formatting
+            duration_str = f"{case_result.duration_ms:.1f}ms"
+            # Assertion counts
+            total_assertions = len(case_result.assertion_results)
+            passed_assertions = sum(
+                1 for a in case_result.assertion_results if a.passed
+            )
+            assertion_str = f"{passed_assertions}/{total_assertions}"
+            table.add_row(
+                status,
+                case_result.case_name,
+                duration_str,
+                assertion_str,
+            )
+        self.console.print(table)
+        # Show detailed failure information if verbose
+        if self.verbose and result.failed_cases > 0:
+            self.console.print()
+            self.console.print("[bold red]Failed Test Details:[/bold red]")
+            self.console.print()
+            for case_result in result.case_results:
+                if not case_result.passed:
+                    self.console.print(
+                        f"[bold red]✗ {case_result.case_name}[/bold red]"
+                    )
+                    # Show error if present
+                    if case_result.error:
+                        self.console.print(
+                            f"  [red]Error:[/red] {case_result.error}"
+                        )
+                    # Show failed assertions
+                    for assertion in case_result.assertion_results:
+                        if not assertion.passed:
+                            self.console.print(
+                                f"  [red]✗[/red] {assertion.message}"
+                            )
+                            if assertion.expected is not None:
+                                self.console.print(
+                                    f"    [dim]Expected:[/dim] {self._truncate(assertion.expected)}"
+                                )
+                            if assertion.actual is not None:
+                                self.console.print(
+                                    f"    [dim]Actual:[/dim] {self._truncate(assertion.actual)}"
+                                )
+                    self.console.print()
+    def _report_plain(self, result: EvalRunResult) -> None:
+        """Print results using plain text (no colors)."""
+        # Print header
+        if result.pass_rate == 1.0:
+            status_symbol = "✓"
+        elif result.pass_rate == 0.0:
+            status_symbol = "✗"
+        else:
+            status_symbol = "⚠"
+        print(f"{status_symbol} {result.suite_name}")
+        print("=" * 60)
+        # Print summary
+        duration = (result.completed_at - result.started_at).total_seconds()
+        print(f"Total: {result.total_cases} | ", end="")
+        print(f"Passed: {result.passed_cases} ({result.pass_rate * 100:.1f}%) | ", end="")
+        print(f"Failed: {result.failed_cases}")
+        print(f"Duration: {duration:.2f}s")
+        print()
+        # Print test cases
+        print("Test Cases:")
+        print("-" * 60)
+        for case_result in result.case_results:
+            status = "✓ PASS" if case_result.passed else "✗ FAIL"
+            duration_str = f"{case_result.duration_ms:.1f}ms"
+            total_assertions = len(case_result.assertion_results)
+            passed_assertions = sum(
+                1 for a in case_result.assertion_results if a.passed
+            )
+            assertion_str = f"{passed_assertions}/{total_assertions}"
+            print(
+                f"{status:8} {case_result.case_name:35} "
+                f"{duration_str:>10} {assertion_str:>12}"
+            )
+        # Show detailed failure information if verbose
+        if self.verbose and result.failed_cases > 0:
+            print()
+            print("Failed Test Details:")
+            print("=" * 60)
+            for case_result in result.case_results:
+                if not case_result.passed:
+                    print(f"\n✗ {case_result.case_name}")
+                    # Show error if present
+                    if case_result.error:
+                        print(f"  Error: {case_result.error}")
+                    # Show failed assertions
+                    for assertion in case_result.assertion_results:
+                        if not assertion.passed:
+                            print(f"  ✗ {assertion.message}")
+                            if assertion.expected is not None:
+                                print(
+                                    f"    Expected: {self._truncate(assertion.expected)}"
+                                )
+                            if assertion.actual is not None:
+                                print(
+                                    f"    Actual: {self._truncate(assertion.actual)}"
+                                )
+    def _truncate(self, value: any, max_length: int = 100) -> str:
+        """Truncate long strings for display.
+        Args:
+            value: The value to truncate (will be converted to string).
+            max_length: Maximum length before truncation.
+        Returns:
+            Truncated string with "..." suffix if needed.
+        """
+        value_str = str(value)
+        if len(value_str) > max_length:
+            return value_str[: max_length - 3] + "..."
+        return value_str

prela/evals/reporters/json.py ADDED Viewed

@@ -0,0 +1,176 @@
+"""JSON reporter for evaluation results.
+This module provides a reporter that writes evaluation results to a JSON file,
+suitable for programmatic access, data analysis, or integration with other tools.
+"""
+from __future__ import annotations
+import json
+from pathlib import Path
+from prela.evals.runner import EvalRunResult
+class JSONReporter:
+    """Reporter that writes evaluation results to a JSON file.
+    Outputs a structured JSON file containing all evaluation data:
+    - Suite metadata (name, timestamps, duration)
+    - Summary statistics (total, passed, failed, pass rate)
+    - Individual case results with assertion details
+    - Full error messages and stack traces
+    The JSON format is designed for:
+    - Programmatic analysis of test results
+    - Integration with data processing pipelines
+    - Historical comparison of evaluation runs
+    - CI/CD artifact storage
+    Example:
+        >>> from prela.evals import EvalRunner
+        >>> from prela.evals.reporters import JSONReporter
+        >>>
+        >>> runner = EvalRunner(suite, agent)
+        >>> result = runner.run()
+        >>>
+        >>> reporter = JSONReporter("results/eval_run_123.json")
+        >>> reporter.report(result)
+        # Creates results/eval_run_123.json with full results
+    """
+    def __init__(self, output_path: str | Path, indent: int = 2):
+        """Initialize the JSON reporter.
+        Args:
+            output_path: Path where the JSON file will be written.
+                         Parent directories will be created if they don't exist.
+            indent: Number of spaces for JSON indentation (default: 2).
+                    Set to None for compact output.
+        """
+        self.output_path = Path(output_path)
+        self.indent = indent
+    def report(self, result: EvalRunResult) -> None:
+        """Write the evaluation results to a JSON file.
+        Creates parent directories if they don't exist. Overwrites
+        any existing file at the output path.
+        Args:
+            result: The evaluation run result to write.
+        Raises:
+            OSError: If unable to write to the output path.
+        """
+        # Create parent directory if needed
+        self.output_path.parent.mkdir(parents=True, exist_ok=True)
+        # Convert result to dict
+        data = self._result_to_dict(result)
+        # Write JSON file
+        with open(self.output_path, "w", encoding="utf-8") as f:
+            json.dump(data, f, indent=self.indent, ensure_ascii=False)
+    def _result_to_dict(self, result: EvalRunResult) -> dict:
+        """Convert EvalRunResult to a JSON-serializable dictionary.
+        Args:
+            result: The evaluation run result.
+        Returns:
+            Dictionary with all result data in JSON-compatible format.
+        """
+        duration_seconds = (
+            result.completed_at - result.started_at
+        ).total_seconds()
+        return {
+            "suite_name": result.suite_name,
+            "started_at": result.started_at.isoformat(),
+            "completed_at": result.completed_at.isoformat(),
+            "duration_seconds": duration_seconds,
+            "summary": {
+                "total_cases": result.total_cases,
+                "passed_cases": result.passed_cases,
+                "failed_cases": result.failed_cases,
+                "pass_rate": result.pass_rate,
+            },
+            "case_results": [
+                self._case_result_to_dict(case_result)
+                for case_result in result.case_results
+            ],
+        }
+    def _case_result_to_dict(self, case_result) -> dict:
+        """Convert CaseResult to a JSON-serializable dictionary.
+        Args:
+            case_result: A CaseResult instance.
+        Returns:
+            Dictionary with all case result data.
+        """
+        return {
+            "case_id": case_result.case_id,
+            "case_name": case_result.case_name,
+            "passed": case_result.passed,
+            "duration_ms": case_result.duration_ms,
+            "trace_id": case_result.trace_id,
+            "output": self._serialize_output(case_result.output),
+            "error": case_result.error,
+            "assertions": [
+                self._assertion_result_to_dict(assertion)
+                for assertion in case_result.assertion_results
+            ],
+        }
+    def _assertion_result_to_dict(self, assertion_result) -> dict:
+        """Convert AssertionResult to a JSON-serializable dictionary.
+        Args:
+            assertion_result: An AssertionResult instance.
+        Returns:
+            Dictionary with all assertion result data.
+        """
+        return {
+            "assertion_type": assertion_result.assertion_type,
+            "passed": assertion_result.passed,
+            "message": assertion_result.message,
+            "score": assertion_result.score,
+            "expected": self._serialize_output(assertion_result.expected),
+            "actual": self._serialize_output(assertion_result.actual),
+            "details": assertion_result.details,
+        }
+    def _serialize_output(self, output) -> any:
+        """Serialize output values for JSON.
+        Handles common non-JSON-serializable types by converting them
+        to strings. For complex objects, returns their string representation.
+        Args:
+            output: The output value to serialize.
+        Returns:
+            JSON-serializable version of the output.
+        """
+        if output is None:
+            return None
+        # Basic JSON-serializable types
+        if isinstance(output, (bool, int, float, str, list, dict)):
+            # For lists and dicts, recursively serialize contents
+            if isinstance(output, list):
+                return [self._serialize_output(item) for item in output]
+            elif isinstance(output, dict):
+                return {
+                    str(key): self._serialize_output(value)
+                    for key, value in output.items()
+                }
+            return output
+        # Convert other types to string
+        return str(output)