prela 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. prela/__init__.py +394 -0
  2. prela/_version.py +3 -0
  3. prela/contrib/CLI.md +431 -0
  4. prela/contrib/README.md +118 -0
  5. prela/contrib/__init__.py +5 -0
  6. prela/contrib/cli.py +1063 -0
  7. prela/contrib/explorer.py +571 -0
  8. prela/core/__init__.py +64 -0
  9. prela/core/clock.py +98 -0
  10. prela/core/context.py +228 -0
  11. prela/core/replay.py +403 -0
  12. prela/core/sampler.py +178 -0
  13. prela/core/span.py +295 -0
  14. prela/core/tracer.py +498 -0
  15. prela/evals/__init__.py +94 -0
  16. prela/evals/assertions/README.md +484 -0
  17. prela/evals/assertions/__init__.py +78 -0
  18. prela/evals/assertions/base.py +90 -0
  19. prela/evals/assertions/multi_agent.py +625 -0
  20. prela/evals/assertions/semantic.py +223 -0
  21. prela/evals/assertions/structural.py +443 -0
  22. prela/evals/assertions/tool.py +380 -0
  23. prela/evals/case.py +370 -0
  24. prela/evals/n8n/__init__.py +69 -0
  25. prela/evals/n8n/assertions.py +450 -0
  26. prela/evals/n8n/runner.py +497 -0
  27. prela/evals/reporters/README.md +184 -0
  28. prela/evals/reporters/__init__.py +32 -0
  29. prela/evals/reporters/console.py +251 -0
  30. prela/evals/reporters/json.py +176 -0
  31. prela/evals/reporters/junit.py +278 -0
  32. prela/evals/runner.py +525 -0
  33. prela/evals/suite.py +316 -0
  34. prela/exporters/__init__.py +27 -0
  35. prela/exporters/base.py +189 -0
  36. prela/exporters/console.py +443 -0
  37. prela/exporters/file.py +322 -0
  38. prela/exporters/http.py +394 -0
  39. prela/exporters/multi.py +154 -0
  40. prela/exporters/otlp.py +388 -0
  41. prela/instrumentation/ANTHROPIC.md +297 -0
  42. prela/instrumentation/LANGCHAIN.md +480 -0
  43. prela/instrumentation/OPENAI.md +59 -0
  44. prela/instrumentation/__init__.py +49 -0
  45. prela/instrumentation/anthropic.py +1436 -0
  46. prela/instrumentation/auto.py +129 -0
  47. prela/instrumentation/base.py +436 -0
  48. prela/instrumentation/langchain.py +959 -0
  49. prela/instrumentation/llamaindex.py +719 -0
  50. prela/instrumentation/multi_agent/__init__.py +48 -0
  51. prela/instrumentation/multi_agent/autogen.py +357 -0
  52. prela/instrumentation/multi_agent/crewai.py +404 -0
  53. prela/instrumentation/multi_agent/langgraph.py +299 -0
  54. prela/instrumentation/multi_agent/models.py +203 -0
  55. prela/instrumentation/multi_agent/swarm.py +231 -0
  56. prela/instrumentation/n8n/__init__.py +68 -0
  57. prela/instrumentation/n8n/code_node.py +534 -0
  58. prela/instrumentation/n8n/models.py +336 -0
  59. prela/instrumentation/n8n/webhook.py +489 -0
  60. prela/instrumentation/openai.py +1198 -0
  61. prela/license.py +245 -0
  62. prela/replay/__init__.py +31 -0
  63. prela/replay/comparison.py +390 -0
  64. prela/replay/engine.py +1227 -0
  65. prela/replay/loader.py +231 -0
  66. prela/replay/result.py +196 -0
  67. prela-0.1.0.dist-info/METADATA +399 -0
  68. prela-0.1.0.dist-info/RECORD +71 -0
  69. prela-0.1.0.dist-info/WHEEL +4 -0
  70. prela-0.1.0.dist-info/entry_points.txt +2 -0
  71. prela-0.1.0.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,32 @@
1
+ """Reporter implementations for evaluation results.
2
+
3
+ This module provides reporters for outputting evaluation results in various formats:
4
+ - ConsoleReporter: Pretty-printed terminal output with colors
5
+ - JSONReporter: JSON file output for programmatic access
6
+ - JUnitReporter: JUnit XML for CI/CD integration
7
+
8
+ Example:
9
+ >>> from prela.evals import EvalRunner
10
+ >>> from prela.evals.reporters import ConsoleReporter, JSONReporter
11
+ >>>
12
+ >>> runner = EvalRunner(suite, agent)
13
+ >>> result = runner.run()
14
+ >>>
15
+ >>> # Print to console
16
+ >>> console = ConsoleReporter()
17
+ >>> console.report(result)
18
+ >>>
19
+ >>> # Save to JSON
20
+ >>> json_reporter = JSONReporter("results.json")
21
+ >>> json_reporter.report(result)
22
+ """
23
+
24
+ from prela.evals.reporters.console import ConsoleReporter
25
+ from prela.evals.reporters.json import JSONReporter
26
+ from prela.evals.reporters.junit import JUnitReporter
27
+
28
+ __all__ = [
29
+ "ConsoleReporter",
30
+ "JSONReporter",
31
+ "JUnitReporter",
32
+ ]
@@ -0,0 +1,251 @@
1
+ """Console reporter for evaluation results with rich terminal output.
2
+
3
+ This module provides a reporter that prints evaluation results to the console
4
+ with beautiful formatting, colors, and tree structures for easy debugging.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from prela.evals.runner import EvalRunResult
10
+
11
+ # Try to import rich for colored output
12
+ try:
13
+ from rich.console import Console
14
+ from rich.table import Table
15
+ from rich.panel import Panel
16
+ from rich.text import Text
17
+
18
+ HAS_RICH = True
19
+ except ImportError:
20
+ HAS_RICH = False
21
+
22
+
23
+ class ConsoleReporter:
24
+ """Reporter that pretty-prints evaluation results to the console.
25
+
26
+ Uses rich library for colored output if available, falls back to
27
+ plain text formatting otherwise. Provides:
28
+ - Summary statistics (pass rate, duration)
29
+ - List of all test cases with pass/fail status
30
+ - Detailed failure information for failed cases
31
+ - Color coding (green=pass, red=fail, yellow=warning)
32
+
33
+ Example:
34
+ >>> from prela.evals import EvalRunner
35
+ >>> from prela.evals.reporters import ConsoleReporter
36
+ >>>
37
+ >>> runner = EvalRunner(suite, agent)
38
+ >>> result = runner.run()
39
+ >>>
40
+ >>> reporter = ConsoleReporter(verbose=True, use_colors=True)
41
+ >>> reporter.report(result)
42
+ ✓ Geography QA Suite
43
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
44
+ Total: 10 | Passed: 9 (90.0%) | Failed: 1
45
+ Duration: 2.5s
46
+ ...
47
+ """
48
+
49
+ def __init__(self, verbose: bool = True, use_colors: bool = True):
50
+ """Initialize the console reporter.
51
+
52
+ Args:
53
+ verbose: If True, show detailed failure information. If False,
54
+ only show summary statistics and failed case names.
55
+ use_colors: If True and rich is available, use colored output.
56
+ If False or rich unavailable, use plain text.
57
+ """
58
+ self.verbose = verbose
59
+ self.use_colors = use_colors and HAS_RICH
60
+ if self.use_colors:
61
+ self.console = Console()
62
+
63
+ def report(self, result: EvalRunResult) -> None:
64
+ """Print the evaluation results to the console.
65
+
66
+ Args:
67
+ result: The evaluation run result to report.
68
+ """
69
+ if self.use_colors:
70
+ self._report_rich(result)
71
+ else:
72
+ self._report_plain(result)
73
+
74
+ def _report_rich(self, result: EvalRunResult) -> None:
75
+ """Print results using rich library (colored output)."""
76
+ # Create title with status symbol
77
+ title = Text()
78
+ if result.pass_rate == 1.0:
79
+ title.append("✓ ", style="bold green")
80
+ elif result.pass_rate == 0.0:
81
+ title.append("✗ ", style="bold red")
82
+ else:
83
+ title.append("⚠ ", style="bold yellow")
84
+ title.append(result.suite_name, style="bold")
85
+
86
+ # Create summary statistics
87
+ duration = (result.completed_at - result.started_at).total_seconds()
88
+ summary = (
89
+ f"Total: {result.total_cases} | "
90
+ f"[green]Passed: {result.passed_cases}[/green] "
91
+ f"([cyan]{result.pass_rate * 100:.1f}%[/cyan]) | "
92
+ f"[red]Failed: {result.failed_cases}[/red]\n"
93
+ f"Duration: {duration:.2f}s"
94
+ )
95
+
96
+ # Print panel with summary
97
+ panel = Panel(
98
+ summary,
99
+ title=title,
100
+ border_style="blue" if result.pass_rate == 1.0 else "yellow",
101
+ )
102
+ self.console.print(panel)
103
+ self.console.print()
104
+
105
+ # Create table of test cases
106
+ table = Table(show_header=True, header_style="bold cyan")
107
+ table.add_column("Status", width=8)
108
+ table.add_column("Test Case")
109
+ table.add_column("Duration", justify="right", width=10)
110
+ table.add_column("Assertions", justify="center", width=12)
111
+
112
+ for case_result in result.case_results:
113
+ # Status column with color
114
+ if case_result.passed:
115
+ status = Text("✓ PASS", style="bold green")
116
+ else:
117
+ status = Text("✗ FAIL", style="bold red")
118
+
119
+ # Duration formatting
120
+ duration_str = f"{case_result.duration_ms:.1f}ms"
121
+
122
+ # Assertion counts
123
+ total_assertions = len(case_result.assertion_results)
124
+ passed_assertions = sum(
125
+ 1 for a in case_result.assertion_results if a.passed
126
+ )
127
+ assertion_str = f"{passed_assertions}/{total_assertions}"
128
+
129
+ table.add_row(
130
+ status,
131
+ case_result.case_name,
132
+ duration_str,
133
+ assertion_str,
134
+ )
135
+
136
+ self.console.print(table)
137
+
138
+ # Show detailed failure information if verbose
139
+ if self.verbose and result.failed_cases > 0:
140
+ self.console.print()
141
+ self.console.print("[bold red]Failed Test Details:[/bold red]")
142
+ self.console.print()
143
+
144
+ for case_result in result.case_results:
145
+ if not case_result.passed:
146
+ self.console.print(
147
+ f"[bold red]✗ {case_result.case_name}[/bold red]"
148
+ )
149
+
150
+ # Show error if present
151
+ if case_result.error:
152
+ self.console.print(
153
+ f" [red]Error:[/red] {case_result.error}"
154
+ )
155
+
156
+ # Show failed assertions
157
+ for assertion in case_result.assertion_results:
158
+ if not assertion.passed:
159
+ self.console.print(
160
+ f" [red]✗[/red] {assertion.message}"
161
+ )
162
+ if assertion.expected is not None:
163
+ self.console.print(
164
+ f" [dim]Expected:[/dim] {self._truncate(assertion.expected)}"
165
+ )
166
+ if assertion.actual is not None:
167
+ self.console.print(
168
+ f" [dim]Actual:[/dim] {self._truncate(assertion.actual)}"
169
+ )
170
+
171
+ self.console.print()
172
+
173
+ def _report_plain(self, result: EvalRunResult) -> None:
174
+ """Print results using plain text (no colors)."""
175
+ # Print header
176
+ if result.pass_rate == 1.0:
177
+ status_symbol = "✓"
178
+ elif result.pass_rate == 0.0:
179
+ status_symbol = "✗"
180
+ else:
181
+ status_symbol = "⚠"
182
+
183
+ print(f"{status_symbol} {result.suite_name}")
184
+ print("=" * 60)
185
+
186
+ # Print summary
187
+ duration = (result.completed_at - result.started_at).total_seconds()
188
+ print(f"Total: {result.total_cases} | ", end="")
189
+ print(f"Passed: {result.passed_cases} ({result.pass_rate * 100:.1f}%) | ", end="")
190
+ print(f"Failed: {result.failed_cases}")
191
+ print(f"Duration: {duration:.2f}s")
192
+ print()
193
+
194
+ # Print test cases
195
+ print("Test Cases:")
196
+ print("-" * 60)
197
+ for case_result in result.case_results:
198
+ status = "✓ PASS" if case_result.passed else "✗ FAIL"
199
+ duration_str = f"{case_result.duration_ms:.1f}ms"
200
+ total_assertions = len(case_result.assertion_results)
201
+ passed_assertions = sum(
202
+ 1 for a in case_result.assertion_results if a.passed
203
+ )
204
+ assertion_str = f"{passed_assertions}/{total_assertions}"
205
+
206
+ print(
207
+ f"{status:8} {case_result.case_name:35} "
208
+ f"{duration_str:>10} {assertion_str:>12}"
209
+ )
210
+
211
+ # Show detailed failure information if verbose
212
+ if self.verbose and result.failed_cases > 0:
213
+ print()
214
+ print("Failed Test Details:")
215
+ print("=" * 60)
216
+
217
+ for case_result in result.case_results:
218
+ if not case_result.passed:
219
+ print(f"\n✗ {case_result.case_name}")
220
+
221
+ # Show error if present
222
+ if case_result.error:
223
+ print(f" Error: {case_result.error}")
224
+
225
+ # Show failed assertions
226
+ for assertion in case_result.assertion_results:
227
+ if not assertion.passed:
228
+ print(f" ✗ {assertion.message}")
229
+ if assertion.expected is not None:
230
+ print(
231
+ f" Expected: {self._truncate(assertion.expected)}"
232
+ )
233
+ if assertion.actual is not None:
234
+ print(
235
+ f" Actual: {self._truncate(assertion.actual)}"
236
+ )
237
+
238
+ def _truncate(self, value: any, max_length: int = 100) -> str:
239
+ """Truncate long strings for display.
240
+
241
+ Args:
242
+ value: The value to truncate (will be converted to string).
243
+ max_length: Maximum length before truncation.
244
+
245
+ Returns:
246
+ Truncated string with "..." suffix if needed.
247
+ """
248
+ value_str = str(value)
249
+ if len(value_str) > max_length:
250
+ return value_str[: max_length - 3] + "..."
251
+ return value_str
@@ -0,0 +1,176 @@
1
+ """JSON reporter for evaluation results.
2
+
3
+ This module provides a reporter that writes evaluation results to a JSON file,
4
+ suitable for programmatic access, data analysis, or integration with other tools.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from pathlib import Path
11
+
12
+ from prela.evals.runner import EvalRunResult
13
+
14
+
15
+ class JSONReporter:
16
+ """Reporter that writes evaluation results to a JSON file.
17
+
18
+ Outputs a structured JSON file containing all evaluation data:
19
+ - Suite metadata (name, timestamps, duration)
20
+ - Summary statistics (total, passed, failed, pass rate)
21
+ - Individual case results with assertion details
22
+ - Full error messages and stack traces
23
+
24
+ The JSON format is designed for:
25
+ - Programmatic analysis of test results
26
+ - Integration with data processing pipelines
27
+ - Historical comparison of evaluation runs
28
+ - CI/CD artifact storage
29
+
30
+ Example:
31
+ >>> from prela.evals import EvalRunner
32
+ >>> from prela.evals.reporters import JSONReporter
33
+ >>>
34
+ >>> runner = EvalRunner(suite, agent)
35
+ >>> result = runner.run()
36
+ >>>
37
+ >>> reporter = JSONReporter("results/eval_run_123.json")
38
+ >>> reporter.report(result)
39
+ # Creates results/eval_run_123.json with full results
40
+ """
41
+
42
+ def __init__(self, output_path: str | Path, indent: int = 2):
43
+ """Initialize the JSON reporter.
44
+
45
+ Args:
46
+ output_path: Path where the JSON file will be written.
47
+ Parent directories will be created if they don't exist.
48
+ indent: Number of spaces for JSON indentation (default: 2).
49
+ Set to None for compact output.
50
+ """
51
+ self.output_path = Path(output_path)
52
+ self.indent = indent
53
+
54
+ def report(self, result: EvalRunResult) -> None:
55
+ """Write the evaluation results to a JSON file.
56
+
57
+ Creates parent directories if they don't exist. Overwrites
58
+ any existing file at the output path.
59
+
60
+ Args:
61
+ result: The evaluation run result to write.
62
+
63
+ Raises:
64
+ OSError: If unable to write to the output path.
65
+ """
66
+ # Create parent directory if needed
67
+ self.output_path.parent.mkdir(parents=True, exist_ok=True)
68
+
69
+ # Convert result to dict
70
+ data = self._result_to_dict(result)
71
+
72
+ # Write JSON file
73
+ with open(self.output_path, "w", encoding="utf-8") as f:
74
+ json.dump(data, f, indent=self.indent, ensure_ascii=False)
75
+
76
+ def _result_to_dict(self, result: EvalRunResult) -> dict:
77
+ """Convert EvalRunResult to a JSON-serializable dictionary.
78
+
79
+ Args:
80
+ result: The evaluation run result.
81
+
82
+ Returns:
83
+ Dictionary with all result data in JSON-compatible format.
84
+ """
85
+ duration_seconds = (
86
+ result.completed_at - result.started_at
87
+ ).total_seconds()
88
+
89
+ return {
90
+ "suite_name": result.suite_name,
91
+ "started_at": result.started_at.isoformat(),
92
+ "completed_at": result.completed_at.isoformat(),
93
+ "duration_seconds": duration_seconds,
94
+ "summary": {
95
+ "total_cases": result.total_cases,
96
+ "passed_cases": result.passed_cases,
97
+ "failed_cases": result.failed_cases,
98
+ "pass_rate": result.pass_rate,
99
+ },
100
+ "case_results": [
101
+ self._case_result_to_dict(case_result)
102
+ for case_result in result.case_results
103
+ ],
104
+ }
105
+
106
+ def _case_result_to_dict(self, case_result) -> dict:
107
+ """Convert CaseResult to a JSON-serializable dictionary.
108
+
109
+ Args:
110
+ case_result: A CaseResult instance.
111
+
112
+ Returns:
113
+ Dictionary with all case result data.
114
+ """
115
+ return {
116
+ "case_id": case_result.case_id,
117
+ "case_name": case_result.case_name,
118
+ "passed": case_result.passed,
119
+ "duration_ms": case_result.duration_ms,
120
+ "trace_id": case_result.trace_id,
121
+ "output": self._serialize_output(case_result.output),
122
+ "error": case_result.error,
123
+ "assertions": [
124
+ self._assertion_result_to_dict(assertion)
125
+ for assertion in case_result.assertion_results
126
+ ],
127
+ }
128
+
129
+ def _assertion_result_to_dict(self, assertion_result) -> dict:
130
+ """Convert AssertionResult to a JSON-serializable dictionary.
131
+
132
+ Args:
133
+ assertion_result: An AssertionResult instance.
134
+
135
+ Returns:
136
+ Dictionary with all assertion result data.
137
+ """
138
+ return {
139
+ "assertion_type": assertion_result.assertion_type,
140
+ "passed": assertion_result.passed,
141
+ "message": assertion_result.message,
142
+ "score": assertion_result.score,
143
+ "expected": self._serialize_output(assertion_result.expected),
144
+ "actual": self._serialize_output(assertion_result.actual),
145
+ "details": assertion_result.details,
146
+ }
147
+
148
+ def _serialize_output(self, output) -> any:
149
+ """Serialize output values for JSON.
150
+
151
+ Handles common non-JSON-serializable types by converting them
152
+ to strings. For complex objects, returns their string representation.
153
+
154
+ Args:
155
+ output: The output value to serialize.
156
+
157
+ Returns:
158
+ JSON-serializable version of the output.
159
+ """
160
+ if output is None:
161
+ return None
162
+
163
+ # Basic JSON-serializable types
164
+ if isinstance(output, (bool, int, float, str, list, dict)):
165
+ # For lists and dicts, recursively serialize contents
166
+ if isinstance(output, list):
167
+ return [self._serialize_output(item) for item in output]
168
+ elif isinstance(output, dict):
169
+ return {
170
+ str(key): self._serialize_output(value)
171
+ for key, value in output.items()
172
+ }
173
+ return output
174
+
175
+ # Convert other types to string
176
+ return str(output)