prela 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. prela/__init__.py +394 -0
  2. prela/_version.py +3 -0
  3. prela/contrib/CLI.md +431 -0
  4. prela/contrib/README.md +118 -0
  5. prela/contrib/__init__.py +5 -0
  6. prela/contrib/cli.py +1063 -0
  7. prela/contrib/explorer.py +571 -0
  8. prela/core/__init__.py +64 -0
  9. prela/core/clock.py +98 -0
  10. prela/core/context.py +228 -0
  11. prela/core/replay.py +403 -0
  12. prela/core/sampler.py +178 -0
  13. prela/core/span.py +295 -0
  14. prela/core/tracer.py +498 -0
  15. prela/evals/__init__.py +94 -0
  16. prela/evals/assertions/README.md +484 -0
  17. prela/evals/assertions/__init__.py +78 -0
  18. prela/evals/assertions/base.py +90 -0
  19. prela/evals/assertions/multi_agent.py +625 -0
  20. prela/evals/assertions/semantic.py +223 -0
  21. prela/evals/assertions/structural.py +443 -0
  22. prela/evals/assertions/tool.py +380 -0
  23. prela/evals/case.py +370 -0
  24. prela/evals/n8n/__init__.py +69 -0
  25. prela/evals/n8n/assertions.py +450 -0
  26. prela/evals/n8n/runner.py +497 -0
  27. prela/evals/reporters/README.md +184 -0
  28. prela/evals/reporters/__init__.py +32 -0
  29. prela/evals/reporters/console.py +251 -0
  30. prela/evals/reporters/json.py +176 -0
  31. prela/evals/reporters/junit.py +278 -0
  32. prela/evals/runner.py +525 -0
  33. prela/evals/suite.py +316 -0
  34. prela/exporters/__init__.py +27 -0
  35. prela/exporters/base.py +189 -0
  36. prela/exporters/console.py +443 -0
  37. prela/exporters/file.py +322 -0
  38. prela/exporters/http.py +394 -0
  39. prela/exporters/multi.py +154 -0
  40. prela/exporters/otlp.py +388 -0
  41. prela/instrumentation/ANTHROPIC.md +297 -0
  42. prela/instrumentation/LANGCHAIN.md +480 -0
  43. prela/instrumentation/OPENAI.md +59 -0
  44. prela/instrumentation/__init__.py +49 -0
  45. prela/instrumentation/anthropic.py +1436 -0
  46. prela/instrumentation/auto.py +129 -0
  47. prela/instrumentation/base.py +436 -0
  48. prela/instrumentation/langchain.py +959 -0
  49. prela/instrumentation/llamaindex.py +719 -0
  50. prela/instrumentation/multi_agent/__init__.py +48 -0
  51. prela/instrumentation/multi_agent/autogen.py +357 -0
  52. prela/instrumentation/multi_agent/crewai.py +404 -0
  53. prela/instrumentation/multi_agent/langgraph.py +299 -0
  54. prela/instrumentation/multi_agent/models.py +203 -0
  55. prela/instrumentation/multi_agent/swarm.py +231 -0
  56. prela/instrumentation/n8n/__init__.py +68 -0
  57. prela/instrumentation/n8n/code_node.py +534 -0
  58. prela/instrumentation/n8n/models.py +336 -0
  59. prela/instrumentation/n8n/webhook.py +489 -0
  60. prela/instrumentation/openai.py +1198 -0
  61. prela/license.py +245 -0
  62. prela/replay/__init__.py +31 -0
  63. prela/replay/comparison.py +390 -0
  64. prela/replay/engine.py +1227 -0
  65. prela/replay/loader.py +231 -0
  66. prela/replay/result.py +196 -0
  67. prela-0.1.0.dist-info/METADATA +399 -0
  68. prela-0.1.0.dist-info/RECORD +71 -0
  69. prela-0.1.0.dist-info/WHEEL +4 -0
  70. prela-0.1.0.dist-info/entry_points.txt +2 -0
  71. prela-0.1.0.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,278 @@
1
+ """JUnit XML reporter for evaluation results.
2
+
3
+ This module provides a reporter that generates JUnit-compatible XML files,
4
+ enabling integration with CI/CD systems like Jenkins, GitLab CI, GitHub Actions,
5
+ and other tools that parse JUnit test reports.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import xml.etree.ElementTree as ET
11
+ from pathlib import Path
12
+
13
+ from prela.evals.runner import EvalRunResult
14
+
15
+
16
+ class JUnitReporter:
17
+ """Reporter that generates JUnit XML format for CI/CD integration.
18
+
19
+ Creates a JUnit XML file that can be consumed by continuous integration
20
+ systems for test result visualization, trend analysis, and failure reporting.
21
+
22
+ The XML format follows the JUnit schema with:
23
+ - <testsuite> root element with summary statistics
24
+ - <testcase> elements for each test case
25
+ - <failure> elements for failed assertions
26
+ - <error> elements for execution errors
27
+ - <system-out> for additional output/trace information
28
+
29
+ Supported CI/CD platforms:
30
+ - Jenkins (JUnit plugin)
31
+ - GitLab CI/CD (junit report artifacts)
32
+ - GitHub Actions (test reporters)
33
+ - Azure DevOps (publish test results)
34
+ - CircleCI (store_test_results)
35
+
36
+ Example:
37
+ >>> from prela.evals import EvalRunner
38
+ >>> from prela.evals.reporters import JUnitReporter
39
+ >>>
40
+ >>> runner = EvalRunner(suite, agent)
41
+ >>> result = runner.run()
42
+ >>>
43
+ >>> reporter = JUnitReporter("test-results/junit.xml")
44
+ >>> reporter.report(result)
45
+ # Creates JUnit XML at test-results/junit.xml
46
+ """
47
+
48
+ def __init__(self, output_path: str | Path):
49
+ """Initialize the JUnit XML reporter.
50
+
51
+ Args:
52
+ output_path: Path where the JUnit XML file will be written.
53
+ Parent directories will be created if they don't exist.
54
+ """
55
+ self.output_path = Path(output_path)
56
+
57
+ def report(self, result: EvalRunResult) -> None:
58
+ """Generate and write JUnit XML for the evaluation results.
59
+
60
+ Creates parent directories if they don't exist. Overwrites
61
+ any existing file at the output path.
62
+
63
+ Args:
64
+ result: The evaluation run result to convert to JUnit XML.
65
+
66
+ Raises:
67
+ OSError: If unable to write to the output path.
68
+ """
69
+ # Create parent directory if needed
70
+ self.output_path.parent.mkdir(parents=True, exist_ok=True)
71
+
72
+ # Build XML structure
73
+ xml_root = self._build_xml(result)
74
+
75
+ # Write to file with pretty formatting
76
+ self._write_xml(xml_root)
77
+
78
+ def _build_xml(self, result: EvalRunResult) -> ET.Element:
79
+ """Build JUnit XML element tree from evaluation result.
80
+
81
+ Args:
82
+ result: The evaluation run result.
83
+
84
+ Returns:
85
+ XML root element (<testsuite>).
86
+ """
87
+ # Calculate duration in seconds
88
+ duration_seconds = (
89
+ result.completed_at - result.started_at
90
+ ).total_seconds()
91
+
92
+ # Create root testsuite element
93
+ testsuite = ET.Element(
94
+ "testsuite",
95
+ attrib={
96
+ "name": result.suite_name,
97
+ "tests": str(result.total_cases),
98
+ "failures": str(result.failed_cases),
99
+ "errors": "0", # We track errors as failures
100
+ "skipped": "0",
101
+ "time": f"{duration_seconds:.3f}",
102
+ "timestamp": result.started_at.isoformat(),
103
+ },
104
+ )
105
+
106
+ # Add testcase elements
107
+ for case_result in result.case_results:
108
+ testcase = ET.SubElement(
109
+ testsuite,
110
+ "testcase",
111
+ attrib={
112
+ "name": case_result.case_name,
113
+ "classname": result.suite_name,
114
+ "time": f"{case_result.duration_ms / 1000:.3f}",
115
+ },
116
+ )
117
+
118
+ # If case failed due to execution error, add <error> element
119
+ if case_result.error:
120
+ error = ET.SubElement(
121
+ testcase,
122
+ "error",
123
+ attrib={
124
+ "type": "ExecutionError",
125
+ "message": self._truncate_message(case_result.error),
126
+ },
127
+ )
128
+ error.text = case_result.error
129
+
130
+ # If case failed assertions, add <failure> elements
131
+ elif not case_result.passed:
132
+ # Collect all failed assertions
133
+ failed_assertions = [
134
+ a for a in case_result.assertion_results if not a.passed
135
+ ]
136
+
137
+ if failed_assertions:
138
+ # Create a single failure element with all failed assertions
139
+ failure_message = self._format_failure_message(
140
+ failed_assertions
141
+ )
142
+ failure = ET.SubElement(
143
+ testcase,
144
+ "failure",
145
+ attrib={
146
+ "type": "AssertionFailure",
147
+ "message": self._truncate_message(failure_message),
148
+ },
149
+ )
150
+ failure.text = self._format_failure_details(
151
+ failed_assertions
152
+ )
153
+
154
+ # Add system-out with trace_id and output if available
155
+ system_out_parts = []
156
+ if case_result.trace_id:
157
+ system_out_parts.append(f"Trace ID: {case_result.trace_id}")
158
+ if case_result.output is not None:
159
+ output_str = str(case_result.output)
160
+ if len(output_str) > 1000:
161
+ output_str = output_str[:1000] + "... (truncated)"
162
+ system_out_parts.append(f"Output: {output_str}")
163
+
164
+ if system_out_parts:
165
+ system_out = ET.SubElement(testcase, "system-out")
166
+ system_out.text = "\n".join(system_out_parts)
167
+
168
+ return testsuite
169
+
170
+ def _write_xml(self, root: ET.Element) -> None:
171
+ """Write XML element tree to file with pretty formatting.
172
+
173
+ Args:
174
+ root: The root XML element to write.
175
+ """
176
+ # Pretty-print the XML
177
+ self._indent(root)
178
+
179
+ # Create ElementTree and write to file
180
+ tree = ET.ElementTree(root)
181
+ tree.write(
182
+ self.output_path,
183
+ encoding="utf-8",
184
+ xml_declaration=True,
185
+ method="xml",
186
+ )
187
+
188
+ def _indent(self, elem: ET.Element, level: int = 0) -> None:
189
+ """Add indentation to XML elements for pretty printing.
190
+
191
+ Modifies the element tree in-place to add newlines and indentation.
192
+
193
+ Args:
194
+ elem: The XML element to indent.
195
+ level: Current indentation level (number of tabs).
196
+ """
197
+ indent = "\n" + " " * level
198
+ if len(elem):
199
+ if not elem.text or not elem.text.strip():
200
+ elem.text = indent + " "
201
+ if not elem.tail or not elem.tail.strip():
202
+ elem.tail = indent
203
+ for child in elem:
204
+ self._indent(child, level + 1)
205
+ if not child.tail or not child.tail.strip():
206
+ child.tail = indent
207
+ else:
208
+ if level and (not elem.tail or not elem.tail.strip()):
209
+ elem.tail = indent
210
+
211
+ def _format_failure_message(self, failed_assertions: list) -> str:
212
+ """Format a summary message for failed assertions.
213
+
214
+ Args:
215
+ failed_assertions: List of AssertionResult objects that failed.
216
+
217
+ Returns:
218
+ Summary string listing all failed assertion types.
219
+ """
220
+ if not failed_assertions:
221
+ return "Test case failed"
222
+
223
+ if len(failed_assertions) == 1:
224
+ return failed_assertions[0].message
225
+
226
+ # Multiple failures
227
+ assertion_types = [a.assertion_type for a in failed_assertions]
228
+ return f"{len(failed_assertions)} assertions failed: {', '.join(assertion_types)}"
229
+
230
+ def _format_failure_details(self, failed_assertions: list) -> str:
231
+ """Format detailed failure information for all failed assertions.
232
+
233
+ Args:
234
+ failed_assertions: List of AssertionResult objects that failed.
235
+
236
+ Returns:
237
+ Detailed multi-line string with all failure information.
238
+ """
239
+ lines = []
240
+ for i, assertion in enumerate(failed_assertions, 1):
241
+ lines.append(f"Assertion {i}: {assertion.assertion_type}")
242
+ lines.append(f" Message: {assertion.message}")
243
+
244
+ if assertion.expected is not None:
245
+ expected_str = str(assertion.expected)
246
+ if len(expected_str) > 200:
247
+ expected_str = expected_str[:200] + "... (truncated)"
248
+ lines.append(f" Expected: {expected_str}")
249
+
250
+ if assertion.actual is not None:
251
+ actual_str = str(assertion.actual)
252
+ if len(actual_str) > 200:
253
+ actual_str = actual_str[:200] + "... (truncated)"
254
+ lines.append(f" Actual: {actual_str}")
255
+
256
+ if assertion.score is not None:
257
+ lines.append(f" Score: {assertion.score:.3f}")
258
+
259
+ if assertion.details:
260
+ lines.append(f" Details: {assertion.details}")
261
+
262
+ lines.append("") # Blank line between assertions
263
+
264
+ return "\n".join(lines)
265
+
266
+ def _truncate_message(self, message: str, max_length: int = 200) -> str:
267
+ """Truncate long error messages for the message attribute.
268
+
269
+ Args:
270
+ message: The message to truncate.
271
+ max_length: Maximum length before truncation.
272
+
273
+ Returns:
274
+ Truncated string with "..." suffix if needed.
275
+ """
276
+ if len(message) > max_length:
277
+ return message[: max_length - 3] + "..."
278
+ return message