prela 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prela/__init__.py +394 -0
- prela/_version.py +3 -0
- prela/contrib/CLI.md +431 -0
- prela/contrib/README.md +118 -0
- prela/contrib/__init__.py +5 -0
- prela/contrib/cli.py +1063 -0
- prela/contrib/explorer.py +571 -0
- prela/core/__init__.py +64 -0
- prela/core/clock.py +98 -0
- prela/core/context.py +228 -0
- prela/core/replay.py +403 -0
- prela/core/sampler.py +178 -0
- prela/core/span.py +295 -0
- prela/core/tracer.py +498 -0
- prela/evals/__init__.py +94 -0
- prela/evals/assertions/README.md +484 -0
- prela/evals/assertions/__init__.py +78 -0
- prela/evals/assertions/base.py +90 -0
- prela/evals/assertions/multi_agent.py +625 -0
- prela/evals/assertions/semantic.py +223 -0
- prela/evals/assertions/structural.py +443 -0
- prela/evals/assertions/tool.py +380 -0
- prela/evals/case.py +370 -0
- prela/evals/n8n/__init__.py +69 -0
- prela/evals/n8n/assertions.py +450 -0
- prela/evals/n8n/runner.py +497 -0
- prela/evals/reporters/README.md +184 -0
- prela/evals/reporters/__init__.py +32 -0
- prela/evals/reporters/console.py +251 -0
- prela/evals/reporters/json.py +176 -0
- prela/evals/reporters/junit.py +278 -0
- prela/evals/runner.py +525 -0
- prela/evals/suite.py +316 -0
- prela/exporters/__init__.py +27 -0
- prela/exporters/base.py +189 -0
- prela/exporters/console.py +443 -0
- prela/exporters/file.py +322 -0
- prela/exporters/http.py +394 -0
- prela/exporters/multi.py +154 -0
- prela/exporters/otlp.py +388 -0
- prela/instrumentation/ANTHROPIC.md +297 -0
- prela/instrumentation/LANGCHAIN.md +480 -0
- prela/instrumentation/OPENAI.md +59 -0
- prela/instrumentation/__init__.py +49 -0
- prela/instrumentation/anthropic.py +1436 -0
- prela/instrumentation/auto.py +129 -0
- prela/instrumentation/base.py +436 -0
- prela/instrumentation/langchain.py +959 -0
- prela/instrumentation/llamaindex.py +719 -0
- prela/instrumentation/multi_agent/__init__.py +48 -0
- prela/instrumentation/multi_agent/autogen.py +357 -0
- prela/instrumentation/multi_agent/crewai.py +404 -0
- prela/instrumentation/multi_agent/langgraph.py +299 -0
- prela/instrumentation/multi_agent/models.py +203 -0
- prela/instrumentation/multi_agent/swarm.py +231 -0
- prela/instrumentation/n8n/__init__.py +68 -0
- prela/instrumentation/n8n/code_node.py +534 -0
- prela/instrumentation/n8n/models.py +336 -0
- prela/instrumentation/n8n/webhook.py +489 -0
- prela/instrumentation/openai.py +1198 -0
- prela/license.py +245 -0
- prela/replay/__init__.py +31 -0
- prela/replay/comparison.py +390 -0
- prela/replay/engine.py +1227 -0
- prela/replay/loader.py +231 -0
- prela/replay/result.py +196 -0
- prela-0.1.0.dist-info/METADATA +399 -0
- prela-0.1.0.dist-info/RECORD +71 -0
- prela-0.1.0.dist-info/WHEEL +4 -0
- prela-0.1.0.dist-info/entry_points.txt +2 -0
- prela-0.1.0.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"""JUnit XML reporter for evaluation results.
|
|
2
|
+
|
|
3
|
+
This module provides a reporter that generates JUnit-compatible XML files,
|
|
4
|
+
enabling integration with CI/CD systems like Jenkins, GitLab CI, GitHub Actions,
|
|
5
|
+
and other tools that parse JUnit test reports.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import xml.etree.ElementTree as ET
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from prela.evals.runner import EvalRunResult
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class JUnitReporter:
|
|
17
|
+
"""Reporter that generates JUnit XML format for CI/CD integration.
|
|
18
|
+
|
|
19
|
+
Creates a JUnit XML file that can be consumed by continuous integration
|
|
20
|
+
systems for test result visualization, trend analysis, and failure reporting.
|
|
21
|
+
|
|
22
|
+
The XML format follows the JUnit schema with:
|
|
23
|
+
- <testsuite> root element with summary statistics
|
|
24
|
+
- <testcase> elements for each test case
|
|
25
|
+
- <failure> elements for failed assertions
|
|
26
|
+
- <error> elements for execution errors
|
|
27
|
+
- <system-out> for additional output/trace information
|
|
28
|
+
|
|
29
|
+
Supported CI/CD platforms:
|
|
30
|
+
- Jenkins (JUnit plugin)
|
|
31
|
+
- GitLab CI/CD (junit report artifacts)
|
|
32
|
+
- GitHub Actions (test reporters)
|
|
33
|
+
- Azure DevOps (publish test results)
|
|
34
|
+
- CircleCI (store_test_results)
|
|
35
|
+
|
|
36
|
+
Example:
|
|
37
|
+
>>> from prela.evals import EvalRunner
|
|
38
|
+
>>> from prela.evals.reporters import JUnitReporter
|
|
39
|
+
>>>
|
|
40
|
+
>>> runner = EvalRunner(suite, agent)
|
|
41
|
+
>>> result = runner.run()
|
|
42
|
+
>>>
|
|
43
|
+
>>> reporter = JUnitReporter("test-results/junit.xml")
|
|
44
|
+
>>> reporter.report(result)
|
|
45
|
+
# Creates JUnit XML at test-results/junit.xml
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(self, output_path: str | Path):
|
|
49
|
+
"""Initialize the JUnit XML reporter.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
output_path: Path where the JUnit XML file will be written.
|
|
53
|
+
Parent directories will be created if they don't exist.
|
|
54
|
+
"""
|
|
55
|
+
self.output_path = Path(output_path)
|
|
56
|
+
|
|
57
|
+
def report(self, result: EvalRunResult) -> None:
|
|
58
|
+
"""Generate and write JUnit XML for the evaluation results.
|
|
59
|
+
|
|
60
|
+
Creates parent directories if they don't exist. Overwrites
|
|
61
|
+
any existing file at the output path.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
result: The evaluation run result to convert to JUnit XML.
|
|
65
|
+
|
|
66
|
+
Raises:
|
|
67
|
+
OSError: If unable to write to the output path.
|
|
68
|
+
"""
|
|
69
|
+
# Create parent directory if needed
|
|
70
|
+
self.output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
71
|
+
|
|
72
|
+
# Build XML structure
|
|
73
|
+
xml_root = self._build_xml(result)
|
|
74
|
+
|
|
75
|
+
# Write to file with pretty formatting
|
|
76
|
+
self._write_xml(xml_root)
|
|
77
|
+
|
|
78
|
+
def _build_xml(self, result: EvalRunResult) -> ET.Element:
|
|
79
|
+
"""Build JUnit XML element tree from evaluation result.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
result: The evaluation run result.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
XML root element (<testsuite>).
|
|
86
|
+
"""
|
|
87
|
+
# Calculate duration in seconds
|
|
88
|
+
duration_seconds = (
|
|
89
|
+
result.completed_at - result.started_at
|
|
90
|
+
).total_seconds()
|
|
91
|
+
|
|
92
|
+
# Create root testsuite element
|
|
93
|
+
testsuite = ET.Element(
|
|
94
|
+
"testsuite",
|
|
95
|
+
attrib={
|
|
96
|
+
"name": result.suite_name,
|
|
97
|
+
"tests": str(result.total_cases),
|
|
98
|
+
"failures": str(result.failed_cases),
|
|
99
|
+
"errors": "0", # We track errors as failures
|
|
100
|
+
"skipped": "0",
|
|
101
|
+
"time": f"{duration_seconds:.3f}",
|
|
102
|
+
"timestamp": result.started_at.isoformat(),
|
|
103
|
+
},
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Add testcase elements
|
|
107
|
+
for case_result in result.case_results:
|
|
108
|
+
testcase = ET.SubElement(
|
|
109
|
+
testsuite,
|
|
110
|
+
"testcase",
|
|
111
|
+
attrib={
|
|
112
|
+
"name": case_result.case_name,
|
|
113
|
+
"classname": result.suite_name,
|
|
114
|
+
"time": f"{case_result.duration_ms / 1000:.3f}",
|
|
115
|
+
},
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# If case failed due to execution error, add <error> element
|
|
119
|
+
if case_result.error:
|
|
120
|
+
error = ET.SubElement(
|
|
121
|
+
testcase,
|
|
122
|
+
"error",
|
|
123
|
+
attrib={
|
|
124
|
+
"type": "ExecutionError",
|
|
125
|
+
"message": self._truncate_message(case_result.error),
|
|
126
|
+
},
|
|
127
|
+
)
|
|
128
|
+
error.text = case_result.error
|
|
129
|
+
|
|
130
|
+
# If case failed assertions, add <failure> elements
|
|
131
|
+
elif not case_result.passed:
|
|
132
|
+
# Collect all failed assertions
|
|
133
|
+
failed_assertions = [
|
|
134
|
+
a for a in case_result.assertion_results if not a.passed
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
if failed_assertions:
|
|
138
|
+
# Create a single failure element with all failed assertions
|
|
139
|
+
failure_message = self._format_failure_message(
|
|
140
|
+
failed_assertions
|
|
141
|
+
)
|
|
142
|
+
failure = ET.SubElement(
|
|
143
|
+
testcase,
|
|
144
|
+
"failure",
|
|
145
|
+
attrib={
|
|
146
|
+
"type": "AssertionFailure",
|
|
147
|
+
"message": self._truncate_message(failure_message),
|
|
148
|
+
},
|
|
149
|
+
)
|
|
150
|
+
failure.text = self._format_failure_details(
|
|
151
|
+
failed_assertions
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Add system-out with trace_id and output if available
|
|
155
|
+
system_out_parts = []
|
|
156
|
+
if case_result.trace_id:
|
|
157
|
+
system_out_parts.append(f"Trace ID: {case_result.trace_id}")
|
|
158
|
+
if case_result.output is not None:
|
|
159
|
+
output_str = str(case_result.output)
|
|
160
|
+
if len(output_str) > 1000:
|
|
161
|
+
output_str = output_str[:1000] + "... (truncated)"
|
|
162
|
+
system_out_parts.append(f"Output: {output_str}")
|
|
163
|
+
|
|
164
|
+
if system_out_parts:
|
|
165
|
+
system_out = ET.SubElement(testcase, "system-out")
|
|
166
|
+
system_out.text = "\n".join(system_out_parts)
|
|
167
|
+
|
|
168
|
+
return testsuite
|
|
169
|
+
|
|
170
|
+
def _write_xml(self, root: ET.Element) -> None:
|
|
171
|
+
"""Write XML element tree to file with pretty formatting.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
root: The root XML element to write.
|
|
175
|
+
"""
|
|
176
|
+
# Pretty-print the XML
|
|
177
|
+
self._indent(root)
|
|
178
|
+
|
|
179
|
+
# Create ElementTree and write to file
|
|
180
|
+
tree = ET.ElementTree(root)
|
|
181
|
+
tree.write(
|
|
182
|
+
self.output_path,
|
|
183
|
+
encoding="utf-8",
|
|
184
|
+
xml_declaration=True,
|
|
185
|
+
method="xml",
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
def _indent(self, elem: ET.Element, level: int = 0) -> None:
|
|
189
|
+
"""Add indentation to XML elements for pretty printing.
|
|
190
|
+
|
|
191
|
+
Modifies the element tree in-place to add newlines and indentation.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
elem: The XML element to indent.
|
|
195
|
+
level: Current indentation level (number of tabs).
|
|
196
|
+
"""
|
|
197
|
+
indent = "\n" + " " * level
|
|
198
|
+
if len(elem):
|
|
199
|
+
if not elem.text or not elem.text.strip():
|
|
200
|
+
elem.text = indent + " "
|
|
201
|
+
if not elem.tail or not elem.tail.strip():
|
|
202
|
+
elem.tail = indent
|
|
203
|
+
for child in elem:
|
|
204
|
+
self._indent(child, level + 1)
|
|
205
|
+
if not child.tail or not child.tail.strip():
|
|
206
|
+
child.tail = indent
|
|
207
|
+
else:
|
|
208
|
+
if level and (not elem.tail or not elem.tail.strip()):
|
|
209
|
+
elem.tail = indent
|
|
210
|
+
|
|
211
|
+
def _format_failure_message(self, failed_assertions: list) -> str:
|
|
212
|
+
"""Format a summary message for failed assertions.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
failed_assertions: List of AssertionResult objects that failed.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
Summary string listing all failed assertion types.
|
|
219
|
+
"""
|
|
220
|
+
if not failed_assertions:
|
|
221
|
+
return "Test case failed"
|
|
222
|
+
|
|
223
|
+
if len(failed_assertions) == 1:
|
|
224
|
+
return failed_assertions[0].message
|
|
225
|
+
|
|
226
|
+
# Multiple failures
|
|
227
|
+
assertion_types = [a.assertion_type for a in failed_assertions]
|
|
228
|
+
return f"{len(failed_assertions)} assertions failed: {', '.join(assertion_types)}"
|
|
229
|
+
|
|
230
|
+
def _format_failure_details(self, failed_assertions: list) -> str:
|
|
231
|
+
"""Format detailed failure information for all failed assertions.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
failed_assertions: List of AssertionResult objects that failed.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
Detailed multi-line string with all failure information.
|
|
238
|
+
"""
|
|
239
|
+
lines = []
|
|
240
|
+
for i, assertion in enumerate(failed_assertions, 1):
|
|
241
|
+
lines.append(f"Assertion {i}: {assertion.assertion_type}")
|
|
242
|
+
lines.append(f" Message: {assertion.message}")
|
|
243
|
+
|
|
244
|
+
if assertion.expected is not None:
|
|
245
|
+
expected_str = str(assertion.expected)
|
|
246
|
+
if len(expected_str) > 200:
|
|
247
|
+
expected_str = expected_str[:200] + "... (truncated)"
|
|
248
|
+
lines.append(f" Expected: {expected_str}")
|
|
249
|
+
|
|
250
|
+
if assertion.actual is not None:
|
|
251
|
+
actual_str = str(assertion.actual)
|
|
252
|
+
if len(actual_str) > 200:
|
|
253
|
+
actual_str = actual_str[:200] + "... (truncated)"
|
|
254
|
+
lines.append(f" Actual: {actual_str}")
|
|
255
|
+
|
|
256
|
+
if assertion.score is not None:
|
|
257
|
+
lines.append(f" Score: {assertion.score:.3f}")
|
|
258
|
+
|
|
259
|
+
if assertion.details:
|
|
260
|
+
lines.append(f" Details: {assertion.details}")
|
|
261
|
+
|
|
262
|
+
lines.append("") # Blank line between assertions
|
|
263
|
+
|
|
264
|
+
return "\n".join(lines)
|
|
265
|
+
|
|
266
|
+
def _truncate_message(self, message: str, max_length: int = 200) -> str:
|
|
267
|
+
"""Truncate long error messages for the message attribute.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
message: The message to truncate.
|
|
271
|
+
max_length: Maximum length before truncation.
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
Truncated string with "..." suffix if needed.
|
|
275
|
+
"""
|
|
276
|
+
if len(message) > max_length:
|
|
277
|
+
return message[: max_length - 3] + "..."
|
|
278
|
+
return message
|