prela 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. prela/__init__.py +394 -0
  2. prela/_version.py +3 -0
  3. prela/contrib/CLI.md +431 -0
  4. prela/contrib/README.md +118 -0
  5. prela/contrib/__init__.py +5 -0
  6. prela/contrib/cli.py +1063 -0
  7. prela/contrib/explorer.py +571 -0
  8. prela/core/__init__.py +64 -0
  9. prela/core/clock.py +98 -0
  10. prela/core/context.py +228 -0
  11. prela/core/replay.py +403 -0
  12. prela/core/sampler.py +178 -0
  13. prela/core/span.py +295 -0
  14. prela/core/tracer.py +498 -0
  15. prela/evals/__init__.py +94 -0
  16. prela/evals/assertions/README.md +484 -0
  17. prela/evals/assertions/__init__.py +78 -0
  18. prela/evals/assertions/base.py +90 -0
  19. prela/evals/assertions/multi_agent.py +625 -0
  20. prela/evals/assertions/semantic.py +223 -0
  21. prela/evals/assertions/structural.py +443 -0
  22. prela/evals/assertions/tool.py +380 -0
  23. prela/evals/case.py +370 -0
  24. prela/evals/n8n/__init__.py +69 -0
  25. prela/evals/n8n/assertions.py +450 -0
  26. prela/evals/n8n/runner.py +497 -0
  27. prela/evals/reporters/README.md +184 -0
  28. prela/evals/reporters/__init__.py +32 -0
  29. prela/evals/reporters/console.py +251 -0
  30. prela/evals/reporters/json.py +176 -0
  31. prela/evals/reporters/junit.py +278 -0
  32. prela/evals/runner.py +525 -0
  33. prela/evals/suite.py +316 -0
  34. prela/exporters/__init__.py +27 -0
  35. prela/exporters/base.py +189 -0
  36. prela/exporters/console.py +443 -0
  37. prela/exporters/file.py +322 -0
  38. prela/exporters/http.py +394 -0
  39. prela/exporters/multi.py +154 -0
  40. prela/exporters/otlp.py +388 -0
  41. prela/instrumentation/ANTHROPIC.md +297 -0
  42. prela/instrumentation/LANGCHAIN.md +480 -0
  43. prela/instrumentation/OPENAI.md +59 -0
  44. prela/instrumentation/__init__.py +49 -0
  45. prela/instrumentation/anthropic.py +1436 -0
  46. prela/instrumentation/auto.py +129 -0
  47. prela/instrumentation/base.py +436 -0
  48. prela/instrumentation/langchain.py +959 -0
  49. prela/instrumentation/llamaindex.py +719 -0
  50. prela/instrumentation/multi_agent/__init__.py +48 -0
  51. prela/instrumentation/multi_agent/autogen.py +357 -0
  52. prela/instrumentation/multi_agent/crewai.py +404 -0
  53. prela/instrumentation/multi_agent/langgraph.py +299 -0
  54. prela/instrumentation/multi_agent/models.py +203 -0
  55. prela/instrumentation/multi_agent/swarm.py +231 -0
  56. prela/instrumentation/n8n/__init__.py +68 -0
  57. prela/instrumentation/n8n/code_node.py +534 -0
  58. prela/instrumentation/n8n/models.py +336 -0
  59. prela/instrumentation/n8n/webhook.py +489 -0
  60. prela/instrumentation/openai.py +1198 -0
  61. prela/license.py +245 -0
  62. prela/replay/__init__.py +31 -0
  63. prela/replay/comparison.py +390 -0
  64. prela/replay/engine.py +1227 -0
  65. prela/replay/loader.py +231 -0
  66. prela/replay/result.py +196 -0
  67. prela-0.1.0.dist-info/METADATA +399 -0
  68. prela-0.1.0.dist-info/RECORD +71 -0
  69. prela-0.1.0.dist-info/WHEEL +4 -0
  70. prela-0.1.0.dist-info/entry_points.txt +2 -0
  71. prela-0.1.0.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,380 @@
1
+ """
2
+ Tool-related assertions for verifying agent tool usage.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any
8
+
9
+ from prela.core.span import Span, SpanType
10
+ from prela.evals.assertions.base import AssertionResult, BaseAssertion
11
+
12
+
13
+ class ToolCalledAssertion(BaseAssertion):
14
+ """Assert that a specific tool was called during execution.
15
+
16
+ This assertion examines the trace to verify that a tool span with the
17
+ specified name exists.
18
+
19
+ Example:
20
+ >>> assertion = ToolCalledAssertion(tool_name="web_search")
21
+ >>> result = assertion.evaluate(output=None, expected=None, trace=spans)
22
+ >>> assert result.passed
23
+ """
24
+
25
+ def __init__(self, tool_name: str):
26
+ """Initialize tool called assertion.
27
+
28
+ Args:
29
+ tool_name: Name of the tool that should have been called
30
+ """
31
+ self.tool_name = tool_name
32
+
33
+ def evaluate(
34
+ self,
35
+ output: Any,
36
+ expected: Any | None,
37
+ trace: list[Span] | None,
38
+ ) -> AssertionResult:
39
+ """Check if the specified tool was called in the trace."""
40
+ if trace is None or len(trace) == 0:
41
+ return AssertionResult(
42
+ passed=False,
43
+ assertion_type="tool_called",
44
+ message=f"No trace available to check for tool '{self.tool_name}'",
45
+ expected=f"tool '{self.tool_name}' called",
46
+ actual="no trace",
47
+ details={},
48
+ )
49
+
50
+ # Look for tool spans with matching name
51
+ tool_spans = [
52
+ span for span in trace
53
+ if span.span_type == SpanType.TOOL and span.name == self.tool_name
54
+ ]
55
+
56
+ passed = len(tool_spans) > 0
57
+
58
+ if passed:
59
+ message = f"Tool '{self.tool_name}' was called {len(tool_spans)} time(s)"
60
+ details = {
61
+ "call_count": len(tool_spans),
62
+ "span_ids": [span.span_id for span in tool_spans],
63
+ }
64
+ else:
65
+ # List available tools to help debugging
66
+ available_tools = {
67
+ span.name for span in trace
68
+ if span.span_type == SpanType.TOOL
69
+ }
70
+ message = f"Tool '{self.tool_name}' was not called"
71
+ details = {
72
+ "call_count": 0,
73
+ "available_tools": list(available_tools),
74
+ }
75
+
76
+ return AssertionResult(
77
+ passed=passed,
78
+ assertion_type="tool_called",
79
+ message=message,
80
+ expected=f"tool '{self.tool_name}' called",
81
+ actual=f"{len(tool_spans)} calls" if passed else "not called",
82
+ details=details,
83
+ )
84
+
85
+ @classmethod
86
+ def from_config(cls, config: dict[str, Any]) -> ToolCalledAssertion:
87
+ """Create from configuration.
88
+
89
+ Config format:
90
+ {
91
+ "tool_name": "web_search"
92
+ }
93
+ """
94
+ if "tool_name" not in config:
95
+ raise ValueError("ToolCalledAssertion requires 'tool_name' in config")
96
+
97
+ return cls(tool_name=config["tool_name"])
98
+
99
+ def __repr__(self) -> str:
100
+ return f"ToolCalledAssertion(tool_name={self.tool_name!r})"
101
+
102
+
103
+ class ToolArgsAssertion(BaseAssertion):
104
+ """Assert that a tool was called with expected arguments.
105
+
106
+ This assertion verifies both that the tool was called and that it was
107
+ called with specific argument values.
108
+
109
+ Example:
110
+ >>> assertion = ToolArgsAssertion(
111
+ ... tool_name="web_search",
112
+ ... expected_args={"query": "Python tutorial"}
113
+ ... )
114
+ >>> result = assertion.evaluate(output=None, expected=None, trace=spans)
115
+ >>> assert result.passed
116
+ """
117
+
118
+ def __init__(
119
+ self,
120
+ tool_name: str,
121
+ expected_args: dict[str, Any],
122
+ partial_match: bool = True,
123
+ ):
124
+ """Initialize tool args assertion.
125
+
126
+ Args:
127
+ tool_name: Name of the tool to check
128
+ expected_args: Expected argument key-value pairs
129
+ partial_match: If True, only check that expected_args are present
130
+ (allow additional args). If False, require exact match.
131
+ """
132
+ self.tool_name = tool_name
133
+ self.expected_args = expected_args
134
+ self.partial_match = partial_match
135
+
136
+ def evaluate(
137
+ self,
138
+ output: Any,
139
+ expected: Any | None,
140
+ trace: list[Span] | None,
141
+ ) -> AssertionResult:
142
+ """Check if tool was called with expected arguments."""
143
+ if trace is None or len(trace) == 0:
144
+ return AssertionResult(
145
+ passed=False,
146
+ assertion_type="tool_args",
147
+ message=f"No trace available to check tool '{self.tool_name}' arguments",
148
+ expected=f"tool '{self.tool_name}' with args {self.expected_args}",
149
+ actual="no trace",
150
+ details={},
151
+ )
152
+
153
+ # Find tool spans with matching name
154
+ tool_spans = [
155
+ span for span in trace
156
+ if span.span_type == SpanType.TOOL and span.name == self.tool_name
157
+ ]
158
+
159
+ if not tool_spans:
160
+ return AssertionResult(
161
+ passed=False,
162
+ assertion_type="tool_args",
163
+ message=f"Tool '{self.tool_name}' was not called",
164
+ expected=f"tool '{self.tool_name}' with args {self.expected_args}",
165
+ actual="tool not called",
166
+ details={},
167
+ )
168
+
169
+ # Check each tool call for matching arguments
170
+ matches = []
171
+ for span in tool_spans:
172
+ # Look for tool input in attributes (common patterns)
173
+ actual_args = {}
174
+ for key, value in span.attributes.items():
175
+ if key.startswith("tool.input.") or key.startswith("input."):
176
+ arg_name = key.split(".")[-1]
177
+ actual_args[arg_name] = value
178
+
179
+ # Also check for generic "input" attribute
180
+ if "input" in span.attributes and isinstance(span.attributes["input"], dict):
181
+ actual_args.update(span.attributes["input"])
182
+
183
+ # Check if this call matches expected args
184
+ if self.partial_match:
185
+ # Check that all expected args are present with correct values
186
+ match = all(
187
+ actual_args.get(key) == value
188
+ for key, value in self.expected_args.items()
189
+ )
190
+ else:
191
+ # Require exact match
192
+ match = actual_args == self.expected_args
193
+
194
+ if match:
195
+ matches.append((span, actual_args))
196
+
197
+ passed = len(matches) > 0
198
+
199
+ if passed:
200
+ message = f"Tool '{self.tool_name}' was called with expected arguments ({len(matches)} time(s))"
201
+ details = {
202
+ "match_count": len(matches),
203
+ "span_ids": [span.span_id for span, _ in matches],
204
+ "matched_args": [args for _, args in matches],
205
+ }
206
+ else:
207
+ # Show actual args from first call for debugging
208
+ first_span_args = {}
209
+ if tool_spans:
210
+ for key, value in tool_spans[0].attributes.items():
211
+ if key.startswith("tool.input.") or key.startswith("input."):
212
+ arg_name = key.split(".")[-1]
213
+ first_span_args[arg_name] = value
214
+
215
+ message = f"Tool '{self.tool_name}' was called but not with expected arguments"
216
+ details = {
217
+ "match_count": 0,
218
+ "call_count": len(tool_spans),
219
+ "first_call_args": first_span_args,
220
+ }
221
+
222
+ return AssertionResult(
223
+ passed=passed,
224
+ assertion_type="tool_args",
225
+ message=message,
226
+ expected=self.expected_args,
227
+ actual=matches[0][1] if matches else first_span_args,
228
+ details=details,
229
+ )
230
+
231
+ @classmethod
232
+ def from_config(cls, config: dict[str, Any]) -> ToolArgsAssertion:
233
+ """Create from configuration.
234
+
235
+ Config format:
236
+ {
237
+ "tool_name": "web_search",
238
+ "expected_args": {"query": "Python"},
239
+ "partial_match": true # optional, default: true
240
+ }
241
+ """
242
+ if "tool_name" not in config:
243
+ raise ValueError("ToolArgsAssertion requires 'tool_name' in config")
244
+ if "expected_args" not in config:
245
+ raise ValueError("ToolArgsAssertion requires 'expected_args' in config")
246
+
247
+ return cls(
248
+ tool_name=config["tool_name"],
249
+ expected_args=config["expected_args"],
250
+ partial_match=config.get("partial_match", True),
251
+ )
252
+
253
+ def __repr__(self) -> str:
254
+ return (
255
+ f"ToolArgsAssertion(tool_name={self.tool_name!r}, "
256
+ f"expected_args={self.expected_args}, "
257
+ f"partial_match={self.partial_match})"
258
+ )
259
+
260
+
261
+ class ToolSequenceAssertion(BaseAssertion):
262
+ """Assert that tools were called in a specific order.
263
+
264
+ This assertion verifies that tools appear in the trace in the expected
265
+ sequence, though other tools may appear between them.
266
+
267
+ Example:
268
+ >>> assertion = ToolSequenceAssertion(
269
+ ... sequence=["web_search", "calculator", "summarize"]
270
+ ... )
271
+ >>> result = assertion.evaluate(output=None, expected=None, trace=spans)
272
+ >>> assert result.passed
273
+ """
274
+
275
+ def __init__(self, sequence: list[str], strict: bool = False):
276
+ """Initialize tool sequence assertion.
277
+
278
+ Args:
279
+ sequence: Expected sequence of tool names
280
+ strict: If True, no other tools can appear between expected ones.
281
+ If False, other tools are allowed between expected sequence.
282
+ """
283
+ if not sequence:
284
+ raise ValueError("sequence cannot be empty")
285
+
286
+ self.sequence = sequence
287
+ self.strict = strict
288
+
289
+ def evaluate(
290
+ self,
291
+ output: Any,
292
+ expected: Any | None,
293
+ trace: list[Span] | None,
294
+ ) -> AssertionResult:
295
+ """Check if tools were called in the expected sequence."""
296
+ if trace is None or len(trace) == 0:
297
+ return AssertionResult(
298
+ passed=False,
299
+ assertion_type="tool_sequence",
300
+ message="No trace available to check tool sequence",
301
+ expected=f"sequence: {self.sequence}",
302
+ actual="no trace",
303
+ details={},
304
+ )
305
+
306
+ # Extract tool call sequence from trace (ordered by started_at)
307
+ tool_spans = [
308
+ span for span in sorted(trace, key=lambda s: s.started_at)
309
+ if span.span_type == SpanType.TOOL
310
+ ]
311
+
312
+ if not tool_spans:
313
+ return AssertionResult(
314
+ passed=False,
315
+ assertion_type="tool_sequence",
316
+ message="No tool calls found in trace",
317
+ expected=f"sequence: {self.sequence}",
318
+ actual="no tools called",
319
+ details={},
320
+ )
321
+
322
+ actual_sequence = [span.name for span in tool_spans]
323
+
324
+ # Check sequence
325
+ if self.strict:
326
+ # Strict mode: must match exactly
327
+ passed = actual_sequence == self.sequence
328
+ if passed:
329
+ message = f"Tool sequence matches exactly: {self.sequence}"
330
+ else:
331
+ message = f"Tool sequence does not match. Expected {self.sequence}, got {actual_sequence}"
332
+ else:
333
+ # Non-strict: check subsequence
334
+ seq_idx = 0
335
+ for tool_name in actual_sequence:
336
+ if seq_idx < len(self.sequence) and tool_name == self.sequence[seq_idx]:
337
+ seq_idx += 1
338
+
339
+ passed = seq_idx == len(self.sequence)
340
+
341
+ if passed:
342
+ message = f"Tool sequence {self.sequence} found in correct order"
343
+ else:
344
+ found = self.sequence[:seq_idx]
345
+ missing = self.sequence[seq_idx:]
346
+ message = f"Tool sequence incomplete. Found {found}, missing {missing}"
347
+
348
+ return AssertionResult(
349
+ passed=passed,
350
+ assertion_type="tool_sequence",
351
+ message=message,
352
+ expected=self.sequence,
353
+ actual=actual_sequence,
354
+ details={
355
+ "strict": self.strict,
356
+ "expected_length": len(self.sequence),
357
+ "actual_length": len(actual_sequence),
358
+ },
359
+ )
360
+
361
+ @classmethod
362
+ def from_config(cls, config: dict[str, Any]) -> ToolSequenceAssertion:
363
+ """Create from configuration.
364
+
365
+ Config format:
366
+ {
367
+ "sequence": ["tool1", "tool2", "tool3"],
368
+ "strict": false # optional, default: false
369
+ }
370
+ """
371
+ if "sequence" not in config:
372
+ raise ValueError("ToolSequenceAssertion requires 'sequence' in config")
373
+
374
+ return cls(
375
+ sequence=config["sequence"],
376
+ strict=config.get("strict", False),
377
+ )
378
+
379
+ def __repr__(self) -> str:
380
+ return f"ToolSequenceAssertion(sequence={self.sequence}, strict={self.strict})"