prela 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prela/__init__.py +394 -0
- prela/_version.py +3 -0
- prela/contrib/CLI.md +431 -0
- prela/contrib/README.md +118 -0
- prela/contrib/__init__.py +5 -0
- prela/contrib/cli.py +1063 -0
- prela/contrib/explorer.py +571 -0
- prela/core/__init__.py +64 -0
- prela/core/clock.py +98 -0
- prela/core/context.py +228 -0
- prela/core/replay.py +403 -0
- prela/core/sampler.py +178 -0
- prela/core/span.py +295 -0
- prela/core/tracer.py +498 -0
- prela/evals/__init__.py +94 -0
- prela/evals/assertions/README.md +484 -0
- prela/evals/assertions/__init__.py +78 -0
- prela/evals/assertions/base.py +90 -0
- prela/evals/assertions/multi_agent.py +625 -0
- prela/evals/assertions/semantic.py +223 -0
- prela/evals/assertions/structural.py +443 -0
- prela/evals/assertions/tool.py +380 -0
- prela/evals/case.py +370 -0
- prela/evals/n8n/__init__.py +69 -0
- prela/evals/n8n/assertions.py +450 -0
- prela/evals/n8n/runner.py +497 -0
- prela/evals/reporters/README.md +184 -0
- prela/evals/reporters/__init__.py +32 -0
- prela/evals/reporters/console.py +251 -0
- prela/evals/reporters/json.py +176 -0
- prela/evals/reporters/junit.py +278 -0
- prela/evals/runner.py +525 -0
- prela/evals/suite.py +316 -0
- prela/exporters/__init__.py +27 -0
- prela/exporters/base.py +189 -0
- prela/exporters/console.py +443 -0
- prela/exporters/file.py +322 -0
- prela/exporters/http.py +394 -0
- prela/exporters/multi.py +154 -0
- prela/exporters/otlp.py +388 -0
- prela/instrumentation/ANTHROPIC.md +297 -0
- prela/instrumentation/LANGCHAIN.md +480 -0
- prela/instrumentation/OPENAI.md +59 -0
- prela/instrumentation/__init__.py +49 -0
- prela/instrumentation/anthropic.py +1436 -0
- prela/instrumentation/auto.py +129 -0
- prela/instrumentation/base.py +436 -0
- prela/instrumentation/langchain.py +959 -0
- prela/instrumentation/llamaindex.py +719 -0
- prela/instrumentation/multi_agent/__init__.py +48 -0
- prela/instrumentation/multi_agent/autogen.py +357 -0
- prela/instrumentation/multi_agent/crewai.py +404 -0
- prela/instrumentation/multi_agent/langgraph.py +299 -0
- prela/instrumentation/multi_agent/models.py +203 -0
- prela/instrumentation/multi_agent/swarm.py +231 -0
- prela/instrumentation/n8n/__init__.py +68 -0
- prela/instrumentation/n8n/code_node.py +534 -0
- prela/instrumentation/n8n/models.py +336 -0
- prela/instrumentation/n8n/webhook.py +489 -0
- prela/instrumentation/openai.py +1198 -0
- prela/license.py +245 -0
- prela/replay/__init__.py +31 -0
- prela/replay/comparison.py +390 -0
- prela/replay/engine.py +1227 -0
- prela/replay/loader.py +231 -0
- prela/replay/result.py +196 -0
- prela-0.1.0.dist-info/METADATA +399 -0
- prela-0.1.0.dist-info/RECORD +71 -0
- prela-0.1.0.dist-info/WHEEL +4 -0
- prela-0.1.0.dist-info/entry_points.txt +2 -0
- prela-0.1.0.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,625 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Multi-agent assertions for evaluating agent collaboration and coordination.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
# Check tier before allowing multi-agent assertions
|
|
10
|
+
from prela.license import check_tier
|
|
11
|
+
|
|
12
|
+
if not check_tier("Multi-agent assertions", "lunch-money", silent=False):
|
|
13
|
+
raise ImportError(
|
|
14
|
+
"Multi-agent assertions require 'lunch-money' subscription or higher. "
|
|
15
|
+
"Upgrade at https://prela.dev/pricing"
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
from prela.core.span import Span
|
|
19
|
+
from prela.evals.assertions.base import AssertionResult, BaseAssertion
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AgentUsedAssertion(BaseAssertion):
|
|
23
|
+
"""Assert that a specific agent was used during execution.
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
>>> assertion = AgentUsedAssertion(agent_name="researcher", min_invocations=2)
|
|
27
|
+
>>> result = assertion.evaluate(output=None, expected=None, trace=spans)
|
|
28
|
+
>>> assert result.passed
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, agent_name: str, min_invocations: int = 1):
|
|
32
|
+
"""Initialize agent used assertion.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
agent_name: Name of the agent that must be used
|
|
36
|
+
min_invocations: Minimum number of times agent must be invoked
|
|
37
|
+
"""
|
|
38
|
+
self.agent_name = agent_name
|
|
39
|
+
self.min_invocations = min_invocations
|
|
40
|
+
|
|
41
|
+
def evaluate(
|
|
42
|
+
self,
|
|
43
|
+
output: Any,
|
|
44
|
+
expected: Any | None,
|
|
45
|
+
trace: list[Span] | None,
|
|
46
|
+
) -> AssertionResult:
|
|
47
|
+
"""Check if specified agent was used."""
|
|
48
|
+
if not trace:
|
|
49
|
+
return AssertionResult(
|
|
50
|
+
passed=False,
|
|
51
|
+
assertion_type="agent_used",
|
|
52
|
+
message="No trace data available to check agent usage",
|
|
53
|
+
details={"agent_name": self.agent_name},
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
agent_spans = [
|
|
57
|
+
s
|
|
58
|
+
for s in trace
|
|
59
|
+
if s.attributes.get("agent.name") == self.agent_name
|
|
60
|
+
]
|
|
61
|
+
passed = len(agent_spans) >= self.min_invocations
|
|
62
|
+
|
|
63
|
+
return AssertionResult(
|
|
64
|
+
passed=passed,
|
|
65
|
+
assertion_type="agent_used",
|
|
66
|
+
message=f"Agent '{self.agent_name}' invoked {len(agent_spans)} times (min: {self.min_invocations})",
|
|
67
|
+
expected=self.min_invocations,
|
|
68
|
+
actual=len(agent_spans),
|
|
69
|
+
details={"agent_name": self.agent_name, "invocations": len(agent_spans)},
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def from_config(cls, config: dict[str, Any]) -> AgentUsedAssertion:
|
|
74
|
+
"""Create from configuration.
|
|
75
|
+
|
|
76
|
+
Config format:
|
|
77
|
+
{
|
|
78
|
+
"agent_name": "researcher",
|
|
79
|
+
"min_invocations": 2 # optional, default: 1
|
|
80
|
+
}
|
|
81
|
+
"""
|
|
82
|
+
if "agent_name" not in config:
|
|
83
|
+
raise ValueError("AgentUsedAssertion requires 'agent_name' in config")
|
|
84
|
+
|
|
85
|
+
return cls(
|
|
86
|
+
agent_name=config["agent_name"],
|
|
87
|
+
min_invocations=config.get("min_invocations", 1),
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def __repr__(self) -> str:
|
|
91
|
+
return f"AgentUsedAssertion(agent_name={self.agent_name!r}, min_invocations={self.min_invocations})"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class TaskCompletedAssertion(BaseAssertion):
|
|
95
|
+
"""Assert that a task was completed successfully.
|
|
96
|
+
|
|
97
|
+
Example:
|
|
98
|
+
>>> assertion = TaskCompletedAssertion(task_description_contains="research")
|
|
99
|
+
>>> result = assertion.evaluate(output=None, expected=None, trace=spans)
|
|
100
|
+
>>> assert result.passed
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def __init__(self, task_description_contains: str, expected_status: str = "completed"):
|
|
104
|
+
"""Initialize task completed assertion.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
task_description_contains: Text that must be in task description
|
|
108
|
+
expected_status: Expected task status (default: "completed")
|
|
109
|
+
"""
|
|
110
|
+
self.task_description_contains = task_description_contains
|
|
111
|
+
self.expected_status = expected_status
|
|
112
|
+
|
|
113
|
+
def evaluate(
|
|
114
|
+
self,
|
|
115
|
+
output: Any,
|
|
116
|
+
expected: Any | None,
|
|
117
|
+
trace: list[Span] | None,
|
|
118
|
+
) -> AssertionResult:
|
|
119
|
+
"""Check if task was completed."""
|
|
120
|
+
if not trace:
|
|
121
|
+
return AssertionResult(
|
|
122
|
+
passed=False,
|
|
123
|
+
assertion_type="task_completed",
|
|
124
|
+
message="No trace data available to check task completion",
|
|
125
|
+
details={},
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
task_spans = [
|
|
129
|
+
s
|
|
130
|
+
for s in trace
|
|
131
|
+
if "task." in s.name
|
|
132
|
+
and self.task_description_contains.lower()
|
|
133
|
+
in s.attributes.get("task.description", "").lower()
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
if not task_spans:
|
|
137
|
+
return AssertionResult(
|
|
138
|
+
passed=False,
|
|
139
|
+
assertion_type="task_completed",
|
|
140
|
+
message=f"No task found containing '{self.task_description_contains}'",
|
|
141
|
+
expected=self.expected_status,
|
|
142
|
+
actual=None,
|
|
143
|
+
details={"task_description_contains": self.task_description_contains},
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
completed = [
|
|
147
|
+
s for s in task_spans if s.attributes.get("task.status") == self.expected_status
|
|
148
|
+
]
|
|
149
|
+
actual_status = task_spans[0].attributes.get("task.status", "unknown")
|
|
150
|
+
|
|
151
|
+
return AssertionResult(
|
|
152
|
+
passed=len(completed) > 0,
|
|
153
|
+
assertion_type="task_completed",
|
|
154
|
+
message=f"Task '{self.task_description_contains}' status: {actual_status}",
|
|
155
|
+
expected=self.expected_status,
|
|
156
|
+
actual=actual_status,
|
|
157
|
+
details={"found": len(task_spans), "completed": len(completed)},
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
@classmethod
|
|
161
|
+
def from_config(cls, config: dict[str, Any]) -> TaskCompletedAssertion:
|
|
162
|
+
"""Create from configuration.
|
|
163
|
+
|
|
164
|
+
Config format:
|
|
165
|
+
{
|
|
166
|
+
"task_description_contains": "research",
|
|
167
|
+
"expected_status": "completed" # optional, default: "completed"
|
|
168
|
+
}
|
|
169
|
+
"""
|
|
170
|
+
if "task_description_contains" not in config:
|
|
171
|
+
raise ValueError(
|
|
172
|
+
"TaskCompletedAssertion requires 'task_description_contains' in config"
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
return cls(
|
|
176
|
+
task_description_contains=config["task_description_contains"],
|
|
177
|
+
expected_status=config.get("expected_status", "completed"),
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
def __repr__(self) -> str:
|
|
181
|
+
return f"TaskCompletedAssertion(task_description_contains={self.task_description_contains!r}, expected_status={self.expected_status!r})"
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class DelegationOccurredAssertion(BaseAssertion):
|
|
185
|
+
"""Assert that delegation occurred between agents.
|
|
186
|
+
|
|
187
|
+
Example:
|
|
188
|
+
>>> assertion = DelegationOccurredAssertion(from_agent="manager", to_agent="researcher")
|
|
189
|
+
>>> result = assertion.evaluate(output=None, expected=None, trace=spans)
|
|
190
|
+
>>> assert result.passed
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
def __init__(self, from_agent: str | None = None, to_agent: str | None = None):
|
|
194
|
+
"""Initialize delegation assertion.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
from_agent: Name of delegating agent (optional, matches any if None)
|
|
198
|
+
to_agent: Name of receiving agent (optional, matches any if None)
|
|
199
|
+
"""
|
|
200
|
+
self.from_agent = from_agent
|
|
201
|
+
self.to_agent = to_agent
|
|
202
|
+
|
|
203
|
+
def evaluate(
|
|
204
|
+
self,
|
|
205
|
+
output: Any,
|
|
206
|
+
expected: Any | None,
|
|
207
|
+
trace: list[Span] | None,
|
|
208
|
+
) -> AssertionResult:
|
|
209
|
+
"""Check if delegation occurred."""
|
|
210
|
+
if not trace:
|
|
211
|
+
return AssertionResult(
|
|
212
|
+
passed=False,
|
|
213
|
+
assertion_type="delegation_occurred",
|
|
214
|
+
message="No trace data available to check delegation",
|
|
215
|
+
details={},
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
delegations = []
|
|
219
|
+
for span in trace:
|
|
220
|
+
for event in span.events:
|
|
221
|
+
if event.name == "agent.delegation":
|
|
222
|
+
attrs = event.attributes
|
|
223
|
+
if self.from_agent and attrs.get("delegation.from") != self.from_agent:
|
|
224
|
+
continue
|
|
225
|
+
if self.to_agent and attrs.get("delegation.to") != self.to_agent:
|
|
226
|
+
continue
|
|
227
|
+
delegations.append(attrs)
|
|
228
|
+
|
|
229
|
+
passed = len(delegations) > 0
|
|
230
|
+
direction = ""
|
|
231
|
+
if self.from_agent and self.to_agent:
|
|
232
|
+
direction = f" from {self.from_agent} to {self.to_agent}"
|
|
233
|
+
elif self.from_agent:
|
|
234
|
+
direction = f" from {self.from_agent}"
|
|
235
|
+
elif self.to_agent:
|
|
236
|
+
direction = f" to {self.to_agent}"
|
|
237
|
+
|
|
238
|
+
return AssertionResult(
|
|
239
|
+
passed=passed,
|
|
240
|
+
assertion_type="delegation_occurred",
|
|
241
|
+
message=f"Delegation{direction} {'occurred' if delegations else 'did not occur'}",
|
|
242
|
+
expected=True,
|
|
243
|
+
actual=passed,
|
|
244
|
+
details={"delegations": len(delegations)},
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
@classmethod
|
|
248
|
+
def from_config(cls, config: dict[str, Any]) -> DelegationOccurredAssertion:
|
|
249
|
+
"""Create from configuration.
|
|
250
|
+
|
|
251
|
+
Config format:
|
|
252
|
+
{
|
|
253
|
+
"from_agent": "manager", # optional
|
|
254
|
+
"to_agent": "researcher" # optional
|
|
255
|
+
}
|
|
256
|
+
"""
|
|
257
|
+
return cls(
|
|
258
|
+
from_agent=config.get("from_agent"),
|
|
259
|
+
to_agent=config.get("to_agent"),
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def __repr__(self) -> str:
|
|
263
|
+
return f"DelegationOccurredAssertion(from_agent={self.from_agent!r}, to_agent={self.to_agent!r})"
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
class HandoffOccurredAssertion(BaseAssertion):
|
|
267
|
+
"""Assert that an agent handoff occurred (typically in Swarm pattern).
|
|
268
|
+
|
|
269
|
+
Example:
|
|
270
|
+
>>> assertion = HandoffOccurredAssertion(to_agent="specialist")
|
|
271
|
+
>>> result = assertion.evaluate(output=None, expected=None, trace=spans)
|
|
272
|
+
>>> assert result.passed
|
|
273
|
+
"""
|
|
274
|
+
|
|
275
|
+
def __init__(self, to_agent: str | None = None):
|
|
276
|
+
"""Initialize handoff assertion.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
to_agent: Name of agent receiving handoff (optional, matches any if None)
|
|
280
|
+
"""
|
|
281
|
+
self.to_agent = to_agent
|
|
282
|
+
|
|
283
|
+
def evaluate(
|
|
284
|
+
self,
|
|
285
|
+
output: Any,
|
|
286
|
+
expected: Any | None,
|
|
287
|
+
trace: list[Span] | None,
|
|
288
|
+
) -> AssertionResult:
|
|
289
|
+
"""Check if handoff occurred."""
|
|
290
|
+
if not trace:
|
|
291
|
+
return AssertionResult(
|
|
292
|
+
passed=False,
|
|
293
|
+
assertion_type="handoff_occurred",
|
|
294
|
+
message="No trace data available to check handoff",
|
|
295
|
+
details={},
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
handoffs = []
|
|
299
|
+
for span in trace:
|
|
300
|
+
for event in span.events:
|
|
301
|
+
if event.name == "agent.handoff":
|
|
302
|
+
attrs = event.attributes
|
|
303
|
+
if self.to_agent and attrs.get("handoff.to_agent") != self.to_agent:
|
|
304
|
+
continue
|
|
305
|
+
handoffs.append(attrs)
|
|
306
|
+
|
|
307
|
+
passed = len(handoffs) > 0
|
|
308
|
+
target = self.to_agent or "any agent"
|
|
309
|
+
|
|
310
|
+
return AssertionResult(
|
|
311
|
+
passed=passed,
|
|
312
|
+
assertion_type="handoff_occurred",
|
|
313
|
+
message=f"Handoff to {target}: {'found' if handoffs else 'not found'}",
|
|
314
|
+
expected=True,
|
|
315
|
+
actual=passed,
|
|
316
|
+
details={"handoffs": len(handoffs)},
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
@classmethod
|
|
320
|
+
def from_config(cls, config: dict[str, Any]) -> HandoffOccurredAssertion:
|
|
321
|
+
"""Create from configuration.
|
|
322
|
+
|
|
323
|
+
Config format:
|
|
324
|
+
{
|
|
325
|
+
"to_agent": "specialist" # optional
|
|
326
|
+
}
|
|
327
|
+
"""
|
|
328
|
+
return cls(to_agent=config.get("to_agent"))
|
|
329
|
+
|
|
330
|
+
def __repr__(self) -> str:
|
|
331
|
+
return f"HandoffOccurredAssertion(to_agent={self.to_agent!r})"
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
class AgentCollaborationAssertion(BaseAssertion):
|
|
335
|
+
"""Assert that multiple agents collaborated during execution.
|
|
336
|
+
|
|
337
|
+
Example:
|
|
338
|
+
>>> assertion = AgentCollaborationAssertion(min_agents=3, required_agents=["manager", "researcher"])
|
|
339
|
+
>>> result = assertion.evaluate(output=None, expected=None, trace=spans)
|
|
340
|
+
>>> assert result.passed
|
|
341
|
+
"""
|
|
342
|
+
|
|
343
|
+
def __init__(
|
|
344
|
+
self, min_agents: int = 2, required_agents: list[str] | None = None
|
|
345
|
+
):
|
|
346
|
+
"""Initialize collaboration assertion.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
min_agents: Minimum number of distinct agents required
|
|
350
|
+
required_agents: List of specific agents that must participate (optional)
|
|
351
|
+
"""
|
|
352
|
+
self.min_agents = min_agents
|
|
353
|
+
self.required_agents = required_agents
|
|
354
|
+
|
|
355
|
+
def evaluate(
|
|
356
|
+
self,
|
|
357
|
+
output: Any,
|
|
358
|
+
expected: Any | None,
|
|
359
|
+
trace: list[Span] | None,
|
|
360
|
+
) -> AssertionResult:
|
|
361
|
+
"""Check if agents collaborated."""
|
|
362
|
+
if not trace:
|
|
363
|
+
return AssertionResult(
|
|
364
|
+
passed=False,
|
|
365
|
+
assertion_type="agent_collaboration",
|
|
366
|
+
message="No trace data available to check collaboration",
|
|
367
|
+
details={},
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
agents_seen = set()
|
|
371
|
+
for span in trace:
|
|
372
|
+
agent_name = span.attributes.get("agent.name")
|
|
373
|
+
if agent_name:
|
|
374
|
+
agents_seen.add(agent_name)
|
|
375
|
+
|
|
376
|
+
passed = len(agents_seen) >= self.min_agents
|
|
377
|
+
missing = []
|
|
378
|
+
|
|
379
|
+
if self.required_agents:
|
|
380
|
+
missing = list(set(self.required_agents) - agents_seen)
|
|
381
|
+
passed = passed and len(missing) == 0
|
|
382
|
+
|
|
383
|
+
message = f"Found {len(agents_seen)} agents: {sorted(agents_seen)}"
|
|
384
|
+
if missing:
|
|
385
|
+
message += f" (missing required: {missing})"
|
|
386
|
+
|
|
387
|
+
return AssertionResult(
|
|
388
|
+
passed=passed,
|
|
389
|
+
assertion_type="agent_collaboration",
|
|
390
|
+
message=message,
|
|
391
|
+
expected=self.min_agents,
|
|
392
|
+
actual=len(agents_seen),
|
|
393
|
+
details={
|
|
394
|
+
"agents": sorted(agents_seen),
|
|
395
|
+
"min": self.min_agents,
|
|
396
|
+
"missing": missing,
|
|
397
|
+
},
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
@classmethod
|
|
401
|
+
def from_config(cls, config: dict[str, Any]) -> AgentCollaborationAssertion:
|
|
402
|
+
"""Create from configuration.
|
|
403
|
+
|
|
404
|
+
Config format:
|
|
405
|
+
{
|
|
406
|
+
"min_agents": 3, # optional, default: 2
|
|
407
|
+
"required_agents": ["manager", "researcher"] # optional
|
|
408
|
+
}
|
|
409
|
+
"""
|
|
410
|
+
return cls(
|
|
411
|
+
min_agents=config.get("min_agents", 2),
|
|
412
|
+
required_agents=config.get("required_agents"),
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
def __repr__(self) -> str:
|
|
416
|
+
return f"AgentCollaborationAssertion(min_agents={self.min_agents}, required_agents={self.required_agents!r})"
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
class ConversationTurnsAssertion(BaseAssertion):
|
|
420
|
+
"""Assert on the number of conversation turns.
|
|
421
|
+
|
|
422
|
+
Example:
|
|
423
|
+
>>> assertion = ConversationTurnsAssertion(min_turns=3, max_turns=10)
|
|
424
|
+
>>> result = assertion.evaluate(output=None, expected=None, trace=spans)
|
|
425
|
+
>>> assert result.passed
|
|
426
|
+
"""
|
|
427
|
+
|
|
428
|
+
def __init__(self, min_turns: int | None = None, max_turns: int | None = None):
|
|
429
|
+
"""Initialize conversation turns assertion.
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
min_turns: Minimum number of conversation turns (optional)
|
|
433
|
+
max_turns: Maximum number of conversation turns (optional)
|
|
434
|
+
"""
|
|
435
|
+
self.min_turns = min_turns
|
|
436
|
+
self.max_turns = max_turns
|
|
437
|
+
|
|
438
|
+
def evaluate(
|
|
439
|
+
self,
|
|
440
|
+
output: Any,
|
|
441
|
+
expected: Any | None,
|
|
442
|
+
trace: list[Span] | None,
|
|
443
|
+
) -> AssertionResult:
|
|
444
|
+
"""Check conversation turn count."""
|
|
445
|
+
if not trace:
|
|
446
|
+
return AssertionResult(
|
|
447
|
+
passed=False,
|
|
448
|
+
assertion_type="conversation_turns",
|
|
449
|
+
message="No trace data available to check conversation turns",
|
|
450
|
+
details={},
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
turn_count = 0
|
|
454
|
+
for span in trace:
|
|
455
|
+
if "conversation" in span.name:
|
|
456
|
+
recorded = span.attributes.get("conversation.total_turns")
|
|
457
|
+
if recorded:
|
|
458
|
+
turn_count = max(turn_count, recorded)
|
|
459
|
+
|
|
460
|
+
passed = True
|
|
461
|
+
constraints = []
|
|
462
|
+
|
|
463
|
+
if self.min_turns is not None:
|
|
464
|
+
if turn_count < self.min_turns:
|
|
465
|
+
passed = False
|
|
466
|
+
constraints.append(f"min: {self.min_turns}")
|
|
467
|
+
|
|
468
|
+
if self.max_turns is not None:
|
|
469
|
+
if turn_count > self.max_turns:
|
|
470
|
+
passed = False
|
|
471
|
+
constraints.append(f"max: {self.max_turns}")
|
|
472
|
+
|
|
473
|
+
constraint_str = f" ({', '.join(constraints)})" if constraints else ""
|
|
474
|
+
|
|
475
|
+
return AssertionResult(
|
|
476
|
+
passed=passed,
|
|
477
|
+
assertion_type="conversation_turns",
|
|
478
|
+
message=f"Conversation had {turn_count} turns{constraint_str}",
|
|
479
|
+
expected=f"{self.min_turns or 0}-{self.max_turns or '∞'}",
|
|
480
|
+
actual=turn_count,
|
|
481
|
+
details={
|
|
482
|
+
"turns": turn_count,
|
|
483
|
+
"min": self.min_turns,
|
|
484
|
+
"max": self.max_turns,
|
|
485
|
+
},
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
@classmethod
|
|
489
|
+
def from_config(cls, config: dict[str, Any]) -> ConversationTurnsAssertion:
|
|
490
|
+
"""Create from configuration.
|
|
491
|
+
|
|
492
|
+
Config format:
|
|
493
|
+
{
|
|
494
|
+
"min_turns": 3, # optional
|
|
495
|
+
"max_turns": 10 # optional
|
|
496
|
+
}
|
|
497
|
+
"""
|
|
498
|
+
return cls(
|
|
499
|
+
min_turns=config.get("min_turns"),
|
|
500
|
+
max_turns=config.get("max_turns"),
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
def __repr__(self) -> str:
|
|
504
|
+
return f"ConversationTurnsAssertion(min_turns={self.min_turns}, max_turns={self.max_turns})"
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
class NoCircularDelegationAssertion(BaseAssertion):
|
|
508
|
+
"""Assert that no circular delegation patterns exist.
|
|
509
|
+
|
|
510
|
+
Detects cycles where agent A delegates to B, B to C, and C back to A.
|
|
511
|
+
|
|
512
|
+
Example:
|
|
513
|
+
>>> assertion = NoCircularDelegationAssertion()
|
|
514
|
+
>>> result = assertion.evaluate(output=None, expected=None, trace=spans)
|
|
515
|
+
>>> assert result.passed
|
|
516
|
+
"""
|
|
517
|
+
|
|
518
|
+
def __init__(self):
|
|
519
|
+
"""Initialize no circular delegation assertion."""
|
|
520
|
+
pass
|
|
521
|
+
|
|
522
|
+
def evaluate(
|
|
523
|
+
self,
|
|
524
|
+
output: Any,
|
|
525
|
+
expected: Any | None,
|
|
526
|
+
trace: list[Span] | None,
|
|
527
|
+
) -> AssertionResult:
|
|
528
|
+
"""Check for circular delegation patterns."""
|
|
529
|
+
if not trace:
|
|
530
|
+
return AssertionResult(
|
|
531
|
+
passed=True, # No trace means no cycles
|
|
532
|
+
assertion_type="no_circular_delegation",
|
|
533
|
+
message="No trace data to check for circular delegation",
|
|
534
|
+
details={},
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
# Collect all delegations/handoffs
|
|
538
|
+
edges = []
|
|
539
|
+
for span in trace:
|
|
540
|
+
for event in span.events:
|
|
541
|
+
if event.name in ["agent.delegation", "agent.handoff"]:
|
|
542
|
+
attrs = event.attributes
|
|
543
|
+
from_a = attrs.get("delegation.from") or attrs.get(
|
|
544
|
+
"handoff.from_agent"
|
|
545
|
+
)
|
|
546
|
+
to_a = attrs.get("delegation.to") or attrs.get("handoff.to_agent")
|
|
547
|
+
if from_a and to_a:
|
|
548
|
+
edges.append((from_a, to_a))
|
|
549
|
+
|
|
550
|
+
# Detect cycles using DFS
|
|
551
|
+
cycles = self._detect_cycles(edges)
|
|
552
|
+
|
|
553
|
+
return AssertionResult(
|
|
554
|
+
passed=len(cycles) == 0,
|
|
555
|
+
assertion_type="no_circular_delegation",
|
|
556
|
+
message=f"{'No cycles found' if not cycles else f'Found {len(cycles)} cycle(s)'}",
|
|
557
|
+
expected=0,
|
|
558
|
+
actual=len(cycles),
|
|
559
|
+
details={"cycles": [" → ".join(cycle) for cycle in cycles]},
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
def _detect_cycles(self, edges: list[tuple[str, str]]) -> list[list[str]]:
|
|
563
|
+
"""Detect cycles in delegation graph using DFS.
|
|
564
|
+
|
|
565
|
+
Args:
|
|
566
|
+
edges: List of (from_agent, to_agent) tuples
|
|
567
|
+
|
|
568
|
+
Returns:
|
|
569
|
+
List of cycles, where each cycle is a list of agent names
|
|
570
|
+
"""
|
|
571
|
+
# Build adjacency list
|
|
572
|
+
graph: dict[str, list[str]] = {}
|
|
573
|
+
for from_a, to_a in edges:
|
|
574
|
+
if from_a not in graph:
|
|
575
|
+
graph[from_a] = []
|
|
576
|
+
graph[from_a].append(to_a)
|
|
577
|
+
|
|
578
|
+
cycles = []
|
|
579
|
+
visited = set()
|
|
580
|
+
rec_stack = set()
|
|
581
|
+
|
|
582
|
+
def dfs(node: str, path: list[str]) -> None:
|
|
583
|
+
visited.add(node)
|
|
584
|
+
rec_stack.add(node)
|
|
585
|
+
path.append(node)
|
|
586
|
+
|
|
587
|
+
for neighbor in graph.get(node, []):
|
|
588
|
+
if neighbor not in visited:
|
|
589
|
+
dfs(neighbor, path.copy())
|
|
590
|
+
elif neighbor in rec_stack:
|
|
591
|
+
# Found a cycle
|
|
592
|
+
cycle_start = path.index(neighbor)
|
|
593
|
+
cycle = path[cycle_start:] + [neighbor]
|
|
594
|
+
cycles.append(cycle)
|
|
595
|
+
|
|
596
|
+
rec_stack.remove(node)
|
|
597
|
+
|
|
598
|
+
for node in graph:
|
|
599
|
+
if node not in visited:
|
|
600
|
+
dfs(node, [])
|
|
601
|
+
|
|
602
|
+
return cycles
|
|
603
|
+
|
|
604
|
+
@classmethod
|
|
605
|
+
def from_config(cls, config: dict[str, Any]) -> NoCircularDelegationAssertion:
|
|
606
|
+
"""Create from configuration.
|
|
607
|
+
|
|
608
|
+
Config format:
|
|
609
|
+
{} # No parameters required
|
|
610
|
+
"""
|
|
611
|
+
return cls()
|
|
612
|
+
|
|
613
|
+
def __repr__(self) -> str:
|
|
614
|
+
return "NoCircularDelegationAssertion()"
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
__all__ = [
|
|
618
|
+
"AgentUsedAssertion",
|
|
619
|
+
"TaskCompletedAssertion",
|
|
620
|
+
"DelegationOccurredAssertion",
|
|
621
|
+
"HandoffOccurredAssertion",
|
|
622
|
+
"AgentCollaborationAssertion",
|
|
623
|
+
"ConversationTurnsAssertion",
|
|
624
|
+
"NoCircularDelegationAssertion",
|
|
625
|
+
]
|