langwatch-scenario 0.1.3__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langwatch_scenario-0.1.3.dist-info → langwatch_scenario-0.3.0.dist-info}/METADATA +95 -34
- langwatch_scenario-0.3.0.dist-info/RECORD +16 -0
- {langwatch_scenario-0.1.3.dist-info → langwatch_scenario-0.3.0.dist-info}/WHEEL +1 -1
- scenario/__init__.py +13 -3
- scenario/config.py +18 -7
- scenario/error_messages.py +81 -23
- scenario/pytest_plugin.py +8 -8
- scenario/scenario.py +144 -26
- scenario/scenario_agent_adapter.py +16 -0
- scenario/scenario_executor.py +405 -143
- scenario/testing_agent.py +123 -109
- scenario/types.py +96 -0
- scenario/utils.py +148 -5
- langwatch_scenario-0.1.3.dist-info/RECORD +0 -15
- scenario/result.py +0 -81
- {langwatch_scenario-0.1.3.dist-info → langwatch_scenario-0.3.0.dist-info}/entry_points.txt +0 -0
- {langwatch_scenario-0.1.3.dist-info → langwatch_scenario-0.3.0.dist-info}/top_level.txt +0 -0
scenario/result.py
DELETED
@@ -1,81 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Result module: defines the class for scenario test results.
|
3
|
-
"""
|
4
|
-
|
5
|
-
from dataclasses import dataclass, field
|
6
|
-
from typing import List, Dict, Optional
|
7
|
-
|
8
|
-
|
9
|
-
@dataclass
|
10
|
-
class ScenarioResult:
|
11
|
-
"""
|
12
|
-
Represents the results of a scenario test run.
|
13
|
-
|
14
|
-
Attributes:
|
15
|
-
success: Whether the scenario passed
|
16
|
-
conversation: The conversation history
|
17
|
-
reasoning: Reasoning for the result
|
18
|
-
met_criteria: List of success criteria that were met
|
19
|
-
unmet_criteria: List of success criteria that were not met
|
20
|
-
triggered_failures: List of failure criteria that were triggered
|
21
|
-
"""
|
22
|
-
|
23
|
-
success: bool
|
24
|
-
conversation: List[Dict[str, str]]
|
25
|
-
reasoning: Optional[str] = None
|
26
|
-
met_criteria: List[str] = field(default_factory=list)
|
27
|
-
unmet_criteria: List[str] = field(default_factory=list)
|
28
|
-
triggered_failures: List[str] = field(default_factory=list)
|
29
|
-
total_time: Optional[float] = None
|
30
|
-
agent_time: Optional[float] = None
|
31
|
-
|
32
|
-
def __post_init__(self) -> None:
|
33
|
-
"""Validate the result after initialization."""
|
34
|
-
if not self.success and not self.reasoning:
|
35
|
-
raise ValueError("Failed scenarios must have a reasoning")
|
36
|
-
|
37
|
-
@classmethod
|
38
|
-
def success_result(
|
39
|
-
cls,
|
40
|
-
conversation: List[Dict[str, str]],
|
41
|
-
reasoning: Optional[str],
|
42
|
-
met_criteria: List[str],
|
43
|
-
total_time: Optional[float] = None,
|
44
|
-
agent_time: Optional[float] = None,
|
45
|
-
) -> "ScenarioResult":
|
46
|
-
"""Create a successful result."""
|
47
|
-
return cls(
|
48
|
-
success=True,
|
49
|
-
conversation=conversation,
|
50
|
-
reasoning=reasoning,
|
51
|
-
met_criteria=met_criteria,
|
52
|
-
unmet_criteria=[],
|
53
|
-
triggered_failures=[],
|
54
|
-
total_time=total_time,
|
55
|
-
agent_time=agent_time,
|
56
|
-
)
|
57
|
-
|
58
|
-
@classmethod
|
59
|
-
def failure_result(
|
60
|
-
cls,
|
61
|
-
conversation: List[Dict[str, str]],
|
62
|
-
reasoning: str,
|
63
|
-
met_criteria: Optional[List[str]] = None,
|
64
|
-
unmet_criteria: Optional[List[str]] = None,
|
65
|
-
triggered_failures: Optional[List[str]] = None,
|
66
|
-
total_time: Optional[float] = None,
|
67
|
-
agent_time: Optional[float] = None,
|
68
|
-
) -> "ScenarioResult":
|
69
|
-
"""Create a failed result."""
|
70
|
-
return cls(
|
71
|
-
success=False,
|
72
|
-
conversation=conversation,
|
73
|
-
reasoning=reasoning,
|
74
|
-
met_criteria=met_criteria if met_criteria is not None else [],
|
75
|
-
unmet_criteria=unmet_criteria if unmet_criteria is not None else [],
|
76
|
-
triggered_failures=(
|
77
|
-
triggered_failures if triggered_failures is not None else []
|
78
|
-
),
|
79
|
-
total_time=total_time,
|
80
|
-
agent_time=agent_time,
|
81
|
-
)
|
File without changes
|
File without changes
|