ragbits-evaluate 0.0.30rc1__py3-none-any.whl → 1.4.0.dev202602030301__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragbits/evaluate/agent_simulation/__init__.py +4 -49
- ragbits/evaluate/agent_simulation/conversation.py +278 -663
- ragbits/evaluate/agent_simulation/logger.py +1 -1
- ragbits/evaluate/agent_simulation/metrics/__init__.py +0 -10
- ragbits/evaluate/agent_simulation/metrics/builtin.py +49 -59
- ragbits/evaluate/agent_simulation/metrics/collectors.py +17 -37
- ragbits/evaluate/agent_simulation/models.py +18 -198
- ragbits/evaluate/agent_simulation/results.py +49 -125
- ragbits/evaluate/agent_simulation/scenarios.py +19 -95
- ragbits/evaluate/agent_simulation/simulation.py +166 -72
- ragbits/evaluate/metrics/question_answer.py +25 -8
- {ragbits_evaluate-0.0.30rc1.dist-info → ragbits_evaluate-1.4.0.dev202602030301.dist-info}/METADATA +2 -6
- {ragbits_evaluate-0.0.30rc1.dist-info → ragbits_evaluate-1.4.0.dev202602030301.dist-info}/RECORD +14 -25
- ragbits/evaluate/agent_simulation/checkers.py +0 -591
- ragbits/evaluate/agent_simulation/display.py +0 -118
- ragbits/evaluate/agent_simulation/metrics/deepeval.py +0 -295
- ragbits/evaluate/agent_simulation/tracing.py +0 -233
- ragbits/evaluate/api.py +0 -603
- ragbits/evaluate/api_types.py +0 -343
- ragbits/evaluate/execution_manager.py +0 -451
- ragbits/evaluate/stores/__init__.py +0 -36
- ragbits/evaluate/stores/base.py +0 -98
- ragbits/evaluate/stores/file.py +0 -466
- ragbits/evaluate/stores/kv.py +0 -535
- {ragbits_evaluate-0.0.30rc1.dist-info → ragbits_evaluate-1.4.0.dev202602030301.dist-info}/WHEEL +0 -0
|
@@ -5,8 +5,6 @@ from datetime import datetime
|
|
|
5
5
|
from enum import Enum
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
|
-
from ragbits.core.llms.base import Usage
|
|
9
|
-
|
|
10
8
|
|
|
11
9
|
class SimulationStatus(str, Enum):
|
|
12
10
|
"""Status of a simulation run."""
|
|
@@ -17,60 +15,6 @@ class SimulationStatus(str, Enum):
|
|
|
17
15
|
TIMEOUT = "timeout"
|
|
18
16
|
|
|
19
17
|
|
|
20
|
-
@dataclass
|
|
21
|
-
class CheckerResultItem:
|
|
22
|
-
"""Result of a single checker evaluation."""
|
|
23
|
-
|
|
24
|
-
type: str
|
|
25
|
-
completed: bool
|
|
26
|
-
reason: str
|
|
27
|
-
|
|
28
|
-
def to_dict(self) -> dict[str, Any]:
|
|
29
|
-
"""Convert to dictionary."""
|
|
30
|
-
return {"type": self.type, "completed": self.completed, "reason": self.reason}
|
|
31
|
-
|
|
32
|
-
@classmethod
|
|
33
|
-
def from_dict(cls, data: dict[str, Any]) -> "CheckerResultItem":
|
|
34
|
-
"""Create from dictionary."""
|
|
35
|
-
return cls(
|
|
36
|
-
type=data.get("type", data.get("checker_type", "unknown")),
|
|
37
|
-
completed=data.get("completed", False),
|
|
38
|
-
reason=data.get("reason", ""),
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
@dataclass
|
|
43
|
-
class ResponseChunk:
|
|
44
|
-
"""A response chunk from the ChatInterface stream."""
|
|
45
|
-
|
|
46
|
-
turn_index: int
|
|
47
|
-
task_index: int
|
|
48
|
-
chunk_index: int
|
|
49
|
-
chunk_type: str
|
|
50
|
-
chunk_data: dict[str, Any]
|
|
51
|
-
|
|
52
|
-
def to_dict(self) -> dict[str, Any]:
|
|
53
|
-
"""Convert to dictionary."""
|
|
54
|
-
return {
|
|
55
|
-
"turn_index": self.turn_index,
|
|
56
|
-
"task_index": self.task_index,
|
|
57
|
-
"chunk_index": self.chunk_index,
|
|
58
|
-
"chunk_type": self.chunk_type,
|
|
59
|
-
"chunk_data": self.chunk_data,
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
@classmethod
|
|
63
|
-
def from_dict(cls, data: dict[str, Any]) -> "ResponseChunk":
|
|
64
|
-
"""Create from dictionary."""
|
|
65
|
-
return cls(
|
|
66
|
-
turn_index=data.get("turn_index", 0),
|
|
67
|
-
task_index=data.get("task_index", 0),
|
|
68
|
-
chunk_index=data.get("chunk_index", 0),
|
|
69
|
-
chunk_type=data.get("chunk_type", "unknown"),
|
|
70
|
-
chunk_data=data.get("chunk_data", {}),
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
|
|
74
18
|
@dataclass
|
|
75
19
|
class TurnResult:
|
|
76
20
|
"""Result of a single conversation turn."""
|
|
@@ -82,10 +26,8 @@ class TurnResult:
|
|
|
82
26
|
tool_calls: list[dict[str, Any]] = field(default_factory=list)
|
|
83
27
|
task_completed: bool = False
|
|
84
28
|
task_completed_reason: str = ""
|
|
85
|
-
token_usage:
|
|
29
|
+
token_usage: dict[str, int] | None = None
|
|
86
30
|
latency_ms: float | None = None
|
|
87
|
-
checkers: list[CheckerResultItem] = field(default_factory=list)
|
|
88
|
-
checker_mode: str = "all"
|
|
89
31
|
|
|
90
32
|
|
|
91
33
|
@dataclass
|
|
@@ -94,51 +36,32 @@ class TaskResult:
|
|
|
94
36
|
|
|
95
37
|
task_index: int
|
|
96
38
|
description: str
|
|
39
|
+
expected_result: str | None
|
|
97
40
|
completed: bool
|
|
98
41
|
turns_taken: int
|
|
99
42
|
final_reason: str
|
|
100
|
-
checkers: list[dict[str, Any]] = field(default_factory=list)
|
|
101
|
-
checker_mode: str = "all"
|
|
102
43
|
|
|
103
44
|
|
|
104
45
|
@dataclass
|
|
105
46
|
class ConversationMetrics:
|
|
106
|
-
"""Aggregate metrics for the conversation.
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
Additional metrics from custom collectors are merged into this dict.
|
|
119
|
-
"""
|
|
120
|
-
|
|
121
|
-
metrics: dict[str, Any] = field(default_factory=dict)
|
|
122
|
-
|
|
123
|
-
@property
|
|
124
|
-
def total_turns(self) -> int:
|
|
125
|
-
"""Number of conversation turns."""
|
|
126
|
-
return self.metrics.get("total_turns", 0)
|
|
127
|
-
|
|
128
|
-
@property
|
|
129
|
-
def total_tasks(self) -> int:
|
|
130
|
-
"""Number of tasks in scenario."""
|
|
131
|
-
return self.metrics.get("total_tasks", 0)
|
|
132
|
-
|
|
133
|
-
@property
|
|
134
|
-
def tasks_completed(self) -> int:
|
|
135
|
-
"""Number of completed tasks."""
|
|
136
|
-
return self.metrics.get("tasks_completed", 0)
|
|
47
|
+
"""Aggregate metrics for the conversation."""
|
|
48
|
+
|
|
49
|
+
total_turns: int
|
|
50
|
+
total_tasks: int
|
|
51
|
+
tasks_completed: int
|
|
52
|
+
total_tokens: int = 0
|
|
53
|
+
prompt_tokens: int = 0
|
|
54
|
+
completion_tokens: int = 0
|
|
55
|
+
total_cost_usd: float = 0.0
|
|
56
|
+
deepeval_scores: dict[str, float] = field(default_factory=dict)
|
|
57
|
+
custom: dict[str, Any] = field(default_factory=dict)
|
|
137
58
|
|
|
138
59
|
@property
|
|
139
60
|
def success_rate(self) -> float:
|
|
140
61
|
"""Calculate task success rate."""
|
|
141
|
-
|
|
62
|
+
if self.total_tasks == 0:
|
|
63
|
+
return 0.0
|
|
64
|
+
return self.tasks_completed / self.total_tasks
|
|
142
65
|
|
|
143
66
|
|
|
144
67
|
@dataclass
|
|
@@ -154,23 +77,15 @@ class SimulationResult:
|
|
|
154
77
|
turns: list[TurnResult] = field(default_factory=list)
|
|
155
78
|
tasks: list[TaskResult] = field(default_factory=list)
|
|
156
79
|
metrics: ConversationMetrics | None = None
|
|
157
|
-
response_chunks: list[ResponseChunk] = field(default_factory=list)
|
|
158
80
|
|
|
159
81
|
# Optional metadata
|
|
160
82
|
end_time: datetime | None = None
|
|
161
83
|
agent_model: str | None = None
|
|
162
84
|
simulated_user_model: str | None = None
|
|
163
85
|
checker_model: str | None = None
|
|
164
|
-
|
|
86
|
+
personality: str | None = None
|
|
165
87
|
error: str | None = None
|
|
166
88
|
|
|
167
|
-
# Conversation context
|
|
168
|
-
conversation_id: str | None = None
|
|
169
|
-
final_state: dict[str, Any] = field(default_factory=dict)
|
|
170
|
-
|
|
171
|
-
# Traces from the chat interface
|
|
172
|
-
traces: list[dict[str, Any]] = field(default_factory=list)
|
|
173
|
-
|
|
174
89
|
def to_dict(self) -> dict[str, Any]:
|
|
175
90
|
"""Convert to JSON-serializable dictionary."""
|
|
176
91
|
return {
|
|
@@ -181,11 +96,8 @@ class SimulationResult:
|
|
|
181
96
|
"agent_model": self.agent_model,
|
|
182
97
|
"simulated_user_model": self.simulated_user_model,
|
|
183
98
|
"checker_model": self.checker_model,
|
|
184
|
-
"
|
|
99
|
+
"personality": self.personality,
|
|
185
100
|
"error": self.error,
|
|
186
|
-
"conversation_id": self.conversation_id,
|
|
187
|
-
"final_state": self.final_state,
|
|
188
|
-
"response_chunks": [c.to_dict() for c in self.response_chunks],
|
|
189
101
|
"turns": [
|
|
190
102
|
{
|
|
191
103
|
"turn_index": t.turn_index,
|
|
@@ -195,10 +107,8 @@ class SimulationResult:
|
|
|
195
107
|
"tool_calls": t.tool_calls,
|
|
196
108
|
"task_completed": t.task_completed,
|
|
197
109
|
"task_completed_reason": t.task_completed_reason,
|
|
198
|
-
"token_usage": t.token_usage
|
|
110
|
+
"token_usage": t.token_usage,
|
|
199
111
|
"latency_ms": t.latency_ms,
|
|
200
|
-
"checkers": [c.to_dict() for c in t.checkers],
|
|
201
|
-
"checker_mode": t.checker_mode,
|
|
202
112
|
}
|
|
203
113
|
for t in self.turns
|
|
204
114
|
],
|
|
@@ -206,16 +116,27 @@ class SimulationResult:
|
|
|
206
116
|
{
|
|
207
117
|
"task_index": t.task_index,
|
|
208
118
|
"description": t.description,
|
|
119
|
+
"expected_result": t.expected_result,
|
|
209
120
|
"completed": t.completed,
|
|
210
121
|
"turns_taken": t.turns_taken,
|
|
211
122
|
"final_reason": t.final_reason,
|
|
212
|
-
"checkers": t.checkers,
|
|
213
|
-
"checker_mode": t.checker_mode,
|
|
214
123
|
}
|
|
215
124
|
for t in self.tasks
|
|
216
125
|
],
|
|
217
|
-
"metrics":
|
|
218
|
-
|
|
126
|
+
"metrics": {
|
|
127
|
+
"total_turns": self.metrics.total_turns,
|
|
128
|
+
"total_tasks": self.metrics.total_tasks,
|
|
129
|
+
"tasks_completed": self.metrics.tasks_completed,
|
|
130
|
+
"success_rate": self.metrics.success_rate,
|
|
131
|
+
"total_tokens": self.metrics.total_tokens,
|
|
132
|
+
"prompt_tokens": self.metrics.prompt_tokens,
|
|
133
|
+
"completion_tokens": self.metrics.completion_tokens,
|
|
134
|
+
"total_cost_usd": self.metrics.total_cost_usd,
|
|
135
|
+
"deepeval_scores": self.metrics.deepeval_scores,
|
|
136
|
+
"custom": self.metrics.custom,
|
|
137
|
+
}
|
|
138
|
+
if self.metrics
|
|
139
|
+
else None,
|
|
219
140
|
}
|
|
220
141
|
|
|
221
142
|
@classmethod
|
|
@@ -232,8 +153,6 @@ class SimulationResult:
|
|
|
232
153
|
task_completed_reason=t.get("task_completed_reason", ""),
|
|
233
154
|
token_usage=t.get("token_usage"),
|
|
234
155
|
latency_ms=t.get("latency_ms"),
|
|
235
|
-
checkers=[CheckerResultItem.from_dict(c) for c in t.get("checkers", [])],
|
|
236
|
-
checker_mode=t.get("checker_mode", "all"),
|
|
237
156
|
)
|
|
238
157
|
for t in data.get("turns", [])
|
|
239
158
|
]
|
|
@@ -242,19 +161,28 @@ class SimulationResult:
|
|
|
242
161
|
TaskResult(
|
|
243
162
|
task_index=t["task_index"],
|
|
244
163
|
description=t["description"],
|
|
164
|
+
expected_result=t.get("expected_result"),
|
|
245
165
|
completed=t["completed"],
|
|
246
166
|
turns_taken=t["turns_taken"],
|
|
247
167
|
final_reason=t["final_reason"],
|
|
248
|
-
checkers=t.get("checkers", []),
|
|
249
|
-
checker_mode=t.get("checker_mode", "all"),
|
|
250
168
|
)
|
|
251
169
|
for t in data.get("tasks", [])
|
|
252
170
|
]
|
|
253
171
|
|
|
254
172
|
metrics_data = data.get("metrics")
|
|
255
|
-
metrics =
|
|
256
|
-
|
|
257
|
-
|
|
173
|
+
metrics = None
|
|
174
|
+
if metrics_data:
|
|
175
|
+
metrics = ConversationMetrics(
|
|
176
|
+
total_turns=metrics_data["total_turns"],
|
|
177
|
+
total_tasks=metrics_data["total_tasks"],
|
|
178
|
+
tasks_completed=metrics_data["tasks_completed"],
|
|
179
|
+
total_tokens=metrics_data.get("total_tokens", 0),
|
|
180
|
+
prompt_tokens=metrics_data.get("prompt_tokens", 0),
|
|
181
|
+
completion_tokens=metrics_data.get("completion_tokens", 0),
|
|
182
|
+
total_cost_usd=metrics_data.get("total_cost_usd", 0.0),
|
|
183
|
+
deepeval_scores=metrics_data.get("deepeval_scores", {}),
|
|
184
|
+
custom=metrics_data.get("custom", {}),
|
|
185
|
+
)
|
|
258
186
|
|
|
259
187
|
return cls(
|
|
260
188
|
scenario_name=data["scenario_name"],
|
|
@@ -264,13 +192,9 @@ class SimulationResult:
|
|
|
264
192
|
agent_model=data.get("agent_model"),
|
|
265
193
|
simulated_user_model=data.get("simulated_user_model"),
|
|
266
194
|
checker_model=data.get("checker_model"),
|
|
267
|
-
|
|
195
|
+
personality=data.get("personality"),
|
|
268
196
|
error=data.get("error"),
|
|
269
|
-
conversation_id=data.get("conversation_id"),
|
|
270
|
-
final_state=data.get("final_state", {}),
|
|
271
197
|
turns=turns,
|
|
272
198
|
tasks=tasks,
|
|
273
199
|
metrics=metrics,
|
|
274
|
-
response_chunks=response_chunks,
|
|
275
|
-
traces=data.get("traces", []),
|
|
276
200
|
)
|
|
@@ -1,52 +1,26 @@
|
|
|
1
1
|
"""Scenario loading functionality for agent simulation."""
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
-
from dataclasses import dataclass, field
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
|
|
7
6
|
from ragbits.evaluate.agent_simulation.models import Personality, Scenario, Task
|
|
8
7
|
|
|
9
8
|
|
|
10
|
-
@dataclass
|
|
11
|
-
class ScenarioFile:
|
|
12
|
-
"""Represents a loaded scenario file with its metadata."""
|
|
13
|
-
|
|
14
|
-
filename: str
|
|
15
|
-
group: str | None
|
|
16
|
-
scenarios: list[Scenario] = field(default_factory=list)
|
|
17
|
-
|
|
18
|
-
|
|
19
9
|
def load_scenarios(scenarios_file: str = "scenarios.json") -> list[Scenario]:
|
|
20
10
|
"""Load scenarios from a JSON file.
|
|
21
11
|
|
|
22
|
-
Expected JSON format
|
|
23
|
-
{
|
|
24
|
-
"group": "Group Name",
|
|
25
|
-
"scenarios": [
|
|
26
|
-
{
|
|
27
|
-
"name": "Scenario 1",
|
|
28
|
-
"tasks": [
|
|
29
|
-
{
|
|
30
|
-
"task": "task description",
|
|
31
|
-
"checkers": [
|
|
32
|
-
{"type": "llm", "expected_result": "expected result"},
|
|
33
|
-
{"type": "tool_call", "tools": ["tool1", "tool2"]},
|
|
34
|
-
{"type": "state", "checks": [{"key": "user.confirmed", "value": true}]}
|
|
35
|
-
],
|
|
36
|
-
"checker_mode": "all"
|
|
37
|
-
},
|
|
38
|
-
...
|
|
39
|
-
]
|
|
40
|
-
},
|
|
41
|
-
...
|
|
42
|
-
]
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
Legacy format (array of scenarios) is still supported:
|
|
12
|
+
Expected JSON format:
|
|
46
13
|
[
|
|
47
14
|
{
|
|
48
15
|
"name": "Scenario 1",
|
|
49
|
-
"tasks": [
|
|
16
|
+
"tasks": [
|
|
17
|
+
{
|
|
18
|
+
"task": "task description",
|
|
19
|
+
"expected_result": "expected result description",
|
|
20
|
+
"expected_tools": ["tool1", "tool2"] # optional
|
|
21
|
+
},
|
|
22
|
+
...
|
|
23
|
+
]
|
|
50
24
|
},
|
|
51
25
|
...
|
|
52
26
|
]
|
|
@@ -57,35 +31,6 @@ def load_scenarios(scenarios_file: str = "scenarios.json") -> list[Scenario]:
|
|
|
57
31
|
Returns:
|
|
58
32
|
List of Scenario objects
|
|
59
33
|
|
|
60
|
-
Raises:
|
|
61
|
-
FileNotFoundError: If the scenarios file doesn't exist
|
|
62
|
-
ValueError: If the file format is invalid
|
|
63
|
-
"""
|
|
64
|
-
scenario_file = load_scenario_file(scenarios_file)
|
|
65
|
-
return scenario_file.scenarios
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def load_scenario_file(scenarios_file: str = "scenarios.json") -> ScenarioFile:
|
|
69
|
-
"""Load scenarios from a JSON file with file-level metadata.
|
|
70
|
-
|
|
71
|
-
This function supports both the new format with file-level group:
|
|
72
|
-
{
|
|
73
|
-
"group": "Group Name",
|
|
74
|
-
"scenarios": [...]
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
And the legacy format (array of scenarios):
|
|
78
|
-
[
|
|
79
|
-
{"name": "Scenario 1", "tasks": [...]},
|
|
80
|
-
...
|
|
81
|
-
]
|
|
82
|
-
|
|
83
|
-
Args:
|
|
84
|
-
scenarios_file: Path to the JSON file containing scenarios
|
|
85
|
-
|
|
86
|
-
Returns:
|
|
87
|
-
ScenarioFile object containing scenarios and file-level metadata
|
|
88
|
-
|
|
89
34
|
Raises:
|
|
90
35
|
FileNotFoundError: If the scenarios file doesn't exist
|
|
91
36
|
ValueError: If the file format is invalid
|
|
@@ -97,31 +42,16 @@ def load_scenario_file(scenarios_file: str = "scenarios.json") -> ScenarioFile:
|
|
|
97
42
|
with scenarios_path.open("r", encoding="utf-8") as f:
|
|
98
43
|
data = json.load(f)
|
|
99
44
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
scenarios_data: list
|
|
103
|
-
|
|
104
|
-
if isinstance(data, dict):
|
|
105
|
-
# New format: {"group": "...", "scenarios": [...]}
|
|
106
|
-
file_group = data.get("group")
|
|
107
|
-
scenarios_data = data.get("scenarios", [])
|
|
108
|
-
if not isinstance(scenarios_data, list):
|
|
109
|
-
raise ValueError(f"'scenarios' field must be a JSON array, got {type(scenarios_data).__name__}")
|
|
110
|
-
elif isinstance(data, list):
|
|
111
|
-
# Legacy format: [...]
|
|
112
|
-
scenarios_data = data
|
|
113
|
-
else:
|
|
114
|
-
raise ValueError(f"Scenarios file must contain a JSON object or array, got {type(data).__name__}")
|
|
45
|
+
if not isinstance(data, list):
|
|
46
|
+
raise ValueError(f"Scenarios file must contain a JSON array, got {type(data).__name__}")
|
|
115
47
|
|
|
116
48
|
scenarios: list[Scenario] = []
|
|
117
|
-
for scenario_data in
|
|
49
|
+
for scenario_data in data:
|
|
118
50
|
if not isinstance(scenario_data, dict):
|
|
119
51
|
raise ValueError(f"Each scenario must be a JSON object, got {type(scenario_data).__name__}")
|
|
120
52
|
|
|
121
53
|
name = scenario_data.get("name", "")
|
|
122
54
|
tasks_data = scenario_data.get("tasks", [])
|
|
123
|
-
# Scenario can have its own group, or inherit from file-level group
|
|
124
|
-
scenario_group = scenario_data.get("group") or file_group
|
|
125
55
|
|
|
126
56
|
if not isinstance(tasks_data, list):
|
|
127
57
|
raise ValueError(f"Tasks must be a JSON array, got {type(tasks_data).__name__}")
|
|
@@ -132,24 +62,18 @@ def load_scenario_file(scenarios_file: str = "scenarios.json") -> ScenarioFile:
|
|
|
132
62
|
raise ValueError(f"Each task must be a JSON object, got {type(task_data).__name__}")
|
|
133
63
|
|
|
134
64
|
task_desc = task_data.get("task", "")
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
tasks.append(Task(task=task_desc, checkers=checkers, checker_mode=checker_mode))
|
|
65
|
+
expected_result = task_data.get("expected_result", "")
|
|
66
|
+
expected_tools = task_data.get("expected_tools")
|
|
67
|
+
if expected_tools is not None and not isinstance(expected_tools, list):
|
|
68
|
+
raise ValueError(f"expected_tools must be a list or null, got {type(expected_tools).__name__}")
|
|
69
|
+
tasks.append(Task(task=task_desc, expected_result=expected_result, expected_tools=expected_tools))
|
|
142
70
|
|
|
143
|
-
scenarios.append(Scenario(name=name, tasks=tasks
|
|
71
|
+
scenarios.append(Scenario(name=name, tasks=tasks))
|
|
144
72
|
|
|
145
73
|
if not scenarios:
|
|
146
74
|
raise ValueError(f"No scenarios found in {scenarios_path}")
|
|
147
75
|
|
|
148
|
-
return
|
|
149
|
-
filename=scenarios_path.name,
|
|
150
|
-
group=file_group,
|
|
151
|
-
scenarios=scenarios,
|
|
152
|
-
)
|
|
76
|
+
return scenarios
|
|
153
77
|
|
|
154
78
|
|
|
155
79
|
def load_personalities(personalities_file: str = "personalities.json") -> list[Personality]:
|