sandboxy 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sandboxy/__init__.py +3 -0
- sandboxy/agents/__init__.py +21 -0
- sandboxy/agents/base.py +66 -0
- sandboxy/agents/llm_prompt.py +308 -0
- sandboxy/agents/loader.py +222 -0
- sandboxy/api/__init__.py +5 -0
- sandboxy/api/app.py +76 -0
- sandboxy/api/routes/__init__.py +1 -0
- sandboxy/api/routes/agents.py +92 -0
- sandboxy/api/routes/local.py +1388 -0
- sandboxy/api/routes/tools.py +106 -0
- sandboxy/cli/__init__.py +1 -0
- sandboxy/cli/main.py +1196 -0
- sandboxy/cli/type_detector.py +48 -0
- sandboxy/config.py +49 -0
- sandboxy/core/__init__.py +1 -0
- sandboxy/core/async_runner.py +824 -0
- sandboxy/core/mdl_parser.py +441 -0
- sandboxy/core/runner.py +599 -0
- sandboxy/core/safe_eval.py +165 -0
- sandboxy/core/state.py +234 -0
- sandboxy/datasets/__init__.py +20 -0
- sandboxy/datasets/loader.py +193 -0
- sandboxy/datasets/runner.py +442 -0
- sandboxy/errors.py +166 -0
- sandboxy/local/context.py +235 -0
- sandboxy/local/results.py +173 -0
- sandboxy/logging.py +31 -0
- sandboxy/mcp/__init__.py +25 -0
- sandboxy/mcp/client.py +360 -0
- sandboxy/mcp/wrapper.py +99 -0
- sandboxy/providers/__init__.py +34 -0
- sandboxy/providers/anthropic_provider.py +271 -0
- sandboxy/providers/base.py +123 -0
- sandboxy/providers/http_client.py +101 -0
- sandboxy/providers/openai_provider.py +282 -0
- sandboxy/providers/openrouter.py +958 -0
- sandboxy/providers/registry.py +199 -0
- sandboxy/scenarios/__init__.py +11 -0
- sandboxy/scenarios/comparison.py +491 -0
- sandboxy/scenarios/loader.py +262 -0
- sandboxy/scenarios/runner.py +468 -0
- sandboxy/scenarios/unified.py +1434 -0
- sandboxy/session/__init__.py +21 -0
- sandboxy/session/manager.py +278 -0
- sandboxy/tools/__init__.py +34 -0
- sandboxy/tools/base.py +127 -0
- sandboxy/tools/loader.py +270 -0
- sandboxy/tools/yaml_tools.py +708 -0
- sandboxy/ui/__init__.py +27 -0
- sandboxy/ui/dist/assets/index-CgAkYWrJ.css +1 -0
- sandboxy/ui/dist/assets/index-D4zoGFcr.js +347 -0
- sandboxy/ui/dist/index.html +14 -0
- sandboxy/utils/__init__.py +3 -0
- sandboxy/utils/time.py +20 -0
- sandboxy-0.0.1.dist-info/METADATA +241 -0
- sandboxy-0.0.1.dist-info/RECORD +60 -0
- sandboxy-0.0.1.dist-info/WHEEL +4 -0
- sandboxy-0.0.1.dist-info/entry_points.txt +3 -0
- sandboxy-0.0.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""Scenario loader - load scenario definitions from YAML files."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class GoalSpec(BaseModel):
|
|
14
|
+
"""Specification for a scenario goal."""
|
|
15
|
+
|
|
16
|
+
id: str
|
|
17
|
+
name: str
|
|
18
|
+
description: str = ""
|
|
19
|
+
points: int = 0
|
|
20
|
+
detection: dict[str, Any] = Field(default_factory=dict)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class StepSpec(BaseModel):
|
|
24
|
+
"""Specification for a scenario step."""
|
|
25
|
+
|
|
26
|
+
id: str
|
|
27
|
+
action: str # inject_user, await_user, await_agent
|
|
28
|
+
params: dict[str, Any] = Field(default_factory=dict)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class McpServerSpec(BaseModel):
|
|
32
|
+
"""Specification for an MCP server connection.
|
|
33
|
+
|
|
34
|
+
Supports two modes:
|
|
35
|
+
- Local (stdio): Set `command` and optionally `args`/`env`
|
|
36
|
+
- Remote (HTTP): Set `url` and optionally `headers`
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
name: str
|
|
40
|
+
|
|
41
|
+
# Local server (stdio transport)
|
|
42
|
+
command: str | None = None
|
|
43
|
+
args: list[str] = Field(default_factory=list)
|
|
44
|
+
env: dict[str, str] = Field(default_factory=dict)
|
|
45
|
+
|
|
46
|
+
# Remote server (HTTP transport - SSE or Streamable HTTP)
|
|
47
|
+
url: str | None = None
|
|
48
|
+
headers: dict[str, str] = Field(default_factory=dict)
|
|
49
|
+
transport: str = "auto" # "auto", "sse", or "streamable_http"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ScenarioSpec(BaseModel):
|
|
53
|
+
"""Complete specification for a scenario."""
|
|
54
|
+
|
|
55
|
+
id: str
|
|
56
|
+
name: str = ""
|
|
57
|
+
description: str = ""
|
|
58
|
+
category: str = ""
|
|
59
|
+
tags: list[str] = Field(default_factory=list)
|
|
60
|
+
|
|
61
|
+
# Tool configuration
|
|
62
|
+
tools_from: list[str] = Field(default_factory=list)
|
|
63
|
+
tools: dict[str, Any] = Field(default_factory=dict)
|
|
64
|
+
|
|
65
|
+
# MCP server connections (real tools)
|
|
66
|
+
mcp_servers: list[McpServerSpec] = Field(default_factory=list)
|
|
67
|
+
|
|
68
|
+
# State and prompts
|
|
69
|
+
initial_state: dict[str, Any] = Field(default_factory=dict)
|
|
70
|
+
system_prompt: str = ""
|
|
71
|
+
|
|
72
|
+
# Conversation flow
|
|
73
|
+
steps: list[StepSpec] = Field(default_factory=list)
|
|
74
|
+
|
|
75
|
+
# Evaluation
|
|
76
|
+
goals: list[GoalSpec] = Field(default_factory=list)
|
|
77
|
+
evaluation: list[dict[str, Any]] = Field(default_factory=list)
|
|
78
|
+
scoring: dict[str, Any] = Field(default_factory=dict)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def load_scenario(path: Path) -> ScenarioSpec:
|
|
82
|
+
"""Load a scenario from a YAML file.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
path: Path to the scenario YAML file
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Parsed ScenarioSpec
|
|
89
|
+
|
|
90
|
+
Raises:
|
|
91
|
+
ValueError: If the file cannot be loaded or parsed
|
|
92
|
+
"""
|
|
93
|
+
try:
|
|
94
|
+
raw = yaml.safe_load(path.read_text())
|
|
95
|
+
except yaml.YAMLError as e:
|
|
96
|
+
raise ValueError(f"Invalid YAML: {e}") from e
|
|
97
|
+
except FileNotFoundError as e:
|
|
98
|
+
raise ValueError(f"File not found: {path}") from e
|
|
99
|
+
|
|
100
|
+
if not isinstance(raw, dict):
|
|
101
|
+
raise ValueError("Scenario must be a YAML mapping")
|
|
102
|
+
|
|
103
|
+
return parse_scenario(raw)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def parse_scenario(raw: dict[str, Any]) -> ScenarioSpec:
|
|
107
|
+
"""Parse a raw dictionary into a ScenarioSpec.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
raw: Raw dictionary from YAML parsing
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Parsed ScenarioSpec
|
|
114
|
+
"""
|
|
115
|
+
# Parse tools_from (can be string or list)
|
|
116
|
+
tools_from = raw.get("tools_from", [])
|
|
117
|
+
if isinstance(tools_from, str):
|
|
118
|
+
tools_from = [tools_from]
|
|
119
|
+
|
|
120
|
+
# Parse MCP servers
|
|
121
|
+
mcp_servers: list[McpServerSpec] = []
|
|
122
|
+
for server in raw.get("mcp_servers", []):
|
|
123
|
+
if isinstance(server, dict):
|
|
124
|
+
mcp_servers.append(
|
|
125
|
+
McpServerSpec(
|
|
126
|
+
name=server.get("name", "unnamed"),
|
|
127
|
+
# Local (stdio) transport
|
|
128
|
+
command=server.get("command"),
|
|
129
|
+
args=server.get("args", []),
|
|
130
|
+
env=server.get("env", {}),
|
|
131
|
+
# Remote (HTTP) transport
|
|
132
|
+
url=server.get("url"),
|
|
133
|
+
headers=server.get("headers", {}),
|
|
134
|
+
transport=server.get("transport", "auto"),
|
|
135
|
+
)
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Parse steps
|
|
139
|
+
steps: list[StepSpec] = []
|
|
140
|
+
for s in raw.get("steps", []):
|
|
141
|
+
steps.append(
|
|
142
|
+
StepSpec(
|
|
143
|
+
id=s.get("id", f"step_{len(steps)}"),
|
|
144
|
+
action=s.get("action", "await_agent"),
|
|
145
|
+
params=s.get("params", {}),
|
|
146
|
+
)
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# Parse goals
|
|
150
|
+
goals: list[GoalSpec] = []
|
|
151
|
+
for g in raw.get("goals", []):
|
|
152
|
+
goals.append(
|
|
153
|
+
GoalSpec(
|
|
154
|
+
id=g.get("id", f"goal_{len(goals)}"),
|
|
155
|
+
name=g.get("name", ""),
|
|
156
|
+
description=g.get("description", ""),
|
|
157
|
+
points=g.get("points", 0),
|
|
158
|
+
detection=g.get("detection", {}),
|
|
159
|
+
)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
return ScenarioSpec(
|
|
163
|
+
id=raw.get("id", "unnamed"),
|
|
164
|
+
name=raw.get("name", raw.get("id", "Unnamed Scenario")),
|
|
165
|
+
description=raw.get("description", ""),
|
|
166
|
+
category=raw.get("category", ""),
|
|
167
|
+
tags=raw.get("tags", []),
|
|
168
|
+
tools_from=tools_from,
|
|
169
|
+
tools=raw.get("tools", {}),
|
|
170
|
+
mcp_servers=mcp_servers,
|
|
171
|
+
initial_state=raw.get("initial_state", {}),
|
|
172
|
+
system_prompt=raw.get("system_prompt", ""),
|
|
173
|
+
steps=steps,
|
|
174
|
+
goals=goals,
|
|
175
|
+
evaluation=raw.get("evaluation", []),
|
|
176
|
+
scoring=raw.get("scoring", {}),
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# -----------------------------------------------------------------------------
|
|
181
|
+
# Variable Interpolation
|
|
182
|
+
# -----------------------------------------------------------------------------
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _interpolate_string(text: str, variables: dict[str, Any]) -> str:
|
|
186
|
+
"""Interpolate {variable} placeholders in a string."""
|
|
187
|
+
if not isinstance(text, str):
|
|
188
|
+
return text
|
|
189
|
+
|
|
190
|
+
def replace(match: re.Match[str]) -> str:
|
|
191
|
+
key = match.group(1)
|
|
192
|
+
if key in variables:
|
|
193
|
+
return str(variables[key])
|
|
194
|
+
# Keep original placeholder if not found
|
|
195
|
+
return match.group(0)
|
|
196
|
+
|
|
197
|
+
return re.sub(r"\{(\w+)\}", replace, text)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _interpolate_value(value: Any, variables: dict[str, Any]) -> Any:
|
|
201
|
+
"""Recursively interpolate variables in a value."""
|
|
202
|
+
if isinstance(value, str):
|
|
203
|
+
return _interpolate_string(value, variables)
|
|
204
|
+
if isinstance(value, dict):
|
|
205
|
+
return {k: _interpolate_value(v, variables) for k, v in value.items()}
|
|
206
|
+
if isinstance(value, list):
|
|
207
|
+
return [_interpolate_value(item, variables) for item in value]
|
|
208
|
+
return value
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def apply_scenario_variables(spec: ScenarioSpec, variables: dict[str, Any]) -> ScenarioSpec:
|
|
212
|
+
"""Apply variable substitutions to a scenario.
|
|
213
|
+
|
|
214
|
+
Interpolates {variable} placeholders in:
|
|
215
|
+
- system_prompt
|
|
216
|
+
- step params (especially content)
|
|
217
|
+
- initial_state values
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
spec: Original scenario specification
|
|
221
|
+
variables: Dictionary of variable name to value
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
New ScenarioSpec with interpolated values
|
|
225
|
+
"""
|
|
226
|
+
# Interpolate system prompt
|
|
227
|
+
new_system_prompt = _interpolate_string(spec.system_prompt, variables)
|
|
228
|
+
|
|
229
|
+
# Interpolate initial state
|
|
230
|
+
new_initial_state = _interpolate_value(spec.initial_state, variables)
|
|
231
|
+
|
|
232
|
+
# Interpolate steps
|
|
233
|
+
new_steps: list[StepSpec] = []
|
|
234
|
+
for step in spec.steps:
|
|
235
|
+
new_params = _interpolate_value(dict(step.params), variables)
|
|
236
|
+
new_steps.append(
|
|
237
|
+
StepSpec(
|
|
238
|
+
id=step.id,
|
|
239
|
+
action=step.action,
|
|
240
|
+
params=new_params,
|
|
241
|
+
)
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# Interpolate description
|
|
245
|
+
new_description = _interpolate_string(spec.description, variables)
|
|
246
|
+
|
|
247
|
+
return ScenarioSpec(
|
|
248
|
+
id=spec.id,
|
|
249
|
+
name=spec.name,
|
|
250
|
+
description=new_description,
|
|
251
|
+
category=spec.category,
|
|
252
|
+
tags=spec.tags,
|
|
253
|
+
tools_from=spec.tools_from,
|
|
254
|
+
tools=spec.tools, # Tools are not interpolated - they have their own param system
|
|
255
|
+
mcp_servers=spec.mcp_servers,
|
|
256
|
+
initial_state=new_initial_state,
|
|
257
|
+
system_prompt=new_system_prompt,
|
|
258
|
+
steps=new_steps,
|
|
259
|
+
goals=spec.goals,
|
|
260
|
+
evaluation=spec.evaluation,
|
|
261
|
+
scoring=spec.scoring,
|
|
262
|
+
)
|