sandboxy 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. sandboxy/__init__.py +3 -0
  2. sandboxy/agents/__init__.py +21 -0
  3. sandboxy/agents/base.py +66 -0
  4. sandboxy/agents/llm_prompt.py +308 -0
  5. sandboxy/agents/loader.py +222 -0
  6. sandboxy/api/__init__.py +5 -0
  7. sandboxy/api/app.py +76 -0
  8. sandboxy/api/routes/__init__.py +1 -0
  9. sandboxy/api/routes/agents.py +92 -0
  10. sandboxy/api/routes/local.py +1388 -0
  11. sandboxy/api/routes/tools.py +106 -0
  12. sandboxy/cli/__init__.py +1 -0
  13. sandboxy/cli/main.py +1196 -0
  14. sandboxy/cli/type_detector.py +48 -0
  15. sandboxy/config.py +49 -0
  16. sandboxy/core/__init__.py +1 -0
  17. sandboxy/core/async_runner.py +824 -0
  18. sandboxy/core/mdl_parser.py +441 -0
  19. sandboxy/core/runner.py +599 -0
  20. sandboxy/core/safe_eval.py +165 -0
  21. sandboxy/core/state.py +234 -0
  22. sandboxy/datasets/__init__.py +20 -0
  23. sandboxy/datasets/loader.py +193 -0
  24. sandboxy/datasets/runner.py +442 -0
  25. sandboxy/errors.py +166 -0
  26. sandboxy/local/context.py +235 -0
  27. sandboxy/local/results.py +173 -0
  28. sandboxy/logging.py +31 -0
  29. sandboxy/mcp/__init__.py +25 -0
  30. sandboxy/mcp/client.py +360 -0
  31. sandboxy/mcp/wrapper.py +99 -0
  32. sandboxy/providers/__init__.py +34 -0
  33. sandboxy/providers/anthropic_provider.py +271 -0
  34. sandboxy/providers/base.py +123 -0
  35. sandboxy/providers/http_client.py +101 -0
  36. sandboxy/providers/openai_provider.py +282 -0
  37. sandboxy/providers/openrouter.py +958 -0
  38. sandboxy/providers/registry.py +199 -0
  39. sandboxy/scenarios/__init__.py +11 -0
  40. sandboxy/scenarios/comparison.py +491 -0
  41. sandboxy/scenarios/loader.py +262 -0
  42. sandboxy/scenarios/runner.py +468 -0
  43. sandboxy/scenarios/unified.py +1434 -0
  44. sandboxy/session/__init__.py +21 -0
  45. sandboxy/session/manager.py +278 -0
  46. sandboxy/tools/__init__.py +34 -0
  47. sandboxy/tools/base.py +127 -0
  48. sandboxy/tools/loader.py +270 -0
  49. sandboxy/tools/yaml_tools.py +708 -0
  50. sandboxy/ui/__init__.py +27 -0
  51. sandboxy/ui/dist/assets/index-CgAkYWrJ.css +1 -0
  52. sandboxy/ui/dist/assets/index-D4zoGFcr.js +347 -0
  53. sandboxy/ui/dist/index.html +14 -0
  54. sandboxy/utils/__init__.py +3 -0
  55. sandboxy/utils/time.py +20 -0
  56. sandboxy-0.0.1.dist-info/METADATA +241 -0
  57. sandboxy-0.0.1.dist-info/RECORD +60 -0
  58. sandboxy-0.0.1.dist-info/WHEEL +4 -0
  59. sandboxy-0.0.1.dist-info/entry_points.txt +3 -0
  60. sandboxy-0.0.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,441 @@
1
+ """MDL (Module Definition Language) parser - YAML to ModuleSpec."""
2
+
3
+ import logging
4
+ import re
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ import yaml
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ from sandboxy.core.state import (
13
+ EnvConfig,
14
+ EvaluationCheck,
15
+ ModuleSpec,
16
+ ModuleVariable,
17
+ ScoringConfig,
18
+ Step,
19
+ ToolRef,
20
+ VariableOption,
21
+ )
22
+
23
+
24
+ class MDLParseError(Exception):
25
+ """Error parsing MDL module."""
26
+
27
+ pass
28
+
29
+
30
+ def load_module(path: Path) -> ModuleSpec:
31
+ """Load and parse an MDL module from a YAML file.
32
+
33
+ Args:
34
+ path: Path to the YAML module file.
35
+
36
+ Returns:
37
+ Parsed ModuleSpec.
38
+
39
+ Raises:
40
+ MDLParseError: If the file cannot be parsed or is invalid.
41
+ """
42
+ try:
43
+ raw: dict[str, Any] = yaml.safe_load(path.read_text())
44
+ except yaml.YAMLError as e:
45
+ raise MDLParseError(f"Invalid YAML: {e}") from e
46
+ except FileNotFoundError as e:
47
+ raise MDLParseError(f"File not found: {path}") from e
48
+
49
+ if not isinstance(raw, dict):
50
+ raise MDLParseError("Module must be a YAML mapping")
51
+
52
+ return parse_module(raw)
53
+
54
+
55
+ def parse_module(raw: dict[str, Any]) -> ModuleSpec:
56
+ """Parse a raw dictionary into a ModuleSpec.
57
+
58
+ Args:
59
+ raw: Raw dictionary from YAML parsing.
60
+
61
+ Returns:
62
+ Parsed ModuleSpec.
63
+
64
+ Raises:
65
+ MDLParseError: If required fields are missing or invalid.
66
+ """
67
+ if "id" not in raw:
68
+ raise MDLParseError("Module must have an 'id' field")
69
+
70
+ # Parse variables
71
+ variables = []
72
+ for v in raw.get("variables", []):
73
+ options = None
74
+ if v.get("options"):
75
+ options = [VariableOption(value=o["value"], label=o["label"]) for o in v["options"]]
76
+ variables.append(
77
+ ModuleVariable(
78
+ name=v["name"],
79
+ label=v.get("label", v["name"]),
80
+ description=v.get("description", ""),
81
+ type=v.get("type", "string"),
82
+ default=v.get("default"),
83
+ options=options,
84
+ min=v.get("min"),
85
+ max=v.get("max"),
86
+ step=v.get("step"),
87
+ )
88
+ )
89
+
90
+ # Parse environment
91
+ env_raw = raw.get("environment", {})
92
+ tools = [
93
+ ToolRef(
94
+ name=t["name"],
95
+ type=t["type"],
96
+ description=t.get("description", ""),
97
+ config=t.get("config", {}),
98
+ )
99
+ for t in env_raw.get("tools", [])
100
+ ]
101
+ environment = EnvConfig(
102
+ sandbox_type=env_raw.get("sandbox_type", "local"),
103
+ tools=tools,
104
+ initial_state=env_raw.get("initial_state", {}),
105
+ )
106
+
107
+ # Parse steps (with condition support)
108
+ steps = [
109
+ Step(
110
+ id=s["id"],
111
+ action=s["action"],
112
+ params=s.get("params", {}),
113
+ condition=s.get("condition"),
114
+ )
115
+ for s in raw.get("steps", [])
116
+ ]
117
+
118
+ # Parse branches
119
+ branches: dict[str, list[Step]] = {}
120
+ for name, branch_steps in (raw.get("branches") or {}).items():
121
+ branches[name] = [
122
+ Step(
123
+ id=s["id"],
124
+ action=s["action"],
125
+ params=s.get("params", {}),
126
+ condition=s.get("condition"),
127
+ )
128
+ for s in branch_steps
129
+ ]
130
+
131
+ # Parse evaluation
132
+ evaluation = []
133
+ for e in raw.get("evaluation", []):
134
+ check = EvaluationCheck(
135
+ name=e["name"],
136
+ kind=e["kind"],
137
+ # Common fields
138
+ target=e.get("target"),
139
+ value=e.get("value"),
140
+ expected=e.get("expected", True),
141
+ # Type-specific fields
142
+ pattern=e.get("pattern"),
143
+ case_sensitive=e.get("case_sensitive", False),
144
+ min=e.get("min"),
145
+ max=e.get("max"),
146
+ tool=e.get("tool"),
147
+ action=e.get("action"),
148
+ key=e.get("key"),
149
+ # Legacy support
150
+ config=e.get("config", {}),
151
+ )
152
+ evaluation.append(check)
153
+
154
+ # Parse agent_config (support both keys)
155
+ agent_config = raw.get("agent_config") or raw.get("agent", {})
156
+ if isinstance(agent_config, dict) and "system_prompt" not in agent_config:
157
+ # Handle "agent:" block with system_prompt inside
158
+ if "system_prompt" in raw.get("agent", {}):
159
+ agent_config = raw["agent"]
160
+
161
+ # Parse scoring config
162
+ scoring_raw = raw.get("scoring", {})
163
+ scoring = ScoringConfig(
164
+ formula=scoring_raw.get("formula"),
165
+ weights=scoring_raw.get("weights", {}),
166
+ normalize=scoring_raw.get("normalize", False),
167
+ min_score=scoring_raw.get("min_score", 0.0),
168
+ max_score=scoring_raw.get("max_score", 100.0),
169
+ )
170
+
171
+ return ModuleSpec(
172
+ id=raw["id"],
173
+ description=raw.get("description", ""),
174
+ variables=variables,
175
+ agent_config=agent_config,
176
+ environment=environment,
177
+ steps=steps,
178
+ branches=branches,
179
+ evaluation=evaluation,
180
+ scoring=scoring,
181
+ )
182
+
183
+
184
+ def interpolate_template(text: str, variables: dict[str, Any]) -> str:
185
+ """Interpolate variables into a template string.
186
+
187
+ Supports:
188
+ - {{variable}} - Simple variable substitution
189
+ - {{#if condition}}...{{else if condition}}...{{else}}...{{/if}} - Conditional blocks with else-if
190
+
191
+ Args:
192
+ text: Template string with {{variable}} placeholders.
193
+ variables: Dictionary of variable values.
194
+
195
+ Returns:
196
+ Interpolated string.
197
+ """
198
+ if not text:
199
+ return text
200
+
201
+ # Process conditional blocks with support for else-if chains
202
+ # Match {{#if ...}}...{{/if}} blocks
203
+ if_pattern = re.compile(r"\{\{#if\s+(.+?)\}\}(.*?)\{\{/if\}\}", re.DOTALL)
204
+
205
+ def eval_if_block(match: re.Match) -> str:
206
+ condition = match.group(1).strip()
207
+ body = match.group(2) or ""
208
+
209
+ # Parse the body for else-if and else clauses
210
+ # Split by {{else if ...}} and {{else}}
211
+ parts = re.split(r"\{\{else if\s+(.+?)\}\}|\{\{else\}\}", body)
212
+
213
+ # parts[0] is the content for the first if condition
214
+ # Then alternating: condition (or None for else), content
215
+
216
+ # Build list of (condition, content) tuples
217
+ branches: list[tuple[str | None, str]] = [(condition, parts[0])]
218
+
219
+ i = 1
220
+ while i < len(parts):
221
+ if i + 1 < len(parts) and parts[i] is not None:
222
+ # This is an else-if: parts[i] is condition, parts[i+1] is content
223
+ branches.append((parts[i].strip(), parts[i + 1]))
224
+ i += 2
225
+ elif parts[i] is None:
226
+ # This is an else: content is in the next part
227
+ if i + 1 < len(parts):
228
+ branches.append((None, parts[i + 1]))
229
+ i += 2
230
+ else:
231
+ i += 1
232
+ else:
233
+ # Orphaned content (shouldn't happen in well-formed templates)
234
+ branches.append((None, parts[i]))
235
+ i += 1
236
+
237
+ # Evaluate branches in order
238
+ for cond, content in branches:
239
+ if cond is None:
240
+ # This is the else clause - always matches
241
+ return content.strip()
242
+ try:
243
+ if _eval_condition(cond, variables):
244
+ return content.strip()
245
+ except Exception:
246
+ continue
247
+
248
+ # No branch matched
249
+ return ""
250
+
251
+ text = if_pattern.sub(eval_if_block, text)
252
+
253
+ # Simple variable substitution: {{variable}}
254
+ def replace_var(match: re.Match) -> str:
255
+ var_name = match.group(1).strip()
256
+ return str(variables.get(var_name, f"{{{{{var_name}}}}}"))
257
+
258
+ var_pattern = re.compile(r"\{\{(\w+)\}\}")
259
+ text = var_pattern.sub(replace_var, text)
260
+
261
+ return text
262
+
263
+
264
+ def _eval_condition(condition: str, variables: dict[str, Any]) -> bool:
265
+ """Safely evaluate a condition expression.
266
+
267
+ Args:
268
+ condition: Condition expression (e.g., "sophistication >= 7").
269
+ variables: Dictionary of variable values.
270
+
271
+ Returns:
272
+ Boolean result of condition evaluation.
273
+ """
274
+ # Safe builtins for condition evaluation
275
+ safe_builtins = {
276
+ "True": True,
277
+ "False": False,
278
+ "None": None,
279
+ "len": len,
280
+ "str": str,
281
+ "int": int,
282
+ "float": float,
283
+ "bool": bool,
284
+ }
285
+
286
+ # Create evaluation context
287
+ safe_globals = {"__builtins__": safe_builtins}
288
+ safe_globals.update(variables)
289
+
290
+ try:
291
+ return bool(eval(condition, safe_globals, {}))
292
+ except Exception:
293
+ return False
294
+
295
+
296
+ def _interpolate_value(value: Any, var_dict: dict[str, Any]) -> Any:
297
+ """Recursively interpolate variables in a value.
298
+
299
+ Handles strings, dicts, and lists. For strings that look like
300
+ pure variable references (e.g., "{{starting_cash}}"), attempts
301
+ to return the actual typed value instead of a string.
302
+ """
303
+ if isinstance(value, str):
304
+ # Check if it's a pure variable reference like "{{var_name}}"
305
+ pure_var_match = re.match(r"^\{\{(\w+)\}\}$", value.strip())
306
+ if pure_var_match:
307
+ var_name = pure_var_match.group(1)
308
+ if var_name in var_dict:
309
+ return var_dict[var_name]
310
+ # Otherwise do string interpolation
311
+ return interpolate_template(value, var_dict)
312
+ if isinstance(value, dict):
313
+ return {k: _interpolate_value(v, var_dict) for k, v in value.items()}
314
+ if isinstance(value, list):
315
+ return [_interpolate_value(item, var_dict) for item in value]
316
+ return value
317
+
318
+
319
+ def apply_variables(module: ModuleSpec, variables: dict[str, Any]) -> ModuleSpec:
320
+ """Apply variable values to a module, interpolating templates.
321
+
322
+ Args:
323
+ module: Module specification.
324
+ variables: Dictionary of variable values (from user or defaults).
325
+
326
+ Returns:
327
+ New ModuleSpec with interpolated values.
328
+ """
329
+ # Build complete variable dict with defaults
330
+ var_dict: dict[str, Any] = {}
331
+ for var in module.variables:
332
+ var_dict[var.name] = var.default
333
+ var_dict.update(variables)
334
+
335
+ # Interpolate agent_config system_prompt
336
+ agent_config = dict(module.agent_config)
337
+ if "system_prompt" in agent_config:
338
+ agent_config["system_prompt"] = interpolate_template(
339
+ agent_config["system_prompt"], var_dict
340
+ )
341
+
342
+ # Interpolate environment config (tools and initial_state)
343
+ new_tools = []
344
+ for tool in module.environment.tools:
345
+ new_config = _interpolate_value(tool.config, var_dict)
346
+ new_tools.append(
347
+ ToolRef(
348
+ name=tool.name,
349
+ type=tool.type,
350
+ description=tool.description,
351
+ config=new_config,
352
+ )
353
+ )
354
+
355
+ new_initial_state = _interpolate_value(dict(module.environment.initial_state), var_dict)
356
+
357
+ new_environment = EnvConfig(
358
+ sandbox_type=module.environment.sandbox_type,
359
+ tools=new_tools,
360
+ initial_state=new_initial_state,
361
+ )
362
+
363
+ # Interpolate step params and filter by condition
364
+ new_steps: list[Step] = []
365
+ for step in module.steps:
366
+ # Check condition if present
367
+ if step.condition:
368
+ if not _eval_condition(step.condition, var_dict):
369
+ continue # Skip this step
370
+
371
+ # Interpolate params
372
+ new_params = _interpolate_value(dict(step.params), var_dict)
373
+
374
+ new_steps.append(
375
+ Step(
376
+ id=step.id,
377
+ action=step.action,
378
+ params=new_params,
379
+ condition=None, # Condition already evaluated
380
+ )
381
+ )
382
+
383
+ # Return new module with interpolated values
384
+ return ModuleSpec(
385
+ id=module.id,
386
+ description=module.description,
387
+ variables=module.variables,
388
+ agent_config=agent_config,
389
+ environment=new_environment,
390
+ steps=new_steps,
391
+ branches=module.branches, # TODO: interpolate branches too if needed
392
+ evaluation=module.evaluation,
393
+ scoring=module.scoring,
394
+ )
395
+
396
+
397
+ def validate_module(path: Path) -> list[str]:
398
+ """Validate an MDL module and return any errors.
399
+
400
+ Args:
401
+ path: Path to the YAML module file.
402
+
403
+ Returns:
404
+ List of validation error messages (empty if valid).
405
+ """
406
+ errors: list[str] = []
407
+
408
+ try:
409
+ module = load_module(path)
410
+ except MDLParseError as e:
411
+ return [str(e)]
412
+
413
+ # Validate steps have valid actions
414
+ valid_actions = {"inject_user", "await_user", "await_agent", "branch", "tool_call"}
415
+ for step in module.steps:
416
+ if step.action not in valid_actions:
417
+ errors.append(f"Step '{step.id}' has invalid action: {step.action}")
418
+
419
+ # Validate branch references exist
420
+ for step in module.steps:
421
+ if step.action == "branch":
422
+ branch_name = step.params.get("branch_name")
423
+ if branch_name and branch_name not in module.branches:
424
+ errors.append(f"Step '{step.id}' references unknown branch: {branch_name}")
425
+
426
+ # Validate evaluation checks have valid kinds
427
+ valid_kinds = {
428
+ "contains",
429
+ "regex",
430
+ "count",
431
+ "tool_called",
432
+ "equals",
433
+ "env_state",
434
+ "deterministic",
435
+ "llm",
436
+ }
437
+ for check in module.evaluation:
438
+ if check.kind not in valid_kinds:
439
+ errors.append(f"Evaluation '{check.name}' has invalid kind: {check.kind}")
440
+
441
+ return errors